aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
committerStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
commitdce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch)
treedcebc53f2b182f145a2e659393bf9a0472cedf23 /lib/Transforms
parent220b921aed042f9e520c26cffd8282a94c66c3d5 (diff)
downloadexternal_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/Hello/Hello.cpp3
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp17
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp9
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp8
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp9
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp28
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp23
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp269
-rw-r--r--lib/Transforms/IPO/IPConstantPropagation.cpp11
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp11
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp7
-rw-r--r--lib/Transforms/IPO/Inliner.cpp70
-rw-r--r--lib/Transforms/IPO/Internalize.cpp10
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp7
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp734
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp11
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp28
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp5
-rw-r--r--lib/Transforms/IPO/StripDeadPrototypes.cpp3
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h107
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp102
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp149
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp305
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp89
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp390
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp51
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp128
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp80
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp92
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp91
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp85
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp115
-rw-r--r--lib/Transforms/InstCombine/InstCombineWorklist.h7
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp246
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp310
-rw-r--r--lib/Transforms/Instrumentation/BoundsChecking.cpp9
-rw-r--r--lib/Transforms/Instrumentation/DataFlowSanitizer.cpp32
-rw-r--r--lib/Transforms/Instrumentation/DebugIR.cpp30
-rw-r--r--lib/Transforms/Instrumentation/DebugIR.h2
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp45
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp226
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp14
-rw-r--r--lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h40
-rw-r--r--lib/Transforms/ObjCARC/DependencyAnalysis.cpp5
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCAPElim.cpp13
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp3
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCContract.cpp9
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCExpand.cpp4
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCOpts.cpp49
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp3
-rw-r--r--lib/Transforms/Scalar/Android.mk6
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt11
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp24
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp5
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp5
-rw-r--r--lib/Transforms/Scalar/DCE.cpp3
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp33
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp23
-rw-r--r--lib/Transforms/Scalar/FlattenCFGPass.cpp3
-rw-r--r--lib/Transforms/Scalar/GVN.cpp115
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp8
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp80
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp39
-rw-r--r--lib/Transforms/Scalar/LICM.cpp15
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp3
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp65
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp17
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp36
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp683
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp12
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp62
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp5
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp41
-rw-r--r--lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp3
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp43
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp3
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp33
-rw-r--r--lib/Transforms/Scalar/SROA.cpp106
-rw-r--r--lib/Transforms/Scalar/SampleProfile.cpp10
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp2
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp79
-rw-r--r--lib/Transforms/Scalar/Scalarizer.cpp13
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp623
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp13
-rw-r--r--lib/Transforms/Scalar/Sink.cpp13
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp27
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp380
-rw-r--r--lib/Transforms/Utils/AddDiscriminators.cpp15
-rw-r--r--lib/Transforms/Utils/Android.mk1
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp117
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp15
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp31
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp7
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt1
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp8
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp14
-rw-r--r--lib/Transforms/Utils/CmpInstAnalysis.cpp2
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp20
-rw-r--r--lib/Transforms/Utils/CtorUtils.cpp183
-rw-r--r--lib/Transforms/Utils/DemoteRegToStack.cpp14
-rw-r--r--lib/Transforms/Utils/FlattenCFG.cpp14
-rw-r--r--lib/Transforms/Utils/GlobalStatus.cpp4
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp253
-rw-r--r--lib/Transforms/Utils/IntegerDivision.cpp3
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp5
-rw-r--r--lib/Transforms/Utils/Local.cpp84
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp42
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp28
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp13
-rw-r--r--lib/Transforms/Utils/LowerExpectIntrinsic.cpp3
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp3
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp7
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp3
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp29
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp29
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp21
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp153
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp30
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp24
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp316
-rw-r--r--lib/Transforms/Utils/SpecialCaseList.cpp6
-rw-r--r--lib/Transforms/Utils/UnifyFunctionExitNodes.cpp8
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp20
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp94
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp443
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp257
128 files changed, 5955 insertions, 3110 deletions
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index c514c49..29b9bb8 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -12,13 +12,14 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "hello"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "hello"
+
STATISTIC(HelloCounter, "Counts number of functions greeted");
namespace {
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 48d3fba..377fa15 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -29,7 +29,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "argpromotion"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
@@ -49,6 +48,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "argpromotion"
+
STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted");
STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
@@ -123,14 +124,14 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
Function *F = CGN->getFunction();
// Make sure that it is local to this module.
- if (!F || !F->hasLocalLinkage()) return 0;
+ if (!F || !F->hasLocalLinkage()) return nullptr;
// First check: see if there are any pointer arguments! If not, quick exit.
SmallVector<Argument*, 16> PointerArgs;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
if (I->getType()->isPointerTy())
PointerArgs.push_back(I);
- if (PointerArgs.empty()) return 0;
+ if (PointerArgs.empty()) return nullptr;
// Second check: make sure that all callers are direct callers. We can't
// transform functions that have indirect callers. Also see if the function
@@ -139,7 +140,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
for (Use &U : F->uses()) {
CallSite CS(U.getUser());
// Must be a direct call.
- if (CS.getInstruction() == 0 || !CS.isCallee(&U)) return 0;
+ if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr;
if (CS.getInstruction()->getParent()->getParent() == F)
isSelfRecursive = true;
@@ -207,7 +208,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// No promotable pointer arguments.
if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
- return 0;
+ return nullptr;
return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
}
@@ -660,7 +661,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
StructType *STy = cast<StructType>(AgTy);
Value *Idxs[2] = {
- ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+ ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
Value *Idx = GetElementPtrInst::Create(*AI, Idxs,
@@ -788,10 +789,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
- Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
+ Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt);
StructType *STy = cast<StructType>(AgTy);
Value *Idxs[2] = {
- ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+ ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 5c3acea..23be081 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -17,7 +17,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "constmerge"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
@@ -31,6 +30,8 @@
#include "llvm/Pass.h"
using namespace llvm;
+#define DEBUG_TYPE "constmerge"
+
STATISTIC(NumMerged, "Number of global constants merged");
namespace {
@@ -66,7 +67,7 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
/// Find values that are marked as llvm.used.
static void FindUsedValues(GlobalVariable *LLVMUsed,
SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
- if (LLVMUsed == 0) return;
+ if (!LLVMUsed) return;
ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
@@ -103,7 +104,7 @@ unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
bool ConstantMerge::runOnModule(Module &M) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
// Find all the globals that are marked "used". These cannot be merged.
SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
@@ -161,7 +162,7 @@ bool ConstantMerge::runOnModule(Module &M) {
// If this is the first constant we find or if the old one is local,
// replace with the current one. If the current is externally visible
// it cannot be replace, but can be the canonical constant we merge with.
- if (Slot == 0 || IsBetterCanonical(*GV, *Slot))
+ if (!Slot || IsBetterCanonical(*GV, *Slot))
Slot = GV;
}
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 1aba3df..284b896 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -17,7 +17,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "deadargelim"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
@@ -38,8 +37,11 @@
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <set>
+#include <tuple>
using namespace llvm;
+#define DEBUG_TYPE "deadargelim"
+
STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
STATISTIC(NumRetValsEliminated , "Number of unused return values removed");
STATISTIC(NumArgumentsReplacedWithUndef,
@@ -764,7 +766,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Find out the new return value.
Type *RetTy = FTy->getReturnType();
- Type *NRetTy = NULL;
+ Type *NRetTy = nullptr;
unsigned RetCount = NumRetVals(F);
// -1 means unused, other numbers are the new index
@@ -1050,7 +1052,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
Value *RetVal;
if (NFTy->getReturnType()->isVoidTy()) {
- RetVal = 0;
+ RetVal = nullptr;
} else {
assert (RetTy->isStructTy());
// The original return value was a struct, insert
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 4211f12..40ec9fa 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -27,11 +27,10 @@ using namespace llvm;
/// the split module remain valid.
static void makeVisible(GlobalValue &GV, bool Delete) {
bool Local = GV.hasLocalLinkage();
- if (Local)
- GV.setVisibility(GlobalValue::HiddenVisibility);
-
if (Local || Delete) {
GV.setLinkage(GlobalValue::ExternalLinkage);
+ if (Local)
+ GV.setVisibility(GlobalValue::HiddenVisibility);
return;
}
@@ -95,7 +94,7 @@ namespace {
makeVisible(*I, Delete);
if (Delete)
- I->setInitializer(0);
+ I->setInitializer(nullptr);
}
// Visit the Functions.
@@ -134,7 +133,7 @@ namespace {
} else {
Declaration =
new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage,
- 0, CurI->getName());
+ nullptr, CurI->getName());
}
CurI->replaceAllUsesWith(Declaration);
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index b716718..fed8839 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -18,7 +18,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "functionattrs"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SetVector.h"
@@ -35,6 +34,8 @@
#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "functionattrs"
+
STATISTIC(NumReadNone, "Number of functions marked readnone");
STATISTIC(NumReadOnly, "Number of functions marked readonly");
STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
@@ -46,7 +47,7 @@ STATISTIC(NumAnnotated, "Number of attributes added to library functions");
namespace {
struct FunctionAttrs : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- FunctionAttrs() : CallGraphSCCPass(ID), AA(0) {
+ FunctionAttrs() : CallGraphSCCPass(ID), AA(nullptr) {
initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
}
@@ -160,7 +161,7 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (F == 0)
+ if (!F)
// External node - may write memory. Just give up.
return false;
@@ -319,7 +320,7 @@ namespace {
ArgumentGraphNode SyntheticRoot;
public:
- ArgumentGraph() { SyntheticRoot.Definition = 0; }
+ ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
typedef SmallVectorImpl<ArgumentGraphNode*>::iterator iterator;
@@ -521,7 +522,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (F == 0)
+ if (!F)
// External node - only a problem for arguments that we pass to it.
continue;
@@ -600,7 +601,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// captures.
for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
- std::vector<ArgumentGraphNode*> &ArgumentSCC = *I;
+ const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I;
if (ArgumentSCC.size() == 1) {
if (!ArgumentSCC[0]->Definition) continue; // synthetic root node
@@ -616,8 +617,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
}
bool SCCCaptured = false;
- for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
- E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) {
+ for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
+ I != E && !SCCCaptured; ++I) {
ArgumentGraphNode *Node = *I;
if (Node->Uses.empty()) {
if (!Node->Definition->hasNoCaptureAttr())
@@ -629,13 +630,12 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
SmallPtrSet<Argument*, 8> ArgumentSCCNodes;
// Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
// quickly looking up whether a given Argument is in this ArgumentSCC.
- for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
- E = ArgumentSCC.end(); I != E; ++I) {
+ for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) {
ArgumentSCCNodes.insert((*I)->Definition);
}
- for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
- E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) {
+ for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
+ I != E && !SCCCaptured; ++I) {
ArgumentGraphNode *N = *I;
for (SmallVectorImpl<ArgumentGraphNode*>::iterator UI = N->Uses.begin(),
UE = N->Uses.end(); UI != UE; ++UI) {
@@ -775,7 +775,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (F == 0)
+ if (!F)
// External node - skip it;
return false;
@@ -1668,7 +1668,7 @@ bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (F != 0 && F->isDeclaration())
+ if (F && F->isDeclaration())
MadeChange |= inferPrototypeAttributes(*F);
}
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 0c081f1..9decddc 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -15,15 +15,18 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "globaldce"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Pass.h"
using namespace llvm;
+#define DEBUG_TYPE "globaldce"
+
STATISTIC(NumAliases , "Number of global aliases removed");
STATISTIC(NumFunctions, "Number of functions removed");
STATISTIC(NumVariables, "Number of global variables removed");
@@ -53,6 +56,15 @@ namespace {
};
}
+/// Returns true if F contains only a single "ret" instruction.
+static bool isEmptyFunction(Function *F) {
+ BasicBlock &Entry = F->getEntryBlock();
+ if (Entry.size() != 1 || !isa<ReturnInst>(Entry.front()))
+ return false;
+ ReturnInst &RI = cast<ReturnInst>(Entry.front());
+ return RI.getReturnValue() == NULL;
+}
+
char GlobalDCE::ID = 0;
INITIALIZE_PASS(GlobalDCE, "globaldce",
"Dead Global Elimination", false, false)
@@ -61,7 +73,10 @@ ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
bool GlobalDCE::runOnModule(Module &M) {
bool Changed = false;
-
+
+ // Remove empty functions from the global ctors list.
+ Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
+
// Loop over the module, adding globals which are obviously necessary.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Changed |= RemoveUnusedGlobalValue(*I);
@@ -99,7 +114,7 @@ bool GlobalDCE::runOnModule(Module &M) {
I != E; ++I)
if (!AliveGlobals.count(I)) {
DeadGlobalVars.push_back(I); // Keep track of dead globals
- I->setInitializer(0);
+ I->setInitializer(nullptr);
}
// The second pass drops the bodies of functions which are dead...
@@ -117,7 +132,7 @@ bool GlobalDCE::runOnModule(Module &M) {
++I)
if (!AliveGlobals.count(I)) {
DeadAliases.push_back(I);
- I->setAliasee(0);
+ I->setAliasee(nullptr);
}
if (!DeadFunctions.empty()) {
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 1a510cf..ae80c43 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "globalopt"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -39,11 +38,15 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
+#include <deque>
using namespace llvm;
+#define DEBUG_TYPE "globalopt"
+
STATISTIC(NumMarked , "Number of globals marked constant");
STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr");
STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
@@ -74,11 +77,9 @@ namespace {
bool runOnModule(Module &M) override;
private:
- GlobalVariable *FindGlobalCtors(Module &M);
bool OptimizeFunctions(Module &M);
bool OptimizeGlobalVars(Module &M);
bool OptimizeGlobalAliases(Module &M);
- bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
const GlobalStatus &GS);
@@ -294,7 +295,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
Changed = true;
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
if (CE->getOpcode() == Instruction::GetElementPtr) {
- Constant *SubInit = 0;
+ Constant *SubInit = nullptr;
if (Init)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI);
@@ -302,7 +303,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
CE->getType()->isPointerTy()) ||
CE->getOpcode() == Instruction::AddrSpaceCast) {
// Pointer cast, delete any stores and memsets to the global.
- Changed |= CleanupConstantGlobalUsers(CE, 0, DL, TLI);
+ Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI);
}
if (CE->use_empty()) {
@@ -313,7 +314,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
// Do not transform "gepinst (gep constexpr (GV))" here, because forming
// "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
// and will invalidate our notion of what Init is.
- Constant *SubInit = 0;
+ Constant *SubInit = nullptr;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
ConstantExpr *CE =
dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, DL, TLI));
@@ -370,7 +371,7 @@ static bool isSafeSROAElementUse(Value *V) {
// Otherwise, it must be a GEP.
GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
- if (GEPI == 0) return false;
+ if (!GEPI) return false;
if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
!cast<Constant>(GEPI->getOperand(1))->isNullValue())
@@ -470,7 +471,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// Make sure this global only has simple uses that we can SRA.
if (!GlobalUsersSafeToSRA(GV))
- return 0;
+ return nullptr;
assert(GV->hasLocalLinkage() && !GV->isConstant());
Constant *Init = GV->getInitializer();
@@ -514,7 +515,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
NumElements = cast<VectorType>(STy)->getNumElements();
if (NumElements > 16 && GV->hasNUsesOrMore(16))
- return 0; // It's not worth it.
+ return nullptr; // It's not worth it.
NewGlobals.reserve(NumElements);
uint64_t EltSize = DL.getTypeAllocSize(STy->getElementType());
@@ -541,7 +542,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
}
if (NewGlobals.empty())
- return 0;
+ return nullptr;
DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
@@ -603,7 +604,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
if (FirstGlobal == i) ++FirstGlobal;
}
- return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0;
+ return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr;
}
/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
@@ -785,7 +786,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
Changed |= CleanupPointerRootUsers(GV, TLI);
} else {
Changed = true;
- CleanupConstantGlobalUsers(GV, 0, DL, TLI);
+ CleanupConstantGlobalUsers(GV, nullptr, DL, TLI);
}
if (GV->use_empty()) {
DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
@@ -847,7 +848,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
// If there are bitcast users of the malloc (which is typical, usually we have
// a malloc + bitcast) then replace them with uses of the new global. Update
// other users to use the global as well.
- BitCastInst *TheBC = 0;
+ BitCastInst *TheBC = nullptr;
while (!CI->use_empty()) {
Instruction *User = cast<Instruction>(CI->user_back());
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
@@ -858,7 +859,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
BCI->setOperand(0, NewGV);
}
} else {
- if (TheBC == 0)
+ if (!TheBC)
TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
User->replaceUsesOfWith(CI, TheBC);
}
@@ -1169,10 +1170,13 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
// PN's type is pointer to struct. Make a new PHI of pointer to struct
// field.
- StructType *ST = cast<StructType>(PN->getType()->getPointerElementType());
+ PointerType *PTy = cast<PointerType>(PN->getType());
+ StructType *ST = cast<StructType>(PTy->getElementType());
+
+ unsigned AS = PTy->getAddressSpace();
PHINode *NewPN =
- PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+ PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS),
PN->getNumIncomingValues(),
PN->getName()+".f"+Twine(FieldNo), PN);
Result = NewPN;
@@ -1284,9 +1288,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
std::vector<Value*> FieldGlobals;
std::vector<Value*> FieldMallocs;
+ unsigned AS = GV->getType()->getPointerAddressSpace();
for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
Type *FieldTy = STy->getElementType(FieldNo);
- PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
+ PointerType *PFieldTy = PointerType::get(FieldTy, AS);
GlobalVariable *NGV =
new GlobalVariable(*GV->getParent(),
@@ -1302,7 +1307,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Type *IntPtrTy = DL->getIntPtrType(CI->getType());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
- NElems, 0,
+ NElems, nullptr,
CI->getName() + ".f" + Twine(FieldNo));
FieldMallocs.push_back(NMI);
new StoreInst(NMI, NGV, CI);
@@ -1535,7 +1540,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
AllocSize, NumElements,
- 0, CI->getName());
+ nullptr, CI->getName());
Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();
@@ -1750,7 +1755,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
->getEntryBlock().begin());
Type *ElemTy = GV->getType()->getElementType();
// FIXME: Pass Global's alignment when globals have alignment
- AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
+ AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr,
+ GV->getName(), &FirstI);
if (!isa<UndefValue>(GV->getInitializer()))
new StoreInst(GV->getInitializer(), Alloca, &FirstI);
@@ -1957,116 +1963,6 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
return Changed;
}
-/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all
-/// initializers have an init priority of 65535.
-GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
- GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
- if (GV == 0) return 0;
-
- // Verify that the initializer is simple enough for us to handle. We are
- // only allowed to optimize the initializer if it is unique.
- if (!GV->hasUniqueInitializer()) return 0;
-
- if (isa<ConstantAggregateZero>(GV->getInitializer()))
- return GV;
- ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
-
- for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
- if (isa<ConstantAggregateZero>(*i))
- continue;
- ConstantStruct *CS = cast<ConstantStruct>(*i);
- if (isa<ConstantPointerNull>(CS->getOperand(1)))
- continue;
-
- // Must have a function or null ptr.
- if (!isa<Function>(CS->getOperand(1)))
- return 0;
-
- // Init priority must be standard.
- ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
- if (CI->getZExtValue() != 65535)
- return 0;
- }
-
- return GV;
-}
-
-/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
-/// return a list of the functions and null terminator as a vector.
-static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
- if (GV->getInitializer()->isNullValue())
- return std::vector<Function*>();
- ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
- std::vector<Function*> Result;
- Result.reserve(CA->getNumOperands());
- for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
- ConstantStruct *CS = cast<ConstantStruct>(*i);
- Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
- }
- return Result;
-}
-
-/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
-/// specified array, returning the new global to use.
-static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
- const std::vector<Function*> &Ctors) {
- // If we made a change, reassemble the initializer list.
- Constant *CSVals[2];
- CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535);
- CSVals[1] = 0;
-
- StructType *StructTy =
- cast<StructType>(GCL->getType()->getElementType()->getArrayElementType());
-
- // Create the new init list.
- std::vector<Constant*> CAList;
- for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
- if (Ctors[i]) {
- CSVals[1] = Ctors[i];
- } else {
- Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
- false);
- PointerType *PFTy = PointerType::getUnqual(FTy);
- CSVals[1] = Constant::getNullValue(PFTy);
- CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
- 0x7fffffff);
- }
- CAList.push_back(ConstantStruct::get(StructTy, CSVals));
- }
-
- // Create the array initializer.
- Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
- CAList.size()), CAList);
-
- // If we didn't change the number of elements, don't create a new GV.
- if (CA->getType() == GCL->getInitializer()->getType()) {
- GCL->setInitializer(CA);
- return GCL;
- }
-
- // Create the new global and insert it next to the existing list.
- GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
- GCL->getLinkage(), CA, "",
- GCL->getThreadLocalMode());
- GCL->getParent()->getGlobalList().insert(GCL, NGV);
- NGV->takeName(GCL);
-
- // Nuke the old list, replacing any uses with the new one.
- if (!GCL->use_empty()) {
- Constant *V = NGV;
- if (V->getType() != GCL->getType())
- V = ConstantExpr::getBitCast(V, GCL->getType());
- GCL->replaceAllUsesWith(V);
- }
- GCL->eraseFromParent();
-
- if (Ctors.size())
- return NGV;
- else
- return 0;
-}
-
-
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
SmallPtrSet<Constant*, 8> &SimpleConstants,
@@ -2271,22 +2167,16 @@ class Evaluator {
public:
Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI)
: DL(DL), TLI(TLI) {
- ValueStack.push_back(new DenseMap<Value*, Constant*>);
+ ValueStack.emplace_back();
}
~Evaluator() {
- DeleteContainerPointers(ValueStack);
- while (!AllocaTmps.empty()) {
- GlobalVariable *Tmp = AllocaTmps.back();
- AllocaTmps.pop_back();
-
+ for (auto &Tmp : AllocaTmps)
// If there are still users of the alloca, the program is doing something
// silly, e.g. storing the address of the alloca somewhere and using it
// later. Since this is undefined, we'll just make it be null.
if (!Tmp->use_empty())
Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
- delete Tmp;
- }
}
/// EvaluateFunction - Evaluate a call to function F, returning true if
@@ -2302,13 +2192,13 @@ public:
Constant *getVal(Value *V) {
if (Constant *CV = dyn_cast<Constant>(V)) return CV;
- Constant *R = ValueStack.back()->lookup(V);
+ Constant *R = ValueStack.back().lookup(V);
assert(R && "Reference to an uncomputed value!");
return R;
}
void setVal(Value *V, Constant *C) {
- ValueStack.back()->operator[](V) = C;
+ ValueStack.back()[V] = C;
}
const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
@@ -2323,9 +2213,9 @@ private:
Constant *ComputeLoadResult(Constant *P);
/// ValueStack - As we compute SSA register values, we store their contents
- /// here. The back of the vector contains the current function and the stack
+ /// here. The back of the deque contains the current function and the stack
/// contains the values in the calling frames.
- SmallVector<DenseMap<Value*, Constant*>*, 4> ValueStack;
+ std::deque<DenseMap<Value*, Constant*>> ValueStack;
/// CallStack - This is used to detect recursion. In pathological situations
/// we could hit exponential behavior, but at least there is nothing
@@ -2340,7 +2230,7 @@ private:
/// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
/// to represent its body. This vector is needed so we can delete the
/// temporary globals when we are done.
- SmallVector<GlobalVariable*, 32> AllocaTmps;
+ SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps;
/// Invariants - These global variables have been marked invariant by the
/// static constructor.
@@ -2369,7 +2259,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
if (GV->hasDefinitiveInitializer())
return GV->getInitializer();
- return 0;
+ return nullptr;
}
// Handle a constantexpr getelementptr.
@@ -2381,7 +2271,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
}
- return 0; // don't know how to evaluate.
+ return nullptr; // don't know how to evaluate.
}
/// EvaluateBlock - Evaluate all instructions in block BB, returning true if
@@ -2391,7 +2281,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
BasicBlock *&NextBB) {
// This is the main evaluation loop.
while (1) {
- Constant *InstResult = 0;
+ Constant *InstResult = nullptr;
DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
@@ -2517,7 +2407,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
"folding: " << *Ptr << "\n");
}
InstResult = ComputeLoadResult(Ptr);
- if (InstResult == 0) {
+ if (!InstResult) {
DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
"\n");
return false; // Could not evaluate load.
@@ -2530,11 +2420,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
return false; // Cannot handle array allocs.
}
Type *Ty = AI->getType()->getElementType();
- AllocaTmps.push_back(new GlobalVariable(Ty, false,
- GlobalValue::InternalLinkage,
- UndefValue::get(Ty),
- AI->getName()));
- InstResult = AllocaTmps.back();
+ AllocaTmps.push_back(
+ make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage,
+ UndefValue::get(Ty), AI->getName()));
+ InstResult = AllocaTmps.back().get();
DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
} else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
CallSite CS(CurInst);
@@ -2636,17 +2525,17 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
return false;
}
- Constant *RetVal = 0;
+ Constant *RetVal = nullptr;
// Execute the call, if successful, use the return value.
- ValueStack.push_back(new DenseMap<Value*, Constant*>);
+ ValueStack.emplace_back();
if (!EvaluateFunction(Callee, RetVal, Formals)) {
DEBUG(dbgs() << "Failed to evaluate function.\n");
return false;
}
- delete ValueStack.pop_back_val();
+ ValueStack.pop_back();
InstResult = RetVal;
- if (InstResult != NULL) {
+ if (InstResult) {
DEBUG(dbgs() << "Successfully evaluated function. Result: " <<
InstResult << "\n\n");
} else {
@@ -2678,7 +2567,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
else
return false; // Cannot determine.
} else if (isa<ReturnInst>(CurInst)) {
- NextBB = 0;
+ NextBB = nullptr;
} else {
// invoke, unwind, resume, unreachable.
DEBUG(dbgs() << "Can not handle terminator.");
@@ -2743,13 +2632,13 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
BasicBlock::iterator CurInst = CurBB->begin();
while (1) {
- BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings.
+ BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
if (!EvaluateBlock(CurInst, NextBB))
return false;
- if (NextBB == 0) {
+ if (!NextBB) {
// Successfully running until there's no next block means that we found
// the return. Fill it the return value and pop the call stack.
ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
@@ -2768,7 +2657,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
// Okay, we have never been in this block before. Check to see if there
// are any PHI nodes. If so, evaluate them with information about where
// we came from.
- PHINode *PN = 0;
+ PHINode *PN = nullptr;
for (CurInst = NextBB->begin();
(PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
@@ -2789,6 +2678,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL,
SmallVector<Constant*, 0>());
if (EvalSuccess) {
+ ++NumCtorsEvaluated;
+
// We succeeded at evaluation: commit the result.
DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
<< F->getName() << "' to " << Eval.getMutatedMemory().size()
@@ -2806,46 +2697,6 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL,
return EvalSuccess;
}
-/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
-/// Return true if anything changed.
-bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
- std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
- bool MadeChange = false;
- if (Ctors.empty()) return false;
-
- // Loop over global ctors, optimizing them when we can.
- for (unsigned i = 0; i != Ctors.size(); ++i) {
- Function *F = Ctors[i];
- // Found a null terminator in the middle of the list, prune off the rest of
- // the list.
- if (F == 0) {
- if (i != Ctors.size()-1) {
- Ctors.resize(i+1);
- MadeChange = true;
- }
- break;
- }
- DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
-
- // We cannot simplify external ctor functions.
- if (F->empty()) continue;
-
- // If we can evaluate the ctor at compile time, do.
- if (EvaluateStaticConstructor(F, DL, TLI)) {
- Ctors.erase(Ctors.begin()+i);
- MadeChange = true;
- --i;
- ++NumCtorsEvaluated;
- continue;
- }
- }
-
- if (!MadeChange) return false;
-
- GCL = InstallGlobalCtors(GCL, Ctors);
- return true;
-}
-
static int compareNames(Constant *const *A, Constant *const *B) {
return (*A)->getName().compare((*B)->getName());
}
@@ -3010,7 +2861,7 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
if (!hasUsesToReplace(*J, Used, RenameTarget))
continue;
- J->replaceAllUsesWith(Aliasee);
+ J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType()));
++NumAliasesResolved;
Changed = true;
@@ -3042,12 +2893,12 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::cxa_atexit))
- return 0;
+ return nullptr;
Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
if (!Fn)
- return 0;
+ return nullptr;
FunctionType *FTy = Fn->getFunctionType();
@@ -3058,7 +2909,7 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
!FTy->getParamType(0)->isPointerTy() ||
!FTy->getParamType(1)->isPointerTy() ||
!FTy->getParamType(2)->isPointerTy())
- return 0;
+ return nullptr;
return Fn;
}
@@ -3160,12 +3011,9 @@ bool GlobalOpt::runOnModule(Module &M) {
bool Changed = false;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
- // Try to find the llvm.globalctors list.
- GlobalVariable *GlobalCtors = FindGlobalCtors(M);
-
bool LocalChange = true;
while (LocalChange) {
LocalChange = false;
@@ -3174,8 +3022,9 @@ bool GlobalOpt::runOnModule(Module &M) {
LocalChange |= OptimizeFunctions(M);
// Optimize global_ctors list.
- if (GlobalCtors)
- LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
+ LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
+ return EvaluateStaticConstructor(F, DL, TLI);
+ });
// Optimize non-address-taken globals.
LocalChange |= OptimizeGlobalVars(M);
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index 8684796..af541d1 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ipconstprop"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -27,6 +26,8 @@
#include "llvm/Pass.h"
using namespace llvm;
+#define DEBUG_TYPE "ipconstprop"
+
STATISTIC(NumArgumentsProped, "Number of args turned into constants");
STATISTIC(NumReturnValProped, "Number of return values turned into constants");
@@ -112,7 +113,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
continue;
Constant *C = dyn_cast<Constant>(*AI);
- if (C && ArgumentConstants[i].first == 0) {
+ if (C && ArgumentConstants[i].first == nullptr) {
ArgumentConstants[i].first = C; // First constant seen.
} else if (C && ArgumentConstants[i].first == C) {
// Still the constant value we think it is.
@@ -139,7 +140,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
continue;
Value *V = ArgumentConstants[i].first;
- if (V == 0) V = UndefValue::get(AI->getType());
+ if (!V) V = UndefValue::get(AI->getType());
AI->replaceAllUsesWith(V);
++NumArgumentsProped;
MadeChange = true;
@@ -209,7 +210,7 @@ bool IPCP::PropagateConstantReturn(Function &F) {
}
// Different or no known return value? Don't propagate this return
// value.
- RetVals[i] = 0;
+ RetVals[i] = nullptr;
// All values non-constant? Stop looking.
if (++NumNonConstant == RetVals.size())
return false;
@@ -235,7 +236,7 @@ bool IPCP::PropagateConstantReturn(Function &F) {
MadeChange = true;
- if (STy == 0) {
+ if (!STy) {
Value* New = RetVals[0];
if (Argument *A = dyn_cast<Argument>(New))
// Was an argument returned? Then find the corresponding argument in
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 6cf3040..624cb90 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "inline"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/CallGraph.h"
@@ -28,6 +27,8 @@
using namespace llvm;
+#define DEBUG_TYPE "inline"
+
namespace {
/// \brief Inliner pass which only handles "always inline" functions.
@@ -36,12 +37,13 @@ class AlwaysInliner : public Inliner {
public:
// Use extremely low threshold.
- AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), ICA(0) {
+ AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true),
+ ICA(nullptr) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
AlwaysInliner(bool InsertLifetime)
- : Inliner(ID, -2000000000, InsertLifetime), ICA(0) {
+ : Inliner(ID, -2000000000, InsertLifetime), ICA(nullptr) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
@@ -93,8 +95,7 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
// that are viable for inlining. FIXME: We shouldn't even get here for
// declarations.
if (Callee && !Callee->isDeclaration() &&
- Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::AlwaysInline) &&
+ CS.hasFnAttr(Attribute::AlwaysInline) &&
ICA->isInlineViable(*Callee))
return InlineCost::getAlways();
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 7141064..d189756 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "inline"
#include "llvm/Transforms/IPO.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
@@ -26,6 +25,8 @@
using namespace llvm;
+#define DEBUG_TYPE "inline"
+
namespace {
/// \brief Actual inliner pass implementation.
@@ -37,12 +38,12 @@ class SimpleInliner : public Inliner {
InlineCostAnalysis *ICA;
public:
- SimpleInliner() : Inliner(ID), ICA(0) {
+ SimpleInliner() : Inliner(ID), ICA(nullptr) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
SimpleInliner(int Threshold)
- : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(0) {
+ : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(nullptr) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index e97fb83..9087ab2 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "inline"
#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -21,6 +20,7 @@
#include "llvm/Analysis/InlineCost.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
@@ -32,6 +32,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "inline"
+
STATISTIC(NumInlined, "Number of functions inlined");
STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
@@ -183,7 +185,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
// canonicalized to be an allocation *of* an array), or allocations whose
// type is not itself an array (because we're afraid of pessimizing SRoA).
ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
- if (ATy == 0 || AI->isArrayAllocation())
+ if (!ATy || AI->isArrayAllocation())
continue;
// Get the list of all available allocas for this array type.
@@ -239,7 +241,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
AI->eraseFromParent();
MergedAwayAlloca = true;
++NumMergedAllocas;
- IFI.StaticAllocas[AllocaNo] = 0;
+ IFI.StaticAllocas[AllocaNo] = nullptr;
break;
}
@@ -288,12 +290,24 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
bool ColdCallee = Callee && !Callee->isDeclaration() &&
Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::Cold);
- if (ColdCallee && ColdThreshold < thres)
+ // Command line argument for InlineLimit will override the default
+ // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
+ // do not use the default cold threshold even if it is smaller.
+ if ((InlineLimit.getNumOccurrences() == 0 ||
+ ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
+ ColdThreshold < thres)
thres = ColdThreshold;
return thres;
}
+static void emitAnalysis(CallSite CS, const Twine &Msg) {
+ Function *Caller = CS.getCaller();
+ LLVMContext &Ctx = Caller->getContext();
+ DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+ emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
+}
+
/// shouldInline - Return true if the inliner should attempt to inline
/// at the given CallSite.
bool Inliner::shouldInline(CallSite CS) {
@@ -302,12 +316,16 @@ bool Inliner::shouldInline(CallSite CS) {
if (IC.isAlways()) {
DEBUG(dbgs() << " Inlining: cost=always"
<< ", Call: " << *CS.getInstruction() << "\n");
+ emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) +
+ " should always be inlined (cost=always)");
return true;
}
if (IC.isNever()) {
DEBUG(dbgs() << " NOT Inlining: cost=never"
<< ", Call: " << *CS.getInstruction() << "\n");
+ emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
+ " should never be inlined (cost=never)"));
return false;
}
@@ -316,6 +334,10 @@ bool Inliner::shouldInline(CallSite CS) {
DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
<< ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << "\n");
+ emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
+ " too costly to inline (cost=") +
+ Twine(IC.getCost()) + ", threshold=" +
+ Twine(IC.getCostDelta() + IC.getCost()) + ")");
return false;
}
@@ -383,6 +405,11 @@ bool Inliner::shouldInline(CallSite CS) {
DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
" Cost = " << IC.getCost() <<
", outer Cost = " << TotalSecondaryCost << '\n');
+ emitAnalysis(
+ CS, Twine("Not inlining. Cost of inlining " +
+ CS.getCalledFunction()->getName() +
+ " increases the cost of inlining " +
+ CS.getCaller()->getName() + " in other contexts"));
return false;
}
}
@@ -390,6 +417,10 @@ bool Inliner::shouldInline(CallSite CS) {
DEBUG(dbgs() << " Inlining: cost=" << IC.getCost()
<< ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << '\n');
+ emitAnalysis(
+ CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") +
+ CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) +
+ " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")");
return true;
}
@@ -410,7 +441,7 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
SmallPtrSet<Function*, 8> SCCFunctions;
@@ -499,7 +530,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
++NumCallsDeleted;
} else {
// We can only inline direct calls to non-declarations.
- if (Callee == 0 || Callee->isDeclaration()) continue;
+ if (!Callee || Callee->isDeclaration()) continue;
// If this call site was obtained by inlining another function, verify
// that the include path for the function did not include the callee
@@ -511,18 +542,37 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
continue;
-
+ LLVMContext &CallerCtx = Caller->getContext();
+
+ // Get DebugLoc to report. CS will be invalid after Inliner.
+ DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+
// If the policy determines that we should inline this function,
// try to do so.
- if (!shouldInline(CS))
+ if (!shouldInline(CS)) {
+ emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+ Twine(Callee->getName() +
+ " will not be inlined into " +
+ Caller->getName()));
continue;
+ }
// Attempt to inline the function.
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
- InlineHistoryID, InsertLifetime, DL))
+ InlineHistoryID, InsertLifetime, DL)) {
+ emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+ Twine(Callee->getName() +
+ " will not be inlined into " +
+ Caller->getName()));
continue;
+ }
++NumInlined;
-
+
+ // Report the inline decision.
+ emitOptimizationRemark(
+ CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+ Twine(Callee->getName() + " inlined into " + Caller->getName()));
+
// If inlining this function gave us any new call sites, throw them
// onto our worklist to process. They are useful inline candidates.
if (!InlineInfo.InlinedCalls.empty()) {
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index c1fe01c..c970a1a 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -19,7 +19,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "internalize"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -35,6 +34,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "internalize"
+
STATISTIC(NumAliases , "Number of aliases internalized");
STATISTIC(NumFunctions, "Number of functions internalized");
STATISTIC(NumGlobals , "Number of global vars internalized");
@@ -131,8 +132,8 @@ static bool shouldInternalize(const GlobalValue &GV,
bool InternalizePass::runOnModule(Module &M) {
CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>();
- CallGraph *CG = CGPass ? &CGPass->getCallGraph() : 0;
- CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
+ CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
+ CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
bool Changed = false;
SmallPtrSet<GlobalValue *, 8> Used;
@@ -158,6 +159,7 @@ bool InternalizePass::runOnModule(Module &M) {
if (!shouldInternalize(*I, ExternalNames))
continue;
+ I->setVisibility(GlobalValue::DefaultVisibility);
I->setLinkage(GlobalValue::InternalLinkage);
if (ExternalNode)
@@ -194,6 +196,7 @@ bool InternalizePass::runOnModule(Module &M) {
if (!shouldInternalize(*I, ExternalNames))
continue;
+ I->setVisibility(GlobalValue::DefaultVisibility);
I->setLinkage(GlobalValue::InternalLinkage);
Changed = true;
++NumGlobals;
@@ -206,6 +209,7 @@ bool InternalizePass::runOnModule(Module &M) {
if (!shouldInternalize(*I, ExternalNames))
continue;
+ I->setVisibility(GlobalValue::DefaultVisibility);
I->setLinkage(GlobalValue::InternalLinkage);
Changed = true;
++NumAliases;
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 464aa99..20414aa 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-extract"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopPass.h"
@@ -30,6 +29,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "loop-extract"
+
STATISTIC(NumExtracted, "Number of loops extracted");
namespace {
@@ -136,7 +137,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
if (NumLoops == 0) return Changed;
--NumLoops;
CodeExtractor Extractor(DT, *L);
- if (Extractor.extractCodeRegion() != 0) {
+ if (Extractor.extractCodeRegion() != nullptr) {
Changed = true;
// After extraction, the loop is replaced by a function call, so
// we shouldn't try to run any more loop passes on it.
@@ -241,7 +242,7 @@ void BlockExtractorPass::SplitLandingPadPreds(Function *F) {
if (!Split) continue;
SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", 0, NewBBs);
+ SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", nullptr, NewBBs);
}
}
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 8555d2c..c3a2b12 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -43,7 +43,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mergefunc"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
@@ -67,6 +66,8 @@
#include <vector>
using namespace llvm;
+#define DEBUG_TYPE "mergefunc"
+
STATISTIC(NumFunctionsMerged, "Number of functions merged");
STATISTIC(NumThunksWritten, "Number of thunks generated");
STATISTIC(NumAliasesWritten, "Number of aliases generated");
@@ -120,12 +121,12 @@ public:
void release() {
assert(Func &&
"Attempted to release function twice, or release empty/tombstone!");
- Func = NULL;
+ Func = nullptr;
}
private:
explicit ComparableFunction(unsigned Hash)
- : Func(NULL), Hash(Hash), DL(NULL) {}
+ : Func(nullptr), Hash(Hash), DL(nullptr) {}
AssertingVH<Function> Func;
unsigned Hash;
@@ -175,19 +176,181 @@ private:
/// Test whether two basic blocks have equivalent behaviour.
bool compare(const BasicBlock *BB1, const BasicBlock *BB2);
+ /// Constants comparison.
+ /// Its analog to lexicographical comparison between hypothetical numbers
+ /// of next format:
+ /// <bitcastability-trait><raw-bit-contents>
+ ///
+ /// 1. Bitcastability.
+ /// Check whether L's type could be losslessly bitcasted to R's type.
+ /// On this stage method, in case when lossless bitcast is not possible
+ /// method returns -1 or 1, thus also defining which type is greater in
+ /// context of bitcastability.
+ /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight
+ /// to the contents comparison.
+ /// If types differ, remember types comparison result and check
+ /// whether we still can bitcast types.
+ /// Stage 1: Types that satisfies isFirstClassType conditions are always
+ /// greater then others.
+ /// Stage 2: Vector is greater then non-vector.
+ /// If both types are vectors, then vector with greater bitwidth is
+ /// greater.
+ /// If both types are vectors with the same bitwidth, then types
+ /// are bitcastable, and we can skip other stages, and go to contents
+ /// comparison.
+ /// Stage 3: Pointer types are greater than non-pointers. If both types are
+ /// pointers of the same address space - go to contents comparison.
+ /// Different address spaces: pointer with greater address space is
+ /// greater.
+ /// Stage 4: Types are neither vectors, nor pointers. And they differ.
+ /// We don't know how to bitcast them. So, we better don't do it,
+ /// and return types comparison result (so it determines the
+ /// relationship among constants we don't know how to bitcast).
+ ///
+ /// Just for clearance, let's see how the set of constants could look
+ /// on single dimension axis:
+ ///
+ /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
+ /// Where: NFCT - Not a FirstClassType
+ /// FCT - FirstClassTyp:
+ ///
+ /// 2. Compare raw contents.
+ /// It ignores types on this stage and only compares bits from L and R.
+ /// Returns 0, if L and R has equivalent contents.
+ /// -1 or 1 if values are different.
+ /// Pretty trivial:
+ /// 2.1. If contents are numbers, compare numbers.
+ /// Ints with greater bitwidth are greater. Ints with same bitwidths
+ /// compared by their contents.
+ /// 2.2. "And so on". Just to avoid discrepancies with comments
+ /// perhaps it would be better to read the implementation itself.
+ /// 3. And again about overall picture. Let's look back at how the ordered set
+ /// of constants will look like:
+ /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
+ ///
+ /// Now look, what could be inside [FCT, "others"], for example:
+ /// [FCT, "others"] =
+ /// [
+ /// [double 0.1], [double 1.23],
+ /// [i32 1], [i32 2],
+ /// { double 1.0 }, ; StructTyID, NumElements = 1
+ /// { i32 1 }, ; StructTyID, NumElements = 1
+ /// { double 1, i32 1 }, ; StructTyID, NumElements = 2
+ /// { i32 1, double 1 } ; StructTyID, NumElements = 2
+ /// ]
+ ///
+ /// Let's explain the order. Float numbers will be less than integers, just
+ /// because of cmpType terms: FloatTyID < IntegerTyID.
+ /// Floats (with same fltSemantics) are sorted according to their value.
+ /// Then you can see integers, and they are, like a floats,
+ /// could be easy sorted among each others.
+ /// The structures. Structures are grouped at the tail, again because of their
+ /// TypeID: StructTyID > IntegerTyID > FloatTyID.
+ /// Structures with greater number of elements are greater. Structures with
+ /// greater elements going first are greater.
+ /// The same logic with vectors, arrays and other possible complex types.
+ ///
+ /// Bitcastable constants.
+ /// Let's assume, that some constant, belongs to some group of
+ /// "so-called-equal" values with different types, and at the same time
+ /// belongs to another group of constants with equal types
+ /// and "really" equal values.
+ ///
+ /// Now, prove that this is impossible:
+ ///
+ /// If constant A with type TyA is bitcastable to B with type TyB, then:
+ /// 1. All constants with equal types to TyA, are bitcastable to B. Since
+ /// those should be vectors (if TyA is vector), pointers
+ /// (if TyA is pointer), or else (if TyA equal to TyB), those types should
+ /// be equal to TyB.
+ /// 2. All constants with non-equal, but bitcastable types to TyA, are
+ /// bitcastable to B.
+ /// Once again, just because we allow it to vectors and pointers only.
+ /// This statement could be expanded as below:
+ /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to
+ /// vector B, and thus bitcastable to B as well.
+ /// 2.2. All pointers of the same address space, no matter what they point to,
+ /// bitcastable. So if C is pointer, it could be bitcasted to A and to B.
+ /// So any constant equal or bitcastable to A is equal or bitcastable to B.
+ /// QED.
+ ///
+ /// In another words, for pointers and vectors, we ignore top-level type and
+ /// look at their particular properties (bit-width for vectors, and
+ /// address space for pointers).
+ /// If these properties are equal - compare their contents.
+ int cmpConstants(const Constant *L, const Constant *R);
+
/// Assign or look up previously assigned numbers for the two values, and
/// return whether the numbers are equal. Numbers are assigned in the order
/// visited.
- bool enumerate(const Value *V1, const Value *V2);
+ /// Comparison order:
+ /// Stage 0: Value that is function itself is always greater then others.
+ /// If left and right values are references to their functions, then
+ /// they are equal.
+ /// Stage 1: Constants are greater than non-constants.
+ /// If both left and right are constants, then the result of
+ /// cmpConstants is used as cmpValues result.
+ /// Stage 2: InlineAsm instances are greater than others. If both left and
+ /// right are InlineAsm instances, InlineAsm* pointers casted to
+ /// integers and compared as numbers.
+ /// Stage 3: For all other cases we compare order we meet these values in
+ /// their functions. If right value was met first during scanning,
+ /// then left value is greater.
+ /// In another words, we compare serial numbers, for more details
+ /// see comments for sn_mapL and sn_mapR.
+ int cmpValues(const Value *L, const Value *R);
+
+ bool enumerate(const Value *V1, const Value *V2) {
+ return cmpValues(V1, V2) == 0;
+ }
/// Compare two Instructions for equivalence, similar to
/// Instruction::isSameOperationAs but with modifications to the type
/// comparison.
+ /// Stages are listed in "most significant stage first" order:
+ /// On each stage below, we do comparison between some left and right
+ /// operation parts. If parts are non-equal, we assign parts comparison
+ /// result to the operation comparison result and exit from method.
+ /// Otherwise we proceed to the next stage.
+ /// Stages:
+ /// 1. Operations opcodes. Compared as numbers.
+ /// 2. Number of operands.
+ /// 3. Operation types. Compared with cmpType method.
+ /// 4. Compare operation subclass optional data as stream of bytes:
+ /// just convert it to integers and call cmpNumbers.
+ /// 5. Compare in operation operand types with cmpType in
+ /// most significant operand first order.
+ /// 6. Last stage. Check operations for some specific attributes.
+ /// For example, for Load it would be:
+ /// 6.1.Load: volatile (as boolean flag)
+ /// 6.2.Load: alignment (as integer numbers)
+ /// 6.3.Load: synch-scope (as integer numbers)
+ /// On this stage its better to see the code, since its not more than 10-15
+ /// strings for particular instruction, and could change sometimes.
+ int cmpOperation(const Instruction *L, const Instruction *R) const;
+
bool isEquivalentOperation(const Instruction *I1,
- const Instruction *I2) const;
+ const Instruction *I2) const {
+ return cmpOperation(I1, I2) == 0;
+ }
/// Compare two GEPs for equivalent pointer arithmetic.
- bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2);
+ /// Parts to be compared for each comparison stage,
+ /// most significant stage first:
+ /// 1. Address space. As numbers.
+ /// 2. Constant offset, (if "DataLayout *DL" field is not NULL,
+ /// using GEPOperator::accumulateConstantOffset method).
+ /// 3. Pointer operand type (using cmpType method).
+ /// 4. Number of operands.
+ /// 5. Compare operands, using cmpValues method.
+ int cmpGEP(const GEPOperator *GEPL, const GEPOperator *GEPR);
+ int cmpGEP(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) {
+ return cmpGEP(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
+ }
+
+ bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2) {
+ return cmpGEP(GEP1, GEP2) == 0;
+ }
bool isEquivalentGEP(const GetElementPtrInst *GEP1,
const GetElementPtrInst *GEP2) {
return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
@@ -241,13 +404,50 @@ private:
int cmpNumbers(uint64_t L, uint64_t R) const;
+ int cmpAPInt(const APInt &L, const APInt &R) const;
+ int cmpAPFloat(const APFloat &L, const APFloat &R) const;
+ int cmpStrings(StringRef L, StringRef R) const;
+ int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
+
// The two functions undergoing comparison.
const Function *F1, *F2;
const DataLayout *DL;
- DenseMap<const Value *, const Value *> id_map;
- DenseSet<const Value *> seen_values;
+ /// Assign serial numbers to values from left function, and values from
+ /// right function.
+ /// Explanation:
+ /// Being comparing functions we need to compare values we meet at left and
+ /// right sides.
+ /// Its easy to sort things out for external values. It just should be
+ /// the same value at left and right.
+ /// But for local values (those were introduced inside function body)
+ /// we have to ensure they were introduced at exactly the same place,
+ /// and plays the same role.
+ /// Let's assign serial number to each value when we meet it first time.
+ /// Values that were met at same place will be with same serial numbers.
+ /// In this case it would be good to explain few points about values assigned
+ /// to BBs and other ways of implementation (see below).
+ ///
+ /// 1. Safety of BB reordering.
+ /// It's safe to change the order of BasicBlocks in function.
+ /// Relationship with other functions and serial numbering will not be
+ /// changed in this case.
+ /// As follows from FunctionComparator::compare(), we do CFG walk: we start
+ /// from the entry, and then take each terminator. So it doesn't matter how in
+ /// fact BBs are ordered in function. And since cmpValues are called during
+ /// this walk, the numbering depends only on how BBs located inside the CFG.
+ /// So the answer is - yes. We will get the same numbering.
+ ///
+ /// 2. Impossibility to use dominance properties of values.
+ /// If we compare two instruction operands: first is usage of local
+ /// variable AL from function FL, and second is usage of local variable AR
+ /// from FR, we could compare their origins and check whether they are
+ /// defined at the same place.
+ /// But, we are still not able to compare operands of PHI nodes, since those
+ /// could be operands from further BBs we didn't scan yet.
+ /// So it's impossible to use dominance properties in general.
+ DenseMap<const Value*, int> sn_mapL, sn_mapR;
};
}
@@ -258,6 +458,206 @@ int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
return 0;
}
+int FunctionComparator::cmpAPInt(const APInt &L, const APInt &R) const {
+ if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
+ return Res;
+ if (L.ugt(R)) return 1;
+ if (R.ugt(L)) return -1;
+ return 0;
+}
+
+int FunctionComparator::cmpAPFloat(const APFloat &L, const APFloat &R) const {
+ if (int Res = cmpNumbers((uint64_t)&L.getSemantics(),
+ (uint64_t)&R.getSemantics()))
+ return Res;
+ return cmpAPInt(L.bitcastToAPInt(), R.bitcastToAPInt());
+}
+
+int FunctionComparator::cmpStrings(StringRef L, StringRef R) const {
+ // Prevent heavy comparison, compare sizes first.
+ if (int Res = cmpNumbers(L.size(), R.size()))
+ return Res;
+
+ // Compare strings lexicographically only when it is necessary: only when
+ // strings are equal in size.
+ return L.compare(R);
+}
+
+int FunctionComparator::cmpAttrs(const AttributeSet L,
+ const AttributeSet R) const {
+ if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots()))
+ return Res;
+
+ for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) {
+ AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i),
+ RE = R.end(i);
+ for (; LI != LE && RI != RE; ++LI, ++RI) {
+ Attribute LA = *LI;
+ Attribute RA = *RI;
+ if (LA < RA)
+ return -1;
+ if (RA < LA)
+ return 1;
+ }
+ if (LI != LE)
+ return 1;
+ if (RI != RE)
+ return -1;
+ }
+ return 0;
+}
+
+/// Constants comparison:
+/// 1. Check whether type of L constant could be losslessly bitcasted to R
+/// type.
+/// 2. Compare constant contents.
+/// For more details see declaration comments.
+int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
+
+ Type *TyL = L->getType();
+ Type *TyR = R->getType();
+
+ // Check whether types are bitcastable. This part is just re-factored
+ // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
+ // we also pack into result which type is "less" for us.
+ int TypesRes = cmpType(TyL, TyR);
+ if (TypesRes != 0) {
+ // Types are different, but check whether we can bitcast them.
+ if (!TyL->isFirstClassType()) {
+ if (TyR->isFirstClassType())
+ return -1;
+ // Neither TyL nor TyR are values of first class type. Return the result
+ // of comparing the types
+ return TypesRes;
+ }
+ if (!TyR->isFirstClassType()) {
+ if (TyL->isFirstClassType())
+ return 1;
+ return TypesRes;
+ }
+
+ // Vector -> Vector conversions are always lossless if the two vector types
+ // have the same size, otherwise not.
+ unsigned TyLWidth = 0;
+ unsigned TyRWidth = 0;
+
+ if (const VectorType *VecTyL = dyn_cast<VectorType>(TyL))
+ TyLWidth = VecTyL->getBitWidth();
+ if (const VectorType *VecTyR = dyn_cast<VectorType>(TyR))
+ TyRWidth = VecTyR->getBitWidth();
+
+ if (TyLWidth != TyRWidth)
+ return cmpNumbers(TyLWidth, TyRWidth);
+
+ // Zero bit-width means neither TyL nor TyR are vectors.
+ if (!TyLWidth) {
+ PointerType *PTyL = dyn_cast<PointerType>(TyL);
+ PointerType *PTyR = dyn_cast<PointerType>(TyR);
+ if (PTyL && PTyR) {
+ unsigned AddrSpaceL = PTyL->getAddressSpace();
+ unsigned AddrSpaceR = PTyR->getAddressSpace();
+ if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR))
+ return Res;
+ }
+ if (PTyL)
+ return 1;
+ if (PTyR)
+ return -1;
+
+ // TyL and TyR aren't vectors, nor pointers. We don't know how to
+ // bitcast them.
+ return TypesRes;
+ }
+ }
+
+ // OK, types are bitcastable, now check constant contents.
+
+ if (L->isNullValue() && R->isNullValue())
+ return TypesRes;
+ if (L->isNullValue() && !R->isNullValue())
+ return 1;
+ if (!L->isNullValue() && R->isNullValue())
+ return -1;
+
+ if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
+ return Res;
+
+ switch (L->getValueID()) {
+ case Value::UndefValueVal: return TypesRes;
+ case Value::ConstantIntVal: {
+ const APInt &LInt = cast<ConstantInt>(L)->getValue();
+ const APInt &RInt = cast<ConstantInt>(R)->getValue();
+ return cmpAPInt(LInt, RInt);
+ }
+ case Value::ConstantFPVal: {
+ const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
+ const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
+ return cmpAPFloat(LAPF, RAPF);
+ }
+ case Value::ConstantArrayVal: {
+ const ConstantArray *LA = cast<ConstantArray>(L);
+ const ConstantArray *RA = cast<ConstantArray>(R);
+ uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements();
+ uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements();
+ if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+ return Res;
+ for (uint64_t i = 0; i < NumElementsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)),
+ cast<Constant>(RA->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::ConstantStructVal: {
+ const ConstantStruct *LS = cast<ConstantStruct>(L);
+ const ConstantStruct *RS = cast<ConstantStruct>(R);
+ unsigned NumElementsL = cast<StructType>(TyL)->getNumElements();
+ unsigned NumElementsR = cast<StructType>(TyR)->getNumElements();
+ if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+ return Res;
+ for (unsigned i = 0; i != NumElementsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)),
+ cast<Constant>(RS->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::ConstantVectorVal: {
+ const ConstantVector *LV = cast<ConstantVector>(L);
+ const ConstantVector *RV = cast<ConstantVector>(R);
+ unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements();
+ unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements();
+ if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+ return Res;
+ for (uint64_t i = 0; i < NumElementsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)),
+ cast<Constant>(RV->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::ConstantExprVal: {
+ const ConstantExpr *LE = cast<ConstantExpr>(L);
+ const ConstantExpr *RE = cast<ConstantExpr>(R);
+ unsigned NumOperandsL = LE->getNumOperands();
+ unsigned NumOperandsR = RE->getNumOperands();
+ if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
+ return Res;
+ for (unsigned i = 0; i < NumOperandsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)),
+ cast<Constant>(RE->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::FunctionVal:
+ case Value::GlobalVariableVal:
+ case Value::GlobalAliasVal:
+ default: // Unknown constant, cast L and R pointers to numbers and compare.
+ return cmpNumbers((uint64_t)L, (uint64_t)R);
+ }
+}
+
/// cmpType - compares two types,
/// defines total ordering among the types set.
/// See method declaration comments for more details.
@@ -350,143 +750,209 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const {
// Determine whether the two operations are the same except that pointer-to-A
// and pointer-to-B are equivalent. This should be kept in sync with
// Instruction::isSameOperationAs.
-bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
- const Instruction *I2) const {
+// Read method declaration comments for more details.
+int FunctionComparator::cmpOperation(const Instruction *L,
+ const Instruction *R) const {
// Differences from Instruction::isSameOperationAs:
// * replace type comparison with calls to isEquivalentType.
// * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
// * because of the above, we don't test for the tail bit on calls later on
- if (I1->getOpcode() != I2->getOpcode() ||
- I1->getNumOperands() != I2->getNumOperands() ||
- !isEquivalentType(I1->getType(), I2->getType()) ||
- !I1->hasSameSubclassOptionalData(I2))
- return false;
+ if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode()))
+ return Res;
+
+ if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+ return Res;
+
+ if (int Res = cmpType(L->getType(), R->getType()))
+ return Res;
+
+ if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
+ R->getRawSubclassOptionalData()))
+ return Res;
// We have two instructions of identical opcode and #operands. Check to see
// if all operands are the same type
- for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i)
- if (!isEquivalentType(I1->getOperand(i)->getType(),
- I2->getOperand(i)->getType()))
- return false;
+ for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
+ if (int Res =
+ cmpType(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
+ return Res;
+ }
// Check special state that is a part of some instructions.
- if (const LoadInst *LI = dyn_cast<LoadInst>(I1))
- return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() &&
- LI->getAlignment() == cast<LoadInst>(I2)->getAlignment() &&
- LI->getOrdering() == cast<LoadInst>(I2)->getOrdering() &&
- LI->getSynchScope() == cast<LoadInst>(I2)->getSynchScope();
- if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
- return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
- SI->getAlignment() == cast<StoreInst>(I2)->getAlignment() &&
- SI->getOrdering() == cast<StoreInst>(I2)->getOrdering() &&
- SI->getSynchScope() == cast<StoreInst>(I2)->getSynchScope();
- if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
- return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
- if (const CallInst *CI = dyn_cast<CallInst>(I1))
- return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
- CI->getAttributes() == cast<CallInst>(I2)->getAttributes();
- if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
- return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
- CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes();
- if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1))
- return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices();
- if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1))
- return EVI->getIndices() == cast<ExtractValueInst>(I2)->getIndices();
- if (const FenceInst *FI = dyn_cast<FenceInst>(I1))
- return FI->getOrdering() == cast<FenceInst>(I2)->getOrdering() &&
- FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope();
- if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1))
- return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() &&
- CXI->getSuccessOrdering() ==
- cast<AtomicCmpXchgInst>(I2)->getSuccessOrdering() &&
- CXI->getFailureOrdering() ==
- cast<AtomicCmpXchgInst>(I2)->getFailureOrdering() &&
- CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope();
- if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1))
- return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() &&
- RMWI->isVolatile() == cast<AtomicRMWInst>(I2)->isVolatile() &&
- RMWI->getOrdering() == cast<AtomicRMWInst>(I2)->getOrdering() &&
- RMWI->getSynchScope() == cast<AtomicRMWInst>(I2)->getSynchScope();
+ if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
+ if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
+ return Res;
+ if (int Res =
+ cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment()))
+ return Res;
+ if (int Res =
+ cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope());
+ }
+ if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
+ if (int Res =
+ cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile()))
+ return Res;
+ if (int Res =
+ cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment()))
+ return Res;
+ if (int Res =
+ cmpNumbers(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope());
+ }
+ if (const CmpInst *CI = dyn_cast<CmpInst>(L))
+ return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
+ if (const CallInst *CI = dyn_cast<CallInst>(L)) {
+ if (int Res = cmpNumbers(CI->getCallingConv(),
+ cast<CallInst>(R)->getCallingConv()))
+ return Res;
+ return cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes());
+ }
+ if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) {
+ if (int Res = cmpNumbers(CI->getCallingConv(),
+ cast<InvokeInst>(R)->getCallingConv()))
+ return Res;
+ return cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes());
+ }
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
+ ArrayRef<unsigned> LIndices = IVI->getIndices();
+ ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices();
+ if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+ return Res;
+ for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+ if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+ return Res;
+ }
+ }
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) {
+ ArrayRef<unsigned> LIndices = EVI->getIndices();
+ ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices();
+ if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+ return Res;
+ for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+ if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+ return Res;
+ }
+ }
+ if (const FenceInst *FI = dyn_cast<FenceInst>(L)) {
+ if (int Res =
+ cmpNumbers(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope());
+ }
- return true;
+ if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
+ if (int Res = cmpNumbers(CXI->isVolatile(),
+ cast<AtomicCmpXchgInst>(R)->isVolatile()))
+ return Res;
+ if (int Res = cmpNumbers(CXI->getSuccessOrdering(),
+ cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
+ return Res;
+ if (int Res = cmpNumbers(CXI->getFailureOrdering(),
+ cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
+ return Res;
+ return cmpNumbers(CXI->getSynchScope(),
+ cast<AtomicCmpXchgInst>(R)->getSynchScope());
+ }
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
+ if (int Res = cmpNumbers(RMWI->getOperation(),
+ cast<AtomicRMWInst>(R)->getOperation()))
+ return Res;
+ if (int Res = cmpNumbers(RMWI->isVolatile(),
+ cast<AtomicRMWInst>(R)->isVolatile()))
+ return Res;
+ if (int Res = cmpNumbers(RMWI->getOrdering(),
+ cast<AtomicRMWInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(RMWI->getSynchScope(),
+ cast<AtomicRMWInst>(R)->getSynchScope());
+ }
+ return 0;
}
// Determine whether two GEP operations perform the same underlying arithmetic.
-bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
- const GEPOperator *GEP2) {
- unsigned AS = GEP1->getPointerAddressSpace();
- if (AS != GEP2->getPointerAddressSpace())
- return false;
+// Read method declaration comments for more details.
+int FunctionComparator::cmpGEP(const GEPOperator *GEPL,
+ const GEPOperator *GEPR) {
+
+ unsigned int ASL = GEPL->getPointerAddressSpace();
+ unsigned int ASR = GEPR->getPointerAddressSpace();
+ if (int Res = cmpNumbers(ASL, ASR))
+ return Res;
+
+ // When we have target data, we can reduce the GEP down to the value in bytes
+ // added to the address.
if (DL) {
- // When we have target data, we can reduce the GEP down to the value in bytes
- // added to the address.
- unsigned BitWidth = DL ? DL->getPointerSizeInBits(AS) : 1;
- APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0);
- if (GEP1->accumulateConstantOffset(*DL, Offset1) &&
- GEP2->accumulateConstantOffset(*DL, Offset2)) {
- return Offset1 == Offset2;
- }
+ unsigned BitWidth = DL->getPointerSizeInBits(ASL);
+ APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
+ if (GEPL->accumulateConstantOffset(*DL, OffsetL) &&
+ GEPR->accumulateConstantOffset(*DL, OffsetR))
+ return cmpAPInt(OffsetL, OffsetR);
}
- if (GEP1->getPointerOperand()->getType() !=
- GEP2->getPointerOperand()->getType())
- return false;
+ if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
+ (uint64_t)GEPR->getPointerOperand()->getType()))
+ return Res;
- if (GEP1->getNumOperands() != GEP2->getNumOperands())
- return false;
+ if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
+ return Res;
- for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
- if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
- return false;
+ for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) {
+ if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
+ return Res;
}
- return true;
+ return 0;
}
-// Compare two values used by the two functions under pair-wise comparison. If
-// this is the first time the values are seen, they're added to the mapping so
-// that we will detect mismatches on next use.
-bool FunctionComparator::enumerate(const Value *V1, const Value *V2) {
- // Check for function @f1 referring to itself and function @f2 referring to
- // itself, or referring to each other, or both referring to either of them.
- // They're all equivalent if the two functions are otherwise equivalent.
- if (V1 == F1 && V2 == F2)
- return true;
- if (V1 == F2 && V2 == F1)
- return true;
+/// Compare two values used by the two functions under pair-wise comparison. If
+/// this is the first time the values are seen, they're added to the mapping so
+/// that we will detect mismatches on next use.
+/// See comments in declaration for more details.
+int FunctionComparator::cmpValues(const Value *L, const Value *R) {
+ // Catch self-reference case.
+ if (L == F1) {
+ if (R == F2)
+ return 0;
+ return -1;
+ }
+ if (R == F2) {
+ if (L == F1)
+ return 0;
+ return 1;
+ }
- if (const Constant *C1 = dyn_cast<Constant>(V1)) {
- if (V1 == V2) return true;
- const Constant *C2 = dyn_cast<Constant>(V2);
- if (!C2) return false;
- // TODO: constant expressions with GEP or references to F1 or F2.
- if (C1->isNullValue() && C2->isNullValue() &&
- isEquivalentType(C1->getType(), C2->getType()))
- return true;
- // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1
- // then they must have equal bit patterns.
- return C1->getType()->canLosslesslyBitCastTo(C2->getType()) &&
- C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType());
- }
-
- if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2))
- return V1 == V2;
-
- // Check that V1 maps to V2. If we find a value that V1 maps to then we simply
- // check whether it's equal to V2. When there is no mapping then we need to
- // ensure that V2 isn't already equivalent to something else. For this
- // purpose, we track the V2 values in a set.
-
- const Value *&map_elem = id_map[V1];
- if (map_elem)
- return map_elem == V2;
- if (!seen_values.insert(V2).second)
- return false;
- map_elem = V2;
- return true;
-}
+ const Constant *ConstL = dyn_cast<Constant>(L);
+ const Constant *ConstR = dyn_cast<Constant>(R);
+ if (ConstL && ConstR) {
+ if (L == R)
+ return 0;
+ return cmpConstants(ConstL, ConstR);
+ }
+
+ if (ConstL)
+ return 1;
+ if (ConstR)
+ return -1;
+
+ const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
+ const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
+
+ if (InlineAsmL && InlineAsmR)
+ return cmpNumbers((uint64_t)L, (uint64_t)R);
+ if (InlineAsmL)
+ return 1;
+ if (InlineAsmR)
+ return -1;
+
+ auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())),
+ RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size()));
+ return cmpNumbers(LeftSN.first->second, RightSN.first->second);
+}
// Test whether two basic blocks have equivalent behaviour.
bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) {
BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
@@ -535,6 +1001,9 @@ bool FunctionComparator::compare() {
// We need to recheck everything, but check the things that weren't included
// in the hash first.
+ sn_mapL.clear();
+ sn_mapR.clear();
+
if (F1->getAttributes() != F2->getAttributes())
return false;
@@ -683,7 +1152,7 @@ ModulePass *llvm::createMergeFunctionsPass() {
bool MergeFunctions::runOnModule(Module &M) {
bool Changed = false;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
@@ -783,8 +1252,23 @@ void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
// Helper for writeThunk,
// Selects proper bitcast operation,
// but a bit simpler then CastInst::getCastOpcode.
-static Value* createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
+static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
Type *SrcTy = V->getType();
+ if (SrcTy->isStructTy()) {
+ assert(DestTy->isStructTy());
+ assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
+ Value *Result = UndefValue::get(DestTy);
+ for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
+ Value *Element = createCast(
+ Builder, Builder.CreateExtractValue(V, ArrayRef<unsigned int>(I)),
+ DestTy->getStructElementType(I));
+
+ Result =
+ Builder.CreateInsertValue(Result, Element, ArrayRef<unsigned int>(I));
+ }
+ return Result;
+ }
+ assert(!DestTy->isStructTy());
if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
return Builder.CreateIntToPtr(V, DestTy);
else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
@@ -843,9 +1327,9 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
// Replace G with an alias to F and delete G.
void MergeFunctions::writeAlias(Function *F, Function *G) {
- Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
- GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "",
- BitcastF, G->getParent());
+ PointerType *PTy = G->getType();
+ auto *GA = GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+ G->getLinkage(), "", F);
F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
GA->takeName(G);
GA->setVisibility(G->getVisibility());
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index ac88aee..76d6dfa 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "partialinlining"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/CFG.h"
@@ -24,6 +23,8 @@
#include "llvm/Transforms/Utils/CodeExtractor.h"
using namespace llvm;
+#define DEBUG_TYPE "partialinlining"
+
STATISTIC(NumPartialInlined, "Number of functions partially inlined");
namespace {
@@ -52,10 +53,10 @@ Function* PartialInliner::unswitchFunction(Function* F) {
BasicBlock* entryBlock = F->begin();
BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
if (!BR || BR->isUnconditional())
- return 0;
+ return nullptr;
- BasicBlock* returnBlock = 0;
- BasicBlock* nonReturnBlock = 0;
+ BasicBlock* returnBlock = nullptr;
+ BasicBlock* nonReturnBlock = nullptr;
unsigned returnCount = 0;
for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
SI != SE; ++SI)
@@ -66,7 +67,7 @@ Function* PartialInliner::unswitchFunction(Function* F) {
nonReturnBlock = *SI;
if (returnCount != 1)
- return 0;
+ return nullptr;
// Clone the function, so that we can hack away on it.
ValueToValueMapTy VMap;
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 4a28b34..38e1b8e 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -56,8 +56,9 @@ RunLoopRerolling("reroll-loops", cl::Hidden,
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
- LibraryInfo = 0;
- Inliner = 0;
+ LibraryInfo = nullptr;
+ Inliner = nullptr;
+ DisableTailCalls = false;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
BBVectorize = RunBBVectorization;
@@ -128,7 +129,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
if (OptLevel == 0) {
if (Inliner) {
MPM.add(Inliner);
- Inliner = 0;
+ Inliner = nullptr;
}
// FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
@@ -156,6 +157,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE
+ addExtensionsToPM(EP_Peephole, MPM);
MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
}
@@ -164,7 +166,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createPruneEHPass()); // Remove dead EH info
if (Inliner) {
MPM.add(Inliner);
- Inliner = 0;
+ Inliner = nullptr;
}
if (!DisableUnitAtATime)
MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs
@@ -182,8 +184,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createInstructionCombiningPass()); // Combine silly seq's
+ addExtensionsToPM(EP_Peephole, MPM);
- MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
+ if (!DisableTailCalls)
+ MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
MPM.add(createLoopRotatePass()); // Rotate Loop
@@ -206,6 +210,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// Run instcombine after redundancy elimination to exploit opportunities
// opened up by them.
MPM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, MPM);
MPM.add(createJumpThreadingPass()); // Thread jumps
MPM.add(createCorrelatedValuePropagationPass());
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
@@ -220,6 +225,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
if (BBVectorize) {
MPM.add(createBBVectorizePass());
MPM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, MPM);
if (OptLevel > 1 && UseGVNAfterVectorization)
MPM.add(createGVNPass()); // Remove redundancies
else
@@ -233,6 +239,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createInstructionCombiningPass()); // Clean up after everything.
+ addExtensionsToPM(EP_Peephole, MPM);
// FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
// pass manager that we are specifically trying to avoid. To prevent this
@@ -245,6 +252,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// as function calls, so that we can only pass them when the vectorizer
// changed the code.
MPM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, MPM);
MPM.add(createCFGSimplificationPass());
if (!DisableUnrollLoops)
@@ -297,6 +305,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// function pointers. When this happens, we often have to resolve varargs
// calls, etc, so let instcombine do this.
PM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, PM);
// Inline small functions
if (RunInliner)
@@ -315,6 +324,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// The IPO passes may leave cruft around. Clean up after them.
PM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, PM);
PM.add(createJumpThreadingPass());
// Break up allocas
@@ -334,11 +344,17 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// Nuke dead stores.
PM.add(createDeadStoreEliminationPass());
- // More loops are countable try to vectorize them.
+ // More loops are countable; try to optimize them.
+ PM.add(createIndVarSimplifyPass());
+ PM.add(createLoopDeletionPass());
PM.add(createLoopVectorizePass(true, true));
+ // More scalar chains could be vectorized due to more alias information
+ PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+
// Cleanup and simplify the code after the scalar optimizations.
PM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, PM);
PM.add(createJumpThreadingPass());
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index c61ec5e..b2c4a09 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "prune-eh"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -30,6 +29,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "prune-eh"
+
STATISTIC(NumRemoved, "Number of invokes removed");
STATISTIC(NumUnreach, "Number of noreturn calls optimized");
@@ -85,7 +86,7 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end();
(!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
Function *F = (*I)->getFunction();
- if (F == 0) {
+ if (!F) {
SCCMightUnwind = true;
SCCMightReturn = true;
} else if (F->isDeclaration() || F->mayBeOverridden()) {
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 1c6532d..956991a 100644
--- a/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -14,13 +14,14 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "strip-dead-prototypes"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
using namespace llvm;
+#define DEBUG_TYPE "strip-dead-prototypes"
+
STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
namespace {
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 6d0be8f..1abbccc 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -192,7 +192,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
/// Find values that are marked as llvm.used.
static void findUsedValues(GlobalVariable *LLVMUsed,
SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
- if (LLVMUsed == 0) return;
+ if (!LLVMUsed) return;
UsedValues.insert(LLVMUsed);
ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 822e146..e04b1be 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -20,34 +20,38 @@
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#define DEBUG_TYPE "instcombine"
+
namespace llvm {
- class CallSite;
- class DataLayout;
- class TargetLibraryInfo;
- class DbgDeclareInst;
- class MemIntrinsic;
- class MemSetInst;
+class CallSite;
+class DataLayout;
+class TargetLibraryInfo;
+class DbgDeclareInst;
+class MemIntrinsic;
+class MemSetInst;
/// SelectPatternFlavor - We can match a variety of different patterns for
/// select operations.
enum SelectPatternFlavor {
SPF_UNKNOWN = 0,
- SPF_SMIN, SPF_UMIN,
- SPF_SMAX, SPF_UMAX
- //SPF_ABS - TODO.
+ SPF_SMIN,
+ SPF_UMIN,
+ SPF_SMAX,
+ SPF_UMAX
+ // SPF_ABS - TODO.
};
/// getComplexity: Assign a complexity or rank value to LLVM Values...
/// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
static inline unsigned getComplexity(Value *V) {
if (isa<Instruction>(V)) {
- if (BinaryOperator::isNeg(V) ||
- BinaryOperator::isFNeg(V) ||
+ if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) ||
BinaryOperator::isNot(V))
return 3;
return 4;
}
- if (isa<Argument>(V)) return 3;
+ if (isa<Argument>(V))
+ return 3;
return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
}
@@ -60,18 +64,18 @@ static inline Constant *SubOne(Constant *C) {
return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
}
-
/// InstCombineIRInserter - This is an IRBuilder insertion helper that works
/// just like the normal insertion helper, but also adds any new instructions
/// to the instcombine worklist.
class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
: public IRBuilderDefaultInserter<true> {
InstCombineWorklist &Worklist;
+
public:
InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {}
- void InsertHelper(Instruction *I, const Twine &Name,
- BasicBlock *BB, BasicBlock::iterator InsertPt) const {
+ void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
+ BasicBlock::iterator InsertPt) const {
IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt);
Worklist.Add(I);
}
@@ -79,13 +83,14 @@ public:
/// InstCombiner - The -instcombine pass.
class LLVM_LIBRARY_VISIBILITY InstCombiner
- : public FunctionPass,
- public InstVisitor<InstCombiner, Instruction*> {
+ : public FunctionPass,
+ public InstVisitor<InstCombiner, Instruction *> {
const DataLayout *DL;
TargetLibraryInfo *TLI;
bool MadeIRChange;
LibCallSimplifier *Simplifier;
bool MinimizeSize;
+
public:
/// Worklist - All of the instructions that need to be simplified.
InstCombineWorklist Worklist;
@@ -96,7 +101,7 @@ public:
BuilderTy *Builder;
static char ID; // Pass identification, replacement for typeid
- InstCombiner() : FunctionPass(ID), DL(0), Builder(0) {
+ InstCombiner() : FunctionPass(ID), DL(nullptr), Builder(nullptr) {
MinimizeSize = false;
initializeInstCombinerPass(*PassRegistry::getPassRegistry());
}
@@ -144,9 +149,9 @@ public:
Instruction *visitAnd(BinaryOperator &I);
Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS);
Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
- Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
- Value *A, Value *B, Value *C);
- Instruction *visitOr (BinaryOperator &I);
+ Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A,
+ Value *B, Value *C);
+ Instruction *visitOr(BinaryOperator &I);
Instruction *visitXor(BinaryOperator &I);
Instruction *visitShl(BinaryOperator &I);
Instruction *visitAShr(BinaryOperator &I);
@@ -156,12 +161,11 @@ public:
Constant *RHSC);
Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
GlobalVariable *GV, CmpInst &ICI,
- ConstantInt *AndCst = 0);
+ ConstantInt *AndCst = nullptr);
Instruction *visitFCmpInst(FCmpInst &I);
Instruction *visitICmpInst(ICmpInst &I);
Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
- Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
- Instruction *LHS,
+ Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *LHS,
ConstantInt *RHS);
Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
ConstantInt *DivRHS);
@@ -171,7 +175,7 @@ public:
ICmpInst::Predicate Pred);
Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond, Instruction &I);
- Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+ Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1,
BinaryOperator &I);
Instruction *commonCastTransforms(CastInst &CI);
Instruction *commonPointerCastTransforms(CastInst &CI);
@@ -188,9 +192,8 @@ public:
Instruction *visitIntToPtr(IntToPtrInst &CI);
Instruction *visitBitCast(BitCastInst &CI);
Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI);
- Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
- Instruction *FI);
- Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
+ Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI);
+ Instruction *FoldSelectIntoOp(SelectInst &SI, Value *, Value *);
Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
Value *A, Value *B, Instruction &Outer,
SelectPatternFlavor SPF2, Value *C);
@@ -209,6 +212,7 @@ public:
Instruction *visitStoreInst(StoreInst &SI);
Instruction *visitBranchInst(BranchInst &BI);
Instruction *visitSwitchInst(SwitchInst &SI);
+ Instruction *visitInsertValueInst(InsertValueInst &IV);
Instruction *visitInsertElementInst(InsertElementInst &IE);
Instruction *visitExtractElementInst(ExtractElementInst &EI);
Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
@@ -216,21 +220,21 @@ public:
Instruction *visitLandingPadInst(LandingPadInst &LI);
// visitInstruction - Specify what to return for unhandled instructions...
- Instruction *visitInstruction(Instruction &I) { return 0; }
+ Instruction *visitInstruction(Instruction &I) { return nullptr; }
private:
bool ShouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
- Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const;
+ Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const;
Type *FindElementAtOffset(Type *PtrTy, int64_t Offset,
- SmallVectorImpl<Value*> &NewIndices);
+ SmallVectorImpl<Value *> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
/// results in any code being generated and is interesting to optimize out. If
/// the cast can be eliminated by some other simple transformation, we prefer
/// to do the simplification first.
- bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V,
+ bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V,
Type *Ty);
Instruction *visitCallSite(CallSite CS);
@@ -251,10 +255,10 @@ public:
// in the program. Add the new instruction to the worklist.
//
Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) {
- assert(New && New->getParent() == 0 &&
+ assert(New && !New->getParent() &&
"New instruction already inserted into a basic block!");
BasicBlock *BB = Old.getParent();
- BB->getInstList().insert(&Old, New); // Insert inst
+ BB->getInstList().insert(&Old, New); // Insert inst
Worklist.Add(New);
return New;
}
@@ -274,7 +278,7 @@ public:
// modified.
//
Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
- Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist.
+ Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist.
// If we are replacing the instruction with itself, this must be in a
// segment of unreachable code, so just clobber the instruction.
@@ -306,12 +310,12 @@ public:
Worklist.Remove(&I);
I.eraseFromParent();
MadeIRChange = true;
- return 0; // Don't do anything with FI
+ return nullptr; // Don't do anything with FI
}
- void ComputeMaskedBits(Value *V, APInt &KnownZero,
- APInt &KnownOne, unsigned Depth = 0) const {
- return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, DL, Depth);
+ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth = 0) const {
+ return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth);
}
bool MaskedValueIsZero(Value *V, const APInt &Mask,
@@ -323,7 +327,6 @@ public:
}
private:
-
/// SimplifyAssociativeOrCommutative - This performs a few simplifications for
/// operators which are associative or commutative.
bool SimplifyAssociativeOrCommutative(BinaryOperator &I);
@@ -337,12 +340,10 @@ private:
/// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
/// based on the demanded bits.
- Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
- APInt& KnownZero, APInt& KnownOne,
- unsigned Depth);
- bool SimplifyDemandedBits(Use &U, APInt DemandedMask,
- APInt& KnownZero, APInt& KnownOne,
- unsigned Depth=0);
+ Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth);
+ bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth = 0);
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
/// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence.
Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl,
@@ -355,7 +356,9 @@ private:
bool SimplifyDemandedInstructionBits(Instruction &Inst);
Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
- APInt& UndefElts, unsigned Depth = 0);
+ APInt &UndefElts, unsigned Depth = 0);
+
+ Value *SimplifyVectorOp(BinaryOperator &Inst);
// FoldOpIntoPhi - Given a binary operator, cast instruction, or select
// which has a PHI node as operand #0, see if we can fold the instruction
@@ -372,21 +375,19 @@ private:
Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
-
Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
ConstantInt *AndRHS, BinaryOperator &TheAnd);
Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
bool isSub, Instruction &I);
- Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
- bool isSigned, bool Inside);
+ Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned,
+ bool Inside);
Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
Instruction *MatchBSwap(BinaryOperator &I);
bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
Instruction *SimplifyMemSet(MemSetInst *MI);
-
Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned);
/// Descale - Return a value X such that Val = X * Scale, or null if none. If
@@ -394,8 +395,8 @@ private:
Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap);
};
-
-
} // end namespace llvm.
+#undef DEBUG_TYPE
+
#endif
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 97910c7..c37a9cf 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -20,6 +20,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
namespace {
/// Class representing coefficient of floating-point addend.
@@ -112,12 +114,12 @@ namespace {
///
class FAddend {
public:
- FAddend() { Val = 0; }
+ FAddend() { Val = nullptr; }
Value *getSymVal (void) const { return Val; }
const FAddendCoef &getCoef(void) const { return Coeff; }
- bool isConstant() const { return Val == 0; }
+ bool isConstant() const { return Val == nullptr; }
bool isZero() const { return Coeff.isZero(); }
void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; }
@@ -154,7 +156,7 @@ namespace {
///
class FAddCombine {
public:
- FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {}
+ FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(nullptr) {}
Value *simplify(Instruction *FAdd);
private:
@@ -348,8 +350,8 @@ Value *FAddendCoef::getValue(Type *Ty) const {
//
unsigned FAddend::drillValueDownOneStep
(Value *Val, FAddend &Addend0, FAddend &Addend1) {
- Instruction *I = 0;
- if (Val == 0 || !(I = dyn_cast<Instruction>(Val)))
+ Instruction *I = nullptr;
+ if (!Val || !(I = dyn_cast<Instruction>(Val)))
return 0;
unsigned Opcode = I->getOpcode();
@@ -359,16 +361,16 @@ unsigned FAddend::drillValueDownOneStep
Value *Opnd0 = I->getOperand(0);
Value *Opnd1 = I->getOperand(1);
if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero())
- Opnd0 = 0;
+ Opnd0 = nullptr;
if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero())
- Opnd1 = 0;
+ Opnd1 = nullptr;
if (Opnd0) {
if (!C0)
Addend0.set(1, Opnd0);
else
- Addend0.set(C0, 0);
+ Addend0.set(C0, nullptr);
}
if (Opnd1) {
@@ -376,7 +378,7 @@ unsigned FAddend::drillValueDownOneStep
if (!C1)
Addend.set(1, Opnd1);
else
- Addend.set(C1, 0);
+ Addend.set(C1, nullptr);
if (Opcode == Instruction::FSub)
Addend.negate();
}
@@ -385,7 +387,7 @@ unsigned FAddend::drillValueDownOneStep
return Opnd0 && Opnd1 ? 2 : 1;
// Both operands are zero. Weird!
- Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0);
+ Addend0.set(APFloat(C0->getValueAPF().getSemantics()), nullptr);
return 1;
}
@@ -443,13 +445,13 @@ Value *FAddCombine::performFactorization(Instruction *I) {
Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
- return 0;
+ return nullptr;
bool isMpy = false;
if (I0->getOpcode() == Instruction::FMul)
isMpy = true;
else if (I0->getOpcode() != Instruction::FDiv)
- return 0;
+ return nullptr;
Value *Opnd0_0 = I0->getOperand(0);
Value *Opnd0_1 = I0->getOperand(1);
@@ -461,8 +463,8 @@ Value *FAddCombine::performFactorization(Instruction *I) {
// (x*y) +/- (x*z) x y z
// (y/x) +/- (z/x) x y z
//
- Value *Factor = 0;
- Value *AddSub0 = 0, *AddSub1 = 0;
+ Value *Factor = nullptr;
+ Value *AddSub0 = nullptr, *AddSub1 = nullptr;
if (isMpy) {
if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
@@ -481,7 +483,7 @@ Value *FAddCombine::performFactorization(Instruction *I) {
}
if (!Factor)
- return 0;
+ return nullptr;
FastMathFlags Flags;
Flags.setUnsafeAlgebra();
@@ -495,7 +497,7 @@ Value *FAddCombine::performFactorization(Instruction *I) {
if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) {
const APFloat &F = CFP->getValueAPF();
if (!F.isNormal())
- return 0;
+ return nullptr;
} else if (Instruction *II = dyn_cast<Instruction>(NewAddSub))
II->setFastMathFlags(Flags);
@@ -517,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) {
// Currently we are not able to handle vector type.
if (I->getType()->isVectorTy())
- return 0;
+ return nullptr;
assert((I->getOpcode() == Instruction::FAdd ||
I->getOpcode() == Instruction::FSub) && "Expect add/sub");
@@ -568,7 +570,7 @@ Value *FAddCombine::simplify(Instruction *I) {
// been optimized into "I = Y - X" in the previous steps.
//
const FAddendCoef &CE = Opnd0.getCoef();
- return CE.isOne() ? Opnd0.getSymVal() : 0;
+ return CE.isOne() ? Opnd0.getSymVal() : nullptr;
}
// step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1]
@@ -614,7 +616,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
// constant close to supper-expr(s) will potentially reveal some optimization
// opportunities in super-expr(s).
//
- const FAddend *ConstAdd = 0;
+ const FAddend *ConstAdd = nullptr;
// Simplified addends are placed <SimpVect>.
AddendVect SimpVect;
@@ -647,7 +649,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
if (T && T->getSymVal() == Val) {
// Set null such that next iteration of the outer loop will not process
// this addend again.
- Addends[SameSymIdx] = 0;
+ Addends[SameSymIdx] = nullptr;
SimpVect.push_back(T);
}
}
@@ -661,7 +663,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
// Pop all addends being folded and push the resulting folded addend.
SimpVect.resize(StartIdx);
- if (Val != 0) {
+ if (Val) {
if (!R.isZero()) {
SimpVect.push_back(&R);
}
@@ -698,7 +700,7 @@ Value *FAddCombine::createNaryFAdd
//
unsigned InstrNeeded = calcInstrNumber(Opnds);
if (InstrNeeded > InstrQuota)
- return 0;
+ return nullptr;
initCreateInstNum();
@@ -710,7 +712,7 @@ Value *FAddCombine::createNaryFAdd
// N-ary addition has at most two instructions, and we don't need to worry
// about tree-height when constructing the N-ary addition.
- Value *LastVal = 0;
+ Value *LastVal = nullptr;
bool LastValNeedNeg = false;
// Iterate the addends, creating fadd/fsub using adjacent two addends.
@@ -870,10 +872,10 @@ Value *FAddCombine::createAddendVal
//
static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) {
if (!V->hasOneUse() || !V->getType()->isIntOrIntVectorTy())
- return 0;
+ return nullptr;
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0) return 0;
+ if (!I) return nullptr;
if (I->getOpcode() == Instruction::Mul)
if ((CST = dyn_cast<Constant>(I->getOperand(1))))
@@ -884,7 +886,7 @@ static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) {
CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST);
return I->getOperand(0);
}
- return 0;
+ return nullptr;
}
@@ -918,6 +920,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
I.hasNoUnsignedWrap(), DL))
return ReplaceInstUsesWith(I, V);
@@ -942,7 +947,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (ZI->getSrcTy()->isIntegerTy(1))
return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
- Value *XorLHS = 0; ConstantInt *XorRHS = 0;
+ Value *XorLHS = nullptr; ConstantInt *XorRHS = nullptr;
if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
const APInt &RHSVal = CI->getValue();
@@ -974,7 +979,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
IntegerType *IT = cast<IntegerType>(I.getType());
APInt LHSKnownOne(IT->getBitWidth(), 0);
APInt LHSKnownZero(IT->getBitWidth(), 0);
- ComputeMaskedBits(XorLHS, LHSKnownZero, LHSKnownOne);
+ computeKnownBits(XorLHS, LHSKnownZero, LHSKnownOne);
if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue())
return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
XorLHS);
@@ -1042,11 +1047,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
APInt LHSKnownOne(IT->getBitWidth(), 0);
APInt LHSKnownZero(IT->getBitWidth(), 0);
- ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
if (LHSKnownZero != 0) {
APInt RHSKnownOne(IT->getBitWidth(), 0);
APInt RHSKnownZero(IT->getBitWidth(), 0);
- ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
// No bits in common -> bitwise or.
if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
@@ -1174,7 +1179,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// Check for (x & y) + (x ^ y)
{
- Value *A = 0, *B = 0;
+ Value *A = nullptr, *B = nullptr;
if (match(RHS, m_Xor(m_Value(A), m_Value(B))) &&
(match(LHS, m_And(m_Specific(A), m_Specific(B))) ||
match(LHS, m_And(m_Specific(B), m_Specific(A)))))
@@ -1186,13 +1191,16 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateOr(A, B);
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL))
return ReplaceInstUsesWith(I, V);
@@ -1266,7 +1274,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) &&
match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) {
if (C1 == C2) {
- Constant *Z1=0, *Z2=0;
+ Constant *Z1=nullptr, *Z2=nullptr;
Value *A, *B, *C=C1;
if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) {
Z1 = dyn_cast<Constant>(A1); A = A2;
@@ -1290,7 +1298,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
return ReplaceInstUsesWith(I, V);
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
@@ -1305,7 +1313,7 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
// If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
// this.
bool Swapped = false;
- GEPOperator *GEP1 = 0, *GEP2 = 0;
+ GEPOperator *GEP1 = nullptr, *GEP2 = nullptr;
// For now we require one side to be the base pointer "A" or a constant
// GEP derived from it.
@@ -1343,9 +1351,9 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
// Avoid duplicating the arithmetic if GEP2 has non-constant indices and
// multiple users.
- if (GEP1 == 0 ||
- (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse()))
- return 0;
+ if (!GEP1 ||
+ (GEP2 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse()))
+ return nullptr;
// Emit the offset of the GEP and an intptr_t.
Value *Result = EmitGEPOffset(GEP1);
@@ -1368,6 +1376,9 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
I.hasNoUnsignedWrap(), DL))
return ReplaceInstUsesWith(I, V);
@@ -1393,7 +1404,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (Constant *C = dyn_cast<Constant>(Op0)) {
// C - ~X == X + (1+C)
- Value *X = 0;
+ Value *X = nullptr;
if (match(Op1, m_Not(m_Value(X))))
return BinaryOperator::CreateAdd(X, AddOne(C));
@@ -1451,9 +1462,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
}
if (Op1->hasOneUse()) {
- Value *X = 0, *Y = 0, *Z = 0;
- Constant *C = 0;
- Constant *CI = 0;
+ Value *X = nullptr, *Y = nullptr, *Z = nullptr;
+ Constant *C = nullptr;
+ Constant *CI = nullptr;
// (X - (Y - Z)) --> (X + (Z - Y)).
if (match(Op1, m_Sub(m_Value(Y), m_Value(Z))))
@@ -1532,12 +1543,15 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return ReplaceInstUsesWith(I, Res);
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL))
return ReplaceInstUsesWith(I, V);
@@ -1574,5 +1588,5 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
return ReplaceInstUsesWith(I, V);
}
- return 0;
+ return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 2c1bfc7..4f5d65a 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -20,6 +20,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
/// isFreeToInvert - Return true if the specified value is free to invert (apply
/// ~ to). This happens in cases where the ~ can be eliminated.
static inline bool isFreeToInvert(Value *V) {
@@ -50,7 +52,7 @@ static inline Value *dyn_castNotVal(Value *V) {
// Constants can be considered to be not'ed values...
if (ConstantInt *C = dyn_cast<ConstantInt>(V))
return ConstantInt::get(C->getType(), ~C->getValue());
- return 0;
+ return nullptr;
}
/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
@@ -123,7 +125,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
ConstantInt *AndRHS,
BinaryOperator &TheAnd) {
Value *X = Op->getOperand(0);
- Constant *Together = 0;
+ Constant *Together = nullptr;
if (!Op->isShift())
Together = ConstantExpr::getAnd(AndRHS, OpRHS);
@@ -250,7 +252,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
}
break;
}
- return 0;
+ return nullptr;
}
/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
@@ -332,12 +334,12 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
Instruction &I) {
Instruction *LHSI = dyn_cast<Instruction>(LHS);
if (!LHSI || LHSI->getNumOperands() != 2 ||
- !isa<ConstantInt>(LHSI->getOperand(1))) return 0;
+ !isa<ConstantInt>(LHSI->getOperand(1))) return nullptr;
ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
switch (LHSI->getOpcode()) {
- default: return 0;
+ default: return nullptr;
case Instruction::And:
if (ConstantExpr::getAnd(N, Mask) == Mask) {
// If the AndRHS is a power of two minus one (0+1+), this is simple.
@@ -357,7 +359,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
break;
}
}
- return 0;
+ return nullptr;
case Instruction::Or:
case Instruction::Xor:
// If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
@@ -365,7 +367,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
&& ConstantExpr::getAnd(N, Mask)->isNullValue())
break;
- return 0;
+ return nullptr;
}
if (isSub)
@@ -418,12 +420,12 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
ConstantInt *BCst = dyn_cast<ConstantInt>(B);
ConstantInt *CCst = dyn_cast<ConstantInt>(C);
bool icmp_eq = (SCC == ICmpInst::ICMP_EQ);
- bool icmp_abit = (ACst != 0 && !ACst->isZero() &&
+ bool icmp_abit = (ACst && !ACst->isZero() &&
ACst->getValue().isPowerOf2());
- bool icmp_bbit = (BCst != 0 && !BCst->isZero() &&
+ bool icmp_bbit = (BCst && !BCst->isZero() &&
BCst->getValue().isPowerOf2());
unsigned result = 0;
- if (CCst != 0 && CCst->isZero()) {
+ if (CCst && CCst->isZero()) {
// if C is zero, then both A and B qualify as mask
result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes |
FoldMskICmp_Mask_AllZeroes |
@@ -455,7 +457,7 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
FoldMskICmp_AMask_NotMixed)
: (FoldMskICmp_Mask_AllZeroes |
FoldMskICmp_AMask_Mixed));
- } else if (ACst != 0 && CCst != 0 &&
+ } else if (ACst && CCst &&
ConstantExpr::getAnd(ACst, CCst) == CCst) {
result |= (icmp_eq ? FoldMskICmp_AMask_Mixed
: FoldMskICmp_AMask_NotMixed);
@@ -470,7 +472,7 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
FoldMskICmp_BMask_NotMixed)
: (FoldMskICmp_Mask_AllZeroes |
FoldMskICmp_BMask_Mixed));
- } else if (BCst != 0 && CCst != 0 &&
+ } else if (BCst && CCst &&
ConstantExpr::getAnd(BCst, CCst) == CCst) {
result |= (icmp_eq ? FoldMskICmp_BMask_Mixed
: FoldMskICmp_BMask_NotMixed);
@@ -570,12 +572,12 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
Value *L11,*L12,*L21,*L22;
// Check whether the icmp can be decomposed into a bit test.
if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) {
- L21 = L22 = L1 = 0;
+ L21 = L22 = L1 = nullptr;
} else {
// Look for ANDs in the LHS icmp.
if (!L1->getType()->isIntegerTy()) {
// You can icmp pointers, for example. They really aren't masks.
- L11 = L12 = 0;
+ L11 = L12 = nullptr;
} else if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) {
// Any icmp can be viewed as being trivially masked; if it allows us to
// remove one, it's worth it.
@@ -585,7 +587,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
if (!L2->getType()->isIntegerTy()) {
// You can icmp pointers, for example. They really aren't masks.
- L21 = L22 = 0;
+ L21 = L22 = nullptr;
} else if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) {
L21 = L2;
L22 = Constant::getAllOnesValue(L2->getType());
@@ -608,7 +610,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
} else {
return 0;
}
- E = R2; R1 = 0; ok = true;
+ E = R2; R1 = nullptr; ok = true;
} else if (R1->getType()->isIntegerTy()) {
if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) {
// As before, model no mask as a trivial mask if it'll let us do an
@@ -665,11 +667,11 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
/// into a single (icmp(A & X) ==/!= Y)
static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
llvm::InstCombiner::BuilderTy* Builder) {
- Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
+ Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS,
LHSCC, RHSCC);
- if (mask == 0) return 0;
+ if (mask == 0) return nullptr;
assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) &&
"foldLogOpOfMaskedICmpsHelper must return an equality predicate.");
@@ -722,9 +724,9 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// their actual values. This isn't strictly, necessary, just a "handle the
// easy cases for now" decision.
ConstantInt *BCst = dyn_cast<ConstantInt>(B);
- if (BCst == 0) return 0;
+ if (!BCst) return nullptr;
ConstantInt *DCst = dyn_cast<ConstantInt>(D);
- if (DCst == 0) return 0;
+ if (!DCst) return nullptr;
if (mask & (FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_BMask_NotAllOnes)) {
// (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and
@@ -763,11 +765,11 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// (icmp ne (A & B), B) & (icmp eq (A & D), D)
// with B and D, having a single bit set
ConstantInt *CCst = dyn_cast<ConstantInt>(C);
- if (CCst == 0) return 0;
+ if (!CCst) return nullptr;
if (LHSCC != NEWCC)
CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) );
ConstantInt *ECst = dyn_cast<ConstantInt>(E);
- if (ECst == 0) return 0;
+ if (!ECst) return nullptr;
if (RHSCC != NEWCC)
ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) );
ConstantInt* MCst = dyn_cast<ConstantInt>(
@@ -776,13 +778,13 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// if there is a conflict we should actually return a false for the
// whole construct
if (!MCst->isZero())
- return 0;
+ return nullptr;
Value *newOr1 = Builder->CreateOr(B, D);
Value *newOr2 = ConstantExpr::getOr(CCst, ECst);
Value *newAnd = Builder->CreateAnd(A, newOr1);
return Builder->CreateICmp(NEWCC, newAnd, newOr2);
}
- return 0;
+ return nullptr;
}
/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
@@ -811,7 +813,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
- if (LHSCst == 0 || RHSCst == 0) return 0;
+ if (!LHSCst || !RHSCst) return nullptr;
if (LHSCst == RHSCst && LHSCC == RHSCC) {
// (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
@@ -835,7 +837,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
if (LHSCC == ICmpInst::ICMP_EQ && LHSCC == RHSCC &&
LHS->hasOneUse() && RHS->hasOneUse()) {
Value *V;
- ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0;
+ ConstantInt *AndCst, *SmallCst = nullptr, *BigCst = nullptr;
// (trunc x) == C1 & (and x, CA) == C2
// (and x, CA) == C2 & (trunc x) == C1
@@ -866,14 +868,14 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// From here on, we only handle:
// (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
- if (Val != Val2) return 0;
+ if (Val != Val2) return nullptr;
// ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
- return 0;
+ return nullptr;
// Make a constant range that's the intersection of the two icmp ranges.
// If the intersection is empty, we know that the result is false.
@@ -887,7 +889,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// We can't fold (ugt x, C) & (sgt x, C2).
if (!PredicatesFoldable(LHSCC, RHSCC))
- return 0;
+ return nullptr;
// Ensure that the larger constant is on the RHS.
bool ShouldSwap;
@@ -1016,7 +1018,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
break;
}
- return 0;
+ return nullptr;
}
/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of
@@ -1026,7 +1028,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
RHS->getPredicate() == FCmpInst::FCMP_ORD) {
if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType())
- return 0;
+ return nullptr;
// (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y)
if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
@@ -1043,7 +1045,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
isa<ConstantAggregateZero>(RHS->getOperand(1)))
return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
- return 0;
+ return nullptr;
}
Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
@@ -1096,7 +1098,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
}
}
- return 0;
+ return nullptr;
}
@@ -1104,6 +1106,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyAndInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -1198,7 +1203,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// If this is an integer truncation, and if the source is an 'and' with
// immediate, transform it. This frequently occurs for bitfield accesses.
{
- Value *X = 0; ConstantInt *YC = 0;
+ Value *X = nullptr; ConstantInt *YC = nullptr;
if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) {
// Change: and (trunc (and X, YC) to T), C2
// into : and (trunc X to T), trunc(YC) & C2
@@ -1231,7 +1236,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
{
- Value *A = 0, *B = 0, *C = 0, *D = 0;
+ Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
// (A|B) & ~(A&B) -> A^B
if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
@@ -1339,7 +1344,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
{
- Value *X = 0;
+ Value *X = nullptr;
bool OpsSwapped = false;
// Canonicalize SExt or Not to the LHS
if (match(Op1, m_SExt(m_Value())) ||
@@ -1366,7 +1371,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
std::swap(Op0, Op1);
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
/// CollectBSwapParts - Analyze the specified subexpression and see if it is
@@ -1498,7 +1503,7 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
if (!ITy || ITy->getBitWidth() % 16 ||
// ByteMask only allows up to 32-byte values.
ITy->getBitWidth() > 32*8)
- return 0; // Can only bswap pairs of bytes. Can't do vectors.
+ return nullptr; // Can only bswap pairs of bytes. Can't do vectors.
/// ByteValues - For each byte of the result, we keep track of which value
/// defines each byte.
@@ -1508,16 +1513,16 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
// Try to find all the pieces corresponding to the bswap.
uint32_t ByteMask = ~0U >> (32-ByteValues.size());
if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
- return 0;
+ return nullptr;
// Check to see if all of the bytes come from the same value.
Value *V = ByteValues[0];
- if (V == 0) return 0; // Didn't find a byte? Must be zero.
+ if (!V) return nullptr; // Didn't find a byte? Must be zero.
// Check to make sure that all of the bytes come from the same value.
for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
if (ByteValues[i] != V)
- return 0;
+ return nullptr;
Module *M = I.getParent()->getParent()->getParent();
Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy);
return CallInst::Create(F, V);
@@ -1529,10 +1534,10 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
Value *C, Value *D) {
// If A is not a select of -1/0, this cannot match.
- Value *Cond = 0;
+ Value *Cond = nullptr;
if (!match(A, m_SExt(m_Value(Cond))) ||
!Cond->getType()->isIntegerTy(1))
- return 0;
+ return nullptr;
// ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
if (match(D, m_Not(m_SExt(m_Specific(Cond)))))
@@ -1545,7 +1550,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
return SelectInst::Create(Cond, C, D);
if (match(B, m_SExt(m_Not(m_Specific(Cond)))))
return SelectInst::Create(Cond, C, D);
- return 0;
+ return nullptr;
}
/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
@@ -1566,8 +1571,8 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
LAnd->getOpcode() == Instruction::And &&
RAnd->getOpcode() == Instruction::And) {
- Value *Mask = 0;
- Value *Masked = 0;
+ Value *Mask = nullptr;
+ Value *Masked = nullptr;
if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
isKnownToBeAPowerOfTwo(LAnd->getOperand(1)) &&
isKnownToBeAPowerOfTwo(RAnd->getOperand(1))) {
@@ -1608,7 +1613,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
if (LHS->hasOneUse() || RHS->hasOneUse()) {
// (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
// (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1)
- Value *A = 0, *B = 0;
+ Value *A = nullptr, *B = nullptr;
if (LHSCC == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero()) {
B = Val;
if (RHSCC == ICmpInst::ICMP_ULT && Val == RHS->getOperand(1))
@@ -1632,7 +1637,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
// This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
- if (LHSCst == 0 || RHSCst == 0) return 0;
+ if (!LHSCst || !RHSCst) return nullptr;
if (LHSCst == RHSCst && LHSCC == RHSCC) {
// (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
@@ -1653,18 +1658,18 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// From here on, we only handle:
// (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
- if (Val != Val2) return 0;
+ if (Val != Val2) return nullptr;
// ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
- return 0;
+ return nullptr;
// We can't fold (ugt x, C) | (sgt x, C2).
if (!PredicatesFoldable(LHSCC, RHSCC))
- return 0;
+ return nullptr;
// Ensure that the larger constant is on the RHS.
bool ShouldSwap;
@@ -1809,7 +1814,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
break;
}
- return 0;
+ return nullptr;
}
/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of
@@ -1837,7 +1842,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
isa<ConstantAggregateZero>(RHS->getOperand(1)))
return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
- return 0;
+ return nullptr;
}
Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
@@ -1869,7 +1874,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder);
}
}
- return 0;
+ return nullptr;
}
/// FoldOrWithConstants - This helper function folds:
@@ -1884,27 +1889,30 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
Value *A, Value *B, Value *C) {
ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
- if (!CI1) return 0;
+ if (!CI1) return nullptr;
- Value *V1 = 0;
- ConstantInt *CI2 = 0;
- if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0;
+ Value *V1 = nullptr;
+ ConstantInt *CI2 = nullptr;
+ if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return nullptr;
APInt Xor = CI1->getValue() ^ CI2->getValue();
- if (!Xor.isAllOnesValue()) return 0;
+ if (!Xor.isAllOnesValue()) return nullptr;
if (V1 == A || V1 == B) {
Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
return BinaryOperator::CreateOr(NewOp, V1);
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitOr(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyOrInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -1918,7 +1926,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return &I;
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
- ConstantInt *C1 = 0; Value *X = 0;
+ ConstantInt *C1 = nullptr; Value *X = nullptr;
// (X & C1) | C2 --> (X | C2) & (C1|C2)
// iff (C1 & C2) == 0.
if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
@@ -1949,8 +1957,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return NV;
}
- Value *A = 0, *B = 0;
- ConstantInt *C1 = 0, *C2 = 0;
+ Value *A = nullptr, *B = nullptr;
+ ConstantInt *C1 = nullptr, *C2 = nullptr;
// (A | B) | C and A | (B | C) -> bswap if possible.
// (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
@@ -1981,10 +1989,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
// (A & C)|(B & D)
- Value *C = 0, *D = 0;
+ Value *C = nullptr, *D = nullptr;
if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
match(Op1, m_And(m_Value(B), m_Value(D)))) {
- Value *V1 = 0, *V2 = 0;
+ Value *V1 = nullptr, *V2 = nullptr;
C1 = dyn_cast<ConstantInt>(C);
C2 = dyn_cast<ConstantInt>(D);
if (C1 && C2) { // (A & C1)|(B & C2)
@@ -2028,7 +2036,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
// iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
- ConstantInt *C3 = 0, *C4 = 0;
+ ConstantInt *C3 = nullptr, *C4 = nullptr;
if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) &&
(C3->getValue() & ~C1->getValue()) == 0 &&
match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) &&
@@ -2220,7 +2228,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// Since this OR statement hasn't been optimized further yet, we hope
// that this transformation will allow the new ORs to be optimized.
{
- Value *X = 0, *Y = 0;
+ Value *X = nullptr, *Y = nullptr;
if (Op0->hasOneUse() && Op1->hasOneUse() &&
match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) &&
match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) {
@@ -2230,13 +2238,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
Instruction *InstCombiner::visitXor(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyXorInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -2494,5 +2505,5 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 0bc3ac7..d4b583b 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -22,6 +22,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
STATISTIC(NumSimplified, "Number of library calls simplified");
/// getPromotedType - Return the specified type promoted as it would be to pass
@@ -70,7 +72,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
// load/store.
ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
- if (MemOpLength == 0) return 0;
+ if (!MemOpLength) return nullptr;
// Source and destination pointer types are always "i8*" for intrinsic. See
// if the size is something we can handle with a single primitive load/store.
@@ -80,7 +82,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
assert(Size && "0-sized memory transferring should be removed already.");
if (Size > 8 || (Size&(Size-1)))
- return 0; // If not 1/2/4/8 bytes, exit.
+ return nullptr; // If not 1/2/4/8 bytes, exit.
// Use an integer load+store unless we can find something better.
unsigned SrcAddrSp =
@@ -99,7 +101,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
// dest address will be promotable. See if we can find a better type than the
// integer datatype.
Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
- MDNode *CopyMD = 0;
+ MDNode *CopyMD = nullptr;
if (StrippedDest != MI->getArgOperand(0)) {
Type *SrcETy = cast<PointerType>(StrippedDest->getType())
->getElementType();
@@ -163,7 +165,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
- return 0;
+ return nullptr;
uint64_t Len = LenC->getLimitedValue();
Alignment = MI->getAlignment();
assert(Len && "0-sized memory setting should be removed already.");
@@ -191,7 +193,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
return MI;
}
- return 0;
+ return nullptr;
}
/// visitCallInst - CallInst simplification. This mostly only handles folding
@@ -233,7 +235,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// No other transformations apply to volatile transfers.
if (MI->isVolatile())
- return 0;
+ return nullptr;
// If we have a memmove and the source operation is a constant global,
// then the source and dest pointers can't alias, so we can change this
@@ -276,11 +278,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
uint64_t Size;
if (getObjectSize(II->getArgOperand(0), Size, DL, TLI))
return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
- return 0;
+ return nullptr;
}
case Intrinsic::bswap: {
Value *IIOperand = II->getArgOperand(0);
- Value *X = 0;
+ Value *X = nullptr;
// bswap(bswap(x)) -> x
if (match(IIOperand, m_BSwap(m_Value(X))))
@@ -320,7 +322,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
uint32_t BitWidth = IT->getBitWidth();
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
+ computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne);
unsigned TrailingZeros = KnownOne.countTrailingZeros();
APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
if ((Mask & KnownZero) == Mask)
@@ -338,7 +340,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
uint32_t BitWidth = IT->getBitWidth();
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
+ computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne);
unsigned LeadingZeros = KnownOne.countLeadingZeros();
APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
if ((Mask & KnownZero) == Mask)
@@ -353,14 +355,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
uint32_t BitWidth = IT->getBitWidth();
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
if (LHSKnownNegative || LHSKnownPositive) {
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
if (LHSKnownNegative && RHSKnownNegative) {
@@ -447,10 +449,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
// Get the largest possible values for each operand.
APInt LHSMax = ~LHSKnownZero;
@@ -554,6 +556,79 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ // Constant fold <A x Bi> << Ci.
+ // FIXME: We don't handle _dq because it's a shift of an i128, but is
+ // represented in the IR as <2 x i64>. A per element shift is wrong.
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w: {
+ // Simplify if count is constant. To 0 if >= BitWidth,
+ // otherwise to shl/lshr.
+ auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1));
+ auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1));
+ if (!CDV && !CInt)
+ break;
+ ConstantInt *Count;
+ if (CDV)
+ Count = cast<ConstantInt>(CDV->getElementAsConstant(0));
+ else
+ Count = CInt;
+
+ auto Vec = II->getArgOperand(0);
+ auto VT = cast<VectorType>(Vec->getType());
+ if (Count->getZExtValue() >
+ VT->getElementType()->getPrimitiveSizeInBits() - 1)
+ return ReplaceInstUsesWith(
+ CI, ConstantAggregateZero::get(Vec->getType()));
+
+ bool isPackedShiftLeft = true;
+ switch (II->getIntrinsicID()) {
+ default : break;
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break;
+ }
+
+ unsigned VWidth = VT->getNumElements();
+ // Get a constant vector of the same type as the first operand.
+ auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());
+ if (isPackedShiftLeft)
+ return BinaryOperator::CreateShl(Vec,
+ Builder->CreateVectorSplat(VWidth, VTCI));
+
+ return BinaryOperator::CreateLShr(Vec,
+ Builder->CreateVectorSplat(VWidth, VTCI));
+ }
case Intrinsic::x86_sse41_pmovsxbw:
case Intrinsic::x86_sse41_pmovsxwd:
@@ -576,6 +651,153 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ case Intrinsic::x86_sse4a_insertqi: {
+ // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
+ // ones undef
+ // TODO: eventually we should lower this intrinsic to IR
+ if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
+ if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) {
+ if (CIWidth->equalsInt(64) && CIStart->isZero()) {
+ Value *Vec = II->getArgOperand(1);
+ Value *Undef = UndefValue::get(Vec->getType());
+ const uint32_t Mask[] = { 0, 2 };
+ return ReplaceInstUsesWith(
+ CI,
+ Builder->CreateShuffleVector(
+ Vec, Undef, ConstantDataVector::get(
+ II->getContext(), ArrayRef<uint32_t>(Mask))));
+
+ } else if (auto Source =
+ dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
+ if (Source->hasOneUse() &&
+ Source->getArgOperand(1) == II->getArgOperand(1)) {
+ // If the source of the insert has only one use and it's another
+ // insert (and they're both inserting from the same vector), try to
+ // bundle both together.
+ auto CISourceWidth =
+ dyn_cast<ConstantInt>(Source->getArgOperand(2));
+ auto CISourceStart =
+ dyn_cast<ConstantInt>(Source->getArgOperand(3));
+ if (CISourceStart && CISourceWidth) {
+ unsigned Start = CIStart->getZExtValue();
+ unsigned Width = CIWidth->getZExtValue();
+ unsigned End = Start + Width;
+ unsigned SourceStart = CISourceStart->getZExtValue();
+ unsigned SourceWidth = CISourceWidth->getZExtValue();
+ unsigned SourceEnd = SourceStart + SourceWidth;
+ unsigned NewStart, NewWidth;
+ bool ShouldReplace = false;
+ if (Start <= SourceStart && SourceStart <= End) {
+ NewStart = Start;
+ NewWidth = std::max(End, SourceEnd) - NewStart;
+ ShouldReplace = true;
+ } else if (SourceStart <= Start && Start <= SourceEnd) {
+ NewStart = SourceStart;
+ NewWidth = std::max(SourceEnd, End) - NewStart;
+ ShouldReplace = true;
+ }
+
+ if (ShouldReplace) {
+ Constant *ConstantWidth = ConstantInt::get(
+ II->getArgOperand(2)->getType(), NewWidth, false);
+ Constant *ConstantStart = ConstantInt::get(
+ II->getArgOperand(3)->getType(), NewStart, false);
+ Value *Args[4] = { Source->getArgOperand(0),
+ II->getArgOperand(1), ConstantWidth,
+ ConstantStart };
+ Module *M = CI.getParent()->getParent()->getParent();
+ Value *F =
+ Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
+ return ReplaceInstUsesWith(CI, Builder->CreateCall(F, Args));
+ }
+ }
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ case Intrinsic::x86_sse41_pblendvb:
+ case Intrinsic::x86_sse41_blendvps:
+ case Intrinsic::x86_sse41_blendvpd:
+ case Intrinsic::x86_avx_blendv_ps_256:
+ case Intrinsic::x86_avx_blendv_pd_256:
+ case Intrinsic::x86_avx2_pblendvb: {
+ // Convert blendv* to vector selects if the mask is constant.
+ // This optimization is convoluted because the intrinsic is defined as
+ // getting a vector of floats or doubles for the ps and pd versions.
+ // FIXME: That should be changed.
+ Value *Mask = II->getArgOperand(2);
+ if (auto C = dyn_cast<ConstantDataVector>(Mask)) {
+ auto Tyi1 = Builder->getInt1Ty();
+ auto SelectorType = cast<VectorType>(Mask->getType());
+ auto EltTy = SelectorType->getElementType();
+ unsigned Size = SelectorType->getNumElements();
+ unsigned BitWidth =
+ EltTy->isFloatTy()
+ ? 32
+ : (EltTy->isDoubleTy() ? 64 : EltTy->getIntegerBitWidth());
+ assert((BitWidth == 64 || BitWidth == 32 || BitWidth == 8) &&
+ "Wrong arguments for variable blend intrinsic");
+ SmallVector<Constant *, 32> Selectors;
+ for (unsigned I = 0; I < Size; ++I) {
+ // The intrinsics only read the top bit
+ uint64_t Selector;
+ if (BitWidth == 8)
+ Selector = C->getElementAsInteger(I);
+ else
+ Selector = C->getElementAsAPFloat(I).bitcastToAPInt().getZExtValue();
+ Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1)));
+ }
+ auto NewSelector = ConstantVector::get(Selectors);
+ return SelectInst::Create(NewSelector, II->getArgOperand(1),
+ II->getArgOperand(0), "blendv");
+ } else {
+ break;
+ }
+ }
+
+ case Intrinsic::x86_avx_vpermilvar_ps:
+ case Intrinsic::x86_avx_vpermilvar_ps_256:
+ case Intrinsic::x86_avx_vpermilvar_pd:
+ case Intrinsic::x86_avx_vpermilvar_pd_256: {
+ // Convert vpermil* to shufflevector if the mask is constant.
+ Value *V = II->getArgOperand(1);
+ unsigned Size = cast<VectorType>(V->getType())->getNumElements();
+ assert(Size == 8 || Size == 4 || Size == 2);
+ uint32_t Indexes[8];
+ if (auto C = dyn_cast<ConstantDataVector>(V)) {
+ // The intrinsics only read one or two bits, clear the rest.
+ for (unsigned I = 0; I < Size; ++I) {
+ uint32_t Index = C->getElementAsInteger(I) & 0x3;
+ if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd ||
+ II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256)
+ Index >>= 1;
+ Indexes[I] = Index;
+ }
+ } else if (isa<ConstantAggregateZero>(V)) {
+ for (unsigned I = 0; I < Size; ++I)
+ Indexes[I] = 0;
+ } else {
+ break;
+ }
+ // The _256 variants are a bit trickier since the mask bits always index
+ // into the corresponding 128 half. In order to convert to a generic
+ // shuffle, we have to make that explicit.
+ if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 ||
+ II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) {
+ for (unsigned I = Size / 2; I < Size; ++I)
+ Indexes[I] += Size / 2;
+ }
+ auto NewC =
+ ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, Size));
+ auto V1 = II->getArgOperand(0);
+ auto V2 = UndefValue::get(V1->getType());
+ auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC);
+ return ReplaceInstUsesWith(CI, Shuffle);
+ }
+
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
@@ -586,8 +808,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
bool AllEltsOk = true;
for (unsigned i = 0; i != 16; ++i) {
Constant *Elt = Mask->getAggregateElement(i);
- if (Elt == 0 ||
- !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
+ if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
AllEltsOk = false;
break;
}
@@ -612,7 +833,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
Idx &= 31; // Match the hardware behavior.
- if (ExtractedElts[Idx] == 0) {
+ if (!ExtractedElts[Idx]) {
ExtractedElts[Idx] =
Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
Builder->getInt32(Idx&15));
@@ -655,8 +876,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::arm_neon_vmulls:
case Intrinsic::arm_neon_vmullu:
- case Intrinsic::arm64_neon_smull:
- case Intrinsic::arm64_neon_umull: {
+ case Intrinsic::aarch64_neon_smull:
+ case Intrinsic::aarch64_neon_umull: {
Value *Arg0 = II->getArgOperand(0);
Value *Arg1 = II->getArgOperand(1);
@@ -667,7 +888,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Check for constant LHS & RHS - in this case we just simplify.
bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
- II->getIntrinsicID() == Intrinsic::arm64_neon_umull);
+ II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
VectorType *NewVT = cast<VectorType>(II->getType());
if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
@@ -776,14 +997,14 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
// strcat_chk and strncat_chk.
Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) {
- if (CI->getCalledFunction() == 0) return 0;
+ if (!CI->getCalledFunction()) return nullptr;
if (Value *With = Simplifier->optimizeCall(CI)) {
++NumSimplified;
return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With);
}
- return 0;
+ return nullptr;
}
static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
@@ -792,35 +1013,35 @@ static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
Value *Underlying = TrampMem->stripPointerCasts();
if (Underlying != TrampMem &&
(!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
- return 0;
+ return nullptr;
if (!isa<AllocaInst>(Underlying))
- return 0;
+ return nullptr;
- IntrinsicInst *InitTrampoline = 0;
+ IntrinsicInst *InitTrampoline = nullptr;
for (User *U : TrampMem->users()) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
if (!II)
- return 0;
+ return nullptr;
if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
if (InitTrampoline)
// More than one init_trampoline writes to this value. Give up.
- return 0;
+ return nullptr;
InitTrampoline = II;
continue;
}
if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
// Allow any number of calls to adjust.trampoline.
continue;
- return 0;
+ return nullptr;
}
// No call to init.trampoline found.
if (!InitTrampoline)
- return 0;
+ return nullptr;
// Check that the alloca is being used in the expected way.
if (InitTrampoline->getOperand(0) != TrampMem)
- return 0;
+ return nullptr;
return InitTrampoline;
}
@@ -837,9 +1058,9 @@ static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
II->getOperand(0) == TrampMem)
return II;
if (Inst->mayWriteToMemory())
- return 0;
+ return nullptr;
}
- return 0;
+ return nullptr;
}
// Given a call to llvm.adjust.trampoline, find and return the corresponding
@@ -851,7 +1072,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) {
IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
if (!AdjustTramp ||
AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
- return 0;
+ return nullptr;
Value *TrampMem = AdjustTramp->getOperand(0);
@@ -859,7 +1080,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) {
return IT;
if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem))
return IT;
- return 0;
+ return nullptr;
}
// visitCallSite - Improvements for call and invoke instructions.
@@ -874,7 +1095,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
// arguments of the call/invoke.
Value *Callee = CS.getCalledValue();
if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
- return 0;
+ return nullptr;
if (Function *CalleeF = dyn_cast<Function>(Callee))
// If the call and callee calling conventions don't match, this call must
@@ -899,7 +1120,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
// change the callee to a null pointer.
cast<InvokeInst>(OldCall)->setCalledFunction(
Constant::getNullValue(CalleeF->getType()));
- return 0;
+ return nullptr;
}
if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
@@ -911,7 +1132,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
if (isa<InvokeInst>(CS.getInstruction())) {
// Can't remove an invoke because we cannot change the CFG.
- return 0;
+ return nullptr;
}
// This instruction is not reachable, just remove it. We insert a store to
@@ -959,7 +1180,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
if (I) return EraseInstFromFunction(*I);
}
- return Changed ? CS.getInstruction() : 0;
+ return Changed ? CS.getInstruction() : nullptr;
}
// transformConstExprCastCall - If the callee is a constexpr cast of a function,
@@ -968,7 +1189,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
bool InstCombiner::transformConstExprCastCall(CallSite CS) {
Function *Callee =
dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
- if (Callee == 0)
+ if (!Callee)
return false;
Instruction *Caller = CS.getInstruction();
const AttributeSet &CallerPAL = CS.getAttributes();
@@ -1044,7 +1265,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
- if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || DL == 0)
+ if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL)
return false;
Type *CurElTy = ActTy->getPointerElementType();
@@ -1235,7 +1456,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// If the call already has the 'nest' attribute somewhere then give up -
// otherwise 'nest' would occur twice after splicing in the chain.
if (Attrs.hasAttrSomewhere(Attribute::Nest))
- return 0;
+ return nullptr;
assert(Tramp &&
"transformCallThroughTrampoline called with incorrect CallSite.");
@@ -1247,7 +1468,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
const AttributeSet &NestAttrs = NestF->getAttributes();
if (!NestAttrs.isEmpty()) {
unsigned NestIdx = 1;
- Type *NestTy = 0;
+ Type *NestTy = nullptr;
AttributeSet NestAttr;
// Look for a parameter marked with the 'nest' attribute.
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index c2b862a..356803a 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -19,6 +19,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
/// expression. If so, decompose it, returning some value X, such that Val is
/// X*Scale+Offset.
@@ -79,7 +81,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
AllocaInst &AI) {
// This requires DataLayout to get the alloca alignment and size information.
- if (!DL) return 0;
+ if (!DL) return nullptr;
PointerType *PTy = cast<PointerType>(CI.getType());
@@ -89,26 +91,26 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// Get the type really allocated and the type casted to.
Type *AllocElTy = AI.getAllocatedType();
Type *CastElTy = PTy->getElementType();
- if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0;
+ if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
unsigned AllocElTyAlign = DL->getABITypeAlignment(AllocElTy);
unsigned CastElTyAlign = DL->getABITypeAlignment(CastElTy);
- if (CastElTyAlign < AllocElTyAlign) return 0;
+ if (CastElTyAlign < AllocElTyAlign) return nullptr;
// If the allocation has multiple uses, only promote it if we are strictly
// increasing the alignment of the resultant allocation. If we keep it the
// same, we open the door to infinite loops of various kinds.
- if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0;
+ if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
uint64_t AllocElTySize = DL->getTypeAllocSize(AllocElTy);
uint64_t CastElTySize = DL->getTypeAllocSize(CastElTy);
- if (CastElTySize == 0 || AllocElTySize == 0) return 0;
+ if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
// If the allocation has multiple uses, only promote it if we're not
// shrinking the amount of memory being allocated.
uint64_t AllocElTyStoreSize = DL->getTypeStoreSize(AllocElTy);
uint64_t CastElTyStoreSize = DL->getTypeStoreSize(CastElTy);
- if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0;
+ if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
// See if we can satisfy the modulus by pulling a scale out of the array
// size argument.
@@ -120,10 +122,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// If we can now satisfy the modulus, by using a non-1 scale, we really can
// do the xform.
if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
- (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0;
+ (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
- Value *Amt = 0;
+ Value *Amt = nullptr;
if (Scale == 1) {
Amt = NumElements;
} else {
@@ -141,6 +143,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
New->setAlignment(AI.getAlignment());
New->takeName(&AI);
+ New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
// If the allocation has multiple real uses, insert a cast and change all
// things that used it to use the new cast. This will also hack on CI, but it
@@ -169,7 +172,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
// Otherwise, it must be an instruction.
Instruction *I = cast<Instruction>(V);
- Instruction *Res = 0;
+ Instruction *Res = nullptr;
unsigned Opc = I->getOpcode();
switch (Opc) {
case Instruction::Add:
@@ -245,11 +248,11 @@ isEliminableCastPair(
Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
Instruction::CastOps secondOp = Instruction::CastOps(opcode);
Type *SrcIntPtrTy = DL && SrcTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(SrcTy) : 0;
+ DL->getIntPtrType(SrcTy) : nullptr;
Type *MidIntPtrTy = DL && MidTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(MidTy) : 0;
+ DL->getIntPtrType(MidTy) : nullptr;
Type *DstIntPtrTy = DL && DstTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(DstTy) : 0;
+ DL->getIntPtrType(DstTy) : nullptr;
unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
DstTy, SrcIntPtrTy, MidIntPtrTy,
DstIntPtrTy);
@@ -318,7 +321,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
return NV;
}
- return 0;
+ return nullptr;
}
/// CanEvaluateTruncated - Return true if we can evaluate the specified
@@ -470,7 +473,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
}
// Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
- Value *A = 0; ConstantInt *Cst = 0;
+ Value *A = nullptr; ConstantInt *Cst = nullptr;
if (Src->hasOneUse() &&
match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) {
// We have three types to worry about here, the type of A, the source of
@@ -502,7 +505,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
ConstantExpr::getTrunc(Cst, CI.getType()));
}
- return 0;
+ return nullptr;
}
/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
@@ -550,7 +553,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
// If Op1C some other power of two, convert:
uint32_t BitWidth = Op1C->getType()->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne);
+ computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne);
APInt KnownZeroMask(~KnownZero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
@@ -598,8 +601,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
- ComputeMaskedBits(LHS, KnownZeroLHS, KnownOneLHS);
- ComputeMaskedBits(RHS, KnownZeroRHS, KnownOneRHS);
+ computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS);
+ computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS);
if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
APInt KnownBits = KnownZeroLHS | KnownOneLHS;
@@ -627,7 +630,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
}
}
- return 0;
+ return nullptr;
}
/// CanEvaluateZExtd - Determine if the specified value can be computed in the
@@ -758,7 +761,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// If this zero extend is only used by a truncate, let the truncate be
// eliminated before we try to optimize this zext.
if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
- return 0;
+ return nullptr;
// If one of the common conversion will work, do it.
if (Instruction *Result = commonCastTransforms(CI))
@@ -883,7 +886,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
}
- return 0;
+ return nullptr;
}
/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations
@@ -918,7 +921,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
unsigned BitWidth = Op1C->getType()->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(Op0, KnownZero, KnownOne);
+ computeKnownBits(Op0, KnownZero, KnownOne);
APInt KnownZeroMask(~KnownZero);
if (KnownZeroMask.isPowerOf2()) {
@@ -967,7 +970,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
}
}
- return 0;
+ return nullptr;
}
/// CanEvaluateSExtd - Return true if we can take the specified value
@@ -1039,7 +1042,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// If this sign extend is only used by a truncate, let the truncate be
// eliminated before we try to optimize this sext.
if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
- return 0;
+ return nullptr;
if (Instruction *I = commonCastTransforms(CI))
return I;
@@ -1107,9 +1110,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// into:
// %a = shl i32 %i, 30
// %d = ashr i32 %a, 30
- Value *A = 0;
+ Value *A = nullptr;
// TODO: Eventually this could be subsumed by EvaluateInDifferentType.
- ConstantInt *BA = 0, *CA = 0;
+ ConstantInt *BA = nullptr, *CA = nullptr;
if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)),
m_ConstantInt(CA))) &&
BA == CA && A->getType() == CI.getType()) {
@@ -1121,7 +1124,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
return BinaryOperator::CreateAShr(A, ShAmtV);
}
- return 0;
+ return nullptr;
}
@@ -1133,7 +1136,7 @@ static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
(void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
if (!losesInfo)
return ConstantFP::get(CFP->getContext(), F);
- return 0;
+ return nullptr;
}
/// LookThroughFPExtensions - If this is an fp extension instruction, look
@@ -1345,7 +1348,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitFPExt(CastInst &CI) {
@@ -1354,7 +1357,7 @@ Instruction *InstCombiner::visitFPExt(CastInst &CI) {
Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
- if (OpI == 0)
+ if (!OpI)
return commonCastTransforms(FI);
// fptoui(uitofp(X)) --> X
@@ -1374,7 +1377,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
- if (OpI == 0)
+ if (!OpI)
return commonCastTransforms(FI);
// fptosi(sitofp(X)) --> X
@@ -1421,7 +1424,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
if (Instruction *I = commonCastTransforms(CI))
return I;
- return 0;
+ return nullptr;
}
/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
@@ -1520,7 +1523,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
// there yet.
if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
DestTy->getElementType()->getPrimitiveSizeInBits())
- return 0;
+ return nullptr;
SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
@@ -1598,7 +1601,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
ElementIndex = Elements.size() - ElementIndex - 1;
// Fail if multiple elements are inserted into this slot.
- if (Elements[ElementIndex] != 0)
+ if (Elements[ElementIndex])
return false;
Elements[ElementIndex] = V;
@@ -1638,7 +1641,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
if (!V->hasOneUse()) return false;
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0) return false;
+ if (!I) return false;
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
@@ -1659,7 +1662,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
case Instruction::Shl: {
// Must be shifting by a constant that is a multiple of the element size.
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
- if (CI == 0) return false;
+ if (!CI) return false;
Shift += CI->getZExtValue();
if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
return CollectInsertionElements(I->getOperand(0), Shift,
@@ -1687,7 +1690,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
InstCombiner &IC) {
// We need to know the target byte order to perform this optimization.
- if (!IC.getDataLayout()) return 0;
+ if (!IC.getDataLayout()) return nullptr;
VectorType *DestVecTy = cast<VectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
@@ -1695,14 +1698,14 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
if (!CollectInsertionElements(IntInput, 0, Elements,
DestVecTy->getElementType(), IC))
- return 0;
+ return nullptr;
// If we succeeded, we know that all of the element are specified by Elements
// or are zero if Elements has a null entry. Recast this as a set of
// insertions.
Value *Result = Constant::getNullValue(CI.getType());
for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
- if (Elements[i] == 0) continue; // Unset element.
+ if (!Elements[i]) continue; // Unset element.
Result = IC.Builder->CreateInsertElement(Result, Elements[i],
IC.Builder->getInt32(i));
@@ -1716,14 +1719,14 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
/// bitcast. The various long double bitcasts can't get in here.
static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
// We need to know the target byte order to perform this optimization.
- if (!IC.getDataLayout()) return 0;
+ if (!IC.getDataLayout()) return nullptr;
Value *Src = CI.getOperand(0);
Type *DestTy = CI.getType();
// If this is a bitcast from int to float, check to see if the int is an
// extraction from a vector.
- Value *VecInput = 0;
+ Value *VecInput = nullptr;
// bitcast(trunc(bitcast(somevector)))
if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
isa<VectorType>(VecInput->getType())) {
@@ -1747,7 +1750,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
}
// bitcast(trunc(lshr(bitcast(somevector), cst))
- ConstantInt *ShAmt = 0;
+ ConstantInt *ShAmt = nullptr;
if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
m_ConstantInt(ShAmt)))) &&
isa<VectorType>(VecInput->getType())) {
@@ -1769,7 +1772,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 8c0ad52..02e8bf1 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -24,6 +24,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
static ConstantInt *getOne(Constant *C) {
return ConstantInt::get(cast<IntegerType>(C->getType()), 1);
}
@@ -218,15 +220,15 @@ Instruction *InstCombiner::
FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
CmpInst &ICI, ConstantInt *AndCst) {
// We need TD information to know the pointer size unless this is inbounds.
- if (!GEP->isInBounds() && DL == 0)
- return 0;
+ if (!GEP->isInBounds() && !DL)
+ return nullptr;
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
- return 0;
+ return nullptr;
uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
- if (ArrayElementCount > 1024) return 0; // Don't blow up on huge arrays.
+ if (ArrayElementCount > 1024) return nullptr; // Don't blow up on huge arrays.
// There are many forms of this optimization we can handle, for now, just do
// the simple index into a single-dimensional array.
@@ -236,7 +238,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
!isa<ConstantInt>(GEP->getOperand(1)) ||
!cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
isa<Constant>(GEP->getOperand(2)))
- return 0;
+ return nullptr;
// Check that indices after the variable are constants and in-range for the
// type they index. Collect the indices. This is typically for arrays of
@@ -246,18 +248,18 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
Type *EltTy = Init->getType()->getArrayElementType();
for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (Idx == 0) return 0; // Variable index.
+ if (!Idx) return nullptr; // Variable index.
uint64_t IdxVal = Idx->getZExtValue();
- if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index.
+ if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index.
if (StructType *STy = dyn_cast<StructType>(EltTy))
EltTy = STy->getElementType(IdxVal);
else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
- if (IdxVal >= ATy->getNumElements()) return 0;
+ if (IdxVal >= ATy->getNumElements()) return nullptr;
EltTy = ATy->getElementType();
} else {
- return 0; // Unknown type.
+ return nullptr; // Unknown type.
}
LaterIndices.push_back(IdxVal);
@@ -296,7 +298,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
Constant *Elt = Init->getAggregateElement(i);
- if (Elt == 0) return 0;
+ if (!Elt) return nullptr;
// If this is indexing an array of structures, get the structure element.
if (!LaterIndices.empty())
@@ -321,7 +323,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// If we can't compute the result for any of the elements, we have to give
// up evaluating the entire conditional.
- if (!isa<ConstantInt>(C)) return 0;
+ if (!isa<ConstantInt>(C)) return nullptr;
// Otherwise, we know if the comparison is true or false for this element,
// update our state machines.
@@ -375,7 +377,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
FalseRangeEnd == Overdefined)
- return 0;
+ return nullptr;
}
// Now that we've scanned the entire array, emit our new comparison(s). We
@@ -467,7 +469,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// of this load, replace it with computation that does:
// ((magic_cst >> i) & 1) != 0
{
- Type *Ty = 0;
+ Type *Ty = nullptr;
// Look for an appropriate type:
// - The type of Idx if the magic fits
@@ -480,7 +482,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
else if (ArrayElementCount <= 32)
Ty = Type::getInt32Ty(Init->getContext());
- if (Ty != 0) {
+ if (Ty) {
Value *V = Builder->CreateIntCast(Idx, Ty, false);
V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
@@ -488,7 +490,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
}
}
- return 0;
+ return nullptr;
}
@@ -533,7 +535,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
// If there are no variable indices, we must have a constant offset, just
// evaluate it the general way.
- if (i == e) return 0;
+ if (i == e) return nullptr;
Value *VariableIdx = GEP->getOperand(i);
// Determine the scale factor of the variable element. For example, this is
@@ -543,7 +545,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
// Verify that there are no other variable indices. If so, emit the hard way.
for (++i, ++GTI; i != e; ++i, ++GTI) {
ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (!CI) return 0;
+ if (!CI) return nullptr;
// Compute the aggregate offset of constant indices.
if (CI->isZero()) continue;
@@ -587,7 +589,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
// multiple of the variable scale.
int64_t NewOffs = Offset / (int64_t)VariableScale;
if (Offset != NewOffs*(int64_t)VariableScale)
- return 0;
+ return nullptr;
// Okay, we can do this evaluation. Start by converting the index to intptr.
if (VariableIdx->getType() != IntPtrTy)
@@ -608,7 +610,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be
// the maximum signed value for the pointer type.
if (ICmpInst::isSigned(Cond))
- return 0;
+ return nullptr;
// Look through bitcasts.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
@@ -623,7 +625,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this);
// If not, synthesize the offset the hard way.
- if (Offset == 0)
+ if (!Offset)
Offset = EmitGEPOffset(GEPLHS);
return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
Constant::getNullValue(Offset->getType()));
@@ -661,7 +663,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// Otherwise, the base pointers are different and the indices are
// different, bail out.
- return 0;
+ return nullptr;
}
// If one of the GEPs has all zero indices, recurse.
@@ -729,7 +731,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
}
}
- return 0;
+ return nullptr;
}
/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
@@ -812,11 +814,11 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// if it finds it.
bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
- return 0;
+ return nullptr;
if (DivRHS->isZero())
- return 0; // The ProdOV computation fails on divide by zero.
+ return nullptr; // The ProdOV computation fails on divide by zero.
if (DivIsSigned && DivRHS->isAllOnesValue())
- return 0; // The overflow computation also screws up here
+ return nullptr; // The overflow computation also screws up here
if (DivRHS->isOne()) {
// This eliminates some funny cases with INT_MIN.
ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X.
@@ -850,7 +852,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// overflow variable is set to 0 if it's corresponding bound variable is valid
// -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
int LoOverflow = 0, HiOverflow = 0;
- Constant *LoBound = 0, *HiBound = 0;
+ Constant *LoBound = nullptr, *HiBound = nullptr;
if (!DivIsSigned) { // udiv
// e.g. X/5 op 3 --> [15, 20)
@@ -890,7 +892,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
if (HiBound == DivRHS) { // -INTMIN = INTMIN
HiOverflow = 1; // [INTMIN+1, overflow)
- HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN
+ HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN
}
} else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos
// e.g. X/-5 op 3 --> [-19, -14)
@@ -964,20 +966,20 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
uint32_t TypeBits = CmpRHSV.getBitWidth();
uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
if (ShAmtVal >= TypeBits || ShAmtVal == 0)
- return 0;
+ return nullptr;
if (!ICI.isEquality()) {
// If we have an unsigned comparison and an ashr, we can't simplify this.
// Similarly for signed comparisons with lshr.
if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
- return 0;
+ return nullptr;
// Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
// by a power of 2. Since we already have logic to simplify these,
// transform to div and then simplify the resultant comparison.
if (Shr->getOpcode() == Instruction::AShr &&
(!Shr->isExact() || ShAmtVal == TypeBits - 1))
- return 0;
+ return nullptr;
// Revisit the shift (to delete it).
Worklist.Add(Shr);
@@ -994,7 +996,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
// If the builder folded the binop, just return it.
BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
- if (TheDiv == 0)
+ if (!TheDiv)
return &ICI;
// Otherwise, fold this div/compare.
@@ -1037,7 +1039,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
Mask, Shr->getName()+".mask");
return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
}
- return 0;
+ return nullptr;
}
@@ -1056,7 +1058,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
- ComputeMaskedBits(LHSI->getOperand(0), KnownZero, KnownOne);
+ computeKnownBits(LHSI->getOperand(0), KnownZero, KnownOne);
// If all the high bits are known, we can do this xform.
if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
@@ -1181,10 +1183,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// access.
BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
if (Shift && !Shift->isShift())
- Shift = 0;
+ Shift = nullptr;
ConstantInt *ShAmt;
- ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0;
+ ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : nullptr;
// This seemingly simple opportunity to fold away a shift turns out to
// be rather complicated. See PR17827
@@ -1777,7 +1779,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
}
}
- return 0;
+ return nullptr;
}
/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst).
@@ -1794,7 +1796,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// integer type is the same size as the pointer type.
if (DL && LHSCI->getOpcode() == Instruction::PtrToInt &&
DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
- Value *RHSOp = 0;
+ Value *RHSOp = nullptr;
if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
} else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
@@ -1812,7 +1814,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Enforce this.
if (LHSCI->getOpcode() != Instruction::ZExt &&
LHSCI->getOpcode() != Instruction::SExt)
- return 0;
+ return nullptr;
bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
bool isSignedCmp = ICI.isSigned();
@@ -1821,12 +1823,12 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Not an extension from the same type?
RHSCIOp = CI->getOperand(0);
if (RHSCIOp->getType() != LHSCIOp->getType())
- return 0;
+ return nullptr;
// If the signedness of the two casts doesn't agree (i.e. one is a sext
// and the other is a zext), then we can't handle this.
if (CI->getOpcode() != LHSCI->getOpcode())
- return 0;
+ return nullptr;
// Deal with equality cases early.
if (ICI.isEquality())
@@ -1844,7 +1846,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// If we aren't dealing with a constant on the RHS, exit early
ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1));
if (!CI)
- return 0;
+ return nullptr;
// Compute the constant that would happen if we truncated to SrcTy then
// reextended to DestTy.
@@ -1873,7 +1875,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// by SimplifyICmpInst, so only deal with the tricky case.
if (isSignedCmp || !isSignedExt)
- return 0;
+ return nullptr;
// Evaluate the comparison for LT (we invert for GT below). LE and GE cases
// should have been folded away previously and not enter in here.
@@ -1909,12 +1911,12 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// In order to eliminate the add-with-constant, the compare can be its only
// use.
Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
- if (!AddWithCst->hasOneUse()) return 0;
+ if (!AddWithCst->hasOneUse()) return nullptr;
// If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
- if (!CI2->getValue().isPowerOf2()) return 0;
+ if (!CI2->getValue().isPowerOf2()) return nullptr;
unsigned NewWidth = CI2->getValue().countTrailingZeros();
- if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0;
+ if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return nullptr;
// The width of the new add formed is 1 more than the bias.
++NewWidth;
@@ -1922,7 +1924,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// Check to see that CI1 is an all-ones value with NewWidth bits.
if (CI1->getBitWidth() == NewWidth ||
CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
- return 0;
+ return nullptr;
// This is only really a signed overflow check if the inputs have been
// sign-extended; check for that condition. For example, if CI2 is 2^31 and
@@ -1930,7 +1932,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
if (IC.ComputeNumSignBits(A) < NeededSignBits ||
IC.ComputeNumSignBits(B) < NeededSignBits)
- return 0;
+ return nullptr;
// In order to replace the original add with a narrower
// llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
@@ -1946,8 +1948,8 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// original add had another add which was then immediately truncated, we
// could still do the transformation.
TruncInst *TI = dyn_cast<TruncInst>(U);
- if (TI == 0 ||
- TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0;
+ if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth)
+ return nullptr;
}
// If the pattern matches, truncate the inputs to the narrower type and
@@ -1983,11 +1985,11 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
InstCombiner &IC) {
// Don't bother doing this transformation for pointers, don't do it for
// vectors.
- if (!isa<IntegerType>(OrigAddV->getType())) return 0;
+ if (!isa<IntegerType>(OrigAddV->getType())) return nullptr;
// If the add is a constant expr, then we don't bother transforming it.
Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
- if (OrigAdd == 0) return 0;
+ if (!OrigAdd) return nullptr;
Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
@@ -2008,6 +2010,236 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
return ExtractValueInst::Create(Call, 1, "uadd.overflow");
}
+/// \brief Recognize and process idiom involving test for multiplication
+/// overflow.
+///
+/// The caller has matched a pattern of the form:
+/// I = cmp u (mul(zext A, zext B), V
+/// The function checks if this is a test for overflow and if so replaces
+/// multiplication with call to 'mul.with.overflow' intrinsic.
+///
+/// \param I Compare instruction.
+/// \param MulVal Result of 'mult' instruction. It is one of the arguments of
+/// the compare instruction. Must be of integer type.
+/// \param OtherVal The other argument of compare instruction.
+/// \returns Instruction which must replace the compare instruction, NULL if no
+/// replacement required.
+static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
+ Value *OtherVal, InstCombiner &IC) {
+ assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal);
+ assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal);
+ assert(isa<IntegerType>(MulVal->getType()));
+ Instruction *MulInstr = cast<Instruction>(MulVal);
+ assert(MulInstr->getOpcode() == Instruction::Mul);
+
+ Instruction *LHS = cast<Instruction>(MulInstr->getOperand(0)),
+ *RHS = cast<Instruction>(MulInstr->getOperand(1));
+ assert(LHS->getOpcode() == Instruction::ZExt);
+ assert(RHS->getOpcode() == Instruction::ZExt);
+ Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
+
+ // Calculate type and width of the result produced by mul.with.overflow.
+ Type *TyA = A->getType(), *TyB = B->getType();
+ unsigned WidthA = TyA->getPrimitiveSizeInBits(),
+ WidthB = TyB->getPrimitiveSizeInBits();
+ unsigned MulWidth;
+ Type *MulType;
+ if (WidthB > WidthA) {
+ MulWidth = WidthB;
+ MulType = TyB;
+ } else {
+ MulWidth = WidthA;
+ MulType = TyA;
+ }
+
+ // In order to replace the original mul with a narrower mul.with.overflow,
+ // all uses must ignore upper bits of the product. The number of used low
+ // bits must be not greater than the width of mul.with.overflow.
+ if (MulVal->hasNUsesOrMore(2))
+ for (User *U : MulVal->users()) {
+ if (U == &I)
+ continue;
+ if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
+ // Check if truncation ignores bits above MulWidth.
+ unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits();
+ if (TruncWidth > MulWidth)
+ return nullptr;
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
+ // Check if AND ignores bits above MulWidth.
+ if (BO->getOpcode() != Instruction::And)
+ return nullptr;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ const APInt &CVal = CI->getValue();
+ if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth)
+ return nullptr;
+ }
+ } else {
+ // Other uses prohibit this transformation.
+ return nullptr;
+ }
+ }
+
+ // Recognize patterns
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_NE:
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp eq/neq mulval, zext trunc mulval
+ if (ZExtInst *Zext = dyn_cast<ZExtInst>(OtherVal))
+ if (Zext->hasOneUse()) {
+ Value *ZextArg = Zext->getOperand(0);
+ if (TruncInst *Trunc = dyn_cast<TruncInst>(ZextArg))
+ if (Trunc->getType()->getPrimitiveSizeInBits() == MulWidth)
+ break; //Recognized
+ }
+
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits.
+ ConstantInt *CI;
+ Value *ValToMask;
+ if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) {
+ if (ValToMask != MulVal)
+ return nullptr;
+ const APInt &CVal = CI->getValue() + 1;
+ if (CVal.isPowerOf2()) {
+ unsigned MaskWidth = CVal.logBase2();
+ if (MaskWidth == MulWidth)
+ break; // Recognized
+ }
+ }
+ return nullptr;
+
+ case ICmpInst::ICMP_UGT:
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp ugt mulval, max
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
+ APInt MaxVal = APInt::getMaxValue(MulWidth);
+ MaxVal = MaxVal.zext(CI->getBitWidth());
+ if (MaxVal.eq(CI->getValue()))
+ break; // Recognized
+ }
+ return nullptr;
+
+ case ICmpInst::ICMP_UGE:
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp uge mulval, max+1
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
+ APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
+ if (MaxVal.eq(CI->getValue()))
+ break; // Recognized
+ }
+ return nullptr;
+
+ case ICmpInst::ICMP_ULE:
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp ule mulval, max
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
+ APInt MaxVal = APInt::getMaxValue(MulWidth);
+ MaxVal = MaxVal.zext(CI->getBitWidth());
+ if (MaxVal.eq(CI->getValue()))
+ break; // Recognized
+ }
+ return nullptr;
+
+ case ICmpInst::ICMP_ULT:
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp ule mulval, max + 1
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
+ APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
+ if (MaxVal.eq(CI->getValue()))
+ break; // Recognized
+ }
+ return nullptr;
+
+ default:
+ return nullptr;
+ }
+
+ InstCombiner::BuilderTy *Builder = IC.Builder;
+ Builder->SetInsertPoint(MulInstr);
+ Module *M = I.getParent()->getParent()->getParent();
+
+ // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
+ Value *MulA = A, *MulB = B;
+ if (WidthA < MulWidth)
+ MulA = Builder->CreateZExt(A, MulType);
+ if (WidthB < MulWidth)
+ MulB = Builder->CreateZExt(B, MulType);
+ Value *F =
+ Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow, MulType);
+ CallInst *Call = Builder->CreateCall2(F, MulA, MulB, "umul");
+ IC.Worklist.Add(MulInstr);
+
+ // If there are uses of mul result other than the comparison, we know that
+ // they are truncation or binary AND. Change them to use result of
+ // mul.with.overflow and adjust properly mask/size.
+ if (MulVal->hasNUsesOrMore(2)) {
+ Value *Mul = Builder->CreateExtractValue(Call, 0, "umul.value");
+ for (User *U : MulVal->users()) {
+ if (U == &I || U == OtherVal)
+ continue;
+ if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
+ if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)
+ IC.ReplaceInstUsesWith(*TI, Mul);
+ else
+ TI->setOperand(0, Mul);
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
+ assert(BO->getOpcode() == Instruction::And);
+ // Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
+ ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
+ APInt ShortMask = CI->getValue().trunc(MulWidth);
+ Value *ShortAnd = Builder->CreateAnd(Mul, ShortMask);
+ Instruction *Zext =
+ cast<Instruction>(Builder->CreateZExt(ShortAnd, BO->getType()));
+ IC.Worklist.Add(Zext);
+ IC.ReplaceInstUsesWith(*BO, Zext);
+ } else {
+ llvm_unreachable("Unexpected Binary operation");
+ }
+ IC.Worklist.Add(cast<Instruction>(U));
+ }
+ }
+ if (isa<Instruction>(OtherVal))
+ IC.Worklist.Add(cast<Instruction>(OtherVal));
+
+ // The original icmp gets replaced with the overflow value, maybe inverted
+ // depending on predicate.
+ bool Inverse = false;
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_NE:
+ break;
+ case ICmpInst::ICMP_EQ:
+ Inverse = true;
+ break;
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ if (I.getOperand(0) == MulVal)
+ break;
+ Inverse = true;
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ if (I.getOperand(1) == MulVal)
+ break;
+ Inverse = true;
+ break;
+ default:
+ llvm_unreachable("Unexpected predicate");
+ }
+ if (Inverse) {
+ Value *Res = Builder->CreateExtractValue(Call, 1);
+ return BinaryOperator::CreateNot(Res);
+ }
+
+ return ExtractValueInst::Create(Call, 1);
+}
+
// DemandedBitsLHSMask - When performing a comparison against a constant,
// it is possible that not all the bits in the LHS are demanded. This helper
// method computes the mask that IS demanded.
@@ -2178,7 +2410,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// See if we are doing a comparison with a constant.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
- Value *A = 0, *B = 0;
+ Value *A = nullptr, *B = nullptr;
// Match the following pattern, which is a common idiom when writing
// overflow-safe integer arithmetic function. The source performs an
@@ -2293,15 +2525,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
APInt Op0KnownZeroInverted = ~Op0KnownZero;
if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
// If the LHS is an AND with the same constant, look through it.
- Value *LHS = 0;
- ConstantInt *LHSC = 0;
+ Value *LHS = nullptr;
+ ConstantInt *LHSC = nullptr;
if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
LHSC->getValue() != Op0KnownZeroInverted)
LHS = Op0;
// If the LHS is 1 << x, and we know the result is a power of 2 like 8,
// then turn "((1 << x)&8) == 0" into "x != 3".
- Value *X = 0;
+ Value *X = nullptr;
if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
return new ICmpInst(ICmpInst::ICMP_NE, X,
@@ -2330,15 +2562,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
APInt Op0KnownZeroInverted = ~Op0KnownZero;
if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
// If the LHS is an AND with the same constant, look through it.
- Value *LHS = 0;
- ConstantInt *LHSC = 0;
+ Value *LHS = nullptr;
+ ConstantInt *LHSC = nullptr;
if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
LHSC->getValue() != Op0KnownZeroInverted)
LHS = Op0;
// If the LHS is 1 << x, and we know the result is a power of 2 like 8,
// then turn "((1 << x)&8) != 0" into "x == 3".
- Value *X = 0;
+ Value *X = nullptr;
if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
return new ICmpInst(ICmpInst::ICMP_EQ, X,
@@ -2470,7 +2702,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin()))
if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
(SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
- return 0;
+ return nullptr;
// See if we are doing a comparison between a constant and an instruction that
// can be folded into the comparison.
@@ -2506,7 +2738,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// If either operand of the select is a constant, we can fold the
// comparison into the select arms, which will cause one to be
// constant folded and the select turned into a bitwise or.
- Value *Op1 = 0, *Op2 = 0;
+ Value *Op1 = nullptr, *Op2 = nullptr;
if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1)))
Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2)))
@@ -2618,7 +2850,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// Analyze the case when either Op0 or Op1 is an add instruction.
// Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
- Value *A = 0, *B = 0, *C = 0, *D = 0;
+ Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
if (BO0 && BO0->getOpcode() == Instruction::Add)
A = BO0->getOperand(0), B = BO0->getOperand(1);
if (BO1 && BO1->getOpcode() == Instruction::Add)
@@ -2713,7 +2945,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// Analyze the case when either Op0 or Op1 is a sub instruction.
// Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
- A = 0; B = 0; C = 0; D = 0;
+ A = nullptr; B = nullptr; C = nullptr; D = nullptr;
if (BO0 && BO0->getOpcode() == Instruction::Sub)
A = BO0->getOperand(0), B = BO0->getOperand(1);
if (BO1 && BO1->getOpcode() == Instruction::Sub)
@@ -2739,7 +2971,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
BO0->hasOneUse() && BO1->hasOneUse())
return new ICmpInst(Pred, D, B);
- BinaryOperator *SRem = NULL;
+ // icmp (0-X) < cst --> x > -cst
+ if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) {
+ Value *X;
+ if (match(BO0, m_Neg(m_Value(X))))
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
+ if (!RHSC->isMinValue(/*isSigned=*/true))
+ return new ICmpInst(I.getSwappedPredicate(), X,
+ ConstantExpr::getNeg(RHSC));
+ }
+
+ BinaryOperator *SRem = nullptr;
// icmp (srem X, Y), Y
if (BO0 && BO0->getOpcode() == Instruction::SRem &&
Op1 == BO0->getOperand(1))
@@ -2877,6 +3119,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
(Op0 == A || Op0 == B))
if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
return R;
+
+ // (zext a) * (zext b) --> llvm.umul.with.overflow.
+ if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
+ if (Instruction *R = ProcessUMulZExtIdiom(I, Op0, Op1, *this))
+ return R;
+ }
+ if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
+ if (Instruction *R = ProcessUMulZExtIdiom(I, Op1, Op0, *this))
+ return R;
+ }
}
if (I.isEquality()) {
@@ -2918,7 +3170,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// (X&Z) == (Y&Z) -> (X^Y) & Z == 0
if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
- Value *X = 0, *Y = 0, *Z = 0;
+ Value *X = nullptr, *Y = nullptr, *Z = nullptr;
if (A == C) {
X = B; Y = D; Z = A;
@@ -3009,7 +3261,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate());
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
@@ -3017,13 +3269,13 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
Instruction *LHSI,
Constant *RHSC) {
- if (!isa<ConstantFP>(RHSC)) return 0;
+ if (!isa<ConstantFP>(RHSC)) return nullptr;
const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
// Get the width of the mantissa. We don't want to hack on conversions that
// might lose information from the integer, e.g. "i64 -> float"
int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
- if (MantissaWidth == -1) return 0; // Unknown.
+ if (MantissaWidth == -1) return nullptr; // Unknown.
// Check to see that the input is converted from an integer type that is small
// enough that preserves all bits. TODO: check here for "known" sign bits.
@@ -3037,7 +3289,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
// If the conversion would lose info, don't hack on this.
if ((int)InputSize > MantissaWidth)
- return 0;
+ return nullptr;
// Otherwise, we can potentially simplify the comparison. We know that it
// will always come through as an integer value and we know the constant is
@@ -3383,5 +3635,5 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
RHSExt->getOperand(0));
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index dcc8b0f..66d0938 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -20,6 +20,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "instcombine"
+
STATISTIC(NumDeadStore, "Number of dead stores eliminated");
STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
@@ -29,10 +31,13 @@ STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
static bool pointsToConstantGlobal(Value *V) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
return GV->isConstant();
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
if (CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::AddrSpaceCast ||
CE->getOpcode() == Instruction::GetElementPtr)
return pointsToConstantGlobal(CE->getOperand(0));
+ }
return false;
}
@@ -60,9 +65,9 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
continue;
}
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+ if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
// If uses of the bitcast are ok, we are ok.
- if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset))
+ if (!isOnlyCopiedFromConstantGlobal(I, TheCopy, ToDelete, IsOffset))
return false;
continue;
}
@@ -112,7 +117,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
// If this is isn't our memcpy/memmove, reject it as something we can't
// handle.
MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
- if (MI == 0)
+ if (!MI)
return false;
// If the transfer is using the alloca as a source of the transfer, then
@@ -148,10 +153,10 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
static MemTransferInst *
isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
SmallVectorImpl<Instruction *> &ToDelete) {
- MemTransferInst *TheCopy = 0;
+ MemTransferInst *TheCopy = nullptr;
if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
return TheCopy;
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
@@ -172,7 +177,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
Type *NewTy =
ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
- AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
+ AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName());
New->setAlignment(AI.getAlignment());
// Scan to the end of the allocation instructions, to skip over a block of
@@ -295,7 +300,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
// If the address spaces don't match, don't eliminate the cast.
if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
- return 0;
+ return nullptr;
Type *SrcPTy = SrcTy->getElementType();
@@ -346,7 +351,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
}
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
@@ -373,7 +378,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// None of the following transforms are legal for volatile/atomic loads.
// FIXME: Some of it is okay for atomic loads; needs refactoring.
- if (!LI.isSimple()) return 0;
+ if (!LI.isSimple()) return nullptr;
// Do really simple store-to-load forwarding and load CSE, to catch cases
// where there are several consecutive memory accesses to the same location,
@@ -455,7 +460,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
}
}
}
- return 0;
+ return nullptr;
}
/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
@@ -467,12 +472,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
- if (SrcTy == 0) return 0;
+ if (!SrcTy) return nullptr;
Type *SrcPTy = SrcTy->getElementType();
if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
- return 0;
+ return nullptr;
/// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
/// to its first element. This allows us to handle things like:
@@ -506,20 +511,20 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
}
if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
- return 0;
+ return nullptr;
// If the pointers point into different address spaces don't do the
// transformation.
if (SrcTy->getAddressSpace() !=
cast<PointerType>(CI->getType())->getAddressSpace())
- return 0;
+ return nullptr;
// If the pointers point to values of different sizes don't do the
// transformation.
if (!IC.getDataLayout() ||
IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
IC.getDataLayout()->getTypeSizeInBits(DestPTy))
- return 0;
+ return nullptr;
// If the pointers point to pointers to different address spaces don't do the
// transformation. It is not safe to introduce an addrspacecast instruction in
@@ -527,7 +532,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
// cast.
if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() &&
SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace())
- return 0;
+ return nullptr;
// Okay, we are casting from one integer or pointer type to another of
// the same size. Instead of casting the pointer before
@@ -607,7 +612,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// Don't hack volatile/atomic stores.
// FIXME: Some bits are legal for atomic stores; needs refactoring.
- if (!SI.isSimple()) return 0;
+ if (!SI.isSimple()) return nullptr;
// If the RHS is an alloca with a single use, zapify the store, making the
// alloca dead.
@@ -674,7 +679,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (Instruction *U = dyn_cast<Instruction>(Val))
Worklist.Add(U); // Dropped a use.
}
- return 0; // Do not modify these!
+ return nullptr; // Do not modify these!
}
// store undef, Ptr -> noop
@@ -703,9 +708,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
if (BI->isUnconditional())
if (SimplifyStoreAtEndOfBlock(SI))
- return 0; // xform done!
+ return nullptr; // xform done!
- return 0;
+ return nullptr;
}
/// SimplifyStoreAtEndOfBlock - Turn things like:
@@ -728,7 +733,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
// the other predecessor.
pred_iterator PI = pred_begin(DestBB);
BasicBlock *P = *PI;
- BasicBlock *OtherBB = 0;
+ BasicBlock *OtherBB = nullptr;
if (P != StoreBB)
OtherBB = P;
@@ -758,7 +763,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
// If the other block ends in an unconditional branch, check for the 'if then
// else' case. there is an instruction before the branch.
- StoreInst *OtherStore = 0;
+ StoreInst *OtherStore = nullptr;
if (OtherBr->isUnconditional()) {
--BBI;
// Skip over debugging info.
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 71fbb6c..9996ebc 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -19,6 +19,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
/// simplifyValueKnownNonZero - The specific integer value is used in a context
/// where it is known to be non-zero. If this allows us to simplify the
@@ -27,13 +29,13 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
// If V has multiple uses, then we would have to do more analysis to determine
// if this is safe. For example, the use could be in dynamically unreached
// code.
- if (!V->hasOneUse()) return 0;
+ if (!V->hasOneUse()) return nullptr;
bool MadeChange = false;
// ((1 << A) >>u B) --> (1 << (A-B))
// Because V cannot be zero, we know that B is less than A.
- Value *A = 0, *B = 0, *PowerOf2 = 0;
+ Value *A = nullptr, *B = nullptr, *PowerOf2 = nullptr;
if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))),
m_Value(B))) &&
// The "1" can be any value known to be a power of 2.
@@ -68,7 +70,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
// If V is a phi node, we can call this on each of its operands.
// "select cond, X, 0" can simplify to "X".
- return MadeChange ? V : 0;
+ return MadeChange ? V : nullptr;
}
@@ -107,7 +109,7 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) {
for (unsigned I = 0, E = CV->getNumElements(); I != E; ++I) {
Constant *Elt = CV->getElementAsConstant(I);
if (!match(Elt, m_APInt(IVal)) || !IVal->isPowerOf2())
- return 0;
+ return nullptr;
Elts.push_back(ConstantInt::get(Elt->getType(), IVal->logBase2()));
}
@@ -118,6 +120,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyMulInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -139,7 +144,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
return BinaryOperator::CreateMul(NewOp, ConstantExpr::getShl(C1, C2));
if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) {
- Constant *NewCst = 0;
+ Constant *NewCst = nullptr;
if (match(C1, m_APInt(IVal)) && IVal->isPowerOf2())
// Replace X*(2^C) with X << C, where C is either a scalar or a splat.
NewCst = ConstantInt::get(NewOp->getType(), IVal->logBase2());
@@ -165,10 +170,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
const APInt & Val = CI->getValue();
const APInt &PosVal = Val.abs();
if (Val.isNegative() && PosVal.isPowerOf2()) {
- Value *X = 0, *Y = 0;
+ Value *X = nullptr, *Y = nullptr;
if (Op0->hasOneUse()) {
ConstantInt *C1;
- Value *Sub = 0;
+ Value *Sub = nullptr;
if (match(Op0, m_Sub(m_Value(Y), m_Value(X))))
Sub = Builder->CreateSub(X, Y, "suba");
else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1))))
@@ -268,7 +273,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
// -2 is "-1 << 1" so it is all bits set except the low one.
APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
- Value *BoolCast = 0, *OtherOp = 0;
+ Value *BoolCast = nullptr, *OtherOp = nullptr;
if (MaskedValueIsZero(Op0, Negative2))
BoolCast = Op0, OtherOp = Op1;
else if (MaskedValueIsZero(Op1, Negative2))
@@ -281,7 +286,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
//
@@ -384,7 +389,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, Constant *C,
Constant *C0 = dyn_cast<Constant>(Opnd0);
Constant *C1 = dyn_cast<Constant>(Opnd1);
- BinaryOperator *R = 0;
+ BinaryOperator *R = nullptr;
// (X * C0) * C => X * (C0*C)
if (FMulOrDiv->getOpcode() == Instruction::FMul) {
@@ -426,6 +431,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (isa<Constant>(Op0))
std::swap(Op0, Op1);
@@ -483,7 +491,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
Value *M1 = ConstantExpr::getFMul(C1, C);
Value *M0 = isNormalFp(cast<Constant>(M1)) ?
foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
- 0;
+ nullptr;
if (M0 && M1) {
if (Swap && FAddSub->getOpcode() == Instruction::FSub)
std::swap(M0, M1);
@@ -503,8 +511,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
// Under unsafe algebra do:
// X * log2(0.5*Y) = X*log2(Y) - X
if (I.hasUnsafeAlgebra()) {
- Value *OpX = NULL;
- Value *OpY = NULL;
+ Value *OpX = nullptr;
+ Value *OpY = nullptr;
IntrinsicInst *Log2;
detectLog2OfHalf(Op0, OpY, Log2);
if (OpY) {
@@ -567,7 +575,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
Value *Opnd0_0, *Opnd0_1;
if (Opnd0->hasOneUse() &&
match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) {
- Value *Y = 0;
+ Value *Y = nullptr;
if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1)
Y = Opnd0_1;
else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1)
@@ -621,7 +629,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
break;
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
@@ -682,12 +690,12 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
// If we past the instruction, quit looking for it.
if (&*BBI == SI)
- SI = 0;
+ SI = nullptr;
if (&*BBI == SelectCond)
- SelectCond = 0;
+ SelectCond = nullptr;
// If we ran out of things to eliminate, break out of the loop.
- if (SelectCond == 0 && SI == 0)
+ if (!SelectCond && !SI)
break;
}
@@ -719,7 +727,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
if (MultiplyOverflows(RHS, LHSRHS,
- I.getOpcode()==Instruction::SDiv))
+ I.getOpcode() == Instruction::SDiv))
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
ConstantExpr::getMul(RHS, LHSRHS));
@@ -735,12 +743,31 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
}
}
+ if (ConstantInt *One = dyn_cast<ConstantInt>(Op0)) {
+ if (One->isOne() && !I.getType()->isIntegerTy(1)) {
+ bool isSigned = I.getOpcode() == Instruction::SDiv;
+ if (isSigned) {
+ // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the
+ // result is one, if Op1 is -1 then the result is minus one, otherwise
+ // it's zero.
+ Value *Inc = Builder->CreateAdd(Op1, One);
+ Value *Cmp = Builder->CreateICmpULT(
+ Inc, ConstantInt::get(I.getType(), 3));
+ return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0));
+ } else {
+ // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the
+ // result is one, otherwise it's zero.
+ return new ZExtInst(Builder->CreateICmpEQ(Op1, One), I.getType());
+ }
+ }
+ }
+
// See if we can fold away this div instruction.
if (SimplifyDemandedInstructionBits(I))
return &I;
// (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y
- Value *X = 0, *Z = 0;
+ Value *X = nullptr, *Z = nullptr;
if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1
bool isSigned = I.getOpcode() == Instruction::SDiv;
if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) ||
@@ -748,7 +775,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
return BinaryOperator::Create(I.getOpcode(), X, Op1);
}
- return 0;
+ return nullptr;
}
/// dyn_castZExtVal - Checks if V is a zext or constant that can
@@ -761,7 +788,7 @@ static Value *dyn_castZExtVal(Value *V, Type *Ty) {
if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth())
return ConstantExpr::getTrunc(C, Ty);
}
- return 0;
+ return nullptr;
}
namespace {
@@ -786,7 +813,7 @@ struct UDivFoldAction {
};
UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand)
- : FoldAction(FA), OperandToFold(InputOperand), FoldResult(0) {}
+ : FoldAction(FA), OperandToFold(InputOperand), FoldResult(nullptr) {}
UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand, size_t SLHS)
: FoldAction(FA), OperandToFold(InputOperand), SelectLHSIdx(SLHS) {}
};
@@ -865,7 +892,8 @@ static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I,
if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
if (size_t LHSIdx = visitUDivOperand(Op0, SI->getOperand(1), I, Actions))
if (visitUDivOperand(Op0, SI->getOperand(2), I, Actions)) {
- Actions.push_back(UDivFoldAction((FoldUDivOperandCb)0, Op1, LHSIdx-1));
+ Actions.push_back(UDivFoldAction((FoldUDivOperandCb)nullptr, Op1,
+ LHSIdx-1));
return Actions.size();
}
@@ -875,6 +903,9 @@ static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I,
Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyUDivInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -928,12 +959,15 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
return Inst;
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifySDivInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -983,7 +1017,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
}
}
- return 0;
+ return nullptr;
}
/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special
@@ -997,7 +1031,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
Constant *Divisor,
bool AllowReciprocal) {
if (!isa<ConstantFP>(Divisor)) // TODO: handle vectors.
- return 0;
+ return nullptr;
const APFloat &FpVal = cast<ConstantFP>(Divisor)->getValueAPF();
APFloat Reciprocal(FpVal.getSemantics());
@@ -1010,7 +1044,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
}
if (!Cvt)
- return 0;
+ return nullptr;
ConstantFP *R;
R = ConstantFP::get(Dividend->getType()->getContext(), Reciprocal);
@@ -1020,6 +1054,9 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyFDivInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -1037,10 +1074,10 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
return R;
if (AllowReassociate) {
- Constant *C1 = 0;
+ Constant *C1 = nullptr;
Constant *C2 = Op1C;
Value *X;
- Instruction *Res = 0;
+ Instruction *Res = nullptr;
if (match(Op0, m_FMul(m_Value(X), m_Constant(C1)))) {
// (X*C1)/C2 => X * (C1/C2)
@@ -1071,12 +1108,12 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
return T;
}
- return 0;
+ return nullptr;
}
if (AllowReassociate && isa<Constant>(Op0)) {
Constant *C1 = cast<Constant>(Op0), *C2;
- Constant *Fold = 0;
+ Constant *Fold = nullptr;
Value *X;
bool CreateDiv = true;
@@ -1098,13 +1135,13 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
R->setFastMathFlags(I.getFastMathFlags());
return R;
}
- return 0;
+ return nullptr;
}
if (AllowReassociate) {
Value *X, *Y;
- Value *NewInst = 0;
- Instruction *SimpR = 0;
+ Value *NewInst = nullptr;
+ Instruction *SimpR = nullptr;
if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) {
// (X/Y) / Z => X / (Y*Z)
@@ -1140,7 +1177,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
}
}
- return 0;
+ return nullptr;
}
/// This function implements the transforms common to both integer remainder
@@ -1176,12 +1213,15 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitURem(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyURemInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -1208,12 +1248,15 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
return ReplaceInstUsesWith(I, Ext);
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifySRemInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -1250,7 +1293,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
bool hasMissing = false;
for (unsigned i = 0; i != VWidth; ++i) {
Constant *Elt = C->getAggregateElement(i);
- if (Elt == 0) {
+ if (!Elt) {
hasMissing = true;
break;
}
@@ -1279,12 +1322,15 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyFRemInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -1292,5 +1338,5 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
return &I;
- return 0;
+ return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 0ab657a..46f7b8a 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -18,6 +18,8 @@
#include "llvm/IR/DataLayout.h"
using namespace llvm;
+#define DEBUG_TYPE "instcombine"
+
/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
/// and if a/b/c and the add's all have a single use, turn this into a phi
/// and a single binop.
@@ -48,12 +50,12 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
// types.
I->getOperand(0)->getType() != LHSType ||
I->getOperand(1)->getType() != RHSType)
- return 0;
+ return nullptr;
// If they are CmpInst instructions, check their predicates
if (CmpInst *CI = dyn_cast<CmpInst>(I))
if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
- return 0;
+ return nullptr;
if (isNUW)
isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
@@ -63,8 +65,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
isExact = cast<PossiblyExactOperator>(I)->isExact();
// Keep track of which operand needs a phi node.
- if (I->getOperand(0) != LHSVal) LHSVal = 0;
- if (I->getOperand(1) != RHSVal) RHSVal = 0;
+ if (I->getOperand(0) != LHSVal) LHSVal = nullptr;
+ if (I->getOperand(1) != RHSVal) RHSVal = nullptr;
}
// If both LHS and RHS would need a PHI, don't do this transformation,
@@ -72,14 +74,14 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
// which leads to higher register pressure. This is especially
// bad when the PHIs are in the header of a loop.
if (!LHSVal && !RHSVal)
- return 0;
+ return nullptr;
// Otherwise, this is safe to transform!
Value *InLHS = FirstInst->getOperand(0);
Value *InRHS = FirstInst->getOperand(1);
- PHINode *NewLHS = 0, *NewRHS = 0;
- if (LHSVal == 0) {
+ PHINode *NewLHS = nullptr, *NewRHS = nullptr;
+ if (!LHSVal) {
NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(),
FirstInst->getOperand(0)->getName() + ".pn");
NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
@@ -87,7 +89,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
LHSVal = NewLHS;
}
- if (RHSVal == 0) {
+ if (!RHSVal) {
NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
FirstInst->getOperand(1)->getName() + ".pn");
NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
@@ -148,7 +150,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
GEP->getNumOperands() != FirstInst->getNumOperands())
- return 0;
+ return nullptr;
AllInBounds &= GEP->isInBounds();
@@ -170,19 +172,19 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
// for struct indices, which must always be constant.
if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
isa<ConstantInt>(GEP->getOperand(op)))
- return 0;
+ return nullptr;
if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
- return 0;
+ return nullptr;
// If we already needed a PHI for an earlier operand, and another operand
// also requires a PHI, we'd be introducing more PHIs than we're
// eliminating, which increases register pressure on entry to the PHI's
// block.
if (NeededPhi)
- return 0;
+ return nullptr;
- FixedOperands[op] = 0; // Needs a PHI.
+ FixedOperands[op] = nullptr; // Needs a PHI.
NeededPhi = true;
}
}
@@ -194,7 +196,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
// load up into the predecessors so that we have a load of a gep of an alloca,
// which can usually all be folded into the load.
if (AllBasePointersAreAllocas)
- return 0;
+ return nullptr;
// Otherwise, this is safe to transform. Insert PHI nodes for each operand
// that is variable.
@@ -288,7 +290,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
// FIXME: This is overconservative; this transform is allowed in some cases
// for atomic operations.
if (FirstLI->isAtomic())
- return 0;
+ return nullptr;
// When processing loads, we need to propagate two bits of information to the
// sunk load: whether it is volatile, and what its alignment is. We currently
@@ -303,20 +305,20 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
// load and the PHI.
if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
!isSafeAndProfitableToSinkLoad(FirstLI))
- return 0;
+ return nullptr;
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
// the path through the other successor.
if (isVolatile &&
FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
- return 0;
+ return nullptr;
// Check to see if all arguments are the same operation.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
if (!LI || !LI->hasOneUse())
- return 0;
+ return nullptr;
// We can't sink the load if the loaded value could be modified between
// the load and the PHI.
@@ -324,12 +326,12 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
LI->getParent() != PN.getIncomingBlock(i) ||
LI->getPointerAddressSpace() != LoadAddrSpace ||
!isSafeAndProfitableToSinkLoad(LI))
- return 0;
+ return nullptr;
// If some of the loads have an alignment specified but not all of them,
// we can't do the transformation.
if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
- return 0;
+ return nullptr;
LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
@@ -338,7 +340,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
// the path through the other successor.
if (isVolatile &&
LI->getParent()->getTerminator()->getNumSuccessors() != 1)
- return 0;
+ return nullptr;
}
// Okay, they are all the same operation. Create a new PHI node of the
@@ -354,7 +356,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
if (NewInVal != InVal)
- InVal = 0;
+ InVal = nullptr;
NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
}
@@ -398,8 +400,8 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
// If all input operands to the phi are the same instruction (e.g. a cast from
// the same type or "+42") we can pull the operation through the PHI, reducing
// code size and simplifying code.
- Constant *ConstantOp = 0;
- Type *CastSrcTy = 0;
+ Constant *ConstantOp = nullptr;
+ Type *CastSrcTy = nullptr;
bool isNUW = false, isNSW = false, isExact = false;
if (isa<CastInst>(FirstInst)) {
@@ -409,13 +411,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
// the code by turning an i32 into an i1293.
if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) {
if (!ShouldChangeType(PN.getType(), CastSrcTy))
- return 0;
+ return nullptr;
}
} else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
// Can fold binop, compare or shift here if the RHS is a constant,
// otherwise call FoldPHIArgBinOpIntoPHI.
ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
- if (ConstantOp == 0)
+ if (!ConstantOp)
return FoldPHIArgBinOpIntoPHI(PN);
if (OverflowingBinaryOperator *BO =
@@ -426,19 +428,19 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
dyn_cast<PossiblyExactOperator>(FirstInst))
isExact = PEO->isExact();
} else {
- return 0; // Cannot fold this operation.
+ return nullptr; // Cannot fold this operation.
}
// Check to see if all arguments are the same operation.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
- if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
- return 0;
+ if (!I || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
+ return nullptr;
if (CastSrcTy) {
if (I->getOperand(0)->getType() != CastSrcTy)
- return 0; // Cast operation must match.
+ return nullptr; // Cast operation must match.
} else if (I->getOperand(1) != ConstantOp) {
- return 0;
+ return nullptr;
}
if (isNUW)
@@ -462,7 +464,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0);
if (NewInVal != InVal)
- InVal = 0;
+ InVal = nullptr;
NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
}
@@ -587,10 +589,10 @@ namespace llvm {
template<>
struct DenseMapInfo<LoweredPHIRecord> {
static inline LoweredPHIRecord getEmptyKey() {
- return LoweredPHIRecord(0, 0);
+ return LoweredPHIRecord(nullptr, 0);
}
static inline LoweredPHIRecord getTombstoneKey() {
- return LoweredPHIRecord(0, 1);
+ return LoweredPHIRecord(nullptr, 1);
}
static unsigned getHashValue(const LoweredPHIRecord &Val) {
return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
@@ -637,14 +639,14 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// bail out.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i));
- if (II == 0) continue;
+ if (!II) continue;
if (II->getParent() != PN->getIncomingBlock(i))
continue;
// If we have a phi, and if it's directly in the predecessor, then we have
// a critical edge where we need to put the truncate. Since we can't
// split the edge in instcombine, we have to bail out.
- return 0;
+ return nullptr;
}
for (User *U : PN->users()) {
@@ -667,7 +669,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
if (UserI->getOpcode() != Instruction::LShr ||
!UserI->hasOneUse() || !isa<TruncInst>(UserI->user_back()) ||
!isa<ConstantInt>(UserI->getOperand(1)))
- return 0;
+ return nullptr;
unsigned Shift = cast<ConstantInt>(UserI->getOperand(1))->getZExtValue();
PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, UserI->user_back()));
@@ -705,7 +707,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// If we've already lowered a user like this, reuse the previously lowered
// value.
- if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
+ if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == nullptr) {
// Otherwise, Create the new PHI node for this user.
EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),
@@ -894,5 +896,5 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
return Res;
- return 0;
+ return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index e74d912..9a41e4b 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -18,16 +18,18 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms,
/// returning the kind and providing the out parameter results if we
/// successfully match.
static SelectPatternFlavor
MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
SelectInst *SI = dyn_cast<SelectInst>(V);
- if (SI == 0) return SPF_UNKNOWN;
+ if (!SI) return SPF_UNKNOWN;
ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
- if (ICI == 0) return SPF_UNKNOWN;
+ if (!ICI) return SPF_UNKNOWN;
LHS = ICI->getOperand(0);
RHS = ICI->getOperand(1);
@@ -129,15 +131,15 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
if (TI->isCast()) {
Type *FIOpndTy = FI->getOperand(0)->getType();
if (TI->getOperand(0)->getType() != FIOpndTy)
- return 0;
+ return nullptr;
// The select condition may be a vector. We may only change the operand
// type if the vector width remains the same (and matches the condition).
Type *CondTy = SI.getCondition()->getType();
if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() ||
CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements()))
- return 0;
+ return nullptr;
} else {
- return 0; // unknown unary op.
+ return nullptr; // unknown unary op.
}
// Fold this by inserting a select from the input values.
@@ -149,7 +151,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
// Only handle binary operators here.
if (!isa<BinaryOperator>(TI))
- return 0;
+ return nullptr;
// Figure out if the operations have any operands in common.
Value *MatchOp, *OtherOpT, *OtherOpF;
@@ -165,7 +167,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
OtherOpF = FI->getOperand(0);
MatchIsOpZero = false;
} else if (!TI->isCommutative()) {
- return 0;
+ return nullptr;
} else if (TI->getOperand(0) == FI->getOperand(1)) {
MatchOp = TI->getOperand(0);
OtherOpT = TI->getOperand(1);
@@ -177,7 +179,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
OtherOpF = FI->getOperand(1);
MatchIsOpZero = true;
} else {
- return 0;
+ return nullptr;
}
// If we reach here, they do have operations in common.
@@ -282,7 +284,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
}
}
- return 0;
+ return nullptr;
}
/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
@@ -296,7 +298,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
Instruction *I = dyn_cast<Instruction>(V);
if (!I)
- return 0;
+ return nullptr;
// If this is a binary operator, try to simplify it with the replaced op.
if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
@@ -347,7 +349,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
}
}
- return 0;
+ return nullptr;
}
/// foldSelectICmpAndOr - We want to turn:
@@ -368,18 +370,18 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
InstCombiner::BuilderTy *Builder) {
const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
- return 0;
+ return nullptr;
Value *CmpLHS = IC->getOperand(0);
Value *CmpRHS = IC->getOperand(1);
if (!match(CmpRHS, m_Zero()))
- return 0;
+ return nullptr;
Value *X;
const APInt *C1;
if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1))))
- return 0;
+ return nullptr;
const APInt *C2;
bool OrOnTrueVal = false;
@@ -388,7 +390,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2)));
if (!OrOnFalseVal && !OrOnTrueVal)
- return 0;
+ return nullptr;
Value *V = CmpLHS;
Value *Y = OrOnFalseVal ? TrueVal : FalseVal;
@@ -527,7 +529,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
if (IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) {
if (TrueVal->getType() == Ty) {
if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) {
- ConstantInt *C1 = NULL, *C2 = NULL;
+ ConstantInt *C1 = nullptr, *C2 = nullptr;
if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) {
C1 = dyn_cast<ConstantInt>(TrueVal);
C2 = dyn_cast<ConstantInt>(FalseVal);
@@ -586,7 +588,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder))
return ReplaceInstUsesWith(SI, V);
- return Changed ? &SI : 0;
+ return Changed ? &SI : nullptr;
}
@@ -606,7 +608,7 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
// If the value is a non-instruction value like a constant or argument, it
// can always be mapped.
const Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0) return true;
+ if (!I) return true;
// If V is a PHI node defined in the same block as the condition PHI, we can
// map the arguments.
@@ -649,11 +651,35 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
return ReplaceInstUsesWith(Outer, C);
}
- // TODO: MIN(MIN(A, 23), 97)
- return 0;
+ if (SPF1 == SPF2) {
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(B)) {
+ if (ConstantInt *CC = dyn_cast<ConstantInt>(C)) {
+ APInt ACB = CB->getValue();
+ APInt ACC = CC->getValue();
+
+ // MIN(MIN(A, 23), 97) -> MIN(A, 23)
+ // MAX(MAX(A, 97), 23) -> MAX(A, 97)
+ if ((SPF1 == SPF_UMIN && ACB.ule(ACC)) ||
+ (SPF1 == SPF_SMIN && ACB.sle(ACC)) ||
+ (SPF1 == SPF_UMAX && ACB.uge(ACC)) ||
+ (SPF1 == SPF_SMAX && ACB.sge(ACC)))
+ return ReplaceInstUsesWith(Outer, Inner);
+
+ // MIN(MIN(A, 97), 23) -> MIN(A, 23)
+ // MAX(MAX(A, 23), 97) -> MAX(A, 97)
+ if ((SPF1 == SPF_UMIN && ACB.ugt(ACC)) ||
+ (SPF1 == SPF_SMIN && ACB.sgt(ACC)) ||
+ (SPF1 == SPF_UMAX && ACB.ult(ACC)) ||
+ (SPF1 == SPF_SMAX && ACB.slt(ACC))) {
+ Outer.replaceUsesOfWith(Inner, A);
+ return &Outer;
+ }
+ }
+ }
+ }
+ return nullptr;
}
-
/// foldSelectICmpAnd - If one of the constants is zero (we know they can't
/// both be) and we have an icmp instruction with zero, and we have an 'and'
/// with the non-constant value and a power of two we can turn the select
@@ -663,27 +689,27 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
InstCombiner::BuilderTy *Builder) {
const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
- return 0;
+ return nullptr;
if (!match(IC->getOperand(1), m_Zero()))
- return 0;
+ return nullptr;
ConstantInt *AndRHS;
Value *LHS = IC->getOperand(0);
if (!match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
- return 0;
+ return nullptr;
// If both select arms are non-zero see if we have a select of the form
// 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic
// for 'x ? 2^n : 0' and fix the thing up at the end.
- ConstantInt *Offset = 0;
+ ConstantInt *Offset = nullptr;
if (!TrueVal->isZero() && !FalseVal->isZero()) {
if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2())
Offset = FalseVal;
else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2())
Offset = TrueVal;
else
- return 0;
+ return nullptr;
// Adjust TrueVal and FalseVal to the offset.
TrueVal = ConstantInt::get(Builder->getContext(),
@@ -696,7 +722,7 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
if (!AndRHS->getValue().isPowerOf2() ||
(!TrueVal->getValue().isPowerOf2() &&
!FalseVal->getValue().isPowerOf2()))
- return 0;
+ return nullptr;
// Determine which shift is needed to transform result of the 'and' into the
// desired result.
@@ -708,7 +734,7 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
// or a trunc of the 'and'. The trunc case requires that all of the truncated
// bits are zero, we can figure that out by looking at the 'and' mask.
if (AndZeros >= ValC->getBitWidth())
- return 0;
+ return nullptr;
Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType());
if (ValZeros > AndZeros)
@@ -866,7 +892,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *TI = dyn_cast<Instruction>(TrueVal))
if (Instruction *FI = dyn_cast<Instruction>(FalseVal))
if (TI->hasOneUse() && FI->hasOneUse()) {
- Instruction *AddOp = 0, *SubOp = 0;
+ Instruction *AddOp = nullptr, *SubOp = nullptr;
// Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
if (TI->getOpcode() == FI->getOpcode())
@@ -888,7 +914,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
if (AddOp) {
- Value *OtherAddOp = 0;
+ Value *OtherAddOp = nullptr;
if (SubOp->getOperand(0) == AddOp->getOperand(0)) {
OtherAddOp = AddOp->getOperand(1);
} else if (SubOp->getOperand(0) == AddOp->getOperand(1)) {
@@ -969,7 +995,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
if (TrueSI->getCondition() == CondVal) {
if (SI.getTrueValue() == TrueSI->getTrueValue())
- return 0;
+ return nullptr;
SI.setOperand(1, TrueSI->getTrueValue());
return &SI;
}
@@ -977,7 +1003,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
if (FalseSI->getCondition() == CondVal) {
if (SI.getFalseValue() == FalseSI->getFalseValue())
- return 0;
+ return nullptr;
SI.setOperand(2, FalseSI->getFalseValue());
return &SI;
}
@@ -1005,5 +1031,5 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
}
- return 0;
+ return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 8273dfd..cc6665c 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -19,6 +19,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -33,7 +35,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
- if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
+ if (Constant *CUI = dyn_cast<Constant>(Op1))
if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
return Res;
@@ -50,7 +52,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
return &I;
}
- return 0;
+ return nullptr;
}
/// CanEvaluateShifted - See if we can compute the specified value, but shifted
@@ -78,7 +80,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
// if the needed bits are already zero in the input. This allows us to reuse
// the value which means that we don't care if the shift has multiple uses.
// TODO: Handle opposite shift by exact value.
- ConstantInt *CI = 0;
+ ConstantInt *CI = nullptr;
if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
(!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
if (CI->getZExtValue() == NumBits) {
@@ -115,7 +117,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
case Instruction::Shl: {
// We can often fold the shift into shifts-by-a-constant.
CI = dyn_cast<ConstantInt>(I->getOperand(1));
- if (CI == 0) return false;
+ if (!CI) return false;
// We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
if (isLeftShift) return true;
@@ -139,7 +141,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
case Instruction::LShr: {
// We can often fold the shift into shifts-by-a-constant.
CI = dyn_cast<ConstantInt>(I->getOperand(1));
- if (CI == 0) return false;
+ if (!CI) return false;
// We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
if (!isLeftShift) return true;
@@ -309,37 +311,38 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
-Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
BinaryOperator &I) {
bool isLeftShift = I.getOpcode() == Instruction::Shl;
+ ConstantInt *COp1 = nullptr;
+ if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(Op1))
+ COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue());
+ else if (ConstantVector *CV = dyn_cast<ConstantVector>(Op1))
+ COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue());
+ else
+ COp1 = dyn_cast<ConstantInt>(Op1);
+
+ if (!COp1)
+ return nullptr;
// See if we can propagate this shift into the input, this covers the trivial
// cast of lshr(shl(x,c1),c2) as well as other more complex cases.
if (I.getOpcode() != Instruction::AShr &&
- CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) {
+ CanEvaluateShifted(Op0, COp1->getZExtValue(), isLeftShift, *this)) {
DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
" to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n");
return ReplaceInstUsesWith(I,
- GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this));
+ GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this));
}
-
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
- // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
- // a signed shift.
- //
- if (Op1->uge(TypeBits)) {
- if (I.getOpcode() != Instruction::AShr)
- return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
- // ashr i32 X, 32 --> ashr i32 X, 31
- I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
- return &I;
- }
+ assert(!COp1->uge(TypeBits) &&
+ "Shift over the type width should have been removed already");
// ((X*C1) << C2) == (X * (C1 << C2))
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
@@ -367,7 +370,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
isa<ConstantInt>(TrOp->getOperand(1))) {
// Okay, we'll do this xform. Make the shift of shift.
- Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
+ Constant *ShAmt = ConstantExpr::getZExt(COp1, TrOp->getType());
// (shift2 (shift1 & 0x00FF), c2)
Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
@@ -384,10 +387,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
// shift. We know that it is a logical shift by a constant, so adjust the
// mask as appropriate.
if (I.getOpcode() == Instruction::Shl)
- MaskV <<= Op1->getZExtValue();
+ MaskV <<= COp1->getZExtValue();
else {
assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
- MaskV = MaskV.lshr(Op1->getZExtValue());
+ MaskV = MaskV.lshr(COp1->getZExtValue());
}
// shift1 & 0x00FF
@@ -421,9 +424,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
// (X + (Y << C))
Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
Op0BO->getOperand(1)->getName());
- uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
- return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
- APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+ uint32_t Op1Val = COp1->getLimitedValue(TypeBits);
+
+ APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
+ Constant *Mask = ConstantInt::get(I.getContext(), Bits);
+ if (VectorType *VT = dyn_cast<VectorType>(X->getType()))
+ Mask = ConstantVector::getSplat(VT->getNumElements(), Mask);
+ return BinaryOperator::CreateAnd(X, Mask);
}
// Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C))
@@ -453,9 +460,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
// (X + (Y << C))
Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
Op0BO->getOperand(0)->getName());
- uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
- return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
- APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+ uint32_t Op1Val = COp1->getLimitedValue(TypeBits);
+
+ APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
+ Constant *Mask = ConstantInt::get(I.getContext(), Bits);
+ if (VectorType *VT = dyn_cast<VectorType>(X->getType()))
+ Mask = ConstantVector::getSplat(VT->getNumElements(), Mask);
+ return BinaryOperator::CreateAnd(X, Mask);
}
// Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C)
@@ -523,7 +534,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
// Find out if this is a shift of a shift by a constant.
BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
if (ShiftOp && !ShiftOp->isShift())
- ShiftOp = 0;
+ ShiftOp = nullptr;
if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
@@ -541,9 +552,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
- uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
+ uint32_t ShiftAmt2 = COp1->getLimitedValue(TypeBits);
assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
- if (ShiftAmt1 == 0) return 0; // Will be simplified in the future.
+ if (ShiftAmt1 == 0) return nullptr; // Will be simplified in the future.
Value *X = ShiftOp->getOperand(0);
IntegerType *Ty = cast<IntegerType>(I.getType());
@@ -671,10 +682,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
}
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitShl(BinaryOperator &I) {
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
DL))
@@ -709,10 +723,13 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
match(I.getOperand(1), m_Constant(C2)))
return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A);
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1),
I.isExact(), DL))
return ReplaceInstUsesWith(I, V);
@@ -749,10 +766,13 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1),
I.isExact(), DL))
return ReplaceInstUsesWith(I, V);
@@ -805,6 +825,5 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
if (NumSignBits == Op0->getType()->getScalarSizeInBits())
return ReplaceInstUsesWith(I, Op0);
- return 0;
+ return nullptr;
}
-
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index a47b709..1b42d3d 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-
#include "InstCombine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -21,6 +20,8 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
/// ShrinkDemandedConstant - Check to see if the specified operand of the
/// specified instruction is a constant integer. If so, check to see if there
/// are any bits set in the constant that are not demanded. If so, shrink the
@@ -57,7 +58,7 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
KnownZero, KnownOne, 0);
- if (V == 0) return false;
+ if (!V) return false;
if (V == &Inst) return true;
ReplaceInstUsesWith(Inst, V);
return true;
@@ -71,7 +72,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
unsigned Depth) {
Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
KnownZero, KnownOne, Depth);
- if (NewVal == 0) return false;
+ if (!NewVal) return false;
U = NewVal;
return true;
}
@@ -101,7 +102,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
APInt &KnownZero, APInt &KnownOne,
unsigned Depth) {
- assert(V != 0 && "Null pointer of Value???");
+ assert(V != nullptr && "Null pointer of Value???");
assert(Depth <= 6 && "Limit Search Depth");
uint32_t BitWidth = DemandedMask.getBitWidth();
Type *VTy = V->getType();
@@ -118,33 +119,33 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// We know all of the bits for a constant!
KnownOne = CI->getValue() & DemandedMask;
KnownZero = ~KnownOne & DemandedMask;
- return 0;
+ return nullptr;
}
if (isa<ConstantPointerNull>(V)) {
// We know all of the bits for a constant!
KnownOne.clearAllBits();
KnownZero = DemandedMask;
- return 0;
+ return nullptr;
}
KnownZero.clearAllBits();
KnownOne.clearAllBits();
if (DemandedMask == 0) { // Not demanding any bits from V.
if (isa<UndefValue>(V))
- return 0;
+ return nullptr;
return UndefValue::get(VTy);
}
if (Depth == 6) // Limit search depth.
- return 0;
+ return nullptr;
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
- ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
- return 0; // Only analyze instructions.
+ computeKnownBits(V, KnownZero, KnownOne, Depth);
+ return nullptr; // Only analyze instructions.
}
// If there are multiple uses of this value and we aren't at the root, then
@@ -157,8 +158,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// this instruction has a simpler value in that context.
if (I->getOpcode() == Instruction::And) {
// If either the LHS or the RHS are Zero, the result is zero.
- ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and' in this
@@ -179,8 +180,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// only bits from X or Y are demanded.
// If either the LHS or the RHS are One, the result is One.
- ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
// If all of the demanded bits are known zero on one side, return the
// other. These bits cannot contribute to the result of the 'or' in this
@@ -204,8 +205,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// We can simplify (X^Y) -> X or Y in the user's context if we know that
// only bits from X or Y are demanded.
- ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
// If all of the demanded bits are known zero on one side, return the
// other.
@@ -216,8 +217,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
// Compute the KnownZero/KnownOne bits to simplify things downstream.
- ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
- return 0;
+ computeKnownBits(I, KnownZero, KnownOne, Depth);
+ return nullptr;
}
// If this is the root being simplified, allow it to have multiple uses,
@@ -229,7 +230,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
switch (I->getOpcode()) {
default:
- ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
+ computeKnownBits(I, KnownZero, KnownOne, Depth);
break;
case Instruction::And:
// If either the LHS or the RHS are Zero, the result is zero.
@@ -409,20 +410,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
case Instruction::BitCast:
if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
- return 0; // vector->int or fp->int?
+ return nullptr; // vector->int or fp->int?
if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
if (VectorType *SrcVTy =
dyn_cast<VectorType>(I->getOperand(0)->getType())) {
if (DstVTy->getNumElements() != SrcVTy->getNumElements())
// Don't touch a bitcast between vectors of different element counts.
- return 0;
+ return nullptr;
} else
// Don't touch a scalar-to-vector bitcast.
- return 0;
+ return nullptr;
} else if (I->getOperand(0)->getType()->isVectorTy())
// Don't touch a vector-to-scalar bitcast.
- return 0;
+ return nullptr;
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
KnownZero, KnownOne, Depth+1))
@@ -578,9 +579,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return I;
}
- // Otherwise just hand the sub off to ComputeMaskedBits to fill in
+ // Otherwise just hand the sub off to computeKnownBits to fill in
// the known zeros and ones.
- ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
+ computeKnownBits(V, KnownZero, KnownOne, Depth);
// Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
// zero.
@@ -751,7 +752,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// remainder is zero.
if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
KnownZero.setBit(KnownZero.getBitWidth() - 1);
@@ -810,10 +811,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
case Intrinsic::x86_sse42_crc32_64_64:
KnownZero = APInt::getHighBitsSet(64, 32);
- return 0;
+ return nullptr;
}
}
- ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
+ computeKnownBits(V, KnownZero, KnownOne, Depth);
break;
}
@@ -821,7 +822,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// constant.
if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
return Constant::getIntegerValue(VTy, KnownOne);
- return 0;
+ return nullptr;
}
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
@@ -847,13 +848,13 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
if (!ShlOp1 || !ShrOp1)
- return 0; // Noop.
+ return nullptr; // Noop.
Value *VarX = Shr->getOperand(0);
Type *Ty = VarX->getType();
unsigned BitWidth = Ty->getIntegerBitWidth();
if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
- return 0; // Undef.
+ return nullptr; // Undef.
unsigned ShlAmt = ShlOp1.getZExtValue();
unsigned ShrAmt = ShrOp1.getZExtValue();
@@ -882,7 +883,7 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
return VarX;
if (!Shr->hasOneUse())
- return 0;
+ return nullptr;
BinaryOperator *New;
if (ShrAmt < ShlAmt) {
@@ -902,7 +903,7 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
return InsertNewInstWith(New, *Shl);
}
- return 0;
+ return nullptr;
}
/// SimplifyDemandedVectorElts - The specified value produces a vector with
@@ -923,7 +924,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (isa<UndefValue>(V)) {
// If the entire vector is undefined, just return this info.
UndefElts = EltMask;
- return 0;
+ return nullptr;
}
if (DemandedElts == 0) { // If nothing is demanded, provide undef.
@@ -938,7 +939,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Check if this is identity. If so, return 0 since we are not simplifying
// anything.
if (DemandedElts.isAllOnesValue())
- return 0;
+ return nullptr;
Type *EltTy = cast<VectorType>(V->getType())->getElementType();
Constant *Undef = UndefValue::get(EltTy);
@@ -952,7 +953,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
Constant *Elt = C->getAggregateElement(i);
- if (Elt == 0) return 0;
+ if (!Elt) return nullptr;
if (isa<UndefValue>(Elt)) { // Already undef.
Elts.push_back(Undef);
@@ -964,12 +965,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// If we changed the constant, return it.
Constant *NewCV = ConstantVector::get(Elts);
- return NewCV != C ? NewCV : 0;
+ return NewCV != C ? NewCV : nullptr;
}
// Limit search depth.
if (Depth == 10)
- return 0;
+ return nullptr;
// If multiple users are using the root value, proceed with
// simplification conservatively assuming that all elements
@@ -980,14 +981,14 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// the main instcombine process.
if (Depth != 0)
// TODO: Just compute the UndefElts information recursively.
- return 0;
+ return nullptr;
// Conservatively assume that all elements are needed.
DemandedElts = EltMask;
}
Instruction *I = dyn_cast<Instruction>(V);
- if (!I) return 0; // Only analyze instructions.
+ if (!I) return nullptr; // Only analyze instructions.
bool MadeChange = false;
APInt UndefElts2(VWidth, 0);
@@ -999,7 +1000,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// If this is a variable index, we don't know which element it overwrites.
// demand exactly the same input as we produce.
ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
- if (Idx == 0) {
+ if (!Idx) {
// Note that we can't propagate undef elt info, because we don't know
// which elt is getting updated.
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
@@ -1281,5 +1282,5 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
}
}
- return MadeChange ? I : 0;
+ return MadeChange ? I : nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 521dc9c..8c5e202 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -17,6 +17,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
/// is to leave as a vector operation. isConstant indicates whether we're
/// extracting one known element. If false we're extracting a variable index.
@@ -73,7 +75,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
// If this is an insert to a variable element, we don't know what it is.
if (!isa<ConstantInt>(III->getOperand(2)))
- return 0;
+ return nullptr;
unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
// If this is an insert to the element we are looking for, return the
@@ -97,14 +99,14 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
}
// Extract a value from a vector add operation with a constant zero.
- Value *Val = 0; Constant *Con = 0;
+ Value *Val = nullptr; Constant *Con = nullptr;
if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) {
if (Con->getAggregateElement(EltNo)->isNullValue())
return FindScalarElement(Val, EltNo);
}
// Otherwise, we don't know.
- return 0;
+ return nullptr;
}
// If we have a PHI node with a vector type that has only 2 uses: feed
@@ -113,7 +115,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
if (!PN->hasNUses(2))
- return NULL;
+ return nullptr;
// If so, it's known at this point that one operand is PHI and the other is
// an extractelement node. Find the PHI user that is not the extractelement
@@ -128,7 +130,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// otherwise return NULL.
if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
!(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true))
- return NULL;
+ return nullptr;
// Create a scalar PHI node that will replace the vector PHI node
// just before the current PHI node.
@@ -318,7 +320,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
}
}
}
- return 0;
+ return nullptr;
}
/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
@@ -440,10 +442,10 @@ static ShuffleOps CollectShuffleElements(Value *V,
// Either the extracted from or inserted into vector must be RHSVec,
// otherwise we'd end up with a shuffle of three inputs.
- if (EI->getOperand(0) == PermittedRHS || PermittedRHS == 0) {
+ if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
Value *RHS = EI->getOperand(0);
ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS);
- assert(LR.second == 0 || LR.second == RHS);
+ assert(LR.second == nullptr || LR.second == RHS);
if (LR.first->getType() != RHS->getType()) {
// We tried our best, but we can't find anything compatible with RHS
@@ -488,6 +490,41 @@ static ShuffleOps CollectShuffleElements(Value *V,
return std::make_pair(V, nullptr);
}
+/// Try to find redundant insertvalue instructions, like the following ones:
+/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
+/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
+/// Here the second instruction inserts values at the same indices, as the
+/// first one, making the first one redundant.
+/// It should be transformed to:
+/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
+Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
+ bool IsRedundant = false;
+ ArrayRef<unsigned int> FirstIndices = I.getIndices();
+
+ // If there is a chain of insertvalue instructions (each of them except the
+ // last one has only one use and it's another insertvalue insn from this
+ // chain), check if any of the 'children' uses the same indices as the first
+ // instruction. In this case, the first one is redundant.
+ Value *V = &I;
+ unsigned Depth = 0;
+ while (V->hasOneUse() && Depth < 10) {
+ User *U = V->user_back();
+ auto UserInsInst = dyn_cast<InsertValueInst>(U);
+ if (!UserInsInst || U->getOperand(0) != V)
+ break;
+ if (UserInsInst->getIndices() == FirstIndices) {
+ IsRedundant = true;
+ break;
+ }
+ V = UserInsInst;
+ Depth++;
+ }
+
+ if (IsRedundant)
+ return ReplaceInstUsesWith(I, I.getOperand(0));
+ return nullptr;
+}
+
Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
Value *VecOp = IE.getOperand(0);
Value *ScalarOp = IE.getOperand(1);
@@ -523,13 +560,14 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
// (and any insertelements it points to), into one big shuffle.
if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) {
SmallVector<Constant*, 16> Mask;
- ShuffleOps LR = CollectShuffleElements(&IE, Mask, 0);
+ ShuffleOps LR = CollectShuffleElements(&IE, Mask, nullptr);
// The proposed shuffle may be trivial, in which case we shouldn't
// perform the combine.
if (LR.first != &IE && LR.second != &IE) {
// We now have a shuffle of LHS, RHS, Mask.
- if (LR.second == 0) LR.second = UndefValue::get(LR.first->getType());
+ if (LR.second == nullptr)
+ LR.second = UndefValue::get(LR.first->getType());
return new ShuffleVectorInst(LR.first, LR.second,
ConstantVector::get(Mask));
}
@@ -546,7 +584,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
return &IE;
}
- return 0;
+ return nullptr;
}
/// Return true if we can evaluate the specified expression tree if the vector
@@ -801,6 +839,20 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
llvm_unreachable("failed to reorder elements of vector instruction!");
}
+static void RecognizeIdentityMask(const SmallVectorImpl<int> &Mask,
+ bool &isLHSID, bool &isRHSID) {
+ isLHSID = isRHSID = true;
+
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] < 0) continue; // Ignore undef values.
+ // Is this an identity shuffle of the LHS value?
+ isLHSID &= (Mask[i] == (int)i);
+
+ // Is this an identity shuffle of the RHS value?
+ isRHSID &= (Mask[i]-e == i);
+ }
+}
+
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
@@ -864,16 +916,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (VWidth == LHSWidth) {
// Analyze the shuffle, are the LHS or RHS and identity shuffles?
- bool isLHSID = true, isRHSID = true;
-
- for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
- if (Mask[i] < 0) continue; // Ignore undef values.
- // Is this an identity shuffle of the LHS value?
- isLHSID &= (Mask[i] == (int)i);
-
- // Is this an identity shuffle of the RHS value?
- isRHSID &= (Mask[i]-e == i);
- }
+ bool isLHSID, isRHSID;
+ RecognizeIdentityMask(Mask, isLHSID, isRHSID);
// Eliminate identity shuffles.
if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
@@ -932,16 +976,16 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
if (LHSShuffle)
if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
- LHSShuffle = NULL;
+ LHSShuffle = nullptr;
if (RHSShuffle)
if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
- RHSShuffle = NULL;
+ RHSShuffle = nullptr;
if (!LHSShuffle && !RHSShuffle)
- return MadeChange ? &SVI : 0;
+ return MadeChange ? &SVI : nullptr;
- Value* LHSOp0 = NULL;
- Value* LHSOp1 = NULL;
- Value* RHSOp0 = NULL;
+ Value* LHSOp0 = nullptr;
+ Value* LHSOp1 = nullptr;
+ Value* RHSOp0 = nullptr;
unsigned LHSOp0Width = 0;
unsigned RHSOp0Width = 0;
if (LHSShuffle) {
@@ -973,11 +1017,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// case 4
if (LHSOp0 == RHSOp0) {
newLHS = LHSOp0;
- newRHS = NULL;
+ newRHS = nullptr;
}
if (newLHS == LHS && newRHS == RHS)
- return MadeChange ? &SVI : 0;
+ return MadeChange ? &SVI : nullptr;
SmallVector<int, 16> LHSMask;
SmallVector<int, 16> RHSMask;
@@ -1037,7 +1081,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// If newRHS == newLHS, we want to remap any references from newRHS to
// newLHS so that we can properly identify splats that may occur due to
// obfuscation across the two vectors.
- if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS)
+ if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
eltMask += newLHSWidth;
}
@@ -1063,10 +1107,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
}
}
- if (newRHS == NULL)
+ if (!newRHS)
newRHS = UndefValue::get(newLHS->getType());
return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
}
- return MadeChange ? &SVI : 0;
+ // If the result mask is an identity, replace uses of this instruction with
+ // corresponding argument.
+ bool isLHSID, isRHSID;
+ RecognizeIdentityMask(newMask, isLHSID, isRHSID);
+ if (isLHSID && VWidth == LHSOp0Width) return ReplaceInstUsesWith(SVI, newLHS);
+ if (isRHSID && VWidth == RHSOp0Width) return ReplaceInstUsesWith(SVI, newRHS);
+
+ return MadeChange ? &SVI : nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 8c780b5..1ab7db3 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -10,7 +10,6 @@
#ifndef INSTCOMBINE_WORKLIST_H
#define INSTCOMBINE_WORKLIST_H
-#define DEBUG_TYPE "instcombine"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Instruction.h"
@@ -18,6 +17,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#define DEBUG_TYPE "instcombine"
+
namespace llvm {
/// InstCombineWorklist - This is the worklist management logic for
@@ -68,7 +69,7 @@ public:
if (It == WorklistMap.end()) return; // Not in worklist.
// Don't bother moving everything down, just null out the slot.
- Worklist[It->second] = 0;
+ Worklist[It->second] = nullptr;
WorklistMap.erase(It);
}
@@ -101,4 +102,6 @@ public:
} // end namespace llvm.
+#undef DEBUG_TYPE
+
#endif
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 0cab81b..4c36887 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -33,7 +33,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "instcombine"
#include "llvm/Transforms/Scalar.h"
#include "InstCombine.h"
#include "llvm-c/Initialization.h"
@@ -58,6 +57,8 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
STATISTIC(NumCombined , "Number of insts combined");
STATISTIC(NumConstProp, "Number of constant folds");
STATISTIC(NumDeadInst , "Number of dead inst eliminated");
@@ -512,7 +513,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
}
}
- return 0;
+ return nullptr;
}
// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
@@ -530,7 +531,7 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
if (C->getType()->getElementType()->isIntegerTy())
return ConstantExpr::getNeg(C);
- return 0;
+ return nullptr;
}
// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
@@ -549,7 +550,7 @@ Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const {
if (C->getType()->getElementType()->isFloatingPointTy())
return ConstantExpr::getFNeg(C);
- return 0;
+ return nullptr;
}
static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
@@ -595,13 +596,13 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
// not have a second operand.
Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
// Don't modify shared select instructions
- if (!SI->hasOneUse()) return 0;
+ if (!SI->hasOneUse()) return nullptr;
Value *TV = SI->getOperand(1);
Value *FV = SI->getOperand(2);
if (isa<Constant>(TV) || isa<Constant>(FV)) {
// Bool selects with constant operands can be folded to logical ops.
- if (SI->getType()->isIntegerTy(1)) return 0;
+ if (SI->getType()->isIntegerTy(1)) return nullptr;
// If it's a bitcast involving vectors, make sure it has the same number of
// elements on both sides.
@@ -610,10 +611,10 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
// Verify that either both or neither are vectors.
- if ((SrcTy == NULL) != (DestTy == NULL)) return 0;
+ if ((SrcTy == nullptr) != (DestTy == nullptr)) return nullptr;
// If vectors, verify that they have the same number of elements.
if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements())
- return 0;
+ return nullptr;
}
Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
@@ -622,7 +623,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
return SelectInst::Create(SI->getCondition(),
SelectTrueVal, SelectFalseVal);
}
- return 0;
+ return nullptr;
}
@@ -634,7 +635,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
PHINode *PN = cast<PHINode>(I.getOperand(0));
unsigned NumPHIValues = PN->getNumIncomingValues();
if (NumPHIValues == 0)
- return 0;
+ return nullptr;
// We normally only transform phis with a single use. However, if a PHI has
// multiple uses and they are all the same operation, we can fold *all* of the
@@ -644,7 +645,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
for (User *U : PN->users()) {
Instruction *UI = cast<Instruction>(U);
if (UI != &I && !I.isIdenticalTo(UI))
- return 0;
+ return nullptr;
}
// Otherwise, we can replace *all* users with the new PHI we form.
}
@@ -654,14 +655,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
// remember the BB it is in. If there is more than one or if *it* is a PHI,
// bail out. We don't do arbitrary constant expressions here because moving
// their computation can be expensive without a cost model.
- BasicBlock *NonConstBB = 0;
+ BasicBlock *NonConstBB = nullptr;
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InVal = PN->getIncomingValue(i);
if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal))
continue;
- if (isa<PHINode>(InVal)) return 0; // Itself a phi.
- if (NonConstBB) return 0; // More than one non-const value.
+ if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
+ if (NonConstBB) return nullptr; // More than one non-const value.
NonConstBB = PN->getIncomingBlock(i);
@@ -669,22 +670,22 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
// insert a computation after it without breaking the edge.
if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
if (II->getParent() == NonConstBB)
- return 0;
+ return nullptr;
// If the incoming non-constant value is in I's block, we will remove one
// instruction, but insert another equivalent one, leading to infinite
// instcombine.
if (NonConstBB == I.getParent())
- return 0;
+ return nullptr;
}
// If there is exactly one non-constant value, we can insert a copy of the
// operation in that block. However, if this is a critical edge, we would be
// inserting the computation one some other paths (e.g. inside a loop). Only
// do this if the pred block is unconditionally branching into the phi block.
- if (NonConstBB != 0) {
+ if (NonConstBB != nullptr) {
BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
- if (!BI || !BI->isUnconditional()) return 0;
+ if (!BI || !BI->isUnconditional()) return nullptr;
}
// Okay, we can do the transformation: create the new PHI node.
@@ -708,7 +709,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
BasicBlock *ThisBB = PN->getIncomingBlock(i);
Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
- Value *InV = 0;
+ Value *InV = nullptr;
// Beware of ConstantExpr: it may eventually evaluate to getNullValue,
// even if currently isNullValue gives false.
Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
@@ -722,7 +723,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
} else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
Constant *C = cast<Constant>(I.getOperand(1));
for (unsigned i = 0; i != NumPHIValues; ++i) {
- Value *InV = 0;
+ Value *InV = nullptr;
if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
else if (isa<ICmpInst>(CI))
@@ -736,7 +737,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
} else if (I.getNumOperands() == 2) {
Constant *C = cast<Constant>(I.getOperand(1));
for (unsigned i = 0; i != NumPHIValues; ++i) {
- Value *InV = 0;
+ Value *InV = nullptr;
if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
InV = ConstantExpr::get(I.getOpcode(), InC, C);
else
@@ -776,11 +777,11 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
assert(PtrTy->isPtrOrPtrVectorTy());
if (!DL)
- return 0;
+ return nullptr;
Type *Ty = PtrTy->getPointerElementType();
if (!Ty->isSized())
- return 0;
+ return nullptr;
// Start with the index over the outer type. Note that the type size
// might be zero (even if the offset isn't zero) if the indexed type
@@ -806,7 +807,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
while (Offset) {
// Indexing into tail padding between struct/array elements.
if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty))
- return 0;
+ return nullptr;
if (StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = DL->getStructLayout(STy);
@@ -827,7 +828,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
Ty = AT->getElementType();
} else {
// Otherwise, we can't index into the middle of this atomic type, bail.
- return 0;
+ return nullptr;
}
}
@@ -859,7 +860,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// If Scale is zero then it does not divide Val.
if (Scale.isMinValue())
- return 0;
+ return nullptr;
// Look through chains of multiplications, searching for a constant that is
// divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4
@@ -902,7 +903,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
if (!Remainder.isMinValue())
// Not divisible by Scale.
- return 0;
+ return nullptr;
// Replace with the quotient in the parent.
Op = ConstantInt::get(CI->getType(), Quotient);
NoSignedWrap = true;
@@ -915,7 +916,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// Multiplication.
NoSignedWrap = BO->hasNoSignedWrap();
if (RequireNoSignedWrap && !NoSignedWrap)
- return 0;
+ return nullptr;
// There are three cases for multiplication: multiplication by exactly
// the scale, multiplication by a constant different to the scale, and
@@ -934,7 +935,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// Otherwise drill down into the constant.
if (!Op->hasOneUse())
- return 0;
+ return nullptr;
Parent = std::make_pair(BO, 1);
continue;
@@ -943,7 +944,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// Multiplication by something else. Drill down into the left-hand side
// since that's where the reassociate pass puts the good stuff.
if (!Op->hasOneUse())
- return 0;
+ return nullptr;
Parent = std::make_pair(BO, 0);
continue;
@@ -954,7 +955,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// Multiplication by a power of 2.
NoSignedWrap = BO->hasNoSignedWrap();
if (RequireNoSignedWrap && !NoSignedWrap)
- return 0;
+ return nullptr;
Value *LHS = BO->getOperand(0);
int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
@@ -968,7 +969,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
break;
}
if (Amt < logScale || !Op->hasOneUse())
- return 0;
+ return nullptr;
// Multiplication by more than the scale. Reduce the multiplying amount
// by the scale in the parent.
@@ -979,7 +980,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
}
if (!Op->hasOneUse())
- return 0;
+ return nullptr;
if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
if (Cast->getOpcode() == Instruction::SExt) {
@@ -993,7 +994,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// Scale and the multiplication Y * SmallScale should not overflow.
if (SmallScale.sext(Scale.getBitWidth()) != Scale)
// SmallScale does not sign-extend to Scale.
- return 0;
+ return nullptr;
assert(SmallScale.exactLogBase2() == logScale);
// Require that Y * SmallScale must not overflow.
RequireNoSignedWrap = true;
@@ -1012,7 +1013,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// trunc (Y * sext Scale) does not, so nsw flags need to be cleared
// from this point up in the expression (see later).
if (RequireNoSignedWrap)
- return 0;
+ return nullptr;
// Drill down through the cast.
unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
@@ -1026,7 +1027,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
}
// Unsupported expression, bail out.
- return 0;
+ return nullptr;
}
// We know that we can successfully descale, so from here on we can safely
@@ -1082,6 +1083,101 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
} while (1);
}
+/// \brief Creates node of binary operation with the same attributes as the
+/// specified one but with other operands.
+static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS,
+ InstCombiner::BuilderTy *B) {
+ Value *BORes = B->CreateBinOp(Inst.getOpcode(), LHS, RHS);
+ if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BORes)) {
+ if (isa<OverflowingBinaryOperator>(NewBO)) {
+ NewBO->setHasNoSignedWrap(Inst.hasNoSignedWrap());
+ NewBO->setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap());
+ }
+ if (isa<PossiblyExactOperator>(NewBO))
+ NewBO->setIsExact(Inst.isExact());
+ }
+ return BORes;
+}
+
+/// \brief Makes transformation of binary operation specific for vector types.
+/// \param Inst Binary operator to transform.
+/// \return Pointer to node that must replace the original binary operator, or
+/// null pointer if no transformation was made.
+Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
+ if (!Inst.getType()->isVectorTy()) return nullptr;
+
+ unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements();
+ Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
+ assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth);
+ assert(cast<VectorType>(RHS->getType())->getNumElements() == VWidth);
+
+ // If both arguments of binary operation are shuffles, which use the same
+ // mask and shuffle within a single vector, it is worthwhile to move the
+ // shuffle after binary operation:
+ // Op(shuffle(v1, m), shuffle(v2, m)) -> shuffle(Op(v1, v2), m)
+ if (isa<ShuffleVectorInst>(LHS) && isa<ShuffleVectorInst>(RHS)) {
+ ShuffleVectorInst *LShuf = cast<ShuffleVectorInst>(LHS);
+ ShuffleVectorInst *RShuf = cast<ShuffleVectorInst>(RHS);
+ if (isa<UndefValue>(LShuf->getOperand(1)) &&
+ isa<UndefValue>(RShuf->getOperand(1)) &&
+ LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType() &&
+ LShuf->getMask() == RShuf->getMask()) {
+ Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0),
+ RShuf->getOperand(0), Builder);
+ Value *Res = Builder->CreateShuffleVector(NewBO,
+ UndefValue::get(NewBO->getType()), LShuf->getMask());
+ return Res;
+ }
+ }
+
+ // If one argument is a shuffle within one vector, the other is a constant,
+ // try moving the shuffle after the binary operation.
+ ShuffleVectorInst *Shuffle = nullptr;
+ Constant *C1 = nullptr;
+ if (isa<ShuffleVectorInst>(LHS)) Shuffle = cast<ShuffleVectorInst>(LHS);
+ if (isa<ShuffleVectorInst>(RHS)) Shuffle = cast<ShuffleVectorInst>(RHS);
+ if (isa<Constant>(LHS)) C1 = cast<Constant>(LHS);
+ if (isa<Constant>(RHS)) C1 = cast<Constant>(RHS);
+ if (Shuffle && C1 && isa<UndefValue>(Shuffle->getOperand(1)) &&
+ Shuffle->getType() == Shuffle->getOperand(0)->getType()) {
+ SmallVector<int, 16> ShMask = Shuffle->getShuffleMask();
+ // Find constant C2 that has property:
+ // shuffle(C2, ShMask) = C1
+ // If such constant does not exist (example: ShMask=<0,0> and C1=<1,2>)
+ // reorder is not possible.
+ SmallVector<Constant*, 16> C2M(VWidth,
+ UndefValue::get(C1->getType()->getScalarType()));
+ bool MayChange = true;
+ for (unsigned I = 0; I < VWidth; ++I) {
+ if (ShMask[I] >= 0) {
+ assert(ShMask[I] < (int)VWidth);
+ if (!isa<UndefValue>(C2M[ShMask[I]])) {
+ MayChange = false;
+ break;
+ }
+ C2M[ShMask[I]] = C1->getAggregateElement(I);
+ }
+ }
+ if (MayChange) {
+ Constant *C2 = ConstantVector::get(C2M);
+ Value *NewLHS, *NewRHS;
+ if (isa<Constant>(LHS)) {
+ NewLHS = C2;
+ NewRHS = Shuffle->getOperand(0);
+ } else {
+ NewLHS = Shuffle->getOperand(0);
+ NewRHS = C2;
+ }
+ Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder);
+ Value *Res = Builder->CreateShuffleVector(NewBO,
+ UndefValue::get(Inst.getType()), Shuffle->getMask());
+ return Res;
+ }
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
@@ -1130,7 +1226,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
//
if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
- return 0;
+ return nullptr;
// Note that if our source is a gep chain itself then we wait for that
// chain to be resolved before we perform this transformation. This
@@ -1138,7 +1234,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (GEPOperator *SrcGEP =
dyn_cast<GEPOperator>(Src->getOperand(0)))
if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
- return 0; // Wait until our source is folded to completion.
+ return nullptr; // Wait until our source is folded to completion.
SmallVector<Value*, 8> Indices;
@@ -1166,7 +1262,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// intptr_t). Just avoid transforming this until the input has been
// normalized.
if (SO1->getType() != GO1->getType())
- return 0;
+ return nullptr;
Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
}
@@ -1216,7 +1312,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// We do not handle pointer-vector geps here.
if (!StrippedPtrTy)
- return 0;
+ return nullptr;
if (StrippedPtr != PtrOp) {
bool HasZeroPointerIndex = false;
@@ -1241,7 +1337,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
GetElementPtrInst *Res =
GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName());
Res->setIsInBounds(GEP.isInBounds());
- return Res;
+ if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
+ return Res;
+ // Insert Res, and create an addrspacecast.
+ // e.g.,
+ // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
+ // ->
+ // %0 = GEP i8 addrspace(1)* X, ...
+ // addrspacecast i8 addrspace(1)* %0 to i8*
+ return new AddrSpaceCastInst(Builder->Insert(Res), GEP.getType());
}
if (ArrayType *XATy =
@@ -1253,8 +1357,24 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// to an array of the same type as the destination pointer
// array. Because the array type is never stepped over (there
// is a leading zero) we can fold the cast into this GEP.
- GEP.setOperand(0, StrippedPtr);
- return &GEP;
+ if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
+ GEP.setOperand(0, StrippedPtr);
+ return &GEP;
+ }
+ // Cannot replace the base pointer directly because StrippedPtr's
+ // address space is different. Instead, create a new GEP followed by
+ // an addrspacecast.
+ // e.g.,
+ // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
+ // i32 0, ...
+ // ->
+ // %0 = GEP [10 x i8] addrspace(1)* X, ...
+ // addrspacecast i8 addrspace(1)* %0 to i8*
+ SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
+ Value *NewGEP = GEP.isInBounds() ?
+ Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
+ Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+ return new AddrSpaceCastInst(NewGEP, GEP.getType());
}
}
}
@@ -1360,7 +1480,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
if (!DL)
- return 0;
+ return nullptr;
/// See if we can simplify:
/// X = bitcast A* to B*
@@ -1412,7 +1532,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
- return 0;
+ return nullptr;
}
static bool
@@ -1527,7 +1647,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
}
return EraseInstFromFunction(MI);
}
- return 0;
+ return nullptr;
}
/// \brief Move the call to free before a NULL test.
@@ -1556,30 +1676,30 @@ tryToMoveFreeBeforeNullTest(CallInst &FI) {
// would duplicate the call to free in each predecessor and it may
// not be profitable even for code size.
if (!PredBB)
- return 0;
+ return nullptr;
// Validate constraint #2: Does this block contains only the call to
// free and an unconditional branch?
// FIXME: We could check if we can speculate everything in the
// predecessor block
if (FreeInstrBB->size() != 2)
- return 0;
+ return nullptr;
BasicBlock *SuccBB;
if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB)))
- return 0;
+ return nullptr;
// Validate the rest of constraint #1 by matching on the pred branch.
TerminatorInst *TI = PredBB->getTerminator();
BasicBlock *TrueBB, *FalseBB;
ICmpInst::Predicate Pred;
if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB)))
- return 0;
+ return nullptr;
if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
- return 0;
+ return nullptr;
// Validate constraint #3: Ensure the null case just falls through.
if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
- return 0;
+ return nullptr;
assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
"Broken CFG: missing edge from predecessor to successor");
@@ -1614,14 +1734,14 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {
if (Instruction *I = tryToMoveFreeBeforeNullTest(FI))
return I;
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
// Change br (not X), label True, label False to: br X, label False, True
- Value *X = 0;
+ Value *X = nullptr;
BasicBlock *TrueDest;
BasicBlock *FalseDest;
if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
@@ -1664,7 +1784,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
return &BI;
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
@@ -1688,7 +1808,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
return &SI;
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
@@ -1705,7 +1825,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
// first index
return ExtractValueInst::Create(C2, EV.getIndices().slice(1));
}
- return 0; // Can't handle other constants
+ return nullptr; // Can't handle other constants
}
if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
@@ -1838,7 +1958,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
// and if again single-use then via load (gep (gep)) to load (gep).
// However, double extracts from e.g. function arguments or return values
// aren't handled yet.
- return 0;
+ return nullptr;
}
enum Personality_Type {
@@ -2177,7 +2297,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
return &LI;
}
- return 0;
+ return nullptr;
}
@@ -2270,7 +2390,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
i != e; ++i) {
ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
- if (CE == 0) continue;
+ if (CE == nullptr) continue;
Constant*& FoldRes = FoldedConstants[CE];
if (!FoldRes)
@@ -2374,7 +2494,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
while (!Worklist.isEmpty()) {
Instruction *I = Worklist.RemoveOne();
- if (I == 0) continue; // skip null values.
+ if (I == nullptr) continue; // skip null values.
// Check to see if we can DCE the instruction.
if (isInstructionTriviallyDead(I, TLI)) {
@@ -2516,7 +2636,7 @@ bool InstCombiner::runOnFunction(Function &F) {
return false;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
// Minimizing size?
MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
@@ -2543,7 +2663,7 @@ bool InstCombiner::runOnFunction(Function &F) {
while (DoOneIteration(F, Iteration++))
EverMadeChange = true;
- Builder = 0;
+ Builder = nullptr;
return EverMadeChange;
}
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index bbfa4c5..95fca75 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -13,8 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asan"
-
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -53,8 +51,11 @@
using namespace llvm;
+#define DEBUG_TYPE "asan"
+
static const uint64_t kDefaultShadowScale = 3;
static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
+static const uint64_t kIOSShadowOffset32 = 1ULL << 30;
static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
static const uint64_t kSmallX86_64ShadowOffset = 0x7FFF8000; // < 2G.
static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41;
@@ -79,6 +80,7 @@ static const char *const kAsanUnregisterGlobalsName =
static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
static const char *const kAsanInitName = "__asan_init_v3";
+static const char *const kAsanCovModuleInitName = "__sanitizer_cov_module_init";
static const char *const kAsanCovName = "__sanitizer_cov";
static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp";
static const char *const kAsanPtrSub = "__sanitizer_ptr_sub";
@@ -135,10 +137,12 @@ static cl::opt<bool> ClGlobals("asan-globals",
static cl::opt<int> ClCoverage("asan-coverage",
cl::desc("ASan coverage. 0: none, 1: entry block, 2: all blocks"),
cl::Hidden, cl::init(false));
+static cl::opt<int> ClCoverageBlockThreshold("asan-coverage-block-threshold",
+ cl::desc("Add coverage instrumentation only to the entry block if there "
+ "are more than this number of blocks."),
+ cl::Hidden, cl::init(1500));
static cl::opt<bool> ClInitializers("asan-initialization-order",
cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
-static cl::opt<bool> ClMemIntrin("asan-memintrin",
- cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
static cl::opt<bool> ClInvalidPointerPairs("asan-detect-invalid-pointer-pair",
cl::desc("Instrument <, <=, >, >=, - with pointer operands"),
cl::Hidden, cl::init(false));
@@ -148,6 +152,16 @@ static cl::opt<unsigned> ClRealignStack("asan-realign-stack",
static cl::opt<std::string> ClBlacklistFile("asan-blacklist",
cl::desc("File containing the list of objects to ignore "
"during instrumentation"), cl::Hidden);
+static cl::opt<int> ClInstrumentationWithCallsThreshold(
+ "asan-instrumentation-with-call-threshold",
+ cl::desc("If the function being instrumented contains more than "
+ "this number of memory accesses, use callbacks instead of "
+ "inline checks (-1 means never use callbacks)."),
+ cl::Hidden, cl::init(7000));
+static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
+ "asan-memory-access-callback-prefix",
+ cl::desc("Prefix for memory access callbacks"), cl::Hidden,
+ cl::init("__asan_"));
// This is an experimental feature that will allow to choose between
// instrumented and non-instrumented code at link-time.
@@ -238,7 +252,7 @@ struct ShadowMapping {
static ShadowMapping getShadowMapping(const Module &M, int LongSize) {
llvm::Triple TargetTriple(M.getTargetTriple());
bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android;
- // bool IsMacOSX = TargetTriple.getOS() == llvm::Triple::MacOSX;
+ bool IsIOS = TargetTriple.getOS() == llvm::Triple::IOS;
bool IsFreeBSD = TargetTriple.getOS() == llvm::Triple::FreeBSD;
bool IsLinux = TargetTriple.getOS() == llvm::Triple::Linux;
bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64 ||
@@ -256,6 +270,8 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize) {
Mapping.Offset = kMIPS32_ShadowOffset32;
else if (IsFreeBSD)
Mapping.Offset = kFreeBSD_ShadowOffset32;
+ else if (IsIOS)
+ Mapping.Offset = kIOSShadowOffset32;
else
Mapping.Offset = kDefaultShadowOffset32;
} else { // LongSize == 64
@@ -303,20 +319,17 @@ struct AddressSanitizer : public FunctionPass {
const char *getPassName() const override {
return "AddressSanitizerFunctionPass";
}
- void instrumentMop(Instruction *I);
+ void instrumentMop(Instruction *I, bool UseCalls);
void instrumentPointerComparisonOrSubtraction(Instruction *I);
void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
Value *Addr, uint32_t TypeSize, bool IsWrite,
- Value *SizeArgument);
+ Value *SizeArgument, bool UseCalls);
Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
Value *ShadowValue, uint32_t TypeSize);
Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
bool IsWrite, size_t AccessSizeIndex,
Value *SizeArgument);
- bool instrumentMemIntrinsic(MemIntrinsic *MI);
- void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr,
- Value *Size,
- Instruction *InsertBefore, bool IsWrite);
+ void instrumentMemIntrinsic(MemIntrinsic *MI);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
bool runOnFunction(Function &F) override;
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
@@ -349,8 +362,11 @@ struct AddressSanitizer : public FunctionPass {
std::unique_ptr<SpecialCaseList> BL;
// This array is indexed by AccessIsWrite and log2(AccessSize).
Function *AsanErrorCallback[2][kNumberOfAccessSizes];
+ Function *AsanMemoryAccessCallback[2][kNumberOfAccessSizes];
// This array is indexed by AccessIsWrite.
- Function *AsanErrorCallbackSized[2];
+ Function *AsanErrorCallbackSized[2],
+ *AsanMemoryAccessCallbackSized[2];
+ Function *AsanMemmove, *AsanMemcpy, *AsanMemset;
InlineAsm *EmptyAsm;
SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
@@ -393,6 +409,7 @@ class AddressSanitizerModule : public ModulePass {
Function *AsanUnpoisonGlobals;
Function *AsanRegisterGlobals;
Function *AsanUnregisterGlobals;
+ Function *AsanCovModuleInit;
};
// Stack poisoning does not play well with exception handling.
@@ -443,11 +460,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
bool runOnFunction() {
if (!ClStack) return false;
// Collect alloca, ret, lifetime instructions etc.
- for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
- DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
- BasicBlock *BB = *DI;
+ for (BasicBlock *BB : depth_first(&F.getEntryBlock()))
visit(*BB);
- }
+
if (AllocaVec.empty()) return false;
initializeCallbacks(*F.getParent());
@@ -590,72 +605,54 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
return IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset));
}
-void AddressSanitizer::instrumentMemIntrinsicParam(
- Instruction *OrigIns,
- Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) {
- IRBuilder<> IRB(InsertBefore);
- if (Size->getType() != IntptrTy)
- Size = IRB.CreateIntCast(Size, IntptrTy, false);
- // Check the first byte.
- instrumentAddress(OrigIns, InsertBefore, Addr, 8, IsWrite, Size);
- // Check the last byte.
- IRB.SetInsertPoint(InsertBefore);
- Value *SizeMinusOne = IRB.CreateSub(Size, ConstantInt::get(IntptrTy, 1));
- Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
- Value *AddrLast = IRB.CreateAdd(AddrLong, SizeMinusOne);
- instrumentAddress(OrigIns, InsertBefore, AddrLast, 8, IsWrite, Size);
-}
-
// Instrument memset/memmove/memcpy
-bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
- Value *Dst = MI->getDest();
- MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI);
- Value *Src = MemTran ? MemTran->getSource() : 0;
- Value *Length = MI->getLength();
-
- Constant *ConstLength = dyn_cast<Constant>(Length);
- Instruction *InsertBefore = MI;
- if (ConstLength) {
- if (ConstLength->isNullValue()) return false;
- } else {
- // The size is not a constant so it could be zero -- check at run-time.
- IRBuilder<> IRB(InsertBefore);
-
- Value *Cmp = IRB.CreateICmpNE(Length,
- Constant::getNullValue(Length->getType()));
- InsertBefore = SplitBlockAndInsertIfThen(Cmp, InsertBefore, false);
+void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
+ IRBuilder<> IRB(MI);
+ if (isa<MemTransferInst>(MI)) {
+ IRB.CreateCall3(
+ isa<MemMoveInst>(MI) ? AsanMemmove : AsanMemcpy,
+ IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false));
+ } else if (isa<MemSetInst>(MI)) {
+ IRB.CreateCall3(
+ AsanMemset,
+ IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false));
}
-
- instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true);
- if (Src)
- instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false);
- return true;
+ MI->eraseFromParent();
}
// If I is an interesting memory access, return the PointerOperand
-// and set IsWrite. Otherwise return NULL.
-static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
+// and set IsWrite/Alignment. Otherwise return NULL.
+static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
+ unsigned *Alignment) {
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (!ClInstrumentReads) return NULL;
+ if (!ClInstrumentReads) return nullptr;
*IsWrite = false;
+ *Alignment = LI->getAlignment();
return LI->getPointerOperand();
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!ClInstrumentWrites) return NULL;
+ if (!ClInstrumentWrites) return nullptr;
*IsWrite = true;
+ *Alignment = SI->getAlignment();
return SI->getPointerOperand();
}
if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
- if (!ClInstrumentAtomics) return NULL;
+ if (!ClInstrumentAtomics) return nullptr;
*IsWrite = true;
+ *Alignment = 0;
return RMW->getPointerOperand();
}
if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
- if (!ClInstrumentAtomics) return NULL;
+ if (!ClInstrumentAtomics) return nullptr;
*IsWrite = true;
+ *Alignment = 0;
return XCHG->getPointerOperand();
}
- return NULL;
+ return nullptr;
}
static bool isPointerOperand(Value *V) {
@@ -700,9 +697,10 @@ AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) {
IRB.CreateCall2(F, Param[0], Param[1]);
}
-void AddressSanitizer::instrumentMop(Instruction *I) {
+void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) {
bool IsWrite = false;
- Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
+ unsigned Alignment = 0;
+ Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &Alignment);
assert(Addr);
if (ClOpt && ClOptGlobals) {
if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
@@ -737,22 +735,29 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
else
NumInstrumentedReads++;
- // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check.
- if (TypeSize == 8 || TypeSize == 16 ||
- TypeSize == 32 || TypeSize == 64 || TypeSize == 128)
- return instrumentAddress(I, I, Addr, TypeSize, IsWrite, 0);
- // Instrument unusual size (but still multiple of 8).
+ unsigned Granularity = 1 << Mapping.Scale;
+ // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check
+ // if the data is properly aligned.
+ if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||
+ TypeSize == 128) &&
+ (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8))
+ return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls);
+ // Instrument unusual size or unusual alignment.
// We can not do it with a single check, so we do 1-byte check for the first
// and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
// to report the actual access size.
IRBuilder<> IRB(I);
- Value *LastByte = IRB.CreateIntToPtr(
- IRB.CreateAdd(IRB.CreatePointerCast(Addr, IntptrTy),
- ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
- OrigPtrTy);
Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
- instrumentAddress(I, I, Addr, 8, IsWrite, Size);
- instrumentAddress(I, I, LastByte, 8, IsWrite, Size);
+ Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+ if (UseCalls) {
+ IRB.CreateCall2(AsanMemoryAccessCallbackSized[IsWrite], AddrLong, Size);
+ } else {
+ Value *LastByte = IRB.CreateIntToPtr(
+ IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
+ OrigPtrTy);
+ instrumentAddress(I, I, Addr, 8, IsWrite, Size, false);
+ instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false);
+ }
}
// Validate the result of Module::getOrInsertFunction called for an interface
@@ -800,11 +805,18 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
}
void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
- Instruction *InsertBefore,
- Value *Addr, uint32_t TypeSize,
- bool IsWrite, Value *SizeArgument) {
+ Instruction *InsertBefore, Value *Addr,
+ uint32_t TypeSize, bool IsWrite,
+ Value *SizeArgument, bool UseCalls) {
IRBuilder<> IRB(InsertBefore);
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+ size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
+
+ if (UseCalls) {
+ IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][AccessSizeIndex],
+ AddrLong);
+ return;
+ }
Type *ShadowTy = IntegerType::get(
*C, std::max(8U, TypeSize >> Mapping.Scale));
@@ -815,9 +827,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
- size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
size_t Granularity = 1 << Mapping.Scale;
- TerminatorInst *CrashTerm = 0;
+ TerminatorInst *CrashTerm = nullptr;
if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
TerminatorInst *CheckTerm =
@@ -842,8 +853,29 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
void AddressSanitizerModule::createInitializerPoisonCalls(
Module &M, GlobalValue *ModuleName) {
- // We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
- Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
+ // We do all of our poisoning and unpoisoning within a global constructor.
+ // These are called _GLOBAL__(sub_)?I_.*.
+ // TODO: Consider looking through the functions in
+ // M.getGlobalVariable("llvm.global_ctors") instead of using this stringly
+ // typed approach.
+ Function *GlobalInit = nullptr;
+ for (auto &F : M.getFunctionList()) {
+ StringRef FName = F.getName();
+
+ const char kGlobalPrefix[] = "_GLOBAL__";
+ if (!FName.startswith(kGlobalPrefix))
+ continue;
+ FName = FName.substr(strlen(kGlobalPrefix));
+
+ const char kOptionalSub[] = "sub_";
+ if (FName.startswith(kOptionalSub))
+ FName = FName.substr(strlen(kOptionalSub));
+
+ if (FName.startswith("I_")) {
+ GlobalInit = &F;
+ break;
+ }
+ }
// If that function is not present, this TU contains no globals, or they have
// all been optimized away
if (!GlobalInit)
@@ -858,7 +890,7 @@ void AddressSanitizerModule::createInitializerPoisonCalls(
// Add calls to unpoison all globals before each return instruction.
for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end();
- I != E; ++I) {
+ I != E; ++I) {
if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) {
CallInst::Create(AsanUnpoisonGlobals, "", RI);
}
@@ -902,8 +934,8 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// Ignore the globals from the __OBJC section. The ObjC runtime assumes
// those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
// them.
- if ((Section.find("__OBJC,") == 0) ||
- (Section.find("__DATA, __objc_") == 0)) {
+ if (Section.startswith("__OBJC,") ||
+ Section.startswith("__DATA, __objc_")) {
DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G << "\n");
return false;
}
@@ -915,16 +947,26 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// is placed into __DATA,__cfstring
// Therefore there's no point in placing redzones into __DATA,__cfstring.
// Moreover, it causes the linker to crash on OS X 10.7
- if (Section.find("__DATA,__cfstring") == 0) {
+ if (Section.startswith("__DATA,__cfstring")) {
DEBUG(dbgs() << "Ignoring CFString: " << *G << "\n");
return false;
}
// The linker merges the contents of cstring_literals and removes the
// trailing zeroes.
- if (Section.find("__TEXT,__cstring,cstring_literals") == 0) {
+ if (Section.startswith("__TEXT,__cstring,cstring_literals")) {
DEBUG(dbgs() << "Ignoring a cstring literal: " << *G << "\n");
return false;
}
+
+ // Callbacks put into the CRT initializer/terminator sections
+ // should not be instrumented.
+ // See https://code.google.com/p/address-sanitizer/issues/detail?id=305
+ // and http://msdn.microsoft.com/en-US/en-en/library/bb918180(v=vs.120).aspx
+ if (Section.startswith(".CRT")) {
+ DEBUG(dbgs() << "Ignoring a global initializer callback: " << *G << "\n");
+ return false;
+ }
+
// Globals from llvm.metadata aren't emitted, do not instrument them.
if (Section == "llvm.metadata") return false;
}
@@ -950,6 +992,10 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) {
kAsanUnregisterGlobalsName,
IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
+ AsanCovModuleInit = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanCovModuleInitName,
+ IRB.getVoidTy(), IntptrTy, NULL));
+ AsanCovModuleInit->setLinkage(Function::ExternalLinkage);
}
// This function replaces all global variables with new variables that have
@@ -980,6 +1026,14 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
GlobalsToChange.push_back(G);
}
+ Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
+ assert(CtorFunc);
+ IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
+
+ Function *CovFunc = M.getFunction(kAsanCovName);
+ int nCov = CovFunc ? CovFunc->getNumUses() : 0;
+ IRB.CreateCall(AsanCovModuleInit, ConstantInt::get(IntptrTy, nCov));
+
size_t n = GlobalsToChange.size();
if (n == 0) return false;
@@ -996,10 +1050,6 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
IntptrTy, IntptrTy, NULL);
SmallVector<Constant *, 16> Initializers(n);
- Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
- assert(CtorFunc);
- IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
-
bool HasDynamicallyInitializedGlobals = false;
// We shouldn't merge same module names, as this string serves as unique
@@ -1110,12 +1160,16 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
AccessSizeIndex++) {
// IsWrite and TypeSize are encoded in the function name.
- std::string FunctionName = std::string(kAsanReportErrorTemplate) +
+ std::string Suffix =
(AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex);
- // If we are merging crash callbacks, they have two parameters.
AsanErrorCallback[AccessIsWrite][AccessSizeIndex] =
- checkInterfaceFunction(M.getOrInsertFunction(
- FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
+ checkInterfaceFunction(
+ M.getOrInsertFunction(kAsanReportErrorTemplate + Suffix,
+ IRB.getVoidTy(), IntptrTy, NULL));
+ AsanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
+ checkInterfaceFunction(
+ M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + Suffix,
+ IRB.getVoidTy(), IntptrTy, NULL));
}
}
AsanErrorCallbackSized[0] = checkInterfaceFunction(M.getOrInsertFunction(
@@ -1123,8 +1177,25 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
AsanErrorCallbackSized[1] = checkInterfaceFunction(M.getOrInsertFunction(
kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
- AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
+ AsanMemoryAccessCallbackSized[0] = checkInterfaceFunction(
+ M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "loadN",
+ IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanMemoryAccessCallbackSized[1] = checkInterfaceFunction(
+ M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "storeN",
+ IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+
+ AsanMemmove = checkInterfaceFunction(M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, NULL));
+ AsanMemcpy = checkInterfaceFunction(M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, NULL));
+ AsanMemset = checkInterfaceFunction(M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + "memset", IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, NULL));
+
+ AsanHandleNoReturnFunc = checkInterfaceFunction(
+ M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
AsanCovFunction = checkInterfaceFunction(M.getOrInsertFunction(
kAsanCovName, IRB.getVoidTy(), NULL));
AsanPtrCmpFunction = checkInterfaceFunction(M.getOrInsertFunction(
@@ -1142,7 +1213,7 @@ bool AddressSanitizer::doInitialization(Module &M) {
// Initialize the private fields. No one has accessed them before.
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
if (!DLP)
- return false;
+ report_fatal_error("data layout missing");
DL = &DLP->getDataLayout();
BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
@@ -1241,7 +1312,8 @@ bool AddressSanitizer::InjectCoverage(Function &F,
const ArrayRef<BasicBlock *> AllBlocks) {
if (!ClCoverage) return false;
- if (ClCoverage == 1) {
+ if (ClCoverage == 1 ||
+ (unsigned)ClCoverageBlockThreshold < AllBlocks.size()) {
InjectCoverageAtBlock(F, F.getEntryBlock());
} else {
for (size_t i = 0, n = AllBlocks.size(); i < n; i++)
@@ -1275,6 +1347,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
SmallVector<Instruction*, 16> PointerComparisonsOrSubtracts;
int NumAllocas = 0;
bool IsWrite;
+ unsigned Alignment;
// Fill the set of memory operations to instrument.
for (Function::iterator FI = F.begin(), FE = F.end();
@@ -1285,7 +1358,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
BI != BE; ++BI) {
if (LooksLikeCodeInBug11395(BI)) return false;
- if (Value *Addr = isInterestingMemoryAccess(BI, &IsWrite)) {
+ if (Value *Addr = isInterestingMemoryAccess(BI, &IsWrite, &Alignment)) {
if (ClOpt && ClOptSameTemp) {
if (!TempsToInstrument.insert(Addr))
continue; // We've seen this temp in the current BB.
@@ -1294,7 +1367,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
isInterestingPointerComparisonOrSubtraction(BI)) {
PointerComparisonsOrSubtracts.push_back(BI);
continue;
- } else if (isa<MemIntrinsic>(BI) && ClMemIntrin) {
+ } else if (isa<MemIntrinsic>(BI)) {
// ok, take it.
} else {
if (isa<AllocaInst>(BI))
@@ -1315,7 +1388,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
}
}
- Function *UninstrumentedDuplicate = 0;
+ Function *UninstrumentedDuplicate = nullptr;
bool LikelyToInstrument =
!NoReturnCalls.empty() || !ToInstrument.empty() || (NumAllocas > 0);
if (ClKeepUninstrumented && LikelyToInstrument) {
@@ -1326,14 +1399,19 @@ bool AddressSanitizer::runOnFunction(Function &F) {
F.getParent()->getFunctionList().push_back(UninstrumentedDuplicate);
}
+ bool UseCalls = false;
+ if (ClInstrumentationWithCallsThreshold >= 0 &&
+ ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold)
+ UseCalls = true;
+
// Instrument.
int NumInstrumented = 0;
for (size_t i = 0, n = ToInstrument.size(); i != n; i++) {
Instruction *Inst = ToInstrument[i];
if (ClDebugMin < 0 || ClDebugMax < 0 ||
(NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
- if (isInterestingMemoryAccess(Inst, &IsWrite))
- instrumentMop(Inst);
+ if (isInterestingMemoryAccess(Inst, &IsWrite, &Alignment))
+ instrumentMop(Inst, UseCalls);
else
instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
}
@@ -1464,12 +1542,23 @@ void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined(
}
}
+static DebugLoc getFunctionEntryDebugLocation(Function &F) {
+ BasicBlock::iterator I = F.getEntryBlock().begin(),
+ E = F.getEntryBlock().end();
+ for (; I != E; ++I)
+ if (!isa<AllocaInst>(I))
+ break;
+ return I->getDebugLoc();
+}
+
void FunctionStackPoisoner::poisonStack() {
int StackMallocIdx = -1;
+ DebugLoc EntryDebugLocation = getFunctionEntryDebugLocation(F);
assert(AllocaVec.size() > 0);
Instruction *InsBefore = AllocaVec[0];
IRBuilder<> IRB(InsBefore);
+ IRB.SetCurrentDebugLocation(EntryDebugLocation);
SmallVector<ASanStackVariableDescription, 16> SVD;
SVD.reserve(AllocaVec.size());
@@ -1493,6 +1582,7 @@ void FunctionStackPoisoner::poisonStack() {
Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize);
AllocaInst *MyAlloca =
new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore);
+ MyAlloca->setDebugLoc(EntryDebugLocation);
assert((ClRealignStack & (ClRealignStack - 1)) == 0);
size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack);
MyAlloca->setAlignment(FrameAlignment);
@@ -1513,11 +1603,13 @@ void FunctionStackPoisoner::poisonStack() {
Instruction *Term = SplitBlockAndInsertIfThen(Cmp, InsBefore, false);
BasicBlock *CmpBlock = cast<Instruction>(Cmp)->getParent();
IRBuilder<> IRBIf(Term);
+ IRBIf.SetCurrentDebugLocation(EntryDebugLocation);
LocalStackBase = IRBIf.CreateCall2(
AsanStackMallocFunc[StackMallocIdx],
ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
BasicBlock *SetBlock = cast<Instruction>(LocalStackBase)->getParent();
IRB.SetInsertPoint(InsBefore);
+ IRB.SetCurrentDebugLocation(EntryDebugLocation);
PHINode *Phi = IRB.CreatePHI(IntptrTy, 2);
Phi->addIncoming(OrigStackBase, CmpBlock);
Phi->addIncoming(LocalStackBase, SetBlock);
@@ -1654,7 +1746,7 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
// We're intested only in allocas we can handle.
- return isInterestingAlloca(*AI) ? AI : 0;
+ return isInterestingAlloca(*AI) ? AI : nullptr;
// See if we've already calculated (or started to calculate) alloca for a
// given value.
AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
@@ -1662,8 +1754,8 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
return I->second;
// Store 0 while we're calculating alloca for value V to avoid
// infinite recursion if the value references itself.
- AllocaForValue[V] = 0;
- AllocaInst *Res = 0;
+ AllocaForValue[V] = nullptr;
+ AllocaInst *Res = nullptr;
if (CastInst *CI = dyn_cast<CastInst>(V))
Res = findAllocaForValue(CI->getOperand(0));
else if (PHINode *PN = dyn_cast<PHINode>(V)) {
@@ -1673,12 +1765,12 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
if (IncValue == PN) continue;
AllocaInst *IncValueAI = findAllocaForValue(IncValue);
// AI for incoming values should exist and should all be equal.
- if (IncValueAI == 0 || (Res != 0 && IncValueAI != Res))
- return 0;
+ if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res))
+ return nullptr;
Res = IncValueAI;
}
}
- if (Res != 0)
+ if (Res)
AllocaForValue[V] = Res;
return Res;
}
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 505fb83..9a5cea8 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "bounds-checking"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -28,6 +27,8 @@
#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "bounds-checking"
+
static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
cl::desc("Use one trap block per function"));
@@ -61,7 +62,7 @@ namespace {
BasicBlock *TrapBB;
BasicBlock *getTrapBB();
- void emitBranchToTrap(Value *Cmp = 0);
+ void emitBranchToTrap(Value *Cmp = nullptr);
bool instrument(Value *Ptr, Value *Val);
};
}
@@ -103,7 +104,7 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) {
if (!C->getZExtValue())
return;
else
- Cmp = 0; // unconditional branch
+ Cmp = nullptr; // unconditional branch
}
++ChecksAdded;
@@ -167,7 +168,7 @@ bool BoundsChecking::runOnFunction(Function &F) {
DL = &getAnalysis<DataLayoutPass>().getDataLayout();
TLI = &getAnalysis<TargetLibraryInfo>();
- TrapBB = 0;
+ TrapBB = nullptr;
BuilderTy TheBuilder(F.getContext(), TargetFolder(DL));
Builder = &TheBuilder;
ObjectSizeOffsetEvaluator TheObjSizeEval(DL, TLI, F.getContext(),
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index df1549d..7f468f7 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -211,7 +211,8 @@ class DataFlowSanitizer : public ModulePass {
public:
DataFlowSanitizer(StringRef ABIListFile = StringRef(),
- void *(*getArgTLS)() = 0, void *(*getRetValTLS)() = 0);
+ void *(*getArgTLS)() = nullptr,
+ void *(*getRetValTLS)() = nullptr);
static char ID;
bool doInitialization(Module &M) override;
bool runOnModule(Module &M) override;
@@ -233,8 +234,8 @@ struct DFSanFunction {
DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
: DFS(DFS), F(F), IA(DFS.getInstrumentedABI()),
- IsNativeABI(IsNativeABI), ArgTLSPtr(0), RetvalTLSPtr(0),
- LabelReturnAlloca(0) {}
+ IsNativeABI(IsNativeABI), ArgTLSPtr(nullptr), RetvalTLSPtr(nullptr),
+ LabelReturnAlloca(nullptr) {}
Value *getArgTLSPtr();
Value *getArgTLS(unsigned Index, Instruction *Pos);
Value *getRetvalTLS();
@@ -303,7 +304,7 @@ FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
ArgTypes.push_back(ShadowPtrTy);
Type *RetType = T->getReturnType();
if (!RetType->isVoidTy())
- RetType = StructType::get(RetType, ShadowTy, (Type *)0);
+ RetType = StructType::get(RetType, ShadowTy, (Type *)nullptr);
return FunctionType::get(RetType, ArgTypes, T->isVarArg());
}
@@ -345,7 +346,7 @@ FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
bool DataFlowSanitizer::doInitialization(Module &M) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
if (!DLP)
- return false;
+ report_fatal_error("data layout missing");
DL = &DLP->getDataLayout();
Mod = &M;
@@ -373,18 +374,20 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
if (GetArgTLSPtr) {
Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
- ArgTLS = 0;
+ ArgTLS = nullptr;
GetArgTLS = ConstantExpr::getIntToPtr(
ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)),
PointerType::getUnqual(
- FunctionType::get(PointerType::getUnqual(ArgTLSTy), (Type *)0)));
+ FunctionType::get(PointerType::getUnqual(ArgTLSTy),
+ (Type *)nullptr)));
}
if (GetRetvalTLSPtr) {
- RetvalTLS = 0;
+ RetvalTLS = nullptr;
GetRetvalTLS = ConstantExpr::getIntToPtr(
ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)),
PointerType::getUnqual(
- FunctionType::get(PointerType::getUnqual(ShadowTy), (Type *)0)));
+ FunctionType::get(PointerType::getUnqual(ShadowTy),
+ (Type *)nullptr)));
}
ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
@@ -554,7 +557,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
++i;
// Don't stop on weak. We assume people aren't playing games with the
// instrumentedness of overridden weak aliases.
- if (Function *F = dyn_cast<Function>(GA->getAliasedGlobal())) {
+ if (Function *F = dyn_cast<Function>(GA->getAliasee())) {
bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
if (GAInst && FInst) {
addGlobalNamePrefix(GA);
@@ -629,7 +632,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
// function... yet.
} else if (FT->isVarArg()) {
UnwrappedFnMap[&F] = &F;
- *i = 0;
+ *i = nullptr;
} else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
// Build a wrapper function for F. The wrapper simply calls F, and is
// added to FnsToInstrument so that any instrumentation according to its
@@ -680,9 +683,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
// DFSanVisitor may create new basic blocks, which confuses df_iterator.
// Build a copy of the list before iterating over it.
- llvm::SmallVector<BasicBlock *, 4> BBList;
- std::copy(df_begin(&(*i)->getEntryBlock()), df_end(&(*i)->getEntryBlock()),
- std::back_inserter(BBList));
+ llvm::SmallVector<BasicBlock *, 4> BBList(
+ depth_first(&(*i)->getEntryBlock()));
for (llvm::SmallVector<BasicBlock *, 4>::iterator i = BBList.begin(),
e = BBList.end();
@@ -1313,7 +1315,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
}
}
- Instruction *Next = 0;
+ Instruction *Next = nullptr;
if (!CS.getType()->isVoidTy()) {
if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
if (II->getNormalDest()->getSinglePredecessor()) {
diff --git a/lib/Transforms/Instrumentation/DebugIR.cpp b/lib/Transforms/Instrumentation/DebugIR.cpp
index 069886e..18bda1a 100644
--- a/lib/Transforms/Instrumentation/DebugIR.cpp
+++ b/lib/Transforms/Instrumentation/DebugIR.cpp
@@ -16,8 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "debug-ir"
-
#include "llvm/IR/ValueMap.h"
#include "DebugIR.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
@@ -42,6 +40,8 @@
using namespace llvm;
+#define DEBUG_TYPE "debug-ir"
+
namespace {
/// Builds a map of Value* to line numbers on which the Value appears in a
@@ -118,7 +118,7 @@ public:
void visitInstruction(Instruction &I) {
if (I.getMetadata(LLVMContext::MD_dbg))
- I.setMetadata(LLVMContext::MD_dbg, 0);
+ I.setMetadata(LLVMContext::MD_dbg, nullptr);
}
void run(Module *M) {
@@ -168,11 +168,11 @@ class DIUpdater : public InstVisitor<DIUpdater> {
public:
DIUpdater(Module &M, StringRef Filename = StringRef(),
- StringRef Directory = StringRef(), const Module *DisplayM = 0,
- const ValueToValueMapTy *VMap = 0)
+ StringRef Directory = StringRef(), const Module *DisplayM = nullptr,
+ const ValueToValueMapTy *VMap = nullptr)
: Builder(M), Layout(&M), LineTable(DisplayM ? DisplayM : &M), VMap(VMap),
- Finder(), Filename(Filename), Directory(Directory), FileNode(0),
- LexicalBlockFileNode(0), CUNode(0) {
+ Finder(), Filename(Filename), Directory(Directory), FileNode(nullptr),
+ LexicalBlockFileNode(nullptr), CUNode(nullptr) {
Finder.processModule(M);
visit(&M);
}
@@ -184,7 +184,7 @@ public:
report_fatal_error("DebugIR pass supports only a signle compile unit per "
"Module.");
createCompileUnit(Finder.compile_unit_count() == 1 ?
- (MDNode*)*Finder.compile_units().begin() : 0);
+ (MDNode*)*Finder.compile_units().begin() : nullptr);
}
void visitFunction(Function &F) {
@@ -232,7 +232,7 @@ public:
/// If a ValueToValueMap is provided, use it to get the real instruction as
/// the line table was generated on a clone of the module on which we are
/// operating.
- Value *RealInst = 0;
+ Value *RealInst = nullptr;
if (VMap)
RealInst = VMap->lookup(&I);
@@ -256,7 +256,7 @@ public:
NewLoc = DebugLoc::get(Line, Col, Loc.getScope(RealInst->getContext()),
Loc.getInlinedAt(RealInst->getContext()));
else if (MDNode *scope = findScope(&I))
- NewLoc = DebugLoc::get(Line, Col, scope, 0);
+ NewLoc = DebugLoc::get(Line, Col, scope, nullptr);
else {
DEBUG(dbgs() << "WARNING: no valid scope for instruction " << &I
<< ". no DebugLoc will be present."
@@ -334,7 +334,7 @@ private:
}
DEBUG(dbgs() << "unable to find DISubprogram node for function "
<< F->getName().str() << "\n");
- return 0;
+ return nullptr;
}
/// Sets Line to the line number on which V appears and returns true. If a
@@ -366,7 +366,7 @@ private:
TypeNodeIter i = TypeDescriptors.find(T);
if (i != TypeDescriptors.end())
return i->second;
- return 0;
+ return nullptr;
}
/// Returns a DebugInfo type from an LLVM type T.
@@ -375,12 +375,12 @@ private:
if (N)
return DIDerivedType(N);
else if (T->isVoidTy())
- return DIDerivedType(0);
+ return DIDerivedType(nullptr);
else if (T->isStructTy()) {
N = Builder.createStructType(
DIScope(LexicalBlockFileNode), T->getStructName(), DIFile(FileNode),
0, Layout.getTypeSizeInBits(T), Layout.getABITypeAlignment(T), 0,
- DIType(0), DIArray(0)); // filled in later
+ DIType(nullptr), DIArray(nullptr)); // filled in later
// N is added to the map (early) so that element search below can find it,
// so as to avoid infinite recursion for structs that contain pointers to
@@ -535,7 +535,7 @@ void DebugIR::writeDebugBitcode(const Module *M, int *fd) {
Out.reset(new raw_fd_ostream(*fd, true));
}
- M->print(*Out, 0);
+ M->print(*Out, nullptr);
Out->close();
}
diff --git a/lib/Transforms/Instrumentation/DebugIR.h b/lib/Transforms/Instrumentation/DebugIR.h
index 3f57da5..02831ed 100644
--- a/lib/Transforms/Instrumentation/DebugIR.h
+++ b/lib/Transforms/Instrumentation/DebugIR.h
@@ -90,7 +90,7 @@ private:
/// Write M to disk, optionally passing in an fd to an open file which is
/// closed by this function after writing. If no fd is specified, a new file
/// is opened, written, and closed.
- void writeDebugBitcode(const llvm::Module *M, int *fd = 0);
+ void writeDebugBitcode(const llvm::Module *M, int *fd = nullptr);
};
} // llvm namespace
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index bd00ec8..8330a9b 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -14,8 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "insert-gcov-profiling"
-
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
@@ -39,10 +37,13 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
+#include <memory>
#include <string>
#include <utility>
using namespace llvm;
+#define DEBUG_TYPE "insert-gcov-profiling"
+
static cl::opt<std::string>
DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden,
cl::ValueRequired);
@@ -77,9 +78,6 @@ namespace {
"GCOVProfiler asked to do nothing?");
init();
}
- ~GCOVProfiler() {
- DeleteContainerPointers(Funcs);
- }
const char *getPassName() const override {
return "GCOV Profiler";
}
@@ -141,7 +139,7 @@ namespace {
Module *M;
LLVMContext *Ctx;
- SmallVector<GCOVFunction *, 16> Funcs;
+ SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
};
}
@@ -449,6 +447,21 @@ bool GCOVProfiler::runOnModule(Module &M) {
return false;
}
+static bool functionHasLines(Function *F) {
+ // Check whether this function actually has any source lines. Not only
+ // do these waste space, they also can crash gcov.
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), IE = BB->end();
+ I != IE; ++I) {
+ const DebugLoc &Loc = I->getDebugLoc();
+ if (Loc.isUnknown()) continue;
+ if (Loc.getLine() != 0)
+ return true;
+ }
+ }
+ return false;
+}
+
void GCOVProfiler::emitProfileNotes() {
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CU_Nodes) return;
@@ -474,6 +487,7 @@ void GCOVProfiler::emitProfileNotes() {
Function *F = SP.getFunction();
if (!F) continue;
+ if (!functionHasLines(F)) continue;
// gcov expects every function to start with an entry block that has a
// single successor, so split the entry block to make sure of that.
@@ -483,19 +497,19 @@ void GCOVProfiler::emitProfileNotes() {
++It;
EntryBlock.splitBasicBlock(It);
- GCOVFunction *Func =
- new GCOVFunction(SP, &out, i, Options.UseCfgChecksum);
- Funcs.push_back(Func);
+ Funcs.push_back(
+ make_unique<GCOVFunction>(SP, &out, i, Options.UseCfgChecksum));
+ GCOVFunction &Func = *Funcs.back();
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- GCOVBlock &Block = Func->getBlock(BB);
+ GCOVBlock &Block = Func.getBlock(BB);
TerminatorInst *TI = BB->getTerminator();
if (int successors = TI->getNumSuccessors()) {
for (int i = 0; i != successors; ++i) {
- Block.addEdge(Func->getBlock(TI->getSuccessor(i)));
+ Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
}
} else if (isa<ReturnInst>(TI)) {
- Block.addEdge(Func->getReturnBlock());
+ Block.addEdge(Func.getReturnBlock());
}
uint32_t Line = 0;
@@ -511,7 +525,7 @@ void GCOVProfiler::emitProfileNotes() {
Lines.addLine(Loc.getLine());
}
}
- EdgeDestinations += Func->getEdgeDestinations();
+ EdgeDestinations += Func.getEdgeDestinations();
}
FileChecksums.push_back(hash_value(EdgeDestinations));
@@ -519,9 +533,7 @@ void GCOVProfiler::emitProfileNotes() {
out.write(ReversedVersion, 4);
out.write(reinterpret_cast<char*>(&FileChecksums.back()), 4);
- for (SmallVectorImpl<GCOVFunction *>::iterator I = Funcs.begin(),
- E = Funcs.end(); I != E; ++I) {
- GCOVFunction *Func = *I;
+ for (auto &Func : Funcs) {
Func->setCfgChecksum(FileChecksums.back());
Func->writeOut();
}
@@ -549,6 +561,7 @@ bool GCOVProfiler::emitProfileArcs() {
continue;
Function *F = SP.getFunction();
if (!F) continue;
+ if (!functionHasLines(F)) continue;
if (!Result) Result = true;
unsigned Edges = 0;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index ec1a195..b8e632e 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -93,12 +93,11 @@
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "msan"
-
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -122,6 +121,8 @@
using namespace llvm;
+#define DEBUG_TYPE "msan"
+
static const uint64_t kShadowMask32 = 1ULL << 31;
static const uint64_t kShadowMask64 = 1ULL << 46;
static const uint64_t kOriginOffset32 = 1ULL << 30;
@@ -129,6 +130,9 @@ static const uint64_t kOriginOffset64 = 1ULL << 45;
static const unsigned kMinOriginAlignment = 4;
static const unsigned kShadowTLSAlignment = 8;
+// Accesses sizes are powers of two: 1, 2, 4, 8.
+static const size_t kNumberOfAccessSizes = 4;
+
/// \brief Track origins of uninitialized values.
///
/// Adds a section to MemorySanitizer report that points to the allocation
@@ -178,6 +182,14 @@ static cl::opt<std::string> ClBlacklistFile("msan-blacklist",
cl::desc("File containing the list of functions where MemorySanitizer "
"should not report bugs"), cl::Hidden);
+static cl::opt<int> ClInstrumentationWithCallThreshold(
+ "msan-instrumentation-with-call-threshold",
+ cl::desc(
+ "If the function being instrumented requires more than "
+ "this number of checks and origin stores, use callbacks instead of "
+ "inline checks (-1 means never use callbacks)."),
+ cl::Hidden, cl::init(3500));
+
// Experimental. Wraps all indirect calls in the instrumented code with
// a call to the given function. This is needed to assist the dynamic
// helper tool (MSanDR) to regain control on transition between instrumented and
@@ -203,8 +215,8 @@ class MemorySanitizer : public FunctionPass {
StringRef BlacklistFile = StringRef())
: FunctionPass(ID),
TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)),
- DL(0),
- WarningFn(0),
+ DL(nullptr),
+ WarningFn(nullptr),
BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile : BlacklistFile),
WrapIndirectCalls(!ClWrapIndirectCalls.empty()) {}
const char *getPassName() const override { return "MemorySanitizer"; }
@@ -245,6 +257,10 @@ class MemorySanitizer : public FunctionPass {
/// \brief The run-time callback to print a warning.
Value *WarningFn;
+ // These arrays are indexed by log2(AccessSize).
+ Value *MaybeWarningFn[kNumberOfAccessSizes];
+ Value *MaybeStoreOriginFn[kNumberOfAccessSizes];
+
/// \brief Run-time helper that generates a new origin value for a stack
/// allocation.
Value *MsanSetAllocaOrigin4Fn;
@@ -321,6 +337,20 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
: "__msan_warning_noreturn";
WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), NULL);
+ for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
+ AccessSizeIndex++) {
+ unsigned AccessSize = 1 << AccessSizeIndex;
+ std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
+ MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
+ FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
+ IRB.getInt32Ty(), NULL);
+
+ FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
+ MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
+ FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
+ IRB.getInt8PtrTy(), IRB.getInt32Ty(), NULL);
+ }
+
MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
"__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
IRB.getInt8PtrTy(), IntptrTy, NULL);
@@ -341,31 +371,32 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
// Create globals.
RetvalTLS = new GlobalVariable(
M, ArrayType::get(IRB.getInt64Ty(), 8), false,
- GlobalVariable::ExternalLinkage, 0, "__msan_retval_tls", 0,
+ GlobalVariable::ExternalLinkage, nullptr, "__msan_retval_tls", nullptr,
GlobalVariable::InitialExecTLSModel);
RetvalOriginTLS = new GlobalVariable(
- M, OriginTy, false, GlobalVariable::ExternalLinkage, 0,
- "__msan_retval_origin_tls", 0, GlobalVariable::InitialExecTLSModel);
+ M, OriginTy, false, GlobalVariable::ExternalLinkage, nullptr,
+ "__msan_retval_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel);
ParamTLS = new GlobalVariable(
M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
- GlobalVariable::ExternalLinkage, 0, "__msan_param_tls", 0,
+ GlobalVariable::ExternalLinkage, nullptr, "__msan_param_tls", nullptr,
GlobalVariable::InitialExecTLSModel);
ParamOriginTLS = new GlobalVariable(
M, ArrayType::get(OriginTy, 1000), false, GlobalVariable::ExternalLinkage,
- 0, "__msan_param_origin_tls", 0, GlobalVariable::InitialExecTLSModel);
+ nullptr, "__msan_param_origin_tls", nullptr,
+ GlobalVariable::InitialExecTLSModel);
VAArgTLS = new GlobalVariable(
M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
- GlobalVariable::ExternalLinkage, 0, "__msan_va_arg_tls", 0,
+ GlobalVariable::ExternalLinkage, nullptr, "__msan_va_arg_tls", nullptr,
GlobalVariable::InitialExecTLSModel);
VAArgOverflowSizeTLS = new GlobalVariable(
- M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, 0,
- "__msan_va_arg_overflow_size_tls", 0,
+ M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, nullptr,
+ "__msan_va_arg_overflow_size_tls", nullptr,
GlobalVariable::InitialExecTLSModel);
OriginTLS = new GlobalVariable(
- M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, 0,
- "__msan_origin_tls", 0, GlobalVariable::InitialExecTLSModel);
+ M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, nullptr,
+ "__msan_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel);
// We insert an empty inline asm after __msan_report* to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
@@ -379,14 +410,14 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
ClWrapIndirectCalls, AnyFunctionPtrTy, AnyFunctionPtrTy, NULL);
}
- if (ClWrapIndirectCallsFast) {
+ if (WrapIndirectCalls && ClWrapIndirectCallsFast) {
MsandrModuleStart = new GlobalVariable(
M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage,
- 0, "__executable_start");
+ nullptr, "__executable_start");
MsandrModuleStart->setVisibility(GlobalVariable::HiddenVisibility);
MsandrModuleEnd = new GlobalVariable(
M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage,
- 0, "_end");
+ nullptr, "_end");
MsandrModuleEnd->setVisibility(GlobalVariable::HiddenVisibility);
}
}
@@ -397,7 +428,7 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
bool MemorySanitizer::doInitialization(Module &M) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
if (!DLP)
- return false;
+ report_fatal_error("data layout missing");
DL = &DLP->getDataLayout();
BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
@@ -474,6 +505,11 @@ VarArgHelper*
CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
MemorySanitizerVisitor &Visitor);
+unsigned TypeSizeToSizeIndex(unsigned TypeSize) {
+ if (TypeSize <= 8) return 0;
+ return Log2_32_Ceil(TypeSize / 8);
+}
+
/// This class does all the work for a given function. Store and Load
/// instructions store and load corresponding shadow and origin
/// values. Most instructions propagate shadow from arguments to their
@@ -529,9 +565,42 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return IRB.CreateCall(MS.MsanChainOriginFn, V);
}
- void materializeStores() {
+ void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
+ unsigned Alignment, bool AsCall) {
+ if (isa<StructType>(Shadow->getType())) {
+ IRB.CreateAlignedStore(updateOrigin(Origin, IRB), getOriginPtr(Addr, IRB),
+ Alignment);
+ } else {
+ Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
+ // TODO(eugenis): handle non-zero constant shadow by inserting an
+ // unconditional check (can not simply fail compilation as this could
+ // be in the dead code).
+ if (isa<Constant>(ConvertedShadow)) return;
+ unsigned TypeSizeInBits =
+ MS.DL->getTypeSizeInBits(ConvertedShadow->getType());
+ unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
+ if (AsCall && SizeIndex < kNumberOfAccessSizes) {
+ Value *Fn = MS.MaybeStoreOriginFn[SizeIndex];
+ Value *ConvertedShadow2 = IRB.CreateZExt(
+ ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
+ IRB.CreateCall3(Fn, ConvertedShadow2,
+ IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
+ updateOrigin(Origin, IRB));
+ } else {
+ Value *Cmp = IRB.CreateICmpNE(
+ ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp");
+ Instruction *CheckTerm = SplitBlockAndInsertIfThen(
+ Cmp, IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
+ IRBuilder<> IRBNew(CheckTerm);
+ IRBNew.CreateAlignedStore(updateOrigin(Origin, IRBNew),
+ getOriginPtr(Addr, IRBNew), Alignment);
+ }
+ }
+ }
+
+ void materializeStores(bool InstrumentWithCalls) {
for (size_t i = 0, n = StoreList.size(); i < n; i++) {
- StoreInst& I = *dyn_cast<StoreInst>(StoreList[i]);
+ StoreInst &I = *dyn_cast<StoreInst>(StoreList[i]);
IRBuilder<> IRB(&I);
Value *Val = I.getValueOperand();
@@ -540,53 +609,41 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
StoreInst *NewSI =
- IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment());
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment());
DEBUG(dbgs() << " STORE: " << *NewSI << "\n");
(void)NewSI;
- if (ClCheckAccessAddress)
- insertShadowCheck(Addr, &I);
+ if (ClCheckAccessAddress) insertShadowCheck(Addr, &I);
- if (I.isAtomic())
- I.setOrdering(addReleaseOrdering(I.getOrdering()));
+ if (I.isAtomic()) I.setOrdering(addReleaseOrdering(I.getOrdering()));
if (MS.TrackOrigins) {
unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
- if (isa<StructType>(Shadow->getType())) {
- IRB.CreateAlignedStore(updateOrigin(getOrigin(Val), IRB),
- getOriginPtr(Addr, IRB), Alignment);
- } else {
- Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
-
- // TODO(eugenis): handle non-zero constant shadow by inserting an
- // unconditional check (can not simply fail compilation as this could
- // be in the dead code).
- if (isa<Constant>(ConvertedShadow))
- continue;
-
- Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
- getCleanShadow(ConvertedShadow), "_mscmp");
- Instruction *CheckTerm =
- SplitBlockAndInsertIfThen(Cmp, &I, false, MS.OriginStoreWeights);
- IRBuilder<> IRBNew(CheckTerm);
- IRBNew.CreateAlignedStore(updateOrigin(getOrigin(Val), IRBNew),
- getOriginPtr(Addr, IRBNew), Alignment);
- }
+ storeOrigin(IRB, Addr, Shadow, getOrigin(Val), Alignment,
+ InstrumentWithCalls);
}
}
}
- void materializeChecks() {
- for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) {
- Value *Shadow = InstrumentationList[i].Shadow;
- Instruction *OrigIns = InstrumentationList[i].OrigIns;
- IRBuilder<> IRB(OrigIns);
- DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n");
- Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
- DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n");
- // See the comment in materializeStores().
- if (isa<Constant>(ConvertedShadow))
- continue;
+ void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin,
+ bool AsCall) {
+ IRBuilder<> IRB(OrigIns);
+ DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n");
+ Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
+ DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n");
+ // See the comment in materializeStores().
+ if (isa<Constant>(ConvertedShadow)) return;
+ unsigned TypeSizeInBits =
+ MS.DL->getTypeSizeInBits(ConvertedShadow->getType());
+ unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
+ if (AsCall && SizeIndex < kNumberOfAccessSizes) {
+ Value *Fn = MS.MaybeWarningFn[SizeIndex];
+ Value *ConvertedShadow2 =
+ IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
+ IRB.CreateCall2(Fn, ConvertedShadow2, MS.TrackOrigins && Origin
+ ? Origin
+ : (Value *)IRB.getInt32(0));
+ } else {
Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
getCleanShadow(ConvertedShadow), "_mscmp");
Instruction *CheckTerm = SplitBlockAndInsertIfThen(
@@ -595,14 +652,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.SetInsertPoint(CheckTerm);
if (MS.TrackOrigins) {
- Value *Origin = InstrumentationList[i].Origin;
- IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0),
+ IRB.CreateStore(Origin ? (Value *)Origin : (Value *)IRB.getInt32(0),
MS.OriginTLS);
}
IRB.CreateCall(MS.WarningFn);
IRB.CreateCall(MS.EmptyAsm);
DEBUG(dbgs() << " CHECK: " << *Cmp << "\n");
}
+ }
+
+ void materializeChecks(bool InstrumentWithCalls) {
+ for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) {
+ Instruction *OrigIns = InstrumentationList[i].OrigIns;
+ Value *Shadow = InstrumentationList[i].Shadow;
+ Value *Origin = InstrumentationList[i].Origin;
+ materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls);
+ }
DEBUG(dbgs() << "DONE:\n" << F);
}
@@ -662,17 +727,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Iterate all BBs in depth-first order and create shadow instructions
// for all instructions (where applicable).
// For PHI nodes we create dummy shadow PHIs which will be finalized later.
- for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
- DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
- BasicBlock *BB = *DI;
+ for (BasicBlock *BB : depth_first(&F.getEntryBlock()))
visit(*BB);
- }
+
// Finalize PHI nodes.
for (size_t i = 0, n = ShadowPHINodes.size(); i < n; i++) {
PHINode *PN = ShadowPHINodes[i];
PHINode *PNS = cast<PHINode>(getShadow(PN));
- PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : 0;
+ PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
size_t NumValues = PN->getNumIncomingValues();
for (size_t v = 0; v < NumValues; v++) {
PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
@@ -683,12 +746,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
VAHelper->finalizeInstrumentation();
+ bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
+ InstrumentationList.size() + StoreList.size() >
+ (unsigned)ClInstrumentationWithCallThreshold;
+
// Delayed instrumentation of StoreInst.
// This may add new checks to be inserted later.
- materializeStores();
+ materializeStores(InstrumentWithCalls);
// Insert shadow value checks.
- materializeChecks();
+ materializeChecks(InstrumentWithCalls);
// Wrap indirect calls.
materializeIndirectCalls();
@@ -704,7 +771,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Compute the shadow type that corresponds to a given Type.
Type *getShadowTy(Type *OrigTy) {
if (!OrigTy->isSized()) {
- return 0;
+ return nullptr;
}
// For integer type, shadow is the same as the original type.
// This may return weird-sized types like i1.
@@ -784,7 +851,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Compute the origin address for a given function argument.
Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
int ArgOffset) {
- if (!MS.TrackOrigins) return 0;
+ if (!MS.TrackOrigins) return nullptr;
Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
@@ -825,7 +892,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Constant *getCleanShadow(Value *V) {
Type *ShadowTy = getShadowTy(V);
if (!ShadowTy)
- return 0;
+ return nullptr;
return Constant::getNullValue(ShadowTy);
}
@@ -845,7 +912,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Constant *getPoisonedShadow(Value *V) {
Type *ShadowTy = getShadowTy(V);
if (!ShadowTy)
- return 0;
+ return nullptr;
return getPoisonedShadow(ShadowTy);
}
@@ -936,7 +1003,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Get the origin for a value.
Value *getOrigin(Value *V) {
- if (!MS.TrackOrigins) return 0;
+ if (!MS.TrackOrigins) return nullptr;
if (isa<Instruction>(V) || isa<Argument>(V)) {
Value *Origin = OriginMap[V];
if (!Origin) {
@@ -1234,7 +1301,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
public:
Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB) :
- Shadow(0), Origin(0), IRB(IRB), MSV(MSV) {}
+ Shadow(nullptr), Origin(nullptr), IRB(IRB), MSV(MSV) {}
/// \brief Add a pair of shadow and origin values to the mix.
Combiner &Add(Value *OpShadow, Value *OpOrigin) {
@@ -1265,7 +1332,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Add an application value to the mix.
Combiner &Add(Value *V) {
Value *OpShadow = MSV->getShadow(V);
- Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : 0;
+ Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
return Add(OpShadow, OpOrigin);
}
@@ -1480,7 +1547,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void handleSignedRelationalComparison(ICmpInst &I) {
Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
- Value* op = NULL;
+ Value* op = nullptr;
CmpInst::Predicate pre = I.getPredicate();
if (constOp0 && constOp0->isNullValue() &&
(pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE)) {
@@ -1789,7 +1856,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
case 1:
ConvertOp = I.getArgOperand(0);
- CopyOp = NULL;
+ CopyOp = nullptr;
break;
default:
llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
@@ -1803,7 +1870,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// FIXME: consider propagating shadow of ConvertOp, at least in the case of
// int->any conversion.
Value *ConvertShadow = getShadow(ConvertOp);
- Value *AggShadow = 0;
+ Value *AggShadow = nullptr;
if (ConvertOp->getType()->isVectorTy()) {
AggShadow = IRB.CreateExtractElement(
ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
@@ -2055,7 +2122,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
continue;
}
unsigned Size = 0;
- Value *Store = 0;
+ Value *Store = nullptr;
// Compute the Shadow for arg even if it is ByVal, because
// in that case getShadow() will copy the actual arg shadow to
// __msan_param_tls.
@@ -2080,7 +2147,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.CreateStore(getOrigin(A),
getOriginPtrForArgument(A, IRB, ArgOffset));
(void)Store;
- assert(Size != 0 && Store != 0);
+ assert(Size != 0 && Store != nullptr);
DEBUG(dbgs() << " Param:" << *Store << "\n");
ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
}
@@ -2098,7 +2165,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Until we have full dynamic coverage, make sure the retval shadow is 0.
Value *Base = getShadowPtrForRetval(&I, IRBBefore);
IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment);
- Instruction *NextInsn = 0;
+ Instruction *NextInsn = nullptr;
if (CS.isCall()) {
NextInsn = I.getNextNode();
} else {
@@ -2318,7 +2385,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
MemorySanitizerVisitor &MSV)
- : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(0), VAArgOverflowSize(0) { }
+ : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr),
+ VAArgOverflowSize(nullptr) {}
enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 5ffb17c..8fe9bca 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -19,8 +19,6 @@
// The rest is handled by the run-time library.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "tsan"
-
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
@@ -46,6 +44,8 @@
using namespace llvm;
+#define DEBUG_TYPE "tsan"
+
static cl::opt<std::string> ClBlacklistFile("tsan-blacklist",
cl::desc("Blacklist file"), cl::Hidden);
static cl::opt<bool> ClInstrumentMemoryAccesses(
@@ -78,7 +78,7 @@ namespace {
struct ThreadSanitizer : public FunctionPass {
ThreadSanitizer(StringRef BlacklistFile = StringRef())
: FunctionPass(ID),
- DL(0),
+ DL(nullptr),
BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
: BlacklistFile) { }
const char *getPassName() const override;
@@ -174,8 +174,8 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
for (int op = AtomicRMWInst::FIRST_BINOP;
op <= AtomicRMWInst::LAST_BINOP; ++op) {
- TsanAtomicRMW[op][i] = NULL;
- const char *NamePart = NULL;
+ TsanAtomicRMW[op][i] = nullptr;
+ const char *NamePart = nullptr;
if (op == AtomicRMWInst::Xchg)
NamePart = "_exchange";
else if (op == AtomicRMWInst::Add)
@@ -226,7 +226,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
bool ThreadSanitizer::doInitialization(Module &M) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
if (!DLP)
- return false;
+ report_fatal_error("data layout missing");
DL = &DLP->getDataLayout();
BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
@@ -518,7 +518,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
if (Idx < 0)
return false;
Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx];
- if (F == NULL)
+ if (!F)
return false;
const size_t ByteSize = 1 << Idx;
const size_t BitSize = ByteSize * 8;
diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index 4eac39d..4098428 100644
--- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -43,34 +43,34 @@ public:
EPT_RetainAutoreleaseRV
};
- ARCRuntimeEntryPoints() : TheModule(0),
- AutoreleaseRV(0),
- Release(0),
- Retain(0),
- RetainBlock(0),
- Autorelease(0),
- StoreStrong(0),
- RetainRV(0),
- RetainAutorelease(0),
- RetainAutoreleaseRV(0) { }
+ ARCRuntimeEntryPoints() : TheModule(nullptr),
+ AutoreleaseRV(nullptr),
+ Release(nullptr),
+ Retain(nullptr),
+ RetainBlock(nullptr),
+ Autorelease(nullptr),
+ StoreStrong(nullptr),
+ RetainRV(nullptr),
+ RetainAutorelease(nullptr),
+ RetainAutoreleaseRV(nullptr) { }
~ARCRuntimeEntryPoints() { }
void Initialize(Module *M) {
TheModule = M;
- AutoreleaseRV = 0;
- Release = 0;
- Retain = 0;
- RetainBlock = 0;
- Autorelease = 0;
- StoreStrong = 0;
- RetainRV = 0;
- RetainAutorelease = 0;
- RetainAutoreleaseRV = 0;
+ AutoreleaseRV = nullptr;
+ Release = nullptr;
+ Retain = nullptr;
+ RetainBlock = nullptr;
+ Autorelease = nullptr;
+ StoreStrong = nullptr;
+ RetainRV = nullptr;
+ RetainAutorelease = nullptr;
+ RetainAutoreleaseRV = nullptr;
}
Constant *get(const EntryPointType entry) {
- assert(TheModule != 0 && "Not initialized.");
+ assert(TheModule != nullptr && "Not initialized.");
switch (entry) {
case EPT_AutoreleaseRV:
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 8780359..08c8842 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -20,7 +20,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "objc-arc-dependency"
#include "ObjCARC.h"
#include "DependencyAnalysis.h"
#include "ProvenanceAnalysis.h"
@@ -29,6 +28,8 @@
using namespace llvm;
using namespace llvm::objcarc;
+#define DEBUG_TYPE "objc-arc-dependency"
+
/// Test whether the given instruction can result in a reference count
/// modification (positive or negative) for the pointer's object.
bool
@@ -223,7 +224,7 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor,
pred_iterator PI(LocalStartBB), PE(LocalStartBB, false);
if (PI == PE)
// If we've reached the function entry, produce a null dependence.
- DependingInsts.insert(0);
+ DependingInsts.insert(nullptr);
else
// Add the predecessors to the worklist.
do {
diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index cb7e4da..1a25391 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -24,7 +24,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "objc-arc-ap-elim"
#include "ObjCARC.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Constants.h"
@@ -34,6 +33,8 @@
using namespace llvm;
using namespace llvm::objcarc;
+#define DEBUG_TYPE "objc-arc-ap-elim"
+
namespace {
/// \brief Autorelease pool elimination.
class ObjCARCAPElim : public ModulePass {
@@ -93,7 +94,7 @@ bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) {
bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
bool Changed = false;
- Instruction *Push = 0;
+ Instruction *Push = nullptr;
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
Instruction *Inst = I++;
switch (GetBasicInstructionClass(Inst)) {
@@ -112,11 +113,11 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
Inst->eraseFromParent();
Push->eraseFromParent();
}
- Push = 0;
+ Push = nullptr;
break;
case IC_CallOrUser:
if (MayAutorelease(ImmutableCallSite(Inst)))
- Push = 0;
+ Push = nullptr;
break;
default:
break;
@@ -154,8 +155,8 @@ bool ObjCARCAPElim::runOnModule(Module &M) {
for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end();
OI != OE; ++OI) {
Value *Op = *OI;
- // llvm.global_ctors is an array of pairs where the second members
- // are constructor functions.
+ // llvm.global_ctors is an array of three-field structs where the second
+ // members are constructor functions.
Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
// If the user used a constructor function with the wrong signature and
// it got bitcasted or whatever, look the other way.
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
index d18667b..2c09e70 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
@@ -20,7 +20,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "objc-arc-aa"
#include "ObjCARC.h"
#include "ObjCARCAliasAnalysis.h"
#include "llvm/IR/Instruction.h"
@@ -28,6 +27,8 @@
#include "llvm/PassAnalysisSupport.h"
#include "llvm/PassSupport.h"
+#define DEBUG_TYPE "objc-arc-aa"
+
namespace llvm {
class Function;
class Value;
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 3da5a0e..f48d53d 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -26,7 +26,6 @@
// TODO: ObjCARCContract could insert PHI nodes when uses aren't
// dominated by single calls.
-#define DEBUG_TYPE "objc-arc-contract"
#include "ObjCARC.h"
#include "ARCRuntimeEntryPoints.h"
#include "DependencyAnalysis.h"
@@ -40,6 +39,8 @@
using namespace llvm;
using namespace llvm::objcarc;
+#define DEBUG_TYPE "objc-arc-contract"
+
STATISTIC(NumPeeps, "Number of calls peephole-optimized");
STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
@@ -157,7 +158,7 @@ ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
// Check that there are no instructions between the retain and the autorelease
// (such as an autorelease_pop) which may change the count.
- CallInst *Retain = 0;
+ CallInst *Retain = nullptr;
if (Class == IC_AutoreleaseRV)
FindDependencies(RetainAutoreleaseRVDep, Arg,
Autorelease->getParent(), Autorelease,
@@ -218,7 +219,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
BasicBlock::iterator I = Load, End = BB->end();
++I;
AliasAnalysis::Location Loc = AA->getLocation(Load);
- StoreInst *Store = 0;
+ StoreInst *Store = nullptr;
bool SawRelease = false;
for (; !Store || !SawRelease; ++I) {
if (I == End)
@@ -300,7 +301,7 @@ bool ObjCARCContract::doInitialization(Module &M) {
EP.Initialize(&M);
// Initialize RetainRVMarker.
- RetainRVMarker = 0;
+ RetainRVMarker = nullptr;
if (NamedMDNode *NMD =
M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
if (NMD->getNumOperands() == 1) {
diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
index 8bec699..bf9fcbb 100644
--- a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -23,8 +23,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "objc-arc-expand"
-
#include "ObjCARC.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Function.h"
@@ -40,6 +38,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#define DEBUG_TYPE "objc-arc-expand"
+
namespace llvm {
class Module;
}
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index eed3cb2..dd4dd50 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -24,7 +24,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "objc-arc-opts"
#include "ObjCARC.h"
#include "ARCRuntimeEntryPoints.h"
#include "DependencyAnalysis.h"
@@ -44,6 +43,8 @@
using namespace llvm;
using namespace llvm::objcarc;
+#define DEBUG_TYPE "objc-arc-opts"
+
/// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific.
/// @{
@@ -156,7 +157,7 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
return FindSingleUseIdentifiedObject(
cast<CallInst>(Arg)->getArgOperand(0));
if (!IsObjCIdentifiedObject(Arg))
- return 0;
+ return nullptr;
return Arg;
}
@@ -165,12 +166,12 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
if (IsObjCIdentifiedObject(Arg)) {
for (const User *U : Arg->users())
if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
- return 0;
+ return nullptr;
return Arg;
}
- return 0;
+ return nullptr;
}
/// This is a wrapper around getUnderlyingObjCPtr along the lines of
@@ -373,7 +374,7 @@ namespace {
bool CFGHazardAfflicted;
RRInfo() :
- KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0),
+ KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(nullptr),
CFGHazardAfflicted(false) {}
void clear();
@@ -388,7 +389,7 @@ namespace {
void RRInfo::clear() {
KnownSafe = false;
IsTailCallRelease = false;
- ReleaseMetadata = 0;
+ ReleaseMetadata = nullptr;
Calls.clear();
ReverseInsertPts.clear();
CFGHazardAfflicted = false;
@@ -397,7 +398,7 @@ void RRInfo::clear() {
bool RRInfo::Merge(const RRInfo &Other) {
// Conservatively merge the ReleaseMetadata information.
if (ReleaseMetadata != Other.ReleaseMetadata)
- ReleaseMetadata = 0;
+ ReleaseMetadata = nullptr;
// Conservatively merge the boolean state.
KnownSafe &= Other.KnownSafe;
@@ -456,7 +457,7 @@ namespace {
}
bool IsTrackingImpreciseReleases() const {
- return RRI.ReleaseMetadata != 0;
+ return RRI.ReleaseMetadata != nullptr;
}
const MDNode *GetReleaseMetadata() const {
@@ -818,7 +819,7 @@ ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier",
/// arc annotation processor tool. If the function is an
static MDString *AppendMDNodeToSourcePtr(unsigned NodeId,
Value *Ptr) {
- MDString *Hash = 0;
+ MDString *Hash = nullptr;
// If pointer is a result of an instruction and it does not have a source
// MDNode it, attach a new MDNode onto it. If pointer is a result of
@@ -880,7 +881,7 @@ static void AppendMDNodeToInstForPtr(unsigned NodeId,
MDString *PtrSourceMDNodeID,
Sequence OldSeq,
Sequence NewSeq) {
- MDNode *Node = 0;
+ MDNode *Node = nullptr;
Value *tmp[3] = {PtrSourceMDNodeID,
SequenceToMDString(Inst->getContext(),
OldSeq),
@@ -916,7 +917,7 @@ static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
Value *PtrName;
StringRef Tmp = Ptr->getName();
- if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) {
+ if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) {
Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
Tmp + "_STR");
PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
@@ -925,7 +926,7 @@ static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
Value *S;
std::string SeqStr = SequenceToString(Seq);
- if (0 == (S = M->getGlobalVariable(SeqStr, true))) {
+ if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) {
Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
SeqStr + "_STR");
S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
@@ -959,7 +960,7 @@ static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
Value *PtrName;
StringRef Tmp = Ptr->getName();
- if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) {
+ if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) {
Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
Tmp + "_STR");
PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
@@ -968,7 +969,7 @@ static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
Value *S;
std::string SeqStr = SequenceToString(Seq);
- if (0 == (S = M->getGlobalVariable(SeqStr, true))) {
+ if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) {
Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
SeqStr + "_STR");
S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
@@ -1718,7 +1719,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
BBState &MyStates) {
bool NestingDetected = false;
InstructionClass Class = GetInstructionClass(Inst);
- const Value *Arg = 0;
+ const Value *Arg = nullptr;
DEBUG(dbgs() << "Class: " << Class << "\n");
@@ -1974,7 +1975,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
BBState &MyStates) {
bool NestingDetected = false;
InstructionClass Class = GetInstructionClass(Inst);
- const Value *Arg = 0;
+ const Value *Arg = nullptr;
switch (Class) {
case IC_RetainBlock:
@@ -2026,7 +2027,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
switch (OldSeq) {
case S_Retain:
case S_CanRelease:
- if (OldSeq == S_Retain || ReleaseMetadata != 0)
+ if (OldSeq == S_Retain || ReleaseMetadata != nullptr)
S.ClearReverseInsertPts();
// FALL THROUGH
case S_Use:
@@ -2432,7 +2433,7 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
} else {
if (ReleasesToMove.ReleaseMetadata !=
NewRetainReleaseRRI.ReleaseMetadata)
- ReleasesToMove.ReleaseMetadata = 0;
+ ReleasesToMove.ReleaseMetadata = nullptr;
if (ReleasesToMove.IsTailCallRelease !=
NewRetainReleaseRRI.IsTailCallRelease)
ReleasesToMove.IsTailCallRelease = false;
@@ -2884,7 +2885,7 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
FindDependencies(CanChangeRetainCount, Arg,
BB, Autorelease, DepInsts, Visited, PA);
if (DepInsts.size() != 1)
- return 0;
+ return nullptr;
CallInst *Retain =
dyn_cast_or_null<CallInst>(*DepInsts.begin());
@@ -2893,7 +2894,7 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
if (!Retain ||
!IsRetain(GetBasicInstructionClass(Retain)) ||
GetObjCArg(Retain) != Arg) {
- return 0;
+ return nullptr;
}
return Retain;
@@ -2911,17 +2912,17 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
FindDependencies(NeedsPositiveRetainCount, Arg,
BB, Ret, DepInsts, V, PA);
if (DepInsts.size() != 1)
- return 0;
+ return nullptr;
CallInst *Autorelease =
dyn_cast_or_null<CallInst>(*DepInsts.begin());
if (!Autorelease)
- return 0;
+ return nullptr;
InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
if (!IsAutorelease(AutoreleaseClass))
- return 0;
+ return nullptr;
if (GetObjCArg(Autorelease) != Arg)
- return 0;
+ return nullptr;
return Autorelease;
}
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index fa8b598..1a3a4aa 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "adce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -28,6 +27,8 @@
#include "llvm/Pass.h"
using namespace llvm;
+#define DEBUG_TYPE "adce"
+
STATISTIC(NumRemoved, "Number of instructions removed");
namespace {
diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk
index 3894f93..079cc86 100644
--- a/lib/Transforms/Scalar/Android.mk
+++ b/lib/Transforms/Scalar/Android.mk
@@ -32,6 +32,7 @@ transforms_scalar_SRC_FILES := \
Scalar.cpp \
Scalarizer.cpp \
ScalarReplAggregates.cpp \
+ SeparateConstOffsetFromGEP.cpp \
SimplifyCFGPass.cpp \
Sink.cpp \
StructurizeCFG.cpp \
@@ -60,11 +61,6 @@ include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(transforms_scalar_SRC_FILES)
LOCAL_MODULE:= libLLVMScalarOpts
-# Override the default optimization level to work around a SIGSEGV
-# on x86 target builds for SROA.cpp.
-# Bug: 8047767
-LOCAL_CFLAGS_x86 += -O1
-
LOCAL_MODULE_TAGS := optional
include $(LLVM_DEVICE_BUILD_MK)
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 27434c1..3ad1488 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -5,19 +5,19 @@ add_llvm_library(LLVMScalarOpts
CorrelatedValuePropagation.cpp
DCE.cpp
DeadStoreElimination.cpp
- Scalarizer.cpp
EarlyCSE.cpp
- GlobalMerge.cpp
+ FlattenCFGPass.cpp
GVN.cpp
+ GlobalMerge.cpp
IndVarSimplify.cpp
JumpThreading.cpp
LICM.cpp
LoopDeletion.cpp
LoopIdiomRecognize.cpp
LoopInstSimplify.cpp
+ LoopRerollPass.cpp
LoopRotation.cpp
LoopStrengthReduce.cpp
- LoopRerollPass.cpp
LoopUnrollPass.cpp
LoopUnswitch.cpp
LowerAtomic.cpp
@@ -25,13 +25,14 @@ add_llvm_library(LLVMScalarOpts
PartiallyInlineLibCalls.cpp
Reassociate.cpp
Reg2Mem.cpp
- SampleProfile.cpp
SCCP.cpp
SROA.cpp
+ SampleProfile.cpp
Scalar.cpp
ScalarReplAggregates.cpp
+ Scalarizer.cpp
+ SeparateConstOffsetFromGEP.cpp
SimplifyCFGPass.cpp
- FlattenCFGPass.cpp
Sink.cpp
StructurizeCFG.cpp
TailRecursionElimination.cpp
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index 57a1521..763d02b 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -33,7 +33,6 @@
// %0 = load i64* inttoptr (i64 big_constant to i64*)
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "consthoist"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -44,9 +43,12 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include <tuple>
using namespace llvm;
+#define DEBUG_TYPE "consthoist"
+
STATISTIC(NumConstantsHoisted, "Number of constants hoisted");
STATISTIC(NumConstantsRebased, "Number of constants rebased");
@@ -117,7 +119,8 @@ class ConstantHoisting : public FunctionPass {
SmallVector<ConstantInfo, 8> ConstantVec;
public:
static char ID; // Pass identification, replacement for typeid
- ConstantHoisting() : FunctionPass(ID), TTI(0), DT(0), Entry(0) {
+ ConstantHoisting() : FunctionPass(ID), TTI(nullptr), DT(nullptr),
+ Entry(nullptr) {
initializeConstantHoistingPass(*PassRegistry::getPassRegistry());
}
@@ -206,7 +209,16 @@ bool ConstantHoisting::runOnFunction(Function &Fn) {
/// \brief Find the constant materialization insertion point.
Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst,
unsigned Idx) const {
- // The simple and common case.
+ // If the operand is a cast instruction, then we have to materialize the
+ // constant before the cast instruction.
+ if (Idx != ~0U) {
+ Value *Opnd = Inst->getOperand(Idx);
+ if (auto CastInst = dyn_cast<Instruction>(Opnd))
+ if (CastInst->isCast())
+ return CastInst;
+ }
+
+ // The simple and common case. This also includes constant expressions.
if (!isa<PHINode>(Inst) && !isa<LandingPadInst>(Inst))
return Inst;
@@ -228,7 +240,7 @@ findConstantInsertionPoint(const ConstantInfo &ConstInfo) const {
SmallPtrSet<BasicBlock *, 8> BBs;
for (auto const &RCI : ConstInfo.RebasedConstants)
for (auto const &U : RCI.Uses)
- BBs.insert(U.Inst->getParent());
+ BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent());
if (BBs.count(Entry))
return &Entry->front();
@@ -487,8 +499,8 @@ void ConstantHoisting::emitBaseConstants(Instruction *Base, Constant *Offset,
ClonedCastInst->insertAfter(CastInst);
// Use the same debug location as the original cast instruction.
ClonedCastInst->setDebugLoc(CastInst->getDebugLoc());
- DEBUG(dbgs() << "Clone instruction: " << *ClonedCastInst << '\n'
- << "To : " << *CastInst << '\n');
+ DEBUG(dbgs() << "Clone instruction: " << *CastInst << '\n'
+ << "To : " << *ClonedCastInst << '\n');
}
DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n');
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 7045b36..dd51ce1 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -18,7 +18,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "constprop"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -31,6 +30,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "constprop"
+
STATISTIC(NumInstKilled, "Number of instructions killed");
namespace {
@@ -68,7 +69,7 @@ bool ConstantPropagation::runOnFunction(Function &F) {
}
bool Changed = false;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
while (!WorkList.empty()) {
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 0490767..0829462 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "correlated-value-propagation"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -26,6 +25,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "correlated-value-propagation"
+
STATISTIC(NumPhis, "Number of phis propagated");
STATISTIC(NumSelects, "Number of selects propagated");
STATISTIC(NumMemAccess, "Number of memory access targets propagated");
@@ -138,7 +139,7 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
}
bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
- Value *Pointer = 0;
+ Value *Pointer = nullptr;
if (LoadInst *L = dyn_cast<LoadInst>(I))
Pointer = L->getPointerOperand();
else
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 8377fd9..99fac75 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/InstIterator.h"
@@ -26,6 +25,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "dce"
+
STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
STATISTIC(DCEEliminated, "Number of insts removed");
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index f54c00d..3af8ee7 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dse"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -38,6 +37,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "dse"
+
STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumFastOther , "Number of other instrs removed");
@@ -49,7 +50,7 @@ namespace {
const TargetLibraryInfo *TLI;
static char ID; // Pass identification, replacement for typeid
- DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) {
+ DSE() : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr) {
initializeDSEPass(*PassRegistry::getPassRegistry());
}
@@ -69,7 +70,7 @@ namespace {
if (DT->isReachableFromEntry(I))
Changed |= runOnBasicBlock(*I);
- AA = 0; MD = 0; DT = 0;
+ AA = nullptr; MD = nullptr; DT = nullptr;
return Changed;
}
@@ -111,9 +112,9 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
/// If ValueSet is non-null, remove any deleted instructions from it as well.
///
static void DeleteDeadInstruction(Instruction *I,
- MemoryDependenceAnalysis &MD,
- const TargetLibraryInfo *TLI,
- SmallSetVector<Value*, 16> *ValueSet = 0) {
+ MemoryDependenceAnalysis &MD,
+ const TargetLibraryInfo *TLI,
+ SmallSetVector<Value*, 16> *ValueSet = nullptr) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
@@ -131,7 +132,7 @@ static void DeleteDeadInstruction(Instruction *I,
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
- DeadInst->setOperand(op, 0);
+ DeadInst->setOperand(op, nullptr);
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty()) continue;
@@ -203,13 +204,13 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
// If we don't have target data around, an unknown size in Location means
// that we should use the size of the pointee type. This isn't valid for
// memset/memcpy, which writes more than an i8.
- if (Loc.Size == AliasAnalysis::UnknownSize && DL == 0)
+ if (Loc.Size == AliasAnalysis::UnknownSize && DL == nullptr)
return AliasAnalysis::Location();
return Loc;
}
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
- if (II == 0) return AliasAnalysis::Location();
+ if (!II) return AliasAnalysis::Location();
switch (II->getIntrinsicID()) {
default: return AliasAnalysis::Location(); // Unhandled intrinsic.
@@ -217,7 +218,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
// If we don't have target data around, an unknown size in Location means
// that we should use the size of the pointee type. This isn't valid for
// init.trampoline, which writes more than an i8.
- if (DL == 0) return AliasAnalysis::Location();
+ if (!DL) return AliasAnalysis::Location();
// FIXME: We don't know the size of the trampoline, so we can't really
// handle it here.
@@ -359,7 +360,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// If we have no DataLayout information around, then the size of the store
// is inferrable from the pointee type. If they are the same type, then
// we know that the store is safe.
- if (DL == 0 && Later.Ptr->getType() == Earlier.Ptr->getType())
+ if (DL == nullptr && Later.Ptr->getType() == Earlier.Ptr->getType())
return OverwriteComplete;
return OverwriteUnknown;
@@ -373,7 +374,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// Otherwise, we have to have size information, and the later store has to be
// larger than the earlier one.
if (Later.Size == AliasAnalysis::UnknownSize ||
- Earlier.Size == AliasAnalysis::UnknownSize || DL == 0)
+ Earlier.Size == AliasAnalysis::UnknownSize || DL == nullptr)
return OverwriteUnknown;
// Check to see if the later store is to the entire object (either a global,
@@ -461,7 +462,7 @@ static bool isPossibleSelfRead(Instruction *Inst,
// Self reads can only happen for instructions that read memory. Get the
// location read.
AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
- if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction.
+ if (!InstReadLoc.Ptr) return false; // Not a reading instruction.
// If the read and written loc obviously don't alias, it isn't a read.
if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
@@ -528,7 +529,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
DeleteDeadInstruction(SI, *MD, TLI);
- if (NextInst == 0) // Next instruction deleted.
+ if (!NextInst) // Next instruction deleted.
BBI = BB.begin();
else if (BBI != BB.begin()) // Revisit this instruction if possible.
--BBI;
@@ -543,7 +544,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
// If we didn't get a useful location, fail.
- if (Loc.Ptr == 0)
+ if (!Loc.Ptr)
continue;
while (InstDep.isDef() || InstDep.isClobber()) {
@@ -557,7 +558,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
Instruction *DepWrite = InstDep.getInst();
AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
// If we didn't get a useful location, or if it isn't a size, bail out.
- if (DepLoc.Ptr == 0)
+ if (!DepLoc.Ptr)
break;
// If we find a write that is a) removable (i.e., non-volatile), b) is
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index af2c3d1..735f5c1 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "early-cse"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/ScopedHashTable.h"
@@ -29,6 +28,8 @@
#include <vector>
using namespace llvm;
+#define DEBUG_TYPE "early-cse"
+
STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
STATISTIC(NumCSE, "Number of instructions CSE'd");
STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
@@ -207,7 +208,7 @@ namespace {
return false;
CallInst *CI = dyn_cast<CallInst>(Inst);
- if (CI == 0 || !CI->onlyReadsMemory())
+ if (!CI || !CI->onlyReadsMemory())
return false;
return true;
}
@@ -405,14 +406,14 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// have invalidated the live-out memory values of our parent value. For now,
// just be conservative and invalidate memory if this block has multiple
// predecessors.
- if (BB->getSinglePredecessor() == 0)
+ if (!BB->getSinglePredecessor())
++CurrentGeneration;
/// LastStore - Keep track of the last non-volatile store that we saw... for
/// as long as there in no instruction that reads memory. If we see a store
/// to the same location, we delete the dead store. This zaps trivial dead
/// stores which can occur in bitfield code among other things.
- StoreInst *LastStore = 0;
+ StoreInst *LastStore = nullptr;
bool Changed = false;
@@ -462,7 +463,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
// Ignore volatile loads.
if (!LI->isSimple()) {
- LastStore = 0;
+ LastStore = nullptr;
continue;
}
@@ -470,7 +471,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// generation, replace this instruction.
std::pair<Value*, unsigned> InVal =
AvailableLoads->lookup(Inst->getOperand(0));
- if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: "
<< *InVal.first << '\n');
if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
@@ -483,20 +484,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// Otherwise, remember that we have this instruction.
AvailableLoads->insert(Inst->getOperand(0),
std::pair<Value*, unsigned>(Inst, CurrentGeneration));
- LastStore = 0;
+ LastStore = nullptr;
continue;
}
// If this instruction may read from memory, forget LastStore.
if (Inst->mayReadFromMemory())
- LastStore = 0;
+ LastStore = nullptr;
// If this is a read-only call, process it.
if (CallValue::canHandle(Inst)) {
// If we have an available version of this call, and if it is the right
// generation, replace this instruction.
std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
- if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: "
<< *InVal.first << '\n');
if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
@@ -528,7 +529,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LastStore->eraseFromParent();
Changed = true;
++NumDSE;
- LastStore = 0;
+ LastStore = nullptr;
continue;
}
@@ -558,7 +559,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
std::vector<StackNode *> nodesToProcess;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp
index e7f2564..0430c18 100644
--- a/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "flattencfg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/CFG.h"
@@ -19,6 +18,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "flattencfg"
+
namespace {
struct FlattenCFGPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 33c387c..6d07ddd 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -15,11 +15,11 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "gvn"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -50,6 +50,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "gvn"
+
STATISTIC(NumGVNInstr, "Number of instructions deleted");
STATISTIC(NumGVNLoad, "Number of loads deleted");
STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
@@ -213,13 +215,13 @@ Expression ValueTable::create_cmp_expression(unsigned Opcode,
}
Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
- assert(EI != 0 && "Not an ExtractValueInst?");
+ assert(EI && "Not an ExtractValueInst?");
Expression e;
e.type = EI->getType();
e.opcode = 0;
IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
- if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
+ if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
// EI might be an extract from one of our recognised intrinsics. If it
// is we'll synthesize a semantically equivalent expression instead on
// an extract value expression.
@@ -327,7 +329,7 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
MD->getNonLocalCallDependency(CallSite(C));
// FIXME: Move the checking logic to MemDep!
- CallInst* cdep = 0;
+ CallInst* cdep = nullptr;
// Check to see if we have a single dominating call instruction that is
// identical to C.
@@ -338,8 +340,8 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
// We don't handle non-definitions. If we already have a call, reject
// instruction dependencies.
- if (!I->getResult().isDef() || cdep != 0) {
- cdep = 0;
+ if (!I->getResult().isDef() || cdep != nullptr) {
+ cdep = nullptr;
break;
}
@@ -350,7 +352,7 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
continue;
}
- cdep = 0;
+ cdep = nullptr;
break;
}
@@ -551,7 +553,7 @@ namespace {
static AvailableValueInBlock getUndef(BasicBlock *BB) {
AvailableValueInBlock Res;
Res.BB = BB;
- Res.Val.setPointer(0);
+ Res.Val.setPointer(nullptr);
Res.Val.setInt(UndefVal);
Res.Offset = 0;
return Res;
@@ -611,7 +613,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
- : FunctionPass(ID), NoLoads(noloads), MD(0) {
+ : FunctionPass(ID), NoLoads(noloads), MD(nullptr) {
initializeGVNPass(*PassRegistry::getPassRegistry());
}
@@ -649,7 +651,7 @@ namespace {
/// removeFromLeaderTable - Scan the list of values corresponding to a given
/// value number, and remove the given instruction if encountered.
void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) {
- LeaderTableEntry* Prev = 0;
+ LeaderTableEntry* Prev = nullptr;
LeaderTableEntry* Curr = &LeaderTable[N];
while (Curr->Val != I || Curr->BB != BB) {
@@ -661,8 +663,8 @@ namespace {
Prev->Next = Curr->Next;
} else {
if (!Curr->Next) {
- Curr->Val = 0;
- Curr->BB = 0;
+ Curr->Val = nullptr;
+ Curr->BB = nullptr;
} else {
LeaderTableEntry* Next = Curr->Next;
Curr->Val = Next->Val;
@@ -855,7 +857,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
Instruction *InsertPt,
const DataLayout &DL) {
if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL))
- return 0;
+ return nullptr;
// If this is already the right type, just return it.
Type *StoredValTy = StoredVal->getType();
@@ -1060,7 +1062,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
const DataLayout &DL) {
// If the mem operation is a non-constant size, we can't handle it.
ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
- if (SizeCst == 0) return -1;
+ if (!SizeCst) return -1;
uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
// If this is memset, we just need to see if the offset is valid in the size
@@ -1075,10 +1077,10 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
MemTransferInst *MTI = cast<MemTransferInst>(MI);
Constant *Src = dyn_cast<Constant>(MTI->getSource());
- if (Src == 0) return -1;
+ if (!Src) return -1;
GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &DL));
- if (GV == 0 || !GV->isConstant()) return -1;
+ if (!GV || !GV->isConstant()) return -1;
// See if the access is within the bounds of the transfer.
int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
@@ -1420,8 +1422,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
// If this is a clobber and L is the first instruction in its block, then
// we have the first instruction in the entry block.
if (DepLI != LI && Address && DL) {
- int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
- LI->getPointerOperand(),
+ int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), Address,
DepLI, *DL);
if (Offset != -1) {
@@ -1469,8 +1470,8 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (S->getValueOperand()->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (DL == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
- LI->getType(), *DL)) {
+ if (!DL || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
+ LI->getType(), *DL)) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1486,7 +1487,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (LD->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (DL == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)){
+ if (!DL || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1539,7 +1540,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Check to see how many predecessors have the loaded value fully
// available.
- DenseMap<BasicBlock*, Value*> PredLoads;
+ MapVector<BasicBlock *, Value *> PredLoads;
DenseMap<BasicBlock*, char> FullyAvailableBlocks;
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
@@ -1553,7 +1554,6 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) {
continue;
}
- PredLoads[Pred] = 0;
if (Pred->getTerminator()->getNumSuccessors() != 1) {
if (isa<IndirectBrInst>(Pred->getTerminator())) {
@@ -1570,11 +1570,14 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
}
CriticalEdgePred.push_back(Pred);
+ } else {
+ // Only add the predecessors that will not be split for now.
+ PredLoads[Pred] = nullptr;
}
}
// Decide whether PRE is profitable for this load.
- unsigned NumUnavailablePreds = PredLoads.size();
+ unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size();
assert(NumUnavailablePreds != 0 &&
"Fully available value should already be eliminated!");
@@ -1586,12 +1589,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
// Split critical edges, and update the unavailable predecessors accordingly.
- for (SmallVectorImpl<BasicBlock *>::iterator I = CriticalEdgePred.begin(),
- E = CriticalEdgePred.end(); I != E; I++) {
- BasicBlock *OrigPred = *I;
+ for (BasicBlock *OrigPred : CriticalEdgePred) {
BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
- PredLoads.erase(OrigPred);
- PredLoads[NewPred] = 0;
+ assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
+ PredLoads[NewPred] = nullptr;
DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
<< LoadBB->getName() << '\n');
}
@@ -1599,9 +1600,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
SmallVector<Instruction*, 8> NewInsts;
- for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
- E = PredLoads.end(); I != E; ++I) {
- BasicBlock *UnavailablePred = I->first;
+ for (auto &PredLoad : PredLoads) {
+ BasicBlock *UnavailablePred = PredLoad.first;
// Do PHI translation to get its value in the predecessor if necessary. The
// returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
@@ -1610,20 +1610,20 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// the load on the pred (?!?), so we can insert code to materialize the
// pointer if it is not available.
PHITransAddr Address(LI->getPointerOperand(), DL);
- Value *LoadPtr = 0;
+ Value *LoadPtr = nullptr;
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
*DT, NewInsts);
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
- if (LoadPtr == 0) {
+ if (!LoadPtr) {
DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
<< *LI->getPointerOperand() << "\n");
CanDoPRE = false;
break;
}
- I->second = LoadPtr;
+ PredLoad.second = LoadPtr;
}
if (!CanDoPRE) {
@@ -1632,8 +1632,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (MD) MD->removeInstruction(I);
I->eraseFromParent();
}
- // HINT:Don't revert the edge-splitting as following transformation may
- // also need to split these critial edges.
+ // HINT: Don't revert the edge-splitting as following transformation may
+ // also need to split these critical edges.
return !CriticalEdgePred.empty();
}
@@ -1654,10 +1654,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
VN.lookup_or_add(NewInsts[i]);
}
- for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
- E = PredLoads.end(); I != E; ++I) {
- BasicBlock *UnavailablePred = I->first;
- Value *LoadPtr = I->second;
+ for (const auto &PredLoad : PredLoads) {
+ BasicBlock *UnavailablePred = PredLoad.first;
+ Value *LoadPtr = PredLoad.second;
Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
LI->getAlignment(),
@@ -1776,7 +1775,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
MDNode *ReplMD = Metadata[i].second;
switch(Kind) {
default:
- ReplInst->setMetadata(Kind, NULL); // Remove unknown metadata
+ ReplInst->setMetadata(Kind, nullptr); // Remove unknown metadata
break;
case LLVMContext::MD_dbg:
llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
@@ -1832,7 +1831,7 @@ bool GVN::processLoad(LoadInst *L) {
// a common base + constant offset, and if the previous store (or memset)
// completely covers this load. This sort of thing can happen in bitfield
// access code.
- Value *AvailVal = 0;
+ Value *AvailVal = nullptr;
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
L->getPointerOperand(),
@@ -1920,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) {
if (DL) {
StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
L, *DL);
- if (StoredVal == 0)
+ if (!StoredVal)
return false;
DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
@@ -1949,7 +1948,7 @@ bool GVN::processLoad(LoadInst *L) {
if (DL) {
AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
L, *DL);
- if (AvailableVal == 0)
+ if (!AvailableVal)
return false;
DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
@@ -1999,9 +1998,9 @@ bool GVN::processLoad(LoadInst *L) {
// a few comparisons of DFS numbers.
Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
LeaderTableEntry Vals = LeaderTable[num];
- if (!Vals.Val) return 0;
+ if (!Vals.Val) return nullptr;
- Value *Val = 0;
+ Value *Val = nullptr;
if (DT->dominates(Vals.BB, BB)) {
Val = Vals.Val;
if (isa<Constant>(Val)) return Val;
@@ -2052,7 +2051,7 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
const BasicBlock *Src = E.getStart();
assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
(void)Src;
- return Pred != 0;
+ return Pred != nullptr;
}
/// propagateEquality - The given values are known to be equal in every block
@@ -2296,7 +2295,7 @@ bool GVN::processInstruction(Instruction *I) {
// Perform fast-path value-number based elimination of values inherited from
// dominators.
Value *repl = findLeader(I->getParent(), Num);
- if (repl == 0) {
+ if (!repl) {
// Failure, just remember this instance for future use.
addToLeaderTable(Num, I, I->getParent());
return false;
@@ -2319,7 +2318,7 @@ bool GVN::runOnFunction(Function& F) {
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
@@ -2421,10 +2420,7 @@ bool GVN::processBlock(BasicBlock *BB) {
bool GVN::performPRE(Function &F) {
bool Changed = false;
SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
- for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
- DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
- BasicBlock *CurrentBlock = *DI;
-
+ for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
// Nothing to PRE in the entry block.
if (CurrentBlock == &F.getEntryBlock()) continue;
@@ -2464,7 +2460,7 @@ bool GVN::performPRE(Function &F) {
// more complicated to get right.
unsigned NumWith = 0;
unsigned NumWithout = 0;
- BasicBlock *PREPred = 0;
+ BasicBlock *PREPred = nullptr;
predMap.clear();
for (pred_iterator PI = pred_begin(CurrentBlock),
@@ -2482,8 +2478,8 @@ bool GVN::performPRE(Function &F) {
}
Value* predV = findLeader(P, ValNo);
- if (predV == 0) {
- predMap.push_back(std::make_pair(static_cast<Value *>(0), P));
+ if (!predV) {
+ predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
PREPred = P;
++NumWithout;
} else if (predV == CurInst) {
@@ -2637,9 +2633,8 @@ bool GVN::iterateOnFunction(Function &F) {
//
std::vector<BasicBlock *> BBVect;
BBVect.reserve(256);
- for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
- DE = df_end(DT->getRootNode()); DI != DE; ++DI)
- BBVect.push_back(DI->getBlock());
+ for (DomTreeNode *x : depth_first(DT->getRootNode()))
+ BBVect.push_back(x->getBlock());
for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
I != E; I++)
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 8ffd64b..990d067 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -51,7 +51,6 @@
// note that we saved 2 registers here almostly "for free".
// ===---------------------------------------------------------------------===//
-#define DEBUG_TYPE "global-merge"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -70,6 +69,8 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
+#define DEBUG_TYPE "global-merge"
+
cl::opt<bool>
EnableGlobalMerge("global-merge", cl::Hidden,
cl::desc("Enable global merge pass"),
@@ -107,7 +108,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid.
- explicit GlobalMerge(const TargetMachine *TM = 0)
+ explicit GlobalMerge(const TargetMachine *TM = nullptr)
: FunctionPass(ID), TM(TM) {
initializeGlobalMergePass(*PassRegistry::getPassRegistry());
}
@@ -173,7 +174,8 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
GlobalValue::InternalLinkage,
MergedInit, "_MergedGlobals",
- 0, GlobalVariable::NotThreadLocal,
+ nullptr,
+ GlobalVariable::NotThreadLocal,
AddrSpace);
for (size_t k = i; k < j; ++k) {
Constant *Idx[2] = {
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 7537632..e83a5c4 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -24,7 +24,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "indvars"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
@@ -50,6 +49,8 @@
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
using namespace llvm;
+#define DEBUG_TYPE "indvars"
+
STATISTIC(NumWidened , "Number of indvars widened");
STATISTIC(NumReplaced , "Number of exit values replaced");
STATISTIC(NumLFTR , "Number of loop exit tests replaced");
@@ -79,8 +80,8 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), DL(0),
- Changed(false) {
+ IndVarSimplify() : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr),
+ DL(nullptr), Changed(false) {
initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
}
@@ -196,7 +197,7 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
if (!PHI)
return User;
- Instruction *InsertPt = 0;
+ Instruction *InsertPt = nullptr;
for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
if (PHI->getIncomingValue(i) != Def)
continue;
@@ -257,13 +258,13 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// an add or increment value can not be represented by an integer.
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
- if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
+ if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
// If this is not an add of the PHI with a constantfp, or if the constant fp
// is not an integer, bail out.
ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
int64_t IncValue;
- if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+ if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
!ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
return;
@@ -280,7 +281,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
if (!Compare)
Compare = dyn_cast<FCmpInst>(U2);
- if (Compare == 0 || !Compare->hasOneUse() ||
+ if (!Compare || !Compare->hasOneUse() ||
!isa<BranchInst>(Compare->user_back()))
return;
@@ -301,7 +302,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// transform it.
ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
int64_t ExitValue;
- if (ExitValueVal == 0 ||
+ if (ExitValueVal == nullptr ||
!ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
return;
@@ -651,7 +652,8 @@ namespace {
Type *WidestNativeType; // Widest integer type created [sz]ext
bool IsSigned; // Was an sext user seen before a zext?
- WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {}
+ WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
+ IsSigned(false) {}
};
}
@@ -693,7 +695,7 @@ struct NarrowIVDefUse {
Instruction *NarrowUse;
Instruction *WideDef;
- NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {}
+ NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {}
NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
@@ -736,9 +738,9 @@ public:
L(LI->getLoopFor(OrigPhi->getParent())),
SE(SEv),
DT(DTree),
- WidePhi(0),
- WideInc(0),
- WideIncExpr(0),
+ WidePhi(nullptr),
+ WideInc(nullptr),
+ WideIncExpr(nullptr),
DeadInsts(DI) {
assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
}
@@ -793,7 +795,7 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
unsigned Opcode = DU.NarrowUse->getOpcode();
switch (Opcode) {
default:
- return 0;
+ return nullptr;
case Instruction::Add:
case Instruction::Mul:
case Instruction::UDiv:
@@ -838,14 +840,14 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
// Handle the common case of add<nsw/nuw>
if (DU.NarrowUse->getOpcode() != Instruction::Add)
- return 0;
+ return nullptr;
// One operand (NarrowDef) has already been extended to WideDef. Now determine
// if extending the other will lead to a recurrence.
unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
- const SCEV *ExtendOperExpr = 0;
+ const SCEV *ExtendOperExpr = nullptr;
const OverflowingBinaryOperator *OBO =
cast<OverflowingBinaryOperator>(DU.NarrowUse);
if (IsSigned && OBO->hasNoSignedWrap())
@@ -855,7 +857,7 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
ExtendOperExpr = SE->getZeroExtendExpr(
SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
else
- return 0;
+ return nullptr;
// When creating this AddExpr, don't apply the current operations NSW or NUW
// flags. This instruction may be guarded by control flow that the no-wrap
@@ -866,7 +868,7 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
if (!AddRec || AddRec->getLoop() != L)
- return 0;
+ return nullptr;
return AddRec;
}
@@ -877,14 +879,14 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
/// recurrence. Otherwise return NULL.
const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
if (!SE->isSCEVable(NarrowUse->getType()))
- return 0;
+ return nullptr;
const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
if (SE->getTypeSizeInBits(NarrowExpr->getType())
>= SE->getTypeSizeInBits(WideType)) {
// NarrowUse implicitly widens its operand. e.g. a gep with a narrow
// index. So don't follow this use.
- return 0;
+ return nullptr;
}
const SCEV *WideExpr = IsSigned ?
@@ -892,7 +894,7 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
SE->getZeroExtendExpr(NarrowExpr, WideType);
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
if (!AddRec || AddRec->getLoop() != L)
- return 0;
+ return nullptr;
return AddRec;
}
@@ -930,7 +932,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
<< " to " << *WidePhi << "\n");
}
- return 0;
+ return nullptr;
}
}
// Our raison d'etre! Eliminate sign and zero extension.
@@ -968,7 +970,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// push the uses of WideDef here.
// No further widening is needed. The deceased [sz]ext had done it for us.
- return 0;
+ return nullptr;
}
// Does this user itself evaluate to a recurrence after widening?
@@ -981,7 +983,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
truncateIVUse(DU, DT);
- return 0;
+ return nullptr;
}
// Assume block terminators cannot evaluate to a recurrence. We can't to
// insert a Trunc after a terminator if there happens to be a critical edge.
@@ -990,14 +992,14 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// Reuse the IV increment that SCEVExpander created as long as it dominates
// NarrowUse.
- Instruction *WideUse = 0;
+ Instruction *WideUse = nullptr;
if (WideAddRec == WideIncExpr
&& Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
WideUse = WideInc;
else {
WideUse = CloneIVUser(DU);
if (!WideUse)
- return 0;
+ return nullptr;
}
// Evaluation of WideAddRec ensured that the narrow expression could be
// extended outside the loop without overflow. This suggests that the wide use
@@ -1008,7 +1010,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
<< ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
DeadInsts.push_back(WideUse);
- return 0;
+ return nullptr;
}
// Returning WideUse pushes it on the worklist.
@@ -1043,7 +1045,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Is this phi an induction variable?
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
if (!AddRec)
- return NULL;
+ return nullptr;
// Widen the induction variable expression.
const SCEV *WideIVExpr = IsSigned ?
@@ -1056,7 +1058,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Can the IV be extended outside the loop without overflow?
AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
if (!AddRec || AddRec->getLoop() != L)
- return NULL;
+ return nullptr;
// An AddRec must have loop-invariant operands. Since this AddRec is
// materialized by a loop header phi, the expression cannot have any post-loop
@@ -1282,7 +1284,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
Instruction *IncI = dyn_cast<Instruction>(IncV);
if (!IncI)
- return 0;
+ return nullptr;
switch (IncI->getOpcode()) {
case Instruction::Add:
@@ -1293,17 +1295,17 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
if (IncI->getNumOperands() == 2)
break;
default:
- return 0;
+ return nullptr;
}
PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
if (Phi && Phi->getParent() == L->getHeader()) {
if (isLoopInvariant(IncI->getOperand(1), L, DT))
return Phi;
- return 0;
+ return nullptr;
}
if (IncI->getOpcode() == Instruction::GetElementPtr)
- return 0;
+ return nullptr;
// Allow add/sub to be commuted.
Phi = dyn_cast<PHINode>(IncI->getOperand(1));
@@ -1311,7 +1313,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
if (isLoopInvariant(IncI->getOperand(0), L, DT))
return Phi;
}
- return 0;
+ return nullptr;
}
/// Return the compare guarding the loop latch, or NULL for unrecognized tests.
@@ -1321,7 +1323,7 @@ static ICmpInst *getLoopTest(Loop *L) {
BasicBlock *LatchBlock = L->getLoopLatch();
// Don't bother with LFTR if the loop is not properly simplified.
if (!LatchBlock)
- return 0;
+ return nullptr;
BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
assert(BI && "expected exit branch");
@@ -1446,8 +1448,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
// Loop over all of the PHI nodes, looking for a simple counter.
- PHINode *BestPhi = 0;
- const SCEV *BestInit = 0;
+ PHINode *BestPhi = nullptr;
+ const SCEV *BestInit = nullptr;
BasicBlock *LatchBlock = L->getLoopLatch();
assert(LatchBlock && "needsLFTR should guarantee a loop latch");
@@ -1571,7 +1573,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
// IVInit integer and IVCount pointer would only occur if a canonical IV
// were generated on top of case #2, which is not expected.
- const SCEV *IVLimit = 0;
+ const SCEV *IVLimit = nullptr;
// For unit stride, IVCount = Start + BECount with 2's complement overflow.
// For non-zero Start, compute IVCount here.
if (AR->getStart()->isZero())
@@ -1813,7 +1815,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
DeadInsts.clear();
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 067deb7..230a381 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jump-threading"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -38,6 +37,8 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
+#define DEBUG_TYPE "jump-threading"
+
STATISTIC(NumThreads, "Number of jumps threaded");
STATISTIC(NumFolds, "Number of terminators folded");
STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
@@ -153,7 +154,7 @@ bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
LVI = &getAnalysis<LazyValueInfo>();
@@ -308,7 +309,7 @@ void JumpThreading::FindLoopHeaders(Function &F) {
/// Returns null if Val is null or not an appropriate constant.
static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
if (!Val)
- return 0;
+ return nullptr;
// Undef is "known" enough.
if (UndefValue *U = dyn_cast<UndefValue>(Val))
@@ -352,7 +353,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// If V is a non-instruction value, or an instruction in a different block,
// then it can't be derived from a PHI.
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0 || I->getParent() != BB) {
+ if (!I || I->getParent() != BB) {
// Okay, if this is a live-in value, see if it has a known value at the end
// of any of our predecessors.
@@ -495,7 +496,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL);
- if (Res == 0) {
+ if (!Res) {
if (!isa<Constant>(RHS))
continue;
@@ -581,7 +582,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// Either operand will do, so be sure to pick the one that's a known
// constant.
// FIXME: Do this more cleverly if both values are known constants?
- KnownCond = (TrueVal != 0);
+ KnownCond = (TrueVal != nullptr);
}
// See if the select has a known constant value for this predecessor.
@@ -737,7 +738,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
Instruction *CondInst = dyn_cast<Instruction>(Condition);
// All the rest of our checks depend on the condition being an instruction.
- if (CondInst == 0) {
+ if (!CondInst) {
// FIXME: Unify this with code below.
if (ProcessThreadableEdges(Condition, BB, Preference))
return true;
@@ -890,7 +891,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
SmallPtrSet<BasicBlock*, 8> PredsScanned;
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
AvailablePredsTy AvailablePreds;
- BasicBlock *OneUnavailablePred = 0;
+ BasicBlock *OneUnavailablePred = nullptr;
// If we got here, the loaded value is transparent through to the start of the
// block. Check to see if it is available in any of the predecessor blocks.
@@ -904,16 +905,16 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
- MDNode *ThisTBAATag = 0;
+ MDNode *ThisTBAATag = nullptr;
Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
- 0, &ThisTBAATag);
+ nullptr, &ThisTBAATag);
if (!PredAvailable) {
OneUnavailablePred = PredBB;
continue;
}
// If tbaa tags disagree or are not present, forget about them.
- if (TBAATag != ThisTBAATag) TBAATag = 0;
+ if (TBAATag != ThisTBAATag) TBAATag = nullptr;
// If so, this load is partially redundant. Remember this info so that we
// can create a PHI node.
@@ -929,7 +930,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// predecessor, we want to insert a merge block for those common predecessors.
// This ensures that we only have to insert one reload, thus not increasing
// code size.
- BasicBlock *UnavailablePred = 0;
+ BasicBlock *UnavailablePred = nullptr;
// If there is exactly one predecessor where the value is unavailable, the
// already computed 'OneUnavailablePred' block is it. If it ends in an
@@ -996,7 +997,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
BasicBlock *P = *PI;
AvailablePredsTy::iterator I =
std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
- std::make_pair(P, (Value*)0));
+ std::make_pair(P, (Value*)nullptr));
assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
@@ -1103,7 +1104,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
SmallPtrSet<BasicBlock*, 16> SeenPreds;
SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
- BasicBlock *OnlyDest = 0;
+ BasicBlock *OnlyDest = nullptr;
BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
@@ -1120,7 +1121,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
BasicBlock *DestBB;
if (isa<UndefValue>(Val))
- DestBB = 0;
+ DestBB = nullptr;
else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
@@ -1171,7 +1172,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// If the threadable edges are branching on an undefined value, we get to pick
// the destination that these predecessors should get to.
- if (MostPopularDest == 0)
+ if (!MostPopularDest)
MostPopularDest = BB->getTerminator()->
getSuccessor(GetBestDestForJumpOnUndef(BB));
@@ -1273,7 +1274,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
}
// Determine which value to split on, true, false, or undef if neither.
- ConstantInt *SplitVal = 0;
+ ConstantInt *SplitVal = nullptr;
if (NumTrue > NumFalse)
SplitVal = ConstantInt::getTrue(BB->getContext());
else if (NumTrue != 0 || NumFalse != 0)
@@ -1294,7 +1295,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// help us. However, we can just replace the LHS or RHS with the constant.
if (BlocksToFoldInto.size() ==
cast<PHINode>(BB->front()).getNumIncomingValues()) {
- if (SplitVal == 0) {
+ if (!SplitVal) {
// If all preds provide undef, just nuke the xor, because it is undef too.
BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
BO->eraseFromParent();
@@ -1531,7 +1532,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// can just clone the bits from BB into the end of the new PredBB.
BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
- if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
+ if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
PredBB = SplitEdge(PredBB, BB, this);
OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
}
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index b69f2dc..0a8d16f 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -30,7 +30,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "licm"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -60,6 +59,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "licm"
+
STATISTIC(NumSunk , "Number of instructions sunk out of loop");
STATISTIC(NumHoisted , "Number of instructions hoisted out of loop");
STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
@@ -223,7 +224,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
@@ -315,8 +316,8 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
"Parent loop not left in LCSSA form after LICM!");
// Clear out loops state information for the next iteration
- CurLoop = 0;
- Preheader = 0;
+ CurLoop = nullptr;
+ Preheader = nullptr;
// If this loop is nested inside of another one, save the alias information
// for when we process the outer loop.
@@ -334,7 +335,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
/// iteration.
///
void LICM::SinkRegion(DomTreeNode *N) {
- assert(N != 0 && "Null dominator tree node?");
+ assert(N != nullptr && "Null dominator tree node?");
BasicBlock *BB = N->getBlock();
// If this subregion is not in the top level loop at all, exit.
@@ -381,7 +382,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
/// before uses, allowing us to hoist a loop body in one pass without iteration.
///
void LICM::HoistRegion(DomTreeNode *N) {
- assert(N != 0 && "Null dominator tree node?");
+ assert(N != nullptr && "Null dominator tree node?");
BasicBlock *BB = N->getBlock();
// If this subregion is not in the top level loop at all, exit.
@@ -774,7 +775,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
// We start with an alignment of one and try to find instructions that allow
// us to prove better alignment.
unsigned Alignment = 1;
- MDNode *TBAATag = 0;
+ MDNode *TBAATag = nullptr;
// Check that all of the pointers in the alias set have the same type. We
// cannot (yet) promote a memory location that is loaded and stored in
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 9a520c8..5ab686a 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-delete"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -23,6 +22,8 @@
#include "llvm/IR/Dominators.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-delete"
+
STATISTIC(NumDeleted, "Number of loops deleted");
namespace {
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index e5e8b84..26a83df 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -41,7 +41,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-idiom"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -61,6 +60,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-idiom"
+
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
@@ -114,7 +115,7 @@ namespace {
Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const;
/// Return true iff the idiom is detected in the loop. and 1) \p CntInst
- /// is set to the instruction counting the pupulation bit. 2) \p CntPhi
+ /// is set to the instruction counting the population bit. 2) \p CntPhi
/// is set to the corresponding phi node. 3) \p Var is set to the value
/// whose population bits are being counted.
bool detectIdiom
@@ -138,7 +139,7 @@ namespace {
static char ID;
explicit LoopIdiomRecognize() : LoopPass(ID) {
initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
- DL = 0; DT = 0; SE = 0; TLI = 0; TTI = 0;
+ DL = nullptr; DT = nullptr; SE = nullptr; TLI = nullptr; TTI = nullptr;
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -182,7 +183,7 @@ namespace {
if (DL)
return DL;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
return DL;
}
@@ -247,7 +248,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE,
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
- DeadInst->setOperand(op, 0);
+ DeadInst->setOperand(op, nullptr);
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty()) continue;
@@ -292,9 +293,9 @@ bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
if (BasicBlock *BB = PreHead->getSinglePredecessor()) {
BranchInst *Br = getBranch(BB);
- return Br && Br->isConditional() ? BB : 0;
+ return Br && Br->isConditional() ? BB : nullptr;
}
- return 0;
+ return nullptr;
}
//===----------------------------------------------------------------------===//
@@ -304,7 +305,7 @@ BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
//===----------------------------------------------------------------------===//
NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
- LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) {
+ LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(nullptr) {
}
bool NclPopcountRecognize::preliminaryScreen() {
@@ -341,25 +342,25 @@ bool NclPopcountRecognize::preliminaryScreen() {
return true;
}
-Value *NclPopcountRecognize::matchCondition (BranchInst *Br,
- BasicBlock *LoopEntry) const {
+Value *NclPopcountRecognize::matchCondition(BranchInst *Br,
+ BasicBlock *LoopEntry) const {
if (!Br || !Br->isConditional())
- return 0;
+ return nullptr;
ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition());
if (!Cond)
- return 0;
+ return nullptr;
ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
if (!CmpZero || !CmpZero->isZero())
- return 0;
+ return nullptr;
ICmpInst::Predicate Pred = Cond->getPredicate();
if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) ||
(Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry))
return Cond->getOperand(0);
- return 0;
+ return nullptr;
}
bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
@@ -390,9 +391,9 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
Value *VarX1, *VarX0;
PHINode *PhiX, *CountPhi;
- DefX2 = CountInst = 0;
- VarX1 = VarX0 = 0;
- PhiX = CountPhi = 0;
+ DefX2 = CountInst = nullptr;
+ VarX1 = VarX0 = nullptr;
+ PhiX = CountPhi = nullptr;
LoopEntry = *(CurLoop->block_begin());
// step 1: Check if the loop-back branch is in desirable form.
@@ -439,7 +440,7 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
// step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
{
- CountInst = NULL;
+ CountInst = nullptr;
for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(),
IterE = LoopEntry->end(); Iter != IterE; Iter++) {
Instruction *Inst = Iter;
@@ -744,7 +745,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
// If processing the store invalidated our iterator, start over from the
// top of the block.
- if (InstPtr == 0)
+ if (!InstPtr)
I = BB->begin();
continue;
}
@@ -757,7 +758,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
// If processing the memset invalidated our iterator, start over from the
// top of the block.
- if (InstPtr == 0)
+ if (!InstPtr)
I = BB->begin();
continue;
}
@@ -784,7 +785,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
// random store we can't handle.
const SCEVAddRecExpr *StoreEv =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
- if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
+ if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
return false;
// Check to see if the stride matches the size of the store. If so, then we
@@ -792,7 +793,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
unsigned StoreSize = (unsigned)SizeInBits >> 3;
const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
- if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) {
+ if (!Stride || StoreSize != Stride->getValue()->getValue()) {
// TODO: Could also handle negative stride here someday, that will require
// the validity check in mayLoopAccessLocation to be updated though.
// Enable this to print exact negative strides.
@@ -841,7 +842,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
// loop, which indicates a strided store. If we have something else, it's a
// random store we can't handle.
const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
- if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
+ if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
return false;
// Reject memsets that are so large that they overflow an unsigned.
@@ -855,7 +856,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
// TODO: Could also handle negative stride here someday, that will require the
// validity check in mayLoopAccessLocation to be updated though.
- if (Stride == 0 || MSI->getLength() != Stride->getValue())
+ if (!Stride || MSI->getLength() != Stride->getValue())
return false;
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
@@ -908,23 +909,23 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) {
// array. We could theoretically do a store to an alloca or something, but
// that doesn't seem worthwhile.
Constant *C = dyn_cast<Constant>(V);
- if (C == 0) return 0;
+ if (!C) return nullptr;
// Only handle simple values that are a power of two bytes in size.
uint64_t Size = DL.getTypeSizeInBits(V->getType());
if (Size == 0 || (Size & 7) || (Size & (Size-1)))
- return 0;
+ return nullptr;
// Don't care enough about darwin/ppc to implement this.
if (DL.isBigEndian())
- return 0;
+ return nullptr;
// Convert to size in bytes.
Size /= 8;
// TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
// if the top and bottom are the same (e.g. for vectors and large integers).
- if (Size > 16) return 0;
+ if (Size > 16) return nullptr;
// If the constant is exactly 16 bytes, just use it.
if (Size == 16) return C;
@@ -949,7 +950,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// are stored. A store of i32 0x01020304 can never be turned into a memset,
// but it can be turned into memset_pattern if the target supports it.
Value *SplatValue = isBytewiseValue(StoredVal);
- Constant *PatternValue = 0;
+ Constant *PatternValue = nullptr;
unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
@@ -960,13 +961,13 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// promote the memset.
CurLoop->isLoopInvariant(SplatValue)) {
// Keep and use SplatValue.
- PatternValue = 0;
+ PatternValue = nullptr;
} else if (DestAS == 0 &&
TLI->has(LibFunc::memset_pattern16) &&
(PatternValue = getMemSetPatternValue(StoredVal, *DL))) {
// Don't create memset_pattern16s with address spaces.
// It looks like we can use PatternValue!
- SplatValue = 0;
+ SplatValue = nullptr;
} else {
// Otherwise, this isn't an idiom we can transform. For example, we can't
// do anything with a 3-byte store.
@@ -1033,7 +1034,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
Int8PtrTy,
Int8PtrTy,
IntPtr,
- (void*)0);
+ (void*)nullptr);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 263ba93..ab1a939 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-instsimplify"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
@@ -26,6 +25,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-instsimplify"
+
STATISTIC(NumSimplified, "Number of redundant instructions simplified");
namespace {
@@ -70,10 +71,10 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0;
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
LoopInfo *LI = &getAnalysis<LoopInfo>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallVector<BasicBlock*, 8> ExitBlocks;
@@ -126,7 +127,15 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
++NumSimplified;
}
}
- LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ if (res) {
+ // RecursivelyDeleteTriviallyDeadInstruction can remove
+ // more than one instruction, so simply incrementing the
+ // iterator does not work. When instructions get deleted
+ // re-iterate instead.
+ BI = BB->begin(); BE = BB->end();
+ LocalChanged |= res;
+ }
if (IsSubloopHeader && !isa<PHINode>(I))
break;
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index 81c1e42..8b5e036 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-reroll"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
@@ -36,6 +35,8 @@
using namespace llvm;
+#define DEBUG_TYPE "loop-reroll"
+
STATISTIC(NumRerolledLoops, "Number of rerolled loops");
static cl::opt<unsigned>
@@ -945,7 +946,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
bool InReduction = Reductions.isPairInSame(J1, J2);
if (!(InReduction && J1->isAssociative())) {
- bool Swapped = false, SomeOpMatched = false;;
+ bool Swapped = false, SomeOpMatched = false;
for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
Value *Op2 = J2->getOperand(j);
@@ -1133,7 +1134,7 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
SE = &getAnalysis<ScalarEvolution>();
TLI = &getAnalysis<TargetLibraryInfo>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
BasicBlock *Header = L->getHeader();
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index fde6bac..2ce5831 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-rotate"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CodeMetrics.h"
@@ -24,6 +23,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -31,7 +31,11 @@
#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
-#define MAX_HEADER_SIZE 16
+#define DEBUG_TYPE "loop-rotate"
+
+static cl::opt<unsigned>
+DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden,
+ cl::desc("The default maximum header size for automatic loop rotation"));
STATISTIC(NumRotated, "Number of loops rotated");
namespace {
@@ -39,8 +43,12 @@ namespace {
class LoopRotate : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopRotate() : LoopPass(ID) {
+ LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+ if (SpecifiedMaxHeaderSize == -1)
+ MaxHeaderSize = DefaultRotationThreshold;
+ else
+ MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
}
// LCSSA form makes instruction renaming easier.
@@ -61,6 +69,7 @@ namespace {
bool rotateLoop(Loop *L, bool SimplifiedLatch);
private:
+ unsigned MaxHeaderSize;
LoopInfo *LI;
const TargetTransformInfo *TTI;
};
@@ -74,7 +83,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
-Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
+Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
+ return new LoopRotate(MaxHeaderSize);
+}
/// Rotate Loop L as many times as possible. Return true if
/// the loop is rotated at least once.
@@ -82,6 +93,9 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
if (skipOptnoneFunction(L))
return false;
+ // Save the loop metadata.
+ MDNode *LoopMD = L->getLoopID();
+
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
@@ -96,6 +110,12 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
MadeChange = true;
SimplifiedLatch = false;
}
+
+ // Restore the loop metadata.
+ // NB! We presume LoopRotation DOESN'T ADD its own metadata.
+ if ((MadeChange || SimplifiedLatch) && LoopMD)
+ L->setLoopID(LoopMD);
+
return MadeChange;
}
@@ -281,7 +301,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
BasicBlock *OrigLatch = L->getLoopLatch();
BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
- if (BI == 0 || BI->isUnconditional())
+ if (!BI || BI->isUnconditional())
return false;
// If the loop header is not one of the loop exiting blocks then
@@ -292,7 +312,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop latch already contains a branch that leaves the loop then the
// loop is already rotated.
- if (OrigLatch == 0)
+ if (!OrigLatch)
return false;
// Rotate if either the loop latch does *not* exit the loop, or if the loop
@@ -310,7 +330,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
<< " instructions: "; L->dump());
return false;
}
- if (Metrics.NumInsts > MAX_HEADER_SIZE)
+ if (Metrics.NumInsts > MaxHeaderSize)
return false;
}
@@ -319,7 +339,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.
- if (OrigPreheader == 0)
+ if (!OrigPreheader)
return false;
// Anything ScalarEvolution may know about this loop or the PHI nodes
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 272a16d..914b56a 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -53,7 +53,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-reduce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
@@ -78,6 +77,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "loop-reduce"
+
/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
/// bail out. This threshold is far beyond the number of users that LSR can
/// conceivably solve, so it should not affect generated code, but catches the
@@ -237,7 +238,15 @@ struct Formula {
int64_t Scale;
/// BaseRegs - The list of "base" registers for this use. When this is
- /// non-empty,
+ /// non-empty. The canonical representation of a formula is
+ /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
+ /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
+ /// #1 enforces that the scaled register is always used when at least two
+ /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
+ /// #2 enforces that 1 * reg is reg.
+ /// This invariant can be temporarly broken while building a formula.
+ /// However, every formula inserted into the LSRInstance must be in canonical
+ /// form.
SmallVector<const SCEV *, 4> BaseRegs;
/// ScaledReg - The 'scaled' register for this use. This should be non-null
@@ -250,12 +259,18 @@ struct Formula {
int64_t UnfoldedOffset;
Formula()
- : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0),
- UnfoldedOffset(0) {}
+ : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
+ ScaledReg(nullptr), UnfoldedOffset(0) {}
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
- unsigned getNumRegs() const;
+ bool isCanonical() const;
+
+ void Canonicalize();
+
+ bool Unscale();
+
+ size_t getNumRegs() const;
Type *getType() const;
void DeleteBaseReg(const SCEV *&S);
@@ -345,12 +360,58 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
+ Canonicalize();
+}
+
+/// \brief Check whether or not this formula statisfies the canonical
+/// representation.
+/// \see Formula::BaseRegs.
+bool Formula::isCanonical() const {
+ if (ScaledReg)
+ return Scale != 1 || !BaseRegs.empty();
+ return BaseRegs.size() <= 1;
+}
+
+/// \brief Helper method to morph a formula into its canonical representation.
+/// \see Formula::BaseRegs.
+/// Every formula having more than one base register, must use the ScaledReg
+/// field. Otherwise, we would have to do special cases everywhere in LSR
+/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
+/// On the other hand, 1*reg should be canonicalized into reg.
+void Formula::Canonicalize() {
+ if (isCanonical())
+ return;
+ // So far we did not need this case. This is easy to implement but it is
+ // useless to maintain dead code. Beside it could hurt compile time.
+ assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.");
+ // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
+ ScaledReg = BaseRegs.back();
+ BaseRegs.pop_back();
+ Scale = 1;
+ size_t BaseRegsSize = BaseRegs.size();
+ size_t Try = 0;
+ // If ScaledReg is an invariant, try to find a variant expression.
+ while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg))
+ std::swap(ScaledReg, BaseRegs[Try++]);
+}
+
+/// \brief Get rid of the scale in the formula.
+/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
+/// \return true if it was possible to get rid of the scale, false otherwise.
+/// \note After this operation the formula may not be in the canonical form.
+bool Formula::Unscale() {
+ if (Scale != 1)
+ return false;
+ Scale = 0;
+ BaseRegs.push_back(ScaledReg);
+ ScaledReg = nullptr;
+ return true;
}
/// getNumRegs - Return the total number of register operands used by this
/// formula. This does not include register uses implied by non-constant
/// addrec strides.
-unsigned Formula::getNumRegs() const {
+size_t Formula::getNumRegs() const {
return !!ScaledReg + BaseRegs.size();
}
@@ -360,7 +421,7 @@ Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
BaseGV ? BaseGV->getType() :
- 0;
+ nullptr;
}
/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
@@ -487,11 +548,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
// Check for a division of a constant by a constant.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
if (!RC)
- return 0;
+ return nullptr;
const APInt &LA = C->getValue()->getValue();
const APInt &RA = RC->getValue()->getValue();
if (LA.srem(RA) != 0)
- return 0;
+ return nullptr;
return SE.getConstant(LA.sdiv(RA));
}
@@ -500,16 +561,16 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
IgnoreSignificantBits);
- if (!Step) return 0;
+ if (!Step) return nullptr;
const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
IgnoreSignificantBits);
- if (!Start) return 0;
+ if (!Start) return nullptr;
// FlagNW is independent of the start value, step direction, and is
// preserved with smaller magnitude steps.
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
}
- return 0;
+ return nullptr;
}
// Distribute the sdiv over add operands, if the add doesn't overflow.
@@ -520,12 +581,12 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
I != E; ++I) {
const SCEV *Op = getExactSDiv(*I, RHS, SE,
IgnoreSignificantBits);
- if (!Op) return 0;
+ if (!Op) return nullptr;
Ops.push_back(Op);
}
return SE.getAddExpr(Ops);
}
- return 0;
+ return nullptr;
}
// Check for a multiply operand that we can pull RHS out of.
@@ -544,13 +605,13 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
}
Ops.push_back(S);
}
- return Found ? SE.getMulExpr(Ops) : 0;
+ return Found ? SE.getMulExpr(Ops) : nullptr;
}
- return 0;
+ return nullptr;
}
// Otherwise we don't know.
- return 0;
+ return nullptr;
}
/// ExtractImmediate - If S involves the addition of a constant integer value,
@@ -604,7 +665,7 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
SCEV::FlagAnyWrap);
return Result;
}
- return 0;
+ return nullptr;
}
/// isAddressUse - Returns true if the specified instruction is using the
@@ -755,12 +816,12 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
Value *V = DeadInsts.pop_back_val();
Instruction *I = dyn_cast_or_null<Instruction>(V);
- if (I == 0 || !isInstructionTriviallyDead(I))
+ if (!I || !isInstructionTriviallyDead(I))
continue;
for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
if (Instruction *U = dyn_cast<Instruction>(*OI)) {
- *OI = 0;
+ *OI = nullptr;
if (U->use_empty())
DeadInsts.push_back(U);
}
@@ -775,9 +836,18 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
namespace {
class LSRUse;
}
-// Check if it is legal to fold 2 base registers.
-static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
- const Formula &F);
+
+/// \brief Check if the addressing mode defined by \p F is completely
+/// folded in \p LU at isel time.
+/// This includes address-mode folding and special icmp tricks.
+/// This function returns true if \p LU can accommodate what \p F
+/// defines and up to 1 base + 1 scaled + offset.
+/// In other words, if \p F has several base registers, this function may
+/// still return true. Therefore, users still need to account for
+/// additional base registers and/or unfolded offsets to derive an
+/// accurate cost model.
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F);
// Get the cost of the scaling factor used in F for LU.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F);
@@ -828,7 +898,7 @@ public:
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
- SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
+ SmallPtrSet<const SCEV *, 16> *LoserRegs = nullptr);
void print(raw_ostream &OS) const;
void dump() const;
@@ -921,6 +991,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+ assert(F.isCanonical() && "Cost is accurate only for canonical formula");
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
@@ -944,11 +1015,13 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
}
// Determine how many (unfolded) adds we'll need inside the loop.
- size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
+ size_t NumBaseParts = F.getNumRegs();
if (NumBaseParts > 1)
// Do not count the base and a possible second register if the target
// allows to fold 2 registers.
- NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F));
+ NumBaseAdds +=
+ NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
+ NumBaseAdds += (F.UnfoldedOffset != 0);
// Accumulate non-free scaling amounts.
ScaleCost += getScalingFactorCost(TTI, LU, F);
@@ -1047,7 +1120,8 @@ struct LSRFixup {
}
LSRFixup::LSRFixup()
- : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
+ : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
+ Offset(0) {}
/// isUseFullyOutsideLoop - Test whether this fixup always uses its
/// value outside of the given loop.
@@ -1183,7 +1257,7 @@ public:
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
RigidFormula(false),
- WidestFixupType(0) {}
+ WidestFixupType(nullptr) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
@@ -1208,7 +1282,10 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
+/// The formula must be in canonical form.
bool LSRUse::InsertFormula(const Formula &F) {
+ assert(F.isCanonical() && "Invalid canonical representation");
+
if (!Formulae.empty() && RigidFormula)
return false;
@@ -1234,6 +1311,8 @@ bool LSRUse::InsertFormula(const Formula &F) {
// Record registers now being used by this use.
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+ if (F.ScaledReg)
+ Regs.insert(F.ScaledReg);
return true;
}
@@ -1300,12 +1379,10 @@ void LSRUse::dump() const {
}
#endif
-/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
-/// be completely folded into the user instruction at isel time. This includes
-/// address-mode folding and special icmp tricks.
-static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
- Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) {
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) {
switch (Kind) {
case LSRUse::Address:
return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
@@ -1356,10 +1433,11 @@ static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
llvm_unreachable("Invalid LSRUse Kind!");
}
-static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
- int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
- GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale) {
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ int64_t MinOffset, int64_t MaxOffset,
+ LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) {
// Check for overflow.
if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
(MinOffset > 0))
@@ -1370,9 +1448,41 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
return false;
MaxOffset = (uint64_t)BaseOffset + MaxOffset;
- return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
- Scale) &&
- isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
+ return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
+ HasBaseReg, Scale) &&
+ isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
+ HasBaseReg, Scale);
+}
+
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ int64_t MinOffset, int64_t MaxOffset,
+ LSRUse::KindType Kind, Type *AccessTy,
+ const Formula &F) {
+ // For the purpose of isAMCompletelyFolded either having a canonical formula
+ // or a scale not equal to zero is correct.
+ // Problems may arise from non canonical formulae having a scale == 0.
+ // Strictly speaking it would best to just rely on canonical formulae.
+ // However, when we generate the scaled formulae, we first check that the
+ // scaling factor is profitable before computing the actual ScaledReg for
+ // compile time sake.
+ assert((F.isCanonical() || F.Scale != 0));
+ return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
+ F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
+}
+
+/// isLegalUse - Test whether we know how to expand the current formula.
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale) {
+ // We know how to expand completely foldable formulae.
+ return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+ BaseOffset, HasBaseReg, Scale) ||
+ // Or formulae that use a base register produced by a sum of base
+ // registers.
+ (Scale == 1 &&
+ isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
+ BaseGV, BaseOffset, true, 0));
}
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
@@ -1382,36 +1492,23 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
F.BaseOffset, F.HasBaseReg, F.Scale);
}
-static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
- const Formula &F) {
- // If F is used as an Addressing Mode, it may fold one Base plus one
- // scaled register. If the scaled register is nil, do as if another
- // element of the base regs is a 1-scaled register.
- // This is possible if BaseRegs has at least 2 registers.
-
- // If this is not an address calculation, this is not an addressing mode
- // use.
- if (LU.Kind != LSRUse::Address)
- return false;
-
- // F is already scaled.
- if (F.Scale != 0)
- return false;
-
- // We need to keep one register for the base and one to scale.
- if (F.BaseRegs.size() < 2)
- return false;
-
- return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
- F.BaseGV, F.BaseOffset, F.HasBaseReg, 1);
- }
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F) {
+ return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
+ F.Scale);
+}
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F) {
if (!F.Scale)
return 0;
- assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, F) && "Illegal formula in use.");
+
+ // If the use is not completely folded in that instruction, we will have to
+ // pay an extra cost only for scale != 1.
+ if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, F))
+ return F.Scale != 1;
switch (LU.Kind) {
case LSRUse::Address: {
@@ -1430,12 +1527,10 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
}
case LSRUse::ICmpZero:
- // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
- // Therefore, return 0 in case F.Scale == -1.
- return F.Scale != -1;
-
case LSRUse::Basic:
case LSRUse::Special:
+ // The use is completely folded, i.e., everything is folded into the
+ // instruction.
return 0;
}
@@ -1460,7 +1555,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
HasBaseReg = true;
}
- return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
+ return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
+ HasBaseReg, Scale);
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
@@ -1485,8 +1581,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
// base and a scale.
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
- return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
- BaseOffset, HasBaseReg, Scale);
+ return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+ BaseOffset, HasBaseReg, Scale);
}
namespace {
@@ -1515,7 +1611,7 @@ struct IVChain {
SmallVector<IVInc,1> Incs;
const SCEV *ExprBase;
- IVChain() : ExprBase(0) {}
+ IVChain() : ExprBase(nullptr) {}
IVChain(const IVInc &Head, const SCEV *Base)
: Incs(1, Head), ExprBase(Base) {}
@@ -1642,8 +1738,19 @@ class LSRInstance {
void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
unsigned Depth = 0);
+
+ void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base, unsigned Depth,
+ size_t Idx, bool IsScaledReg = false);
void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base, size_t Idx,
+ bool IsScaledReg = false);
void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base,
+ const SmallVectorImpl<int64_t> &Worklist,
+ size_t Idx, bool IsScaledReg = false);
void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
@@ -1721,7 +1828,7 @@ void LSRInstance::OptimizeShadowIV() {
IVUsers::const_iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
- Type *DestTy = 0;
+ Type *DestTy = nullptr;
bool IsSigned = false;
/* If shadow use is a int->float cast then insert a second IV
@@ -1783,7 +1890,7 @@ void LSRInstance::OptimizeShadowIV() {
continue;
/* Initialize new IV, double d = 0.0 in above example. */
- ConstantInt *C = 0;
+ ConstantInt *C = nullptr;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
@@ -1905,7 +2012,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// for ICMP_ULE here because the comparison would be with zero, which
// isn't interesting.
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
- const SCEVNAryExpr *Max = 0;
+ const SCEVNAryExpr *Max = nullptr;
if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
Pred = ICmpInst::ICMP_SLE;
Max = S;
@@ -1948,7 +2055,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
- Value *NewRHS = 0;
+ Value *NewRHS = nullptr;
if (ICmpInst::isTrueWhenEqual(Pred)) {
// Look for n+1, and grab n.
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
@@ -2018,7 +2125,7 @@ LSRInstance::OptimizeLoopTermCond() {
continue;
// Search IVUsesByStride to find Cond's IVUse if there is one.
- IVStrideUse *CondUse = 0;
+ IVStrideUse *CondUse = nullptr;
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
if (!FindIVUserForCond(Cond, CondUse))
continue;
@@ -2071,12 +2178,12 @@ LSRInstance::OptimizeLoopTermCond() {
// Check for possible scaled-address reuse.
Type *AccessTy = getAccessType(UI->getUser());
int64_t Scale = C->getSExtValue();
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
/*BaseOffset=*/ 0,
/*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
Scale = -Scale;
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
/*BaseOffset=*/ 0,
/*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
@@ -2146,23 +2253,25 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
// the uses will have all its uses outside the loop, for example.
if (LU.Kind != Kind)
return false;
+
+ // Check for a mismatched access type, and fall back conservatively as needed.
+ // TODO: Be less conservative when the type is similar and can use the same
+ // addressing modes.
+ if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
+ NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
- if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
LU.MaxOffset - NewOffset, HasBaseReg))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
- if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
NewOffset - LU.MinOffset, HasBaseReg))
return false;
NewMaxOffset = NewOffset;
}
- // Check for a mismatched access type, and fall back conservatively as needed.
- // TODO: Be less conservative when the type is similar and can use the same
- // addressing modes.
- if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
- NewAccessTy = Type::getVoidTy(AccessTy->getContext());
// Update the use.
LU.MinOffset = NewMinOffset;
@@ -2183,7 +2292,7 @@ LSRInstance::getUse(const SCEV *&Expr,
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
- if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
Offset, /*HasBaseReg=*/ true)) {
Expr = Copy;
Offset = 0;
@@ -2267,7 +2376,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
}
// Nothing looked good.
- return 0;
+ return nullptr;
}
void LSRInstance::CollectInterestingTypesAndFactors() {
@@ -2385,7 +2494,7 @@ static const SCEV *getExprBase(const SCEV *S) {
default: // uncluding scUnknown.
return S;
case scConstant:
- return 0;
+ return nullptr;
case scTruncate:
return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
case scZeroExtend:
@@ -2476,7 +2585,7 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
&& SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
--cost;
}
- const SCEV *LastIncExpr = 0;
+ const SCEV *LastIncExpr = nullptr;
unsigned NumConstIncrements = 0;
unsigned NumVarIncrements = 0;
unsigned NumReusedIncrements = 0;
@@ -2535,7 +2644,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
// Visit all existing chains. Check if its IVOper can be computed as a
// profitable loop invariant increment from the last link in the Chain.
unsigned ChainIdx = 0, NChains = IVChainVec.size();
- const SCEV *LastIncExpr = 0;
+ const SCEV *LastIncExpr = nullptr;
for (; ChainIdx < NChains; ++ChainIdx) {
IVChain &Chain = IVChainVec[ChainIdx];
@@ -2755,7 +2864,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(TTI, LSRUse::Address,
- getAccessType(UserInst), /*BaseGV=*/ 0,
+ getAccessType(UserInst), /*BaseGV=*/ nullptr,
IncOffset, /*HaseBaseReg=*/ false))
return false;
@@ -2773,7 +2882,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
- Value *IVSrc = 0;
+ Value *IVSrc = nullptr;
while (IVOpIter != IVOpEnd) {
IVSrc = getWideOperand(*IVOpIter);
@@ -2800,7 +2909,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
Type *IVTy = IVSrc->getType();
Type *IntTy = SE.getEffectiveSCEVType(IVTy);
- const SCEV *LeftOverExpr = 0;
+ const SCEV *LeftOverExpr = nullptr;
for (IVChain::const_iterator IncI = Chain.begin(),
IncE = Chain.end(); IncI != IncE; ++IncI) {
@@ -2831,7 +2940,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
TTI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
- LeftOverExpr = 0;
+ LeftOverExpr = nullptr;
}
}
Type *OperTy = IncI->IVOperand->getType();
@@ -2886,7 +2995,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.PostIncLoops = UI->getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
- Type *AccessTy = 0;
+ Type *AccessTy = nullptr;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
@@ -2917,7 +3026,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
- N = TransformForPostIncUse(Normalize, N, CI, 0,
+ N = TransformForPostIncUse(Normalize, N, CI, nullptr,
LF.PostIncLoops, SE, DT);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
@@ -2992,6 +3101,9 @@ void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
+ // Do not insert formula that we will not be able to expand.
+ assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
+ "Formula is illegal");
if (!LU.InsertFormula(F))
return false;
@@ -3068,7 +3180,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LSRFixup &LF = getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
LF.OperandValToReplace = U;
- std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
+ std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, nullptr);
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
@@ -3107,7 +3219,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
if (Remainder)
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
}
- return 0;
+ return nullptr;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
if (AR->getStart()->isZero())
@@ -3119,7 +3231,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
// does not pertain to this loop.
if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
- Remainder = 0;
+ Remainder = nullptr;
}
if (Remainder != AR->getStart()) {
if (!Remainder)
@@ -3141,90 +3253,110 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
if (Remainder)
Ops.push_back(SE.getMulExpr(C, Remainder));
- return 0;
+ return nullptr;
}
}
return S;
}
-/// GenerateReassociations - Split out subexpressions from adds and the bases of
-/// addrecs.
-void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
- Formula Base,
- unsigned Depth) {
- // Arbitrarily cap recursion to protect compile time.
- if (Depth >= 3) return;
-
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
- const SCEV *BaseReg = Base.BaseRegs[i];
+/// \brief Helper function for LSRInstance::GenerateReassociations.
+void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base,
+ unsigned Depth, size_t Idx,
+ bool IsScaledReg) {
+ const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ SmallVector<const SCEV *, 8> AddOps;
+ const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
+ if (Remainder)
+ AddOps.push_back(Remainder);
+
+ if (AddOps.size() == 1)
+ return;
- SmallVector<const SCEV *, 8> AddOps;
- const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
- if (Remainder)
- AddOps.push_back(Remainder);
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
+ JE = AddOps.end();
+ J != JE; ++J) {
- if (AddOps.size() == 1) continue;
+ // Loop-variant "unknown" values are uninteresting; we won't be able to
+ // do anything meaningful with them.
+ if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
+ continue;
- for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
- JE = AddOps.end(); J != JE; ++J) {
+ // Don't pull a constant into a register if the constant could be folded
+ // into an immediate field.
+ if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, *J, Base.getNumRegs() > 1))
+ continue;
- // Loop-variant "unknown" values are uninteresting; we won't be able to
- // do anything meaningful with them.
- if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
- continue;
+ // Collect all operands except *J.
+ SmallVector<const SCEV *, 8> InnerAddOps(
+ ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ InnerAddOps.append(std::next(J),
+ ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+
+ // Don't leave just a constant behind in a register if the constant could
+ // be folded into an immediate field.
+ if (InnerAddOps.size() == 1 &&
+ isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
+ continue;
- // Don't pull a constant into a register if the constant could be folded
- // into an immediate field.
- if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, *J, Base.getNumRegs() > 1))
- continue;
+ const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
+ if (InnerSum->isZero())
+ continue;
+ Formula F = Base;
- // Collect all operands except *J.
- SmallVector<const SCEV *, 8> InnerAddOps(
- ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
- InnerAddOps.append(std::next(J),
- ((const SmallVector<const SCEV *, 8> &)AddOps).end());
-
- // Don't leave just a constant behind in a register if the constant could
- // be folded into an immediate field.
- if (InnerAddOps.size() == 1 &&
- isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
- continue;
+ // Add the remaining pieces of the add back into the new formula.
+ const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
+ if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ InnerSumSC->getValue()->getZExtValue())) {
+ F.UnfoldedOffset =
+ (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
+ if (IsScaledReg)
+ F.ScaledReg = nullptr;
+ else
+ F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
+ } else if (IsScaledReg)
+ F.ScaledReg = InnerSum;
+ else
+ F.BaseRegs[Idx] = InnerSum;
+
+ // Add J as its own register, or an unfolded immediate.
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
+ if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ SC->getValue()->getZExtValue()))
+ F.UnfoldedOffset =
+ (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
+ else
+ F.BaseRegs.push_back(*J);
+ // We may have changed the number of register in base regs, adjust the
+ // formula accordingly.
+ F.Canonicalize();
+
+ if (InsertFormula(LU, LUIdx, F))
+ // If that formula hadn't been seen before, recurse to find more like
+ // it.
+ GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
+ }
+}
- const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
- if (InnerSum->isZero())
- continue;
- Formula F = Base;
+/// GenerateReassociations - Split out subexpressions from adds and the bases of
+/// addrecs.
+void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
+ Formula Base, unsigned Depth) {
+ assert(Base.isCanonical() && "Input must be in the canonical form");
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3)
+ return;
- // Add the remaining pieces of the add back into the new formula.
- const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
- if (InnerSumSC &&
- SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
- TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- InnerSumSC->getValue()->getZExtValue())) {
- F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
- InnerSumSC->getValue()->getZExtValue();
- F.BaseRegs.erase(F.BaseRegs.begin() + i);
- } else
- F.BaseRegs[i] = InnerSum;
-
- // Add J as its own register, or an unfolded immediate.
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
- if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
- TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- SC->getValue()->getZExtValue()))
- F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
- SC->getValue()->getZExtValue();
- else
- F.BaseRegs.push_back(*J);
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
- if (InsertFormula(LU, LUIdx, F))
- // If that formula hadn't been seen before, recurse to find more like
- // it.
- GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
- }
- }
+ if (Base.Scale == 1)
+ GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
+ /* Idx */ -1, /* IsScaledReg */ true);
}
/// GenerateCombinations - Generate a formula consisting of all of the
@@ -3232,8 +3364,12 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// This method is only interesting on a plurality of registers.
- if (Base.BaseRegs.size() <= 1) return;
+ if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
+ return;
+ // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
+ // processing the formula.
+ Base.Unscale();
Formula F = Base;
F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
@@ -3253,29 +3389,87 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// rather than proceed with zero in a register.
if (!Sum->isZero()) {
F.BaseRegs.push_back(Sum);
+ F.Canonicalize();
(void)InsertFormula(LU, LUIdx, F);
}
}
}
+/// \brief Helper function for LSRInstance::GenerateSymbolicOffsets.
+void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base, size_t Idx,
+ bool IsScaledReg) {
+ const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ GlobalValue *GV = ExtractSymbol(G, SE);
+ if (G->isZero() || !GV)
+ return;
+ Formula F = Base;
+ F.BaseGV = GV;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
+ return;
+ if (IsScaledReg)
+ F.ScaledReg = G;
+ else
+ F.BaseRegs[Idx] = G;
+ (void)InsertFormula(LU, LUIdx, F);
+}
+
/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// We can't add a symbolic offset if the address already contains one.
if (Base.BaseGV) return;
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
- const SCEV *G = Base.BaseRegs[i];
- GlobalValue *GV = ExtractSymbol(G, SE);
- if (G->isZero() || !GV)
- continue;
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
+ if (Base.Scale == 1)
+ GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
+ /* IsScaledReg */ true);
+}
+
+/// \brief Helper function for LSRInstance::GenerateConstantOffsets.
+void LSRInstance::GenerateConstantOffsetsImpl(
+ LSRUse &LU, unsigned LUIdx, const Formula &Base,
+ const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
+ const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
+ E = Worklist.end();
+ I != E; ++I) {
Formula F = Base;
- F.BaseGV = GV;
- if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
- continue;
- F.BaseRegs[i] = G;
- (void)InsertFormula(LU, LUIdx, F);
+ F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
+ if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
+ LU.AccessTy, F)) {
+ // Add the offset to the base register.
+ const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
+ // If it cancelled out, drop the base register, otherwise update it.
+ if (NewG->isZero()) {
+ if (IsScaledReg) {
+ F.Scale = 0;
+ F.ScaledReg = nullptr;
+ } else
+ F.DeleteBaseReg(F.BaseRegs[Idx]);
+ F.Canonicalize();
+ } else if (IsScaledReg)
+ F.ScaledReg = NewG;
+ else
+ F.BaseRegs[Idx] = NewG;
+
+ (void)InsertFormula(LU, LUIdx, F);
+ }
}
+
+ int64_t Imm = ExtractImmediate(G, SE);
+ if (G->isZero() || Imm == 0)
+ return;
+ Formula F = Base;
+ F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
+ return;
+ if (IsScaledReg)
+ F.ScaledReg = G;
+ else
+ F.BaseRegs[Idx] = G;
+ (void)InsertFormula(LU, LUIdx, F);
}
/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
@@ -3288,38 +3482,11 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
if (LU.MaxOffset != LU.MinOffset)
Worklist.push_back(LU.MaxOffset);
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
- const SCEV *G = Base.BaseRegs[i];
-
- for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
- E = Worklist.end(); I != E; ++I) {
- Formula F = Base;
- F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
- if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
- LU.AccessTy, F)) {
- // Add the offset to the base register.
- const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
- // If it cancelled out, drop the base register, otherwise update it.
- if (NewG->isZero()) {
- std::swap(F.BaseRegs[i], F.BaseRegs.back());
- F.BaseRegs.pop_back();
- } else
- F.BaseRegs[i] = NewG;
-
- (void)InsertFormula(LU, LUIdx, F);
- }
- }
-
- int64_t Imm = ExtractImmediate(G, SE);
- if (G->isZero() || Imm == 0)
- continue;
- Formula F = Base;
- F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
- if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
- continue;
- F.BaseRegs[i] = G;
- (void)InsertFormula(LU, LUIdx, F);
- }
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
+ if (Base.Scale == 1)
+ GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
+ /* IsScaledReg */ true);
}
/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
@@ -3419,7 +3586,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (!IntTy) return;
// If this Formula already has a scaled register, we can't add another one.
- if (Base.Scale != 0) return;
+ // Try to unscale the formula to generate a better scale.
+ if (Base.Scale != 0 && !Base.Unscale())
+ return;
+
+ assert(Base.Scale == 0 && "Unscale did not did its job!");
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
@@ -3460,6 +3631,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
Formula F = Base;
F.ScaledReg = Quotient;
F.DeleteBaseReg(F.BaseRegs[i]);
+ // The canonical representation of 1*reg is reg, which is already in
+ // Base. In that case, do not try to insert the formula, it will be
+ // rejected anyway.
+ if (F.Scale == 1 && F.BaseRegs.empty())
+ continue;
(void)InsertFormula(LU, LUIdx, F);
}
}
@@ -3624,7 +3800,12 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// TODO: Use a more targeted data structure.
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
- const Formula &F = LU.Formulae[L];
+ Formula F = LU.Formulae[L];
+ // FIXME: The code for the scaled and unscaled registers looks
+ // very similar but slightly different. Investigate if they
+ // could be merged. That way, we would not have to unscale the
+ // Formula.
+ F.Unscale();
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
@@ -3650,6 +3831,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
continue;
// OK, looks good.
+ NewF.Canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
} else {
// Use the immediate in a base register.
@@ -3683,6 +3865,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
goto skip_formula;
// Ok, looks good.
+ NewF.Canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
break;
skip_formula:;
@@ -3936,7 +4119,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
- if (F.BaseOffset == 0 || F.Scale != 0)
+ if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
continue;
LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
@@ -4033,7 +4216,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
// Pick the register which is used by the most LSRUses, which is likely
// to be a good reuse register candidate.
- const SCEV *Best = 0;
+ const SCEV *Best = nullptr;
unsigned BestNum = 0;
for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
I != E; ++I) {
@@ -4130,19 +4313,22 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
- // Ignore formulae which do not use any of the required registers.
- bool SatisfiedReqReg = true;
+ // Ignore formulae which may not be ideal in terms of register reuse of
+ // ReqRegs. The formula should use all required registers before
+ // introducing new ones.
+ int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
JE = ReqRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
- if ((!F.ScaledReg || F.ScaledReg != Reg) &&
- std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
+ if ((F.ScaledReg && F.ScaledReg == Reg) ||
+ std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) !=
F.BaseRegs.end()) {
- SatisfiedReqReg = false;
- break;
+ --NumReqRegsToFind;
+ if (NumReqRegsToFind == 0)
+ break;
}
}
- if (!SatisfiedReqReg) {
+ if (NumReqRegsToFind != 0) {
// If none of the formulae satisfied the required registers, then we could
// clear ReqRegs and try again. Currently, we simply give up in this case.
continue;
@@ -4240,7 +4426,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
}
bool AllDominate = true;
- Instruction *BetterPos = 0;
+ Instruction *BetterPos = nullptr;
Instruction *Tentative = IDom->getTerminator();
for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
E = Inputs.end(); I != E; ++I) {
@@ -4379,11 +4565,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
- Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
+ Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP)));
}
// Expand the ScaledReg portion.
- Value *ICmpScaledV = 0;
+ Value *ICmpScaledV = nullptr;
if (F.Scale != 0) {
const SCEV *ScaledS = F.ScaledReg;
@@ -4394,25 +4580,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
Loops, SE, DT);
if (LU.Kind == LSRUse::ICmpZero) {
- // An interesting way of "folding" with an icmp is to use a negated
- // scale, which we'll implement by inserting it into the other operand
- // of the icmp.
- assert(F.Scale == -1 &&
- "The only scale supported by ICmpZero uses is -1!");
- ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
+ // Expand ScaleReg as if it was part of the base regs.
+ if (F.Scale == 1)
+ Ops.push_back(
+ SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)));
+ else {
+ // An interesting way of "folding" with an icmp is to use a negated
+ // scale, which we'll implement by inserting it into the other operand
+ // of the icmp.
+ assert(F.Scale == -1 &&
+ "The only scale supported by ICmpZero uses is -1!");
+ ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
+ }
} else {
// Otherwise just expand the scaled register and an explicit scale,
// which is expected to be matched as part of the address.
// Flush the operand list to suppress SCEVExpander hoisting address modes.
- if (!Ops.empty() && LU.Kind == LSRUse::Address) {
+ // Unless the addressing mode will not be folded.
+ if (!Ops.empty() && LU.Kind == LSRUse::Address &&
+ isAMCompletelyFolded(TTI, LU, F)) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
- ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
- ScaledS = SE.getMulExpr(ScaledS,
- SE.getConstant(ScaledS->getType(), F.Scale));
+ ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP));
+ if (F.Scale != 1)
+ ScaledS =
+ SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
Ops.push_back(ScaledS);
}
}
@@ -4490,7 +4685,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
}
CI->setOperand(1, ICmpScaledV);
} else {
- assert(F.Scale == 0 &&
+ // A scale of 1 means that the scale has been expanded as part of the
+ // base regs.
+ assert((F.Scale == 0 || F.Scale == 1) &&
"ICmp does not support folding a global value and "
"a scale at the same time!");
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
@@ -4531,7 +4728,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
Loop *PNLoop = LI.getLoopFor(Parent);
if (!PNLoop || Parent != PNLoop->getHeader()) {
// Split the critical edge.
- BasicBlock *NewBB = 0;
+ BasicBlock *NewBB = nullptr;
if (!Parent->isLandingPad()) {
NewBB = SplitCriticalEdge(BB, Parent, P,
/*MergeIdenticalEdges=*/true,
@@ -4560,7 +4757,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
}
std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
- Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
+ Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
if (!Pair.second)
PN->setIncomingValue(i, Pair.first->second);
else {
@@ -4670,7 +4867,7 @@ LSRInstance::LSRInstance(Loop *L, Pass *P)
DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
LI(P->getAnalysis<LoopInfo>()),
TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
- IVIncInsertPos(0) {
+ IVIncInsertPos(nullptr) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index ecd350b..fc28fd2 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -12,7 +12,6 @@
// counts of loops easily.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-unroll"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopPass.h"
@@ -29,6 +28,8 @@
using namespace llvm;
+#define DEBUG_TYPE "loop-unroll"
+
static cl::opt<unsigned>
UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
cl::desc("The cut-off point for automatic loop unrolling"));
@@ -237,9 +238,12 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
}
uint64_t Size = (uint64_t)LoopSize*Count;
- if (TripCount != 1 && Size > Threshold) {
- DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
- << " because size: " << Size << ">" << Threshold << "\n");
+ if (TripCount != 1 &&
+ (Size > Threshold || (Count != TripCount && Size > PartialThreshold))) {
+ if (Size > Threshold)
+ DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
+ << " because size: " << Size << ">" << Threshold << "\n");
+
bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
if (!AllowPartial && !(Runtime && TripCount == 0)) {
DEBUG(dbgs() << " will not try to unroll partially because "
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 5954f4a..977c53a 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -26,7 +26,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-unswitch"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -53,6 +52,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "loop-unswitch"
+
STATISTIC(NumBranches, "Number of branches unswitched");
STATISTIC(NumSwitches, "Number of switches unswitched");
STATISTIC(NumSelects , "Number of selects unswitched");
@@ -96,7 +97,7 @@ namespace {
public:
LUAnalysisCache() :
- CurLoopInstructions(0), CurrentLoopProperties(0),
+ CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr),
MaxSize(Threshold)
{}
@@ -151,8 +152,8 @@ namespace {
static char ID; // Pass ID, replacement for typeid
explicit LoopUnswitch(bool Os = false) :
LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
- currentLoop(0), DT(0), loopHeader(0),
- loopPreheader(0) {
+ currentLoop(nullptr), DT(nullptr), loopHeader(nullptr),
+ loopPreheader(nullptr) {
initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
}
@@ -180,15 +181,6 @@ namespace {
BranchesInfo.forgetLoop(currentLoop);
}
- /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
- /// remove it.
- void RemoveLoopFromWorklist(Loop *L) {
- std::vector<Loop*>::iterator I = std::find(LoopProcessWorklist.begin(),
- LoopProcessWorklist.end(), L);
- if (I != LoopProcessWorklist.end())
- LoopProcessWorklist.erase(I);
- }
-
void initLoopData() {
loopHeader = currentLoop->getHeader();
loopPreheader = currentLoop->getLoopPreheader();
@@ -212,9 +204,8 @@ namespace {
Instruction *InsertPt);
void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
- void RemoveLoopFromHierarchy(Loop *L);
- bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
- BasicBlock **LoopExit = 0);
+ bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr,
+ BasicBlock **LoopExit = nullptr);
};
}
@@ -283,8 +274,8 @@ void LUAnalysisCache::forgetLoop(const Loop *L) {
LoopsProperties.erase(LIt);
}
- CurrentLoopProperties = 0;
- CurLoopInstructions = 0;
+ CurrentLoopProperties = nullptr;
+ CurLoopInstructions = nullptr;
}
// Mark case value as unswitched.
@@ -355,10 +346,10 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
// We can never unswitch on vector conditions.
if (Cond->getType()->isVectorTy())
- return 0;
+ return nullptr;
// Constants should be folded, not unswitched on!
- if (isa<Constant>(Cond)) return 0;
+ if (isa<Constant>(Cond)) return nullptr;
// TODO: Handle: br (VARIANT|INVARIANT).
@@ -378,7 +369,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
return RHS;
}
- return 0;
+ return nullptr;
}
bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
@@ -389,7 +380,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
LPM = &LPM_Ref;
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : 0;
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
bool Changed = false;
@@ -461,7 +452,7 @@ bool LoopUnswitch::processCurrentLoop() {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
// FIXME: scan for a case with a non-critical edge?
- Constant *UnswitchVal = 0;
+ Constant *UnswitchVal = nullptr;
// Do not process same value again and again.
// At this point we have some cases already unswitched and
@@ -518,7 +509,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
if (!L->contains(BB)) {
// Otherwise, this is a loop exit, this is fine so long as this is the
// first exit.
- if (ExitBB != 0) return false;
+ if (ExitBB) return false;
ExitBB = BB;
return true;
}
@@ -545,10 +536,10 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
std::set<BasicBlock*> Visited;
Visited.insert(L->getHeader()); // Branches to header make infinite loops.
- BasicBlock *ExitBB = 0;
+ BasicBlock *ExitBB = nullptr;
if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
return ExitBB;
- return 0;
+ return nullptr;
}
/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
@@ -569,7 +560,7 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
TerminatorInst *HeaderTerm = Header->getTerminator();
LLVMContext &Context = Header->getContext();
- BasicBlock *LoopExitBB = 0;
+ BasicBlock *LoopExitBB = nullptr;
if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
// If the header block doesn't end with a conditional branch on Cond, we
// can't handle it.
@@ -639,8 +630,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
/// unswitch the loop, reprocess the pieces, then return true.
bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
Function *F = loopHeader->getParent();
- Constant *CondVal = 0;
- BasicBlock *ExitBlock = 0;
+ Constant *CondVal = nullptr;
+ BasicBlock *ExitBlock = nullptr;
if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
// If the condition is trivial, always unswitch. There is no code growth
@@ -948,17 +939,6 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
++NumSimplify;
}
-/// RemoveLoopFromHierarchy - We have discovered that the specified loop has
-/// become unwrapped, either because the backedge was deleted, or because the
-/// edge into the header was removed. If the edge into the header from the
-/// latch block was removed, the loop is unwrapped but subloops are still alive,
-/// so they just reparent loops. If the loops are actually dead, they will be
-/// removed later.
-void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) {
- LPM->deleteLoopFromQueue(L);
- RemoveLoopFromWorklist(L);
-}
-
// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
// the value specified by Val in the specified loop, or we know it does NOT have
// that value. Rewrite any uses of LIC or of properties correlated to it.
@@ -1020,7 +1000,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// If we know that LIC is not Val, use this info to simplify code.
SwitchInst *SI = dyn_cast<SwitchInst>(UI);
- if (SI == 0 || !isa<ConstantInt>(Val)) continue;
+ if (!SI || !isa<ConstantInt>(Val)) continue;
SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
// Default case is live for multiple values.
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index 7c0a623..4251ac4 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loweratomic"
#include "llvm/Transforms/Scalar.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -20,6 +19,8 @@
#include "llvm/Pass.h"
using namespace llvm;
+#define DEBUG_TYPE "loweratomic"
+
static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
IRBuilder<> Builder(CXI->getParent(), CXI);
Value *Ptr = CXI->getPointerOperand();
@@ -42,7 +43,7 @@ static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
Value *Val = RMWI->getValOperand();
LoadInst *Orig = Builder.CreateLoad(Ptr);
- Value *Res = NULL;
+ Value *Res = nullptr;
switch (RMWI->getOperation()) {
default: llvm_unreachable("Unexpected RMW operation");
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2603c96..b6bc792 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "memcpyopt"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -33,6 +32,8 @@
#include <list>
using namespace llvm;
+#define DEBUG_TYPE "memcpyopt"
+
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
@@ -49,7 +50,7 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
int64_t Offset = 0;
for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (OpC == 0)
+ if (!OpC)
return VariableIdxFound = true;
if (OpC->isZero()) continue; // No offset.
@@ -89,12 +90,12 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
// If one pointer is a GEP and the other isn't, then see if the GEP is a
// constant offset from the base, as in "P" and "gep P, 1".
- if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
+ if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
return !VariableIdxFound;
}
- if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
+ if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
return !VariableIdxFound;
}
@@ -317,9 +318,9 @@ namespace {
static char ID; // Pass identification, replacement for typeid
MemCpyOpt() : FunctionPass(ID) {
initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
- MD = 0;
- TLI = 0;
- DL = 0;
+ MD = nullptr;
+ TLI = nullptr;
+ DL = nullptr;
}
bool runOnFunction(Function &F) override;
@@ -373,7 +374,7 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
/// attempts to merge them together into a memcpy/memset.
Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Value *StartPtr, Value *ByteVal) {
- if (DL == 0) return 0;
+ if (!DL) return nullptr;
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
@@ -426,7 +427,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// If we have no ranges, then we just had a single store with nothing that
// could be merged in. This is a very common case of course.
if (Ranges.empty())
- return 0;
+ return nullptr;
// If we had at least one store that could be merged in, add the starting
// store as well. We try to avoid this unless there is at least something
@@ -440,7 +441,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// Now that we have full information about ranges, loop over the ranges and
// emit memset's for anything big enough to be worthwhile.
- Instruction *AMemSet = 0;
+ Instruction *AMemSet = nullptr;
for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
I != E; ++I) {
const MemsetRange &Range = *I;
@@ -491,7 +492,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
- if (DL == 0) return false;
+ if (!DL) return false;
// Detect cases where we're performing call slot forwarding, but
// happen to be using a load-store pair to implement it, rather than
@@ -500,7 +501,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (LI->isSimple() && LI->hasOneUse() &&
LI->getParent() == SI->getParent()) {
MemDepResult ldep = MD->getDependency(LI);
- CallInst *C = 0;
+ CallInst *C = nullptr;
if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
C = dyn_cast<CallInst>(ldep.getInst());
@@ -512,7 +513,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
E = C; I != E; --I) {
if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
- C = 0;
+ C = nullptr;
break;
}
}
@@ -603,7 +604,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return false;
// Check that all of src is copied to dest.
- if (DL == 0) return false;
+ if (!DL) return false;
ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
if (!srcArraySize)
@@ -846,7 +847,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
// The optimizations after this point require the memcpy size.
ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
- if (CopySize == 0) return false;
+ if (!CopySize) return false;
// The are three possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE.
@@ -929,7 +930,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
/// processByValArgument - This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
- if (DL == 0) return false;
+ if (!DL) return false;
// Find out what feeds this byval argument.
Value *ByValArg = CS.getArgument(ArgNo);
@@ -946,13 +947,13 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
// a memcpy, see if we can byval from the source of the memcpy instead of the
// result.
MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
- if (MDep == 0 || MDep->isVolatile() ||
+ if (!MDep || MDep->isVolatile() ||
ByValArg->stripPointerCasts() != MDep->getDest())
return false;
// The length of the memcpy must be larger or equal to the size of the byval.
ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
- if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
+ if (!C1 || C1->getValue().getZExtValue() < ByValSize)
return false;
// Get the alignment of the byval. If the call doesn't specify the alignment,
@@ -1043,7 +1044,7 @@ bool MemCpyOpt::runOnFunction(Function &F) {
bool MadeChange = false;
MD = &getAnalysis<MemoryDependenceAnalysis>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
// If we don't have at least memset and memcpy, there is little point of doing
@@ -1058,6 +1059,6 @@ bool MemCpyOpt::runOnFunction(Function &F) {
MadeChange = true;
}
- MD = 0;
+ MD = nullptr;
return MadeChange;
}
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 2f19935..7cce89e 100644
--- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "partially-inline-libcalls"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Intrinsics.h"
@@ -25,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "partially-inline-libcalls"
+
namespace {
class PartiallyInlineLibCalls : public FunctionPass {
public:
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index b6b4d97..986d6a4 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -20,7 +20,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "reassociate"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -42,6 +41,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "reassociate"
+
STATISTIC(NumChanged, "Number of insts reassociated");
STATISTIC(NumAnnihil, "Number of expr tree annihilated");
STATISTIC(NumFactor , "Number of multiplies factored");
@@ -122,14 +123,14 @@ namespace {
public:
XorOpnd(Value *V);
- bool isInvalid() const { return SymbolicPart == 0; }
+ bool isInvalid() const { return SymbolicPart == nullptr; }
bool isOrExpr() const { return isOr; }
Value *getValue() const { return OrigVal; }
Value *getSymbolicPart() const { return SymbolicPart; }
unsigned getSymbolicRank() const { return SymbolicRank; }
const APInt &getConstPart() const { return ConstPart; }
- void Invalidate() { SymbolicPart = OrigVal = 0; }
+ void Invalidate() { SymbolicPart = OrigVal = nullptr; }
void setSymbolicRank(unsigned R) { SymbolicRank = R; }
// Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank.
@@ -236,7 +237,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
if (V->hasOneUse() && isa<Instruction>(V) &&
cast<Instruction>(V)->getOpcode() == Opcode)
return cast<BinaryOperator>(V);
- return 0;
+ return nullptr;
}
static bool isUnmovableInstruction(Instruction *I) {
@@ -284,7 +285,7 @@ void Reassociate::BuildRankMap(Function &F) {
unsigned Reassociate::getRank(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0) {
+ if (!I) {
if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
return 0; // Otherwise it's a global or constant, rank 0.
}
@@ -705,7 +706,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
// ExpressionChanged - Non-null if the rewritten expression differs from the
// original in some non-trivial way, requiring the clearing of optional flags.
// Flags are cleared from the operator in ExpressionChanged up to I inclusive.
- BinaryOperator *ExpressionChanged = 0;
+ BinaryOperator *ExpressionChanged = nullptr;
for (unsigned i = 0; ; ++i) {
// The last operation (which comes earliest in the IR) is special as both
// operands will come from Ops, rather than just one with the other being
@@ -995,7 +996,7 @@ static Value *EmitAddTreeOfValues(Instruction *I,
/// remove Factor from the tree and return the new tree.
Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
- if (!BO) return 0;
+ if (!BO) return nullptr;
SmallVector<RepeatedValue, 8> Tree;
MadeChange |= LinearizeExprTree(BO, Tree);
@@ -1029,7 +1030,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
if (!FoundFactor) {
// Make sure to restore the operands to the expression tree.
RewriteExprTree(BO, Factors);
- return 0;
+ return nullptr;
}
BasicBlock::iterator InsertPt = BO; ++InsertPt;
@@ -1114,7 +1115,7 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
++NumAnnihil;
}
}
- return 0;
+ return nullptr;
}
/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and
@@ -1135,7 +1136,7 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
}
return Opnd;
}
- return 0;
+ return nullptr;
}
// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
@@ -1261,7 +1262,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
return V;
if (Ops.size() == 1)
- return 0;
+ return nullptr;
SmallVector<XorOpnd, 8> Opnds;
SmallVector<XorOpnd*, 8> OpndPtrs;
@@ -1294,7 +1295,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
// Step 3: Combine adjacent operands
- XorOpnd *PrevOpnd = 0;
+ XorOpnd *PrevOpnd = nullptr;
bool Changed = false;
for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
XorOpnd *CurrOpnd = OpndPtrs[i];
@@ -1328,7 +1329,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
PrevOpnd = CurrOpnd;
} else {
CurrOpnd->Invalidate();
- PrevOpnd = 0;
+ PrevOpnd = nullptr;
}
Changed = true;
}
@@ -1358,7 +1359,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
}
}
- return 0;
+ return nullptr;
}
/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This
@@ -1445,7 +1446,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
// where they are actually the same multiply.
unsigned MaxOcc = 0;
- Value *MaxOccVal = 0;
+ Value *MaxOccVal = nullptr;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul);
if (!BOp)
@@ -1543,7 +1544,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
}
- return 0;
+ return nullptr;
}
/// \brief Build up a vector of value/power pairs factoring a product.
@@ -1688,14 +1689,14 @@ Value *Reassociate::OptimizeMul(BinaryOperator *I,
// We can only optimize the multiplies when there is a chain of more than
// three, such that a balanced tree might require fewer total multiplies.
if (Ops.size() < 4)
- return 0;
+ return nullptr;
// Try to turn linear trees of multiplies without other uses of the
// intermediate stages into minimal multiply DAGs with perfect sub-expression
// re-use.
SmallVector<Factor, 4> Factors;
if (!collectMultiplyFactors(Ops, Factors))
- return 0; // All distinct factors, so nothing left for us to do.
+ return nullptr; // All distinct factors, so nothing left for us to do.
IRBuilder<> Builder(I);
Value *V = buildMinimalMultiplyDAG(Builder, Factors);
@@ -1704,14 +1705,14 @@ Value *Reassociate::OptimizeMul(BinaryOperator *I,
ValueEntry NewEntry = ValueEntry(getRank(V), V);
Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry);
- return 0;
+ return nullptr;
}
Value *Reassociate::OptimizeExpression(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
// Now that we have the linearized expression tree, try to optimize it.
// Start by folding any constants that we found.
- Constant *Cst = 0;
+ Constant *Cst = nullptr;
unsigned Opcode = I->getOpcode();
while (!Ops.empty() && isa<Constant>(Ops.back().Op)) {
Constant *C = cast<Constant>(Ops.pop_back_val().Op);
@@ -1761,7 +1762,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
if (Ops.size() != NumOps)
return OptimizeExpression(I, Ops);
- return 0;
+ return nullptr;
}
/// EraseInst - Zap the given instruction, adding interesting operands to the
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index d9809ce..b6023e2 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "reg2mem"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/BasicBlock.h"
@@ -30,6 +29,8 @@
#include <list>
using namespace llvm;
+#define DEBUG_TYPE "reg2mem"
+
STATISTIC(NumRegsDemoted, "Number of registers demoted");
STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index b8f10e9..feeb231 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -17,7 +17,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sccp"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -42,6 +41,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "sccp"
+
STATISTIC(NumInstRemoved, "Number of instructions removed");
STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable");
@@ -81,7 +82,7 @@ class LatticeVal {
}
public:
- LatticeVal() : Val(0, undefined) {}
+ LatticeVal() : Val(nullptr, undefined) {}
bool isUndefined() const { return getLatticeValue() == undefined; }
bool isConstant() const {
@@ -133,7 +134,7 @@ public:
ConstantInt *getConstantInt() const {
if (isConstant())
return dyn_cast<ConstantInt>(getConstant());
- return 0;
+ return nullptr;
}
void markForcedConstant(Constant *V) {
@@ -403,7 +404,7 @@ private:
if (Constant *C = dyn_cast<Constant>(V)) {
Constant *Elt = C->getAggregateElement(i);
- if (Elt == 0)
+ if (!Elt)
LV.markOverdefined(); // Unknown sort of constant.
else if (isa<UndefValue>(Elt))
; // Undef values remain undefined.
@@ -522,7 +523,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
LatticeVal BCValue = getValueState(BI->getCondition());
ConstantInt *CI = BCValue.getConstantInt();
- if (CI == 0) {
+ if (!CI) {
// Overdefined condition variables, and branches on unfoldable constant
// conditions, mean the branch could go either way.
if (!BCValue.isUndefined())
@@ -549,7 +550,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
- if (CI == 0) { // Overdefined or undefined condition?
+ if (!CI) { // Overdefined or undefined condition?
// All destinations are executable!
if (!SCValue.isUndefined())
Succs.assign(TI.getNumSuccessors(), true);
@@ -594,7 +595,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
// Overdefined condition variables mean the branch could go either way,
// undef conditions mean that neither edge is feasible yet.
ConstantInt *CI = BCValue.getConstantInt();
- if (CI == 0)
+ if (!CI)
return !BCValue.isUndefined();
// Constant condition variables mean the branch can only go a single way.
@@ -612,7 +613,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
- if (CI == 0)
+ if (!CI)
return !SCValue.isUndefined();
return SI->findCaseValue(CI).getCaseSuccessor() == To;
@@ -626,7 +627,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
#ifndef NDEBUG
dbgs() << "Unknown terminator instruction: " << *TI << '\n';
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
// visit Implementations - Something changed in this instruction, either an
@@ -667,7 +668,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
// constant. If they are constant and don't agree, the PHI is overdefined.
// If there are no executable operands, the PHI remains undefined.
//
- Constant *OperandVal = 0;
+ Constant *OperandVal = nullptr;
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
LatticeVal IV = getValueState(PN.getIncomingValue(i));
if (IV.isUndefined()) continue; // Doesn't influence PHI node.
@@ -678,7 +679,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
if (IV.isOverdefined()) // PHI node becomes overdefined!
return markOverdefined(&PN);
- if (OperandVal == 0) { // Grab the first value.
+ if (!OperandVal) { // Grab the first value.
OperandVal = IV.getConstant();
continue;
}
@@ -774,7 +775,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
StructType *STy = dyn_cast<StructType>(IVI.getType());
- if (STy == 0)
+ if (!STy)
return markOverdefined(&IVI);
// If this has more than one index, we can't handle it, drive all results to
@@ -862,7 +863,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
// If this is an AND or OR with 0 or -1, it doesn't matter that the other
// operand is overdefined.
if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) {
- LatticeVal *NonOverdefVal = 0;
+ LatticeVal *NonOverdefVal = nullptr;
if (!V1State.isOverdefined())
NonOverdefVal = &V1State;
else if (!V2State.isOverdefined())
@@ -1081,7 +1082,7 @@ void SCCPSolver::visitCallSite(CallSite CS) {
// The common case is that we aren't tracking the callee, either because we
// are not doing interprocedural analysis or the callee is indirect, or is
// external. Handle these cases first.
- if (F == 0 || F->isDeclaration()) {
+ if (!F || F->isDeclaration()) {
CallOverdefined:
// Void return and not tracking callee, just bail.
if (I->getType()->isVoidTy()) return;
@@ -1555,7 +1556,7 @@ bool SCCP::runOnFunction(Function &F) {
DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
const DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SCCPSolver Solver(DL, TLI);
@@ -1684,7 +1685,7 @@ static bool AddressIsTaken(const GlobalValue *GV) {
bool IPSCCP::runOnModule(Module &M) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SCCPSolver Solver(DL, TLI);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index ed5e618..04bf4f8 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -23,7 +23,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sroa"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -64,6 +63,8 @@
using namespace llvm;
+#define DEBUG_TYPE "sroa"
+
STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
@@ -159,8 +160,8 @@ public:
Use *getUse() const { return UseAndIsSplittable.getPointer(); }
- bool isDead() const { return getUse() == 0; }
- void kill() { UseAndIsSplittable.setPointer(0); }
+ bool isDead() const { return getUse() == nullptr; }
+ void kill() { UseAndIsSplittable.setPointer(nullptr); }
/// \brief Support for ordering ranges.
///
@@ -320,7 +321,7 @@ static Value *foldSelectInst(SelectInst &SI) {
if (SI.getOperand(1) == SI.getOperand(2))
return SI.getOperand(1);
- return 0;
+ return nullptr;
}
/// \brief Builder for the alloca slices.
@@ -642,7 +643,7 @@ private:
Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
} while (!Uses.empty());
- return 0;
+ return nullptr;
}
void visitPHINode(PHINode &PN) {
@@ -724,7 +725,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
AI(AI),
#endif
- PointerEscapingInstr(0) {
+ PointerEscapingInstr(nullptr) {
SliceBuilder PB(DL, AI, *this);
SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
if (PtrI.isEscaped() || PtrI.isAborted()) {
@@ -873,7 +874,7 @@ public:
for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
- Value *Arg = 0;
+ Value *Arg = nullptr;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
@@ -969,7 +970,7 @@ class SROA : public FunctionPass {
public:
SROA(bool RequiresDomTree = true)
: FunctionPass(ID), RequiresDomTree(RequiresDomTree),
- C(0), DL(0), DT(0) {
+ C(nullptr), DL(nullptr), DT(nullptr) {
initializeSROAPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -1011,9 +1012,9 @@ INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",
static Type *findCommonType(AllocaSlices::const_iterator B,
AllocaSlices::const_iterator E,
uint64_t EndOffset) {
- Type *Ty = 0;
+ Type *Ty = nullptr;
bool TyIsCommon = true;
- IntegerType *ITy = 0;
+ IntegerType *ITy = nullptr;
// Note that we need to look at *every* alloca slice's Use to ensure we
// always get consistent results regardless of the order of slices.
@@ -1024,7 +1025,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
continue;
- Type *UserTy = 0;
+ Type *UserTy = nullptr;
if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
UserTy = LI->getType();
} else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
@@ -1074,7 +1075,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
/// FIXME: This should be hoisted into a generic utility, likely in
/// Transforms/Util/Local.h
static bool isSafePHIToSpeculate(PHINode &PN,
- const DataLayout *DL = 0) {
+ const DataLayout *DL = nullptr) {
// For now, we can only do this promotion if the load is in the same block
// as the PHI, and if there are no stores between the phi and load.
// TODO: Allow recursive phi users.
@@ -1084,7 +1085,7 @@ static bool isSafePHIToSpeculate(PHINode &PN,
bool HaveLoad = false;
for (User *U : PN.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
- if (LI == 0 || !LI->isSimple())
+ if (!LI || !LI->isSimple())
return false;
// For now we only allow loads in the same block as the PHI. This is
@@ -1191,7 +1192,8 @@ static void speculatePHINodeLoads(PHINode &PN) {
///
/// We can do this to a select if its only uses are loads and if the operand
/// to the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = 0) {
+static bool isSafeSelectToSpeculate(SelectInst &SI,
+ const DataLayout *DL = nullptr) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
bool TDerefable = TValue->isDereferenceablePointer();
@@ -1199,7 +1201,7 @@ static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = 0) {
for (User *U : SI.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
- if (LI == 0 || !LI->isSimple())
+ if (!LI || !LI->isSimple())
return false;
// Both operands to the select need to be dereferencable, either
@@ -1332,19 +1334,21 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
// We can't recurse through pointer types.
if (Ty->isPointerTy())
- return 0;
+ return nullptr;
// We try to analyze GEPs over vectors here, but note that these GEPs are
// extremely poorly defined currently. The long-term goal is to remove GEPing
// over a vector from the IR completely.
if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType());
- if (ElementSizeInBits % 8)
- return 0; // GEPs over non-multiple of 8 size vector elements are invalid.
+ if (ElementSizeInBits % 8 != 0) {
+ // GEPs over non-multiple of 8 size vector elements are invalid.
+ return nullptr;
+ }
APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
APInt NumSkippedElements = Offset.sdiv(ElementSize);
if (NumSkippedElements.ugt(VecTy->getNumElements()))
- return 0;
+ return nullptr;
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
@@ -1356,7 +1360,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
APInt NumSkippedElements = Offset.sdiv(ElementSize);
if (NumSkippedElements.ugt(ArrTy->getNumElements()))
- return 0;
+ return nullptr;
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
@@ -1366,17 +1370,17 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
StructType *STy = dyn_cast<StructType>(Ty);
if (!STy)
- return 0;
+ return nullptr;
const StructLayout *SL = DL.getStructLayout(STy);
uint64_t StructOffset = Offset.getZExtValue();
if (StructOffset >= SL->getSizeInBytes())
- return 0;
+ return nullptr;
unsigned Index = SL->getElementContainingOffset(StructOffset);
Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
Type *ElementTy = STy->getElementType(Index);
if (Offset.uge(DL.getTypeAllocSize(ElementTy)))
- return 0; // The offset points into alignment padding.
+ return nullptr; // The offset points into alignment padding.
Indices.push_back(IRB.getInt32(Index));
return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
@@ -1402,14 +1406,14 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
// Don't consider any GEPs through an i8* as natural unless the TargetTy is
// an i8.
if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
- return 0;
+ return nullptr;
Type *ElementTy = Ty->getElementType();
if (!ElementTy->isSized())
- return 0; // We can't GEP through an unsized element.
+ return nullptr; // We can't GEP through an unsized element.
APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
if (ElementSize == 0)
- return 0; // Zero-length arrays can't help us build a natural GEP.
+ return nullptr; // Zero-length arrays can't help us build a natural GEP.
APInt NumSkippedElements = Offset.sdiv(ElementSize);
Offset -= NumSkippedElements * ElementSize;
@@ -1445,11 +1449,11 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
// We may end up computing an offset pointer that has the wrong type. If we
// never are able to compute one directly that has the correct type, we'll
// fall back to it, so keep it around here.
- Value *OffsetPtr = 0;
+ Value *OffsetPtr = nullptr;
// Remember any i8 pointer we come across to re-use if we need to do a raw
// byte offset.
- Value *Int8Ptr = 0;
+ Value *Int8Ptr = nullptr;
APInt Int8PtrOffset(Offset.getBitWidth(), 0);
Type *TargetTy = PointerTy->getPointerElementType();
@@ -2043,14 +2047,14 @@ public:
NewAllocaBeginOffset(NewAllocaBeginOffset),
NewAllocaEndOffset(NewAllocaEndOffset),
NewAllocaTy(NewAI.getAllocatedType()),
- VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : 0),
- ElementTy(VecTy ? VecTy->getElementType() : 0),
+ VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : nullptr),
+ ElementTy(VecTy ? VecTy->getElementType() : nullptr),
ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
IntTy(IsIntegerPromotable
? Type::getIntNTy(
NewAI.getContext(),
DL.getTypeSizeInBits(NewAI.getAllocatedType()))
- : 0),
+ : nullptr),
BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
IRB(NewAI.getContext(), ConstantFolder()) {
@@ -2144,7 +2148,7 @@ private:
///
/// You can optionally pass a type to this routine and if that type's ABI
/// alignment is itself suitable, this will return zero.
- unsigned getSliceAlign(Type *Ty = 0) {
+ unsigned getSliceAlign(Type *Ty = nullptr) {
unsigned NewAIAlign = NewAI.getAlignment();
if (!NewAIAlign)
NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType());
@@ -2594,7 +2598,7 @@ private:
unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
unsigned NumElements = EndIndex - BeginIndex;
IntegerType *SubIntTy
- = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
+ = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : nullptr;
// Reset the other pointer type to match the register type we're going to
// use, but using the address space of the original other pointer.
@@ -2992,22 +2996,22 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
return stripAggregateTypeWrapping(DL, Ty);
if (Offset > DL.getTypeAllocSize(Ty) ||
(DL.getTypeAllocSize(Ty) - Offset) < Size)
- return 0;
+ return nullptr;
if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
// We can't partition pointers...
if (SeqTy->isPointerTy())
- return 0;
+ return nullptr;
Type *ElementTy = SeqTy->getElementType();
uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
uint64_t NumSkippedElements = Offset / ElementSize;
if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
if (NumSkippedElements >= ArrTy->getNumElements())
- return 0;
+ return nullptr;
} else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) {
if (NumSkippedElements >= VecTy->getNumElements())
- return 0;
+ return nullptr;
}
Offset -= NumSkippedElements * ElementSize;
@@ -3015,7 +3019,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
if (Offset > 0 || Size < ElementSize) {
// Bail if the partition ends in a different array element.
if ((Offset + Size) > ElementSize)
- return 0;
+ return nullptr;
// Recurse through the element type trying to peel off offset bytes.
return getTypePartition(DL, ElementTy, Offset, Size);
}
@@ -3026,20 +3030,20 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
assert(Size > ElementSize);
uint64_t NumElements = Size / ElementSize;
if (NumElements * ElementSize != Size)
- return 0;
+ return nullptr;
return ArrayType::get(ElementTy, NumElements);
}
StructType *STy = dyn_cast<StructType>(Ty);
if (!STy)
- return 0;
+ return nullptr;
const StructLayout *SL = DL.getStructLayout(STy);
if (Offset >= SL->getSizeInBytes())
- return 0;
+ return nullptr;
uint64_t EndOffset = Offset + Size;
if (EndOffset > SL->getSizeInBytes())
- return 0;
+ return nullptr;
unsigned Index = SL->getElementContainingOffset(Offset);
Offset -= SL->getElementOffset(Index);
@@ -3047,12 +3051,12 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
Type *ElementTy = STy->getElementType(Index);
uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
if (Offset >= ElementSize)
- return 0; // The offset points into alignment padding.
+ return nullptr; // The offset points into alignment padding.
// See if any partition must be contained by the element.
if (Offset > 0 || Size < ElementSize) {
if ((Offset + Size) > ElementSize)
- return 0;
+ return nullptr;
return getTypePartition(DL, ElementTy, Offset, Size);
}
assert(Offset == 0);
@@ -3065,14 +3069,14 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
if (EndOffset < SL->getSizeInBytes()) {
unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
if (Index == EndIndex)
- return 0; // Within a single element and its padding.
+ return nullptr; // Within a single element and its padding.
// Don't try to form "natural" types if the elements don't line up with the
// expected size.
// FIXME: We could potentially recurse down through the last element in the
// sub-struct to find a natural end point.
if (SL->getElementOffset(EndIndex) != EndOffset)
- return 0;
+ return nullptr;
assert(Index < EndIndex);
EE = STy->element_begin() + EndIndex;
@@ -3083,7 +3087,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
STy->isPacked());
const StructLayout *SubSL = DL.getStructLayout(SubTy);
if (Size != SubSL->getSizeInBytes())
- return 0; // The sub-struct doesn't have quite the size needed.
+ return nullptr; // The sub-struct doesn't have quite the size needed.
return SubTy;
}
@@ -3108,7 +3112,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
- Type *SliceTy = 0;
+ Type *SliceTy = nullptr;
if (Type *CommonUseTy = findCommonType(B, E, EndOffset))
if (DL->getTypeAllocSize(CommonUseTy) >= SliceSize)
SliceTy = CommonUseTy;
@@ -3155,7 +3159,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
// the alloca's alignment unconstrained.
if (Alignment <= DL->getABITypeAlignment(SliceTy))
Alignment = 0;
- NewAI = new AllocaInst(SliceTy, 0, Alignment,
+ NewAI = new AllocaInst(SliceTy, nullptr, Alignment,
AI.getName() + ".sroa." + Twine(B - S.begin()), &AI);
++NumNewAllocas;
}
@@ -3494,7 +3498,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
for (Use &Operand : I->operands())
if (Instruction *U = dyn_cast<Instruction>(Operand)) {
// Zero out the operand and see if it becomes trivially dead.
- Operand = 0;
+ Operand = nullptr;
if (isInstructionTriviallyDead(U))
DeadInsts.insert(U);
}
@@ -3612,7 +3616,7 @@ bool SROA::runOnFunction(Function &F) {
DL = &DLP->getDataLayout();
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : 0;
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
BasicBlock &EntryBB = F.getEntryBlock();
for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
index 20d6daa..8e557aa 100644
--- a/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -22,8 +22,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sample-profile"
-
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -54,6 +52,8 @@
using namespace llvm;
+#define DEBUG_TYPE "sample-profile"
+
// Command line option to specify the file to read samples from. This is
// mainly used for debugging.
static cl::opt<std::string> SampleProfileFile(
@@ -120,8 +120,8 @@ typedef DenseMap<BasicBlock *, SmallVector<BasicBlock *, 8>> BlockEdgeMap;
class SampleFunctionProfile {
public:
SampleFunctionProfile()
- : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(0), PDT(0),
- LI(0), Ctx(0) {}
+ : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(nullptr),
+ PDT(nullptr), LI(nullptr), Ctx(nullptr) {}
unsigned getFunctionLoc(Function &F);
bool emitAnnotations(Function &F, DominatorTree *DomTree,
@@ -315,7 +315,7 @@ protected:
/// \brief Name of the profile file to load.
StringRef Filename;
- /// \brief Flag indicating whether the profile input loaded succesfully.
+ /// \brief Flag indicating whether the profile input loaded successfully.
bool ProfileIsValid;
};
}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index e950eba..f8f828c 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -64,6 +64,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeStructurizeCFGPass(Registry);
initializeSinkingPass(Registry);
initializeTailCallElimPass(Registry);
+ initializeSeparateConstOffsetFromGEPPass(Registry);
}
void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
@@ -181,6 +182,7 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createVerifierPass());
+ // FIXME: should this also add createDebugInfoVerifierPass()?
}
void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index e7b5ab2..58192fc 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -19,7 +19,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "scalarrepl"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
@@ -52,6 +51,8 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
+#define DEBUG_TYPE "scalarrepl"
+
STATISTIC(NumReplaced, "Number of allocas broken up");
STATISTIC(NumPromoted, "Number of allocas promoted");
STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
@@ -304,7 +305,7 @@ public:
explicit ConvertToScalarInfo(unsigned Size, const DataLayout &DL,
unsigned SLT)
: AllocaSize(Size), DL(DL), ScalarLoadThreshold(SLT), IsNotTrivial(false),
- ScalarKind(Unknown), VectorTy(0), HadNonMemTransferAccess(false),
+ ScalarKind(Unknown), VectorTy(nullptr), HadNonMemTransferAccess(false),
HadDynamicAccess(false) { }
AllocaInst *TryConvert(AllocaInst *AI);
@@ -332,8 +333,8 @@ private:
AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// If we can't convert this scalar, or if mem2reg can trivially do it, bail
// out.
- if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial)
- return 0;
+ if (!CanConvertToScalar(AI, 0, nullptr) || !IsNotTrivial)
+ return nullptr;
// If an alloca has only memset / memcpy uses, it may still have an Unknown
// ScalarKind. Treat it as an Integer below.
@@ -361,23 +362,24 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// Do not convert to scalar integer if the alloca size exceeds the
// scalar load threshold.
if (BitWidth > ScalarLoadThreshold)
- return 0;
+ return nullptr;
if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
!HadNonMemTransferAccess && !DL.fitsInLegalInteger(BitWidth))
- return 0;
+ return nullptr;
// Dynamic accesses on integers aren't yet supported. They need us to shift
// by a dynamic amount which could be difficult to work out as we might not
// know whether to use a left or right shift.
if (ScalarKind == Integer && HadDynamicAccess)
- return 0;
+ return nullptr;
DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
// Create and insert the integer alloca.
NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
- AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
- ConvertUsesToScalar(AI, NewAI, 0, 0);
+ AllocaInst *NewAI = new AllocaInst(NewTy, nullptr, "",
+ AI->getParent()->begin());
+ ConvertUsesToScalar(AI, NewAI, 0, nullptr);
return NewAI;
}
@@ -508,7 +510,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- Value *GEPNonConstantIdx = 0;
+ Value *GEPNonConstantIdx = nullptr;
if (!GEP->hasAllConstantIndices()) {
if (!isa<VectorType>(PtrTy->getElementType()))
return false;
@@ -564,7 +566,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
if (NonConstantIdx)
return false;
ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
- if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
+ if (!Len || Len->getZExtValue() != AllocaSize || Offset != 0)
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
@@ -608,7 +610,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- Value* GEPNonConstantIdx = 0;
+ Value* GEPNonConstantIdx = nullptr;
if (!GEP->hasAllConstantIndices()) {
assert(!NonConstantIdx &&
"Dynamic GEP reading from dynamic GEP unsupported");
@@ -671,7 +673,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(
ConstantInt::get(User->getContext(), APVal),
- Old, Offset, 0, Builder);
+ Old, Offset, nullptr, Builder);
Builder.CreateStore(New, NewAI);
// If the load we just inserted is now dead, then the memset overwrote
@@ -809,7 +811,7 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
Offset+Layout.getElementOffsetInBits(i),
- 0, Builder);
+ nullptr, Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
@@ -822,7 +824,8 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
Value *Res = UndefValue::get(AT);
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
- Offset+i*EltSize, 0, Builder);
+ Offset+i*EltSize, nullptr,
+ Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
@@ -938,7 +941,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old,
Offset+Layout.getElementOffsetInBits(i),
- 0, Builder);
+ nullptr, Builder);
}
return Old;
}
@@ -949,7 +952,8 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i);
- Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder);
+ Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, nullptr,
+ Builder);
}
return Old;
}
@@ -1024,7 +1028,7 @@ bool SROA::runOnFunction(Function &F) {
return false;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
bool Changed = performPromotion(F);
@@ -1054,7 +1058,7 @@ class AllocaPromoter : public LoadAndStorePromoter {
public:
AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
DIBuilder *DB)
- : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {}
+ : LoadAndStorePromoter(Insts, S), AI(nullptr), DIB(DB) {}
void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
// Remember which alloca we're promoting (for isInstInList).
@@ -1100,7 +1104,7 @@ public:
for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
- Value *Arg = NULL;
+ Value *Arg = nullptr;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
@@ -1143,7 +1147,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
for (User *U : SI->users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
- if (LI == 0 || !LI->isSimple()) return false;
+ if (!LI || !LI->isSimple()) return false;
// Both operands to the select need to be dereferencable, either absolutely
// (e.g. allocas) or at this point because we can see other accesses to it.
@@ -1183,7 +1187,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
unsigned MaxAlign = 0;
for (User *U : PN->users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
- if (LI == 0 || !LI->isSimple()) return false;
+ if (!LI || !LI->isSimple()) return false;
// For now we only allow loads in the same block as the PHI. This is a
// common case that happens when instcombine merges two loads through a PHI.
@@ -1380,7 +1384,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = PN->getIncomingBlock(i);
LoadInst *&Load = InsertedLoads[Pred];
- if (Load == 0) {
+ if (!Load) {
Load = new LoadInst(PN->getIncomingValue(i),
PN->getName() + "." + Pred->getName(),
Pred->getTerminator());
@@ -1400,7 +1404,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
bool SROA::performPromotion(Function &F) {
std::vector<AllocaInst*> Allocas;
- DominatorTree *DT = 0;
+ DominatorTree *DT = nullptr;
if (HasDomTree)
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -1537,7 +1541,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
ElementAllocas.reserve(ST->getNumContainedTypes());
for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
- AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
+ AllocaInst *NA = new AllocaInst(ST->getContainedType(i), nullptr,
AI->getAlignment(),
AI->getName() + "." + Twine(i), AI);
ElementAllocas.push_back(NA);
@@ -1548,7 +1552,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
ElementAllocas.reserve(AT->getNumElements());
Type *ElTy = AT->getElementType();
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
- AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
+ AllocaInst *NA = new AllocaInst(ElTy, nullptr, AI->getAlignment(),
AI->getName() + "." + Twine(i), AI);
ElementAllocas.push_back(NA);
WorkList.push_back(NA); // Add to worklist for recursive processing
@@ -1577,7 +1581,7 @@ void SROA::DeleteDeadInstructions() {
// Zero out the operand and see if it becomes trivially dead.
// (But, don't add allocas to the dead instruction list -- they are
// already on the worklist and will be deleted separately.)
- *OI = 0;
+ *OI = nullptr;
if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
DeadInsts.push_back(U);
}
@@ -1604,12 +1608,10 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
isSafeForScalarRepl(GEPI, GEPOffset, Info);
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
- if (Length == 0)
- return MarkUnsafe(Info, User);
- if (Length->isNegative())
+ if (!Length || Length->isNegative())
return MarkUnsafe(Info, User);
- isSafeMemAccess(Offset, Length->getZExtValue(), 0,
+ isSafeMemAccess(Offset, Length->getZExtValue(), nullptr,
U.getOperandNo() == 0, Info, MI,
true /*AllowWholeAccess*/);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
@@ -1744,12 +1746,12 @@ static bool isHomogeneousAggregate(Type *T, unsigned &NumElts,
Type *&EltTy) {
if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
NumElts = AT->getNumElements();
- EltTy = (NumElts == 0 ? 0 : AT->getElementType());
+ EltTy = (NumElts == 0 ? nullptr : AT->getElementType());
return true;
}
if (StructType *ST = dyn_cast<StructType>(T)) {
NumElts = ST->getNumContainedTypes();
- EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
+ EltTy = (NumElts == 0 ? nullptr : ST->getContainedType(0));
for (unsigned n = 1; n < NumElts; ++n) {
if (ST->getContainedType(n) != EltTy)
return false;
@@ -2038,7 +2040,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
// In this case, it must be the last GEP operand which is dynamic so keep that
// aside until we've found the constant GEP offset then add it back in at the
// end.
- Value* NonConstantIdx = 0;
+ Value* NonConstantIdx = nullptr;
if (!GEPI->hasAllConstantIndices())
NonConstantIdx = Indices.pop_back_val();
Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
@@ -2108,7 +2110,8 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
if (NewOffset) {
// Splice the first element and index 'NewOffset' bytes in. SROA will
// split the alloca again later.
- Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy());
+ unsigned AS = AI->getType()->getAddressSpace();
+ Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS));
V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
IdxTy = NewElts[Idx]->getAllocatedType();
@@ -2155,7 +2158,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// appropriate type. The "Other" pointer is the pointer that goes to memory
// that doesn't have anything to do with the alloca that we are promoting. For
// memset, this Value* stays null.
- Value *OtherPtr = 0;
+ Value *OtherPtr = nullptr;
unsigned MemAlignment = MI->getAlignment();
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
if (Inst == MTI->getRawDest())
@@ -2207,7 +2210,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// If this is a memcpy/memmove, emit a GEP of the other element address.
- Value *OtherElt = 0;
+ Value *OtherElt = nullptr;
unsigned OtherEltAlign = MemAlignment;
if (OtherPtr) {
@@ -2449,7 +2452,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
- const StructLayout *Layout = 0;
+ const StructLayout *Layout = nullptr;
uint64_t ArrayEltBitOffset = 0;
if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
Layout = DL->getStructLayout(EltSTy);
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 006375c..7a73f11 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "scalarizer"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
@@ -25,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "scalarizer"
+
namespace {
// Used to store the scattered form of a vector.
typedef SmallVector<Value *, 8> ValueVector;
@@ -48,7 +49,7 @@ public:
// insert them before BBI in BB. If Cache is nonnull, use it to cache
// the results.
Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
- ValueVector *cachePtr = 0);
+ ValueVector *cachePtr = nullptr);
// Return component I, creating a new Value for it if necessary.
Value *operator[](unsigned I);
@@ -101,7 +102,7 @@ struct BinarySplitter {
// Information about a load or store that we're scalarizing.
struct VectorLayout {
- VectorLayout() : VecTy(0), ElemTy(0), VecAlign(0), ElemSize(0) {}
+ VectorLayout() : VecTy(nullptr), ElemTy(nullptr), VecAlign(0), ElemSize(0) {}
// Return the alignment of element I.
uint64_t getElemAlign(unsigned I) {
@@ -186,9 +187,9 @@ Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
Ty = PtrTy->getElementType();
Size = Ty->getVectorNumElements();
if (!CachePtr)
- Tmp.resize(Size, 0);
+ Tmp.resize(Size, nullptr);
else if (CachePtr->empty())
- CachePtr->resize(Size, 0);
+ CachePtr->resize(Size, nullptr);
else
assert(Size == CachePtr->size() && "Inconsistent vector sizes");
}
@@ -241,7 +242,7 @@ bool Scalarizer::doInitialization(Module &M) {
bool Scalarizer::runOnFunction(Function &F) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
BasicBlock *BB = BBI;
for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
new file mode 100644
index 0000000..b8529e1
--- /dev/null
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -0,0 +1,623 @@
+//===-- SeparateConstOffsetFromGEP.cpp - ------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loop unrolling may create many similar GEPs for array accesses.
+// e.g., a 2-level loop
+//
+// float a[32][32]; // global variable
+//
+// for (int i = 0; i < 2; ++i) {
+// for (int j = 0; j < 2; ++j) {
+// ...
+// ... = a[x + i][y + j];
+// ...
+// }
+// }
+//
+// will probably be unrolled to:
+//
+// gep %a, 0, %x, %y; load
+// gep %a, 0, %x, %y + 1; load
+// gep %a, 0, %x + 1, %y; load
+// gep %a, 0, %x + 1, %y + 1; load
+//
+// LLVM's GVN does not use partial redundancy elimination yet, and is thus
+// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
+// significant slowdown in targets with limited addressing modes. For instance,
+// because the PTX target does not support the reg+reg addressing mode, the
+// NVPTX backend emits PTX code that literally computes the pointer address of
+// each GEP, wasting tons of registers. It emits the following PTX for the
+// first load and similar PTX for other loads.
+//
+// mov.u32 %r1, %x;
+// mov.u32 %r2, %y;
+// mul.wide.u32 %rl2, %r1, 128;
+// mov.u64 %rl3, a;
+// add.s64 %rl4, %rl3, %rl2;
+// mul.wide.u32 %rl5, %r2, 4;
+// add.s64 %rl6, %rl4, %rl5;
+// ld.global.f32 %f1, [%rl6];
+//
+// To reduce the register pressure, the optimization implemented in this file
+// merges the common part of a group of GEPs, so we can compute each pointer
+// address by adding a simple offset to the common part, saving many registers.
+//
+// It works by splitting each GEP into a variadic base and a constant offset.
+// The variadic base can be computed once and reused by multiple GEPs, and the
+// constant offsets can be nicely folded into the reg+immediate addressing mode
+// (supported by most targets) without using any extra register.
+//
+// For instance, we transform the four GEPs and four loads in the above example
+// into:
+//
+// base = gep a, 0, x, y
+// load base
+// laod base + 1 * sizeof(float)
+// load base + 32 * sizeof(float)
+// load base + 33 * sizeof(float)
+//
+// Given the transformed IR, a backend that supports the reg+immediate
+// addressing mode can easily fold the pointer arithmetics into the loads. For
+// example, the NVPTX backend can easily fold the pointer arithmetics into the
+// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
+//
+// mov.u32 %r1, %tid.x;
+// mov.u32 %r2, %tid.y;
+// mul.wide.u32 %rl2, %r1, 128;
+// mov.u64 %rl3, a;
+// add.s64 %rl4, %rl3, %rl2;
+// mul.wide.u32 %rl5, %r2, 4;
+// add.s64 %rl6, %rl4, %rl5;
+// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
+// ld.global.f32 %f2, [%rl6+4]; // much better
+// ld.global.f32 %f3, [%rl6+128]; // much better
+// ld.global.f32 %f4, [%rl6+132]; // much better
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
+ "disable-separate-const-offset-from-gep", cl::init(false),
+ cl::desc("Do not separate the constant offset from a GEP instruction"),
+ cl::Hidden);
+
+namespace {
+
+/// \brief A helper class for separating a constant offset from a GEP index.
+///
+/// In real programs, a GEP index may be more complicated than a simple addition
+/// of something and a constant integer which can be trivially splitted. For
+/// example, to split ((a << 3) | 5) + b, we need to search deeper for the
+/// constant offset, so that we can separate the index to (a << 3) + b and 5.
+///
+/// Therefore, this class looks into the expression that computes a given GEP
+/// index, and tries to find a constant integer that can be hoisted to the
+/// outermost level of the expression as an addition. Not every constant in an
+/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
+/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
+/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
+class ConstantOffsetExtractor {
+ public:
+ /// Extracts a constant offset from the given GEP index. It outputs the
+ /// numeric value of the extracted constant offset (0 if failed), and a
+ /// new index representing the remainder (equal to the original index minus
+ /// the constant offset).
+ /// \p Idx The given GEP index
+ /// \p NewIdx The new index to replace
+ /// \p DL The datalayout of the module
+ /// \p IP Calculating the new index requires new instructions. IP indicates
+ /// where to insert them (typically right before the GEP).
+ static int64_t Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL,
+ Instruction *IP);
+ /// Looks for a constant offset without extracting it. The meaning of the
+ /// arguments and the return value are the same as Extract.
+ static int64_t Find(Value *Idx, const DataLayout *DL);
+
+ private:
+ ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt)
+ : DL(Layout), IP(InsertionPt) {}
+ /// Searches the expression that computes V for a constant offset. If the
+ /// searching is successful, update UserChain as a path from V to the constant
+ /// offset.
+ int64_t find(Value *V);
+ /// A helper function to look into both operands of a binary operator U.
+ /// \p IsSub Whether U is a sub operator. If so, we need to negate the
+ /// constant offset at some point.
+ int64_t findInEitherOperand(User *U, bool IsSub);
+ /// After finding the constant offset and how it is reached from the GEP
+ /// index, we build a new index which is a clone of the old one except the
+ /// constant offset is removed. For example, given (a + (b + 5)) and knowning
+ /// the constant offset is 5, this function returns (a + b).
+ ///
+ /// We cannot simply change the constant to zero because the expression that
+ /// computes the index or its intermediate result may be used by others.
+ Value *rebuildWithoutConstantOffset();
+ // A helper function for rebuildWithoutConstantOffset that rebuilds the direct
+ // user (U) of the constant offset (C).
+ Value *rebuildLeafWithoutConstantOffset(User *U, Value *C);
+ /// Returns a clone of U except the first occurrence of From with To.
+ Value *cloneAndReplace(User *U, Value *From, Value *To);
+
+ /// Returns true if LHS and RHS have no bits in common, i.e., LHS | RHS == 0.
+ bool NoCommonBits(Value *LHS, Value *RHS) const;
+ /// Computes which bits are known to be one or zero.
+ /// \p KnownOne Mask of all bits that are known to be one.
+ /// \p KnownZero Mask of all bits that are known to be zero.
+ void ComputeKnownBits(Value *V, APInt &KnownOne, APInt &KnownZero) const;
+ /// Finds the first use of Used in U. Returns -1 if not found.
+ static unsigned FindFirstUse(User *U, Value *Used);
+ /// Returns whether OPC (sext or zext) can be distributed to the operands of
+ /// BO. e.g., sext can be distributed to the operands of an "add nsw" because
+ /// sext (add nsw a, b) == add nsw (sext a), (sext b).
+ static bool Distributable(unsigned OPC, BinaryOperator *BO);
+
+ /// The path from the constant offset to the old GEP index. e.g., if the GEP
+ /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
+ /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
+ /// UserChain[2] will be the entire expression "a * b + (c + 5)".
+ ///
+ /// This path helps rebuildWithoutConstantOffset rebuild the new GEP index.
+ SmallVector<User *, 8> UserChain;
+ /// The data layout of the module. Used in ComputeKnownBits.
+ const DataLayout *DL;
+ Instruction *IP; /// Insertion position of cloned instructions.
+};
+
+/// \brief A pass that tries to split every GEP in the function into a variadic
+/// base and a constant offset. It is a FunctionPass because searching for the
+/// constant offset may inspect other basic blocks.
+class SeparateConstOffsetFromGEP : public FunctionPass {
+ public:
+ static char ID;
+ SeparateConstOffsetFromGEP() : FunctionPass(ID) {
+ initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DataLayoutPass>();
+ AU.addRequired<TargetTransformInfo>();
+ }
+ bool runOnFunction(Function &F) override;
+
+ private:
+ /// Tries to split the given GEP into a variadic base and a constant offset,
+ /// and returns true if the splitting succeeds.
+ bool splitGEP(GetElementPtrInst *GEP);
+ /// Finds the constant offset within each index, and accumulates them. This
+ /// function only inspects the GEP without changing it. The output
+ /// NeedsExtraction indicates whether we can extract a non-zero constant
+ /// offset from any index.
+ int64_t accumulateByteOffset(GetElementPtrInst *GEP, const DataLayout *DL,
+ bool &NeedsExtraction);
+};
+} // anonymous namespace
+
+char SeparateConstOffsetFromGEP::ID = 0;
+INITIALIZE_PASS_BEGIN(
+ SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
+ "Split GEPs to a variadic base and a constant offset for better CSE", false,
+ false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(DataLayoutPass)
+INITIALIZE_PASS_END(
+ SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
+ "Split GEPs to a variadic base and a constant offset for better CSE", false,
+ false)
+
+FunctionPass *llvm::createSeparateConstOffsetFromGEPPass() {
+ return new SeparateConstOffsetFromGEP();
+}
+
+bool ConstantOffsetExtractor::Distributable(unsigned OPC, BinaryOperator *BO) {
+ assert(OPC == Instruction::SExt || OPC == Instruction::ZExt);
+
+ // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
+ // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
+ if (BO->getOpcode() == Instruction::Add ||
+ BO->getOpcode() == Instruction::Sub) {
+ return (OPC == Instruction::SExt && BO->hasNoSignedWrap()) ||
+ (OPC == Instruction::ZExt && BO->hasNoUnsignedWrap());
+ }
+
+ // sext/zext (and/or/xor A, B) == and/or/xor (sext/zext A), (sext/zext B)
+ // -instcombine also leverages this invariant to do the reverse
+ // transformation to reduce integer casts.
+ return BO->getOpcode() == Instruction::And ||
+ BO->getOpcode() == Instruction::Or ||
+ BO->getOpcode() == Instruction::Xor;
+}
+
+int64_t ConstantOffsetExtractor::findInEitherOperand(User *U, bool IsSub) {
+ assert(U->getNumOperands() == 2);
+ int64_t ConstantOffset = find(U->getOperand(0));
+ // If we found a constant offset in the left operand, stop and return that.
+ // This shortcut might cause us to miss opportunities of combining the
+ // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
+ // However, such cases are probably already handled by -instcombine,
+ // given this pass runs after the standard optimizations.
+ if (ConstantOffset != 0) return ConstantOffset;
+ ConstantOffset = find(U->getOperand(1));
+ // If U is a sub operator, negate the constant offset found in the right
+ // operand.
+ return IsSub ? -ConstantOffset : ConstantOffset;
+}
+
+int64_t ConstantOffsetExtractor::find(Value *V) {
+ // TODO(jingyue): We can even trace into integer/pointer casts, such as
+ // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
+ // integers because it gives good enough results for our benchmarks.
+ assert(V->getType()->isIntegerTy());
+
+ User *U = dyn_cast<User>(V);
+ // We cannot do much with Values that are not a User, such as BasicBlock and
+ // MDNode.
+ if (U == nullptr) return 0;
+
+ int64_t ConstantOffset = 0;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U)) {
+ // Hooray, we found it!
+ ConstantOffset = CI->getSExtValue();
+ } else if (Operator *O = dyn_cast<Operator>(U)) {
+ // The GEP index may be more complicated than a simple addition of a
+ // varaible and a constant. Therefore, we trace into subexpressions for more
+ // hoisting opportunities.
+ switch (O->getOpcode()) {
+ case Instruction::Add: {
+ ConstantOffset = findInEitherOperand(U, false);
+ break;
+ }
+ case Instruction::Sub: {
+ ConstantOffset = findInEitherOperand(U, true);
+ break;
+ }
+ case Instruction::Or: {
+ // If LHS and RHS don't have common bits, (LHS | RHS) is equivalent to
+ // (LHS + RHS).
+ if (NoCommonBits(U->getOperand(0), U->getOperand(1)))
+ ConstantOffset = findInEitherOperand(U, false);
+ break;
+ }
+ case Instruction::SExt:
+ case Instruction::ZExt: {
+ // We trace into sext/zext if the operator can be distributed to its
+ // operand. e.g., we can transform into "sext (add nsw a, 5)" and
+ // extract constant 5, because
+ // sext (add nsw a, 5) == add nsw (sext a), 5
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0))) {
+ if (Distributable(O->getOpcode(), BO))
+ ConstantOffset = find(U->getOperand(0));
+ }
+ break;
+ }
+ }
+ }
+ // If we found a non-zero constant offset, adds it to the path for future
+ // transformation (rebuildWithoutConstantOffset). Zero is a valid constant
+ // offset, but doesn't help this optimization.
+ if (ConstantOffset != 0)
+ UserChain.push_back(U);
+ return ConstantOffset;
+}
+
+unsigned ConstantOffsetExtractor::FindFirstUse(User *U, Value *Used) {
+ for (unsigned I = 0, E = U->getNumOperands(); I < E; ++I) {
+ if (U->getOperand(I) == Used)
+ return I;
+ }
+ return -1;
+}
+
+Value *ConstantOffsetExtractor::cloneAndReplace(User *U, Value *From,
+ Value *To) {
+ // Finds in U the first use of From. It is safe to ignore future occurrences
+ // of From, because findInEitherOperand similarly stops searching the right
+ // operand when the first operand has a non-zero constant offset.
+ unsigned OpNo = FindFirstUse(U, From);
+ assert(OpNo != (unsigned)-1 && "UserChain wasn't built correctly");
+
+ // ConstantOffsetExtractor::find only follows Operators (i.e., Instructions
+ // and ConstantExprs). Therefore, U is either an Instruction or a
+ // ConstantExpr.
+ if (Instruction *I = dyn_cast<Instruction>(U)) {
+ Instruction *Clone = I->clone();
+ Clone->setOperand(OpNo, To);
+ Clone->insertBefore(IP);
+ return Clone;
+ }
+ // cast<Constant>(To) is safe because a ConstantExpr only uses Constants.
+ return cast<ConstantExpr>(U)
+ ->getWithOperandReplaced(OpNo, cast<Constant>(To));
+}
+
+Value *ConstantOffsetExtractor::rebuildLeafWithoutConstantOffset(User *U,
+ Value *C) {
+ assert(U->getNumOperands() <= 2 &&
+ "We didn't trace into any operator with more than 2 operands");
+ // If U has only one operand which is the constant offset, removing the
+ // constant offset leaves U as a null value.
+ if (U->getNumOperands() == 1)
+ return Constant::getNullValue(U->getType());
+
+ // U->getNumOperands() == 2
+ unsigned OpNo = FindFirstUse(U, C); // U->getOperand(OpNo) == C
+ assert(OpNo < 2 && "UserChain wasn't built correctly");
+ Value *TheOther = U->getOperand(1 - OpNo); // The other operand of U
+ // If U = C - X, removing C makes U = -X; otherwise U will simply be X.
+ if (!isa<SubOperator>(U) || OpNo == 1)
+ return TheOther;
+ if (isa<ConstantExpr>(U))
+ return ConstantExpr::getNeg(cast<Constant>(TheOther));
+ return BinaryOperator::CreateNeg(TheOther, "", IP);
+}
+
+Value *ConstantOffsetExtractor::rebuildWithoutConstantOffset() {
+ assert(UserChain.size() > 0 && "you at least found a constant, right?");
+ // Start with the constant and go up through UserChain, each time building a
+ // clone of the subexpression but with the constant removed.
+ // e.g., to build a clone of (a + (b + (c + 5)) but with the 5 removed, we
+ // first c, then (b + c), and finally (a + (b + c)).
+ //
+ // Fast path: if the GEP index is a constant, simply returns 0.
+ if (UserChain.size() == 1)
+ return ConstantInt::get(UserChain[0]->getType(), 0);
+
+ Value *Remainder =
+ rebuildLeafWithoutConstantOffset(UserChain[1], UserChain[0]);
+ for (size_t I = 2; I < UserChain.size(); ++I)
+ Remainder = cloneAndReplace(UserChain[I], UserChain[I - 1], Remainder);
+ return Remainder;
+}
+
+int64_t ConstantOffsetExtractor::Extract(Value *Idx, Value *&NewIdx,
+ const DataLayout *DL,
+ Instruction *IP) {
+ ConstantOffsetExtractor Extractor(DL, IP);
+ // Find a non-zero constant offset first.
+ int64_t ConstantOffset = Extractor.find(Idx);
+ if (ConstantOffset == 0)
+ return 0;
+ // Then rebuild a new index with the constant removed.
+ NewIdx = Extractor.rebuildWithoutConstantOffset();
+ return ConstantOffset;
+}
+
+int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL) {
+ return ConstantOffsetExtractor(DL, nullptr).find(Idx);
+}
+
+void ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne,
+ APInt &KnownZero) const {
+ IntegerType *IT = cast<IntegerType>(V->getType());
+ KnownOne = APInt(IT->getBitWidth(), 0);
+ KnownZero = APInt(IT->getBitWidth(), 0);
+ llvm::computeKnownBits(V, KnownZero, KnownOne, DL, 0);
+}
+
+bool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const {
+ assert(LHS->getType() == RHS->getType() &&
+ "LHS and RHS should have the same type");
+ APInt LHSKnownOne, LHSKnownZero, RHSKnownOne, RHSKnownZero;
+ ComputeKnownBits(LHS, LHSKnownOne, LHSKnownZero);
+ ComputeKnownBits(RHS, RHSKnownOne, RHSKnownZero);
+ return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
+}
+
+int64_t SeparateConstOffsetFromGEP::accumulateByteOffset(
+ GetElementPtrInst *GEP, const DataLayout *DL, bool &NeedsExtraction) {
+ NeedsExtraction = false;
+ int64_t AccumulativeByteOffset = 0;
+ gep_type_iterator GTI = gep_type_begin(*GEP);
+ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ // Tries to extract a constant offset from this GEP index.
+ int64_t ConstantOffset =
+ ConstantOffsetExtractor::Find(GEP->getOperand(I), DL);
+ if (ConstantOffset != 0) {
+ NeedsExtraction = true;
+ // A GEP may have multiple indices. We accumulate the extracted
+ // constant offset to a byte offset, and later offset the remainder of
+ // the original GEP with this byte offset.
+ AccumulativeByteOffset +=
+ ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
+ }
+ }
+ }
+ return AccumulativeByteOffset;
+}
+
+bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
+ // Skip vector GEPs.
+ if (GEP->getType()->isVectorTy())
+ return false;
+
+ // The backend can already nicely handle the case where all indices are
+ // constant.
+ if (GEP->hasAllConstantIndices())
+ return false;
+
+ bool Changed = false;
+
+ // Shortcuts integer casts. Eliminating these explicit casts can make
+ // subsequent optimizations more obvious: ConstantOffsetExtractor needn't
+ // trace into these casts.
+ if (GEP->isInBounds()) {
+ // Doing this to inbounds GEPs is safe because their indices are guaranteed
+ // to be non-negative and in bounds.
+ gep_type_iterator GTI = gep_type_begin(*GEP);
+ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ if (Operator *O = dyn_cast<Operator>(GEP->getOperand(I))) {
+ if (O->getOpcode() == Instruction::SExt ||
+ O->getOpcode() == Instruction::ZExt) {
+ GEP->setOperand(I, O->getOperand(0));
+ Changed = true;
+ }
+ }
+ }
+ }
+ }
+
+ const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout();
+ bool NeedsExtraction;
+ int64_t AccumulativeByteOffset =
+ accumulateByteOffset(GEP, DL, NeedsExtraction);
+
+ if (!NeedsExtraction)
+ return Changed;
+ // Before really splitting the GEP, check whether the backend supports the
+ // addressing mode we are about to produce. If no, this splitting probably
+ // won't be beneficial.
+ TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
+ if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(),
+ /*BaseGV=*/nullptr, AccumulativeByteOffset,
+ /*HasBaseReg=*/true, /*Scale=*/0)) {
+ return Changed;
+ }
+
+ // Remove the constant offset in each GEP index. The resultant GEP computes
+ // the variadic base.
+ gep_type_iterator GTI = gep_type_begin(*GEP);
+ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ Value *NewIdx = nullptr;
+ // Tries to extract a constant offset from this GEP index.
+ int64_t ConstantOffset =
+ ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP);
+ if (ConstantOffset != 0) {
+ assert(NewIdx != nullptr &&
+ "ConstantOffset != 0 implies NewIdx is set");
+ GEP->setOperand(I, NewIdx);
+ // Clear the inbounds attribute because the new index may be off-bound.
+ // e.g.,
+ //
+ // b = add i64 a, 5
+ // addr = gep inbounds float* p, i64 b
+ //
+ // is transformed to:
+ //
+ // addr2 = gep float* p, i64 a
+ // addr = gep float* addr2, i64 5
+ //
+ // If a is -4, although the old index b is in bounds, the new index a is
+ // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
+ // inbounds keyword is not present, the offsets are added to the base
+ // address with silently-wrapping two's complement arithmetic".
+ // Therefore, the final code will be a semantically equivalent.
+ //
+ // TODO(jingyue): do some range analysis to keep as many inbounds as
+ // possible. GEPs with inbounds are more friendly to alias analysis.
+ GEP->setIsInBounds(false);
+ Changed = true;
+ }
+ }
+ }
+
+ // Offsets the base with the accumulative byte offset.
+ //
+ // %gep ; the base
+ // ... %gep ...
+ //
+ // => add the offset
+ //
+ // %gep2 ; clone of %gep
+ // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
+ // %gep ; will be removed
+ // ... %gep ...
+ //
+ // => replace all uses of %gep with %new.gep and remove %gep
+ //
+ // %gep2 ; clone of %gep
+ // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
+ // ... %new.gep ...
+ //
+ // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
+ // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
+ // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
+ // type of %gep.
+ //
+ // %gep2 ; clone of %gep
+ // %0 = bitcast %gep2 to i8*
+ // %uglygep = gep %0, <offset>
+ // %new.gep = bitcast %uglygep to <type of %gep>
+ // ... %new.gep ...
+ Instruction *NewGEP = GEP->clone();
+ NewGEP->insertBefore(GEP);
+
+ Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ uint64_t ElementTypeSizeOfGEP =
+ DL->getTypeAllocSize(GEP->getType()->getElementType());
+ if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
+ // Very likely. As long as %gep is natually aligned, the byte offset we
+ // extracted should be a multiple of sizeof(*%gep).
+ // Per ANSI C standard, signed / unsigned = unsigned. Therefore, we
+ // cast ElementTypeSizeOfGEP to signed.
+ int64_t Index =
+ AccumulativeByteOffset / static_cast<int64_t>(ElementTypeSizeOfGEP);
+ NewGEP = GetElementPtrInst::Create(
+ NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP);
+ } else {
+ // Unlikely but possible. For example,
+ // #pragma pack(1)
+ // struct S {
+ // int a[3];
+ // int64 b[8];
+ // };
+ // #pragma pack()
+ //
+ // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
+ // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
+ // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
+ // sizeof(int64).
+ //
+ // Emit an uglygep in this case.
+ Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(),
+ GEP->getPointerAddressSpace());
+ NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP);
+ NewGEP = GetElementPtrInst::Create(
+ NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true),
+ "uglygep", GEP);
+ if (GEP->getType() != I8PtrTy)
+ NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
+ }
+
+ GEP->replaceAllUsesWith(NewGEP);
+ GEP->eraseFromParent();
+
+ return true;
+}
+
+bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
+ if (DisableSeparateConstOffsetFromGEP)
+ return false;
+
+ bool Changed = false;
+ for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
+ for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) {
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) {
+ Changed |= splitGEP(GEP);
+ }
+ // No need to split GEP ConstantExprs because all its indices are constant
+ // already.
+ }
+ }
+ return Changed;
+}
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index ceae5a7..5d5606b 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -21,7 +21,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "simplifycfg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -38,6 +37,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "simplifycfg"
+
STATISTIC(NumSimpl, "Number of blocks simplified");
namespace {
@@ -71,7 +72,7 @@ FunctionPass *llvm::createCFGSimplificationPass() {
static bool mergeEmptyReturnBlocks(Function &F) {
bool Changed = false;
- BasicBlock *RetBlock = 0;
+ BasicBlock *RetBlock = nullptr;
// Scan all the blocks in the function, looking for empty return blocks.
for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
@@ -79,7 +80,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
// Only look at return blocks.
ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
- if (Ret == 0) continue;
+ if (!Ret) continue;
// Only look at the block if it is empty or the only other thing in it is a
// single PHI node that is the operand to the return.
@@ -98,7 +99,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
}
// If this is the first returning block, remember it and keep going.
- if (RetBlock == 0) {
+ if (!RetBlock) {
RetBlock = &BB;
continue;
}
@@ -119,7 +120,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
// If the canonical return block has no PHI node, create one now.
PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
- if (RetBlockPHI == 0) {
+ if (!RetBlockPHI) {
Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock);
RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
@@ -173,7 +174,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
bool EverChanged = removeUnreachableBlocks(F);
EverChanged |= mergeEmptyReturnBlocks(F);
EverChanged |= iterativelySimplifyCFG(F, TTI, DL);
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index 4107374..482c33a 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sink"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -25,6 +24,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "sink"
+
STATISTIC(NumSunk, "Number of instructions sunk");
STATISTIC(NumSinkIter, "Number of sinking iterations");
@@ -203,7 +204,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
// Don't sink instructions into a loop.
Loop *succ = LI->getLoopFor(SuccToSinkTo);
Loop *cur = LI->getLoopFor(Inst->getParent());
- if (succ != 0 && succ != cur)
+ if (succ != nullptr && succ != cur)
return false;
}
@@ -237,14 +238,14 @@ bool Sinking::SinkInstruction(Instruction *Inst,
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
- BasicBlock *SuccToSinkTo = 0;
+ BasicBlock *SuccToSinkTo = nullptr;
// Instructions can only be sunk if all their uses are in blocks
// dominated by one of the successors.
// Look at all the postdominators and see if we can sink it in one.
DomTreeNode *DTN = DT->getNode(Inst->getParent());
for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end();
- I != E && SuccToSinkTo == 0; ++I) {
+ I != E && SuccToSinkTo == nullptr; ++I) {
BasicBlock *Candidate = (*I)->getBlock();
if ((*I)->getIDom()->getBlock() == Inst->getParent() &&
IsAcceptableTarget(Inst, Candidate))
@@ -254,13 +255,13 @@ bool Sinking::SinkInstruction(Instruction *Inst,
// If no suitable postdominator was found, look at all the successors and
// decide which one we should sink to, if any.
for (succ_iterator I = succ_begin(Inst->getParent()),
- E = succ_end(Inst->getParent()); I != E && SuccToSinkTo == 0; ++I) {
+ E = succ_end(Inst->getParent()); I != E && !SuccToSinkTo; ++I) {
if (IsAcceptableTarget(Inst, *I))
SuccToSinkTo = *I;
}
// If we couldn't find a block to sink to, ignore this instruction.
- if (SuccToSinkTo == 0)
+ if (!SuccToSinkTo)
return false;
DEBUG(dbgs() << "Sink" << *Inst << " (";
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 8fd2268..7b77ae1 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "structurizecfg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SCCIterator.h"
@@ -21,6 +20,8 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+#define DEBUG_TYPE "structurizecfg"
+
namespace {
// Definition of the complex types used in this pass.
@@ -64,14 +65,14 @@ public:
/// \brief Start a new query
NearestCommonDominator(DominatorTree *DomTree) {
DT = DomTree;
- Result = 0;
+ Result = nullptr;
}
/// \brief Add BB to the resulting dominator
void addBlock(BasicBlock *BB, bool Remember = true) {
DomTreeNode *Node = DT->getNode(BB);
- if (Result == 0) {
+ if (!Result) {
unsigned Numbering = 0;
for (;Node;Node = Node->getIDom())
IndexMap[Node] = ++Numbering;
@@ -279,7 +280,7 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
void StructurizeCFG::orderNodes() {
scc_iterator<Region *> I = scc_begin(ParentRegion);
for (Order.clear(); !I.isAtEnd(); ++I) {
- std::vector<RegionNode *> &Nodes = *I;
+ const std::vector<RegionNode *> &Nodes = *I;
Order.append(Nodes.begin(), Nodes.end());
}
}
@@ -453,10 +454,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
Value *Default = Loops ? BoolTrue : BoolFalse;
SSAUpdater PhiInserter;
- for (BranchVector::iterator I = Conds.begin(),
- E = Conds.end(); I != E; ++I) {
-
- BranchInst *Term = *I;
+ for (BranchInst *Term : Conds) {
assert(Term->isConditional());
BasicBlock *Parent = Term->getParent();
@@ -472,7 +470,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
NearestCommonDominator Dominator(DT);
Dominator.addBlock(Parent, false);
- Value *ParentValue = 0;
+ Value *ParentValue = nullptr;
for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
PI != PE; ++PI) {
@@ -591,7 +589,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
if (Node->isSubRegion()) {
Region *SubRegion = Node->getNodeAs<Region>();
BasicBlock *OldExit = SubRegion->getExit();
- BasicBlock *Dominator = 0;
+ BasicBlock *Dominator = nullptr;
// Find all the edges from the sub region to the exit
for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit);
@@ -678,7 +676,8 @@ BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
/// \brief Set the previous node
void StructurizeCFG::setPrevNode(BasicBlock *BB) {
- PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0;
+ PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
+ : nullptr;
}
/// \brief Does BB dominate all the predicates of Node ?
@@ -699,7 +698,7 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
bool Dominated = false;
// Regionentry is always true
- if (PrevNode == 0)
+ if (!PrevNode)
return true;
for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
@@ -806,11 +805,11 @@ void StructurizeCFG::createFlow() {
Conditions.clear();
LoopConds.clear();
- PrevNode = 0;
+ PrevNode = nullptr;
Visited.clear();
while (!Order.empty()) {
- handleLoops(EntryDominatesExit, 0);
+ handleLoops(EntryDominatesExit, nullptr);
}
if (PrevNode)
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 6d02777..05b9892 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -50,12 +50,12 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "tailcallelim"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
@@ -64,6 +64,7 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -76,6 +77,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "tailcallelim"
+
STATISTIC(NumEliminated, "Number of tail calls removed");
STATISTIC(NumRetDuped, "Number of return duplicated");
STATISTIC(NumAccumAdded, "Number of accumulators introduced");
@@ -94,6 +97,9 @@ namespace {
bool runOnFunction(Function &F) override;
private:
+ bool runTRE(Function &F);
+ bool markTails(Function &F, bool &AllCallsAreTailCalls);
+
CallInst *FindTRECandidate(Instruction *I,
bool CannotTailCallElimCallsMarkedTail);
bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
@@ -131,55 +137,255 @@ void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetTransformInfo>();
}
-/// CanTRE - Scan the specified basic block for alloca instructions.
-/// If it contains any that are variable-sized or not in the entry block,
-/// returns false.
-static bool CanTRE(AllocaInst *AI) {
- // Because of PR962, we don't TRE allocas outside the entry block.
-
- // If this alloca is in the body of the function, or if it is a variable
- // sized allocation, we cannot tail call eliminate calls marked 'tail'
- // with this mechanism.
- BasicBlock *BB = AI->getParent();
- return BB == &BB->getParent()->getEntryBlock() &&
- isa<ConstantInt>(AI->getArraySize());
+/// \brief Scan the specified function for alloca instructions.
+/// If it contains any dynamic allocas, returns false.
+static bool CanTRE(Function &F) {
+ // Because of PR962, we don't TRE dynamic allocas.
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+ if (!AI->isStaticAlloca())
+ return false;
+ }
+ }
+ }
+
+ return true;
}
-namespace {
-struct AllocaCaptureTracker : public CaptureTracker {
- AllocaCaptureTracker() : Captured(false) {}
+bool TailCallElim::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
- void tooManyUses() override { Captured = true; }
+ bool AllCallsAreTailCalls = false;
+ bool Modified = markTails(F, AllCallsAreTailCalls);
+ if (AllCallsAreTailCalls)
+ Modified |= runTRE(F);
+ return Modified;
+}
- bool shouldExplore(const Use *U) override {
- Value *V = U->getUser();
- if (isa<CallInst>(V) || isa<InvokeInst>(V))
- UsesAlloca.insert(V);
- return true;
+namespace {
+struct AllocaDerivedValueTracker {
+ // Start at a root value and walk its use-def chain to mark calls that use the
+ // value or a derived value in AllocaUsers, and places where it may escape in
+ // EscapePoints.
+ void walk(Value *Root) {
+ SmallVector<Use *, 32> Worklist;
+ SmallPtrSet<Use *, 32> Visited;
+
+ auto AddUsesToWorklist = [&](Value *V) {
+ for (auto &U : V->uses()) {
+ if (!Visited.insert(&U))
+ continue;
+ Worklist.push_back(&U);
+ }
+ };
+
+ AddUsesToWorklist(Root);
+
+ while (!Worklist.empty()) {
+ Use *U = Worklist.pop_back_val();
+ Instruction *I = cast<Instruction>(U->getUser());
+
+ switch (I->getOpcode()) {
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(I);
+ bool IsNocapture = !CS.isCallee(U) &&
+ CS.doesNotCapture(CS.getArgumentNo(U));
+ callUsesLocalStack(CS, IsNocapture);
+ if (IsNocapture) {
+ // If the alloca-derived argument is passed in as nocapture, then it
+ // can't propagate to the call's return. That would be capturing.
+ continue;
+ }
+ break;
+ }
+ case Instruction::Load: {
+ // The result of a load is not alloca-derived (unless an alloca has
+ // otherwise escaped, but this is a local analysis).
+ continue;
+ }
+ case Instruction::Store: {
+ if (U->getOperandNo() == 0)
+ EscapePoints.insert(I);
+ continue; // Stores have no users to analyze.
+ }
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::PHI:
+ case Instruction::Select:
+ case Instruction::AddrSpaceCast:
+ break;
+ default:
+ EscapePoints.insert(I);
+ break;
+ }
+
+ AddUsesToWorklist(I);
+ }
}
- bool captured(const Use *U) override {
- if (isa<ReturnInst>(U->getUser()))
- return false;
- Captured = true;
- return true;
+ void callUsesLocalStack(CallSite CS, bool IsNocapture) {
+ // Add it to the list of alloca users. If it's already there, skip further
+ // processing.
+ if (!AllocaUsers.insert(CS.getInstruction()))
+ return;
+
+ // If it's nocapture then it can't capture the alloca.
+ if (IsNocapture)
+ return;
+
+ // If it can write to memory, it can leak the alloca value.
+ if (!CS.onlyReadsMemory())
+ EscapePoints.insert(CS.getInstruction());
}
- bool Captured;
- SmallPtrSet<const Value *, 16> UsesAlloca;
+ SmallPtrSet<Instruction *, 32> AllocaUsers;
+ SmallPtrSet<Instruction *, 32> EscapePoints;
};
-} // end anonymous namespace
+}
-bool TailCallElim::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
+bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
+ if (F.callsFunctionThatReturnsTwice())
return false;
+ AllCallsAreTailCalls = true;
+
+ // The local stack holds all alloca instructions and all byval arguments.
+ AllocaDerivedValueTracker Tracker;
+ for (Argument &Arg : F.args()) {
+ if (Arg.hasByValAttr())
+ Tracker.walk(&Arg);
+ }
+ for (auto &BB : F) {
+ for (auto &I : BB)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
+ Tracker.walk(AI);
+ }
+ bool Modified = false;
+
+ // Track whether a block is reachable after an alloca has escaped. Blocks that
+ // contain the escaping instruction will be marked as being visited without an
+ // escaped alloca, since that is how the block began.
+ enum VisitType {
+ UNVISITED,
+ UNESCAPED,
+ ESCAPED
+ };
+ DenseMap<BasicBlock *, VisitType> Visited;
+
+ // We propagate the fact that an alloca has escaped from block to successor.
+ // Visit the blocks that are propagating the escapedness first. To do this, we
+ // maintain two worklists.
+ SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped;
+
+ // We may enter a block and visit it thinking that no alloca has escaped yet,
+ // then see an escape point and go back around a loop edge and come back to
+ // the same block twice. Because of this, we defer setting tail on calls when
+ // we first encounter them in a block. Every entry in this list does not
+ // statically use an alloca via use-def chain analysis, but may find an alloca
+ // through other means if the block turns out to be reachable after an escape
+ // point.
+ SmallVector<CallInst *, 32> DeferredTails;
+
+ BasicBlock *BB = &F.getEntryBlock();
+ VisitType Escaped = UNESCAPED;
+ do {
+ for (auto &I : *BB) {
+ if (Tracker.EscapePoints.count(&I))
+ Escaped = ESCAPED;
+
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (!CI || CI->isTailCall())
+ continue;
+
+ if (CI->doesNotAccessMemory()) {
+ // A call to a readnone function whose arguments are all things computed
+ // outside this function can be marked tail. Even if you stored the
+ // alloca address into a global, a readnone function can't load the
+ // global anyhow.
+ //
+ // Note that this runs whether we know an alloca has escaped or not. If
+ // it has, then we can't trust Tracker.AllocaUsers to be accurate.
+ bool SafeToTail = true;
+ for (auto &Arg : CI->arg_operands()) {
+ if (isa<Constant>(Arg.getUser()))
+ continue;
+ if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
+ if (!A->hasByValAttr())
+ continue;
+ SafeToTail = false;
+ break;
+ }
+ if (SafeToTail) {
+ emitOptimizationRemark(
+ F.getContext(), "tailcallelim", F, CI->getDebugLoc(),
+ "marked this readnone call a tail call candidate");
+ CI->setTailCall();
+ Modified = true;
+ continue;
+ }
+ }
+
+ if (Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
+ DeferredTails.push_back(CI);
+ } else {
+ AllCallsAreTailCalls = false;
+ }
+ }
+
+ for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
+ auto &State = Visited[SuccBB];
+ if (State < Escaped) {
+ State = Escaped;
+ if (State == ESCAPED)
+ WorklistEscaped.push_back(SuccBB);
+ else
+ WorklistUnescaped.push_back(SuccBB);
+ }
+ }
+
+ if (!WorklistEscaped.empty()) {
+ BB = WorklistEscaped.pop_back_val();
+ Escaped = ESCAPED;
+ } else {
+ BB = nullptr;
+ while (!WorklistUnescaped.empty()) {
+ auto *NextBB = WorklistUnescaped.pop_back_val();
+ if (Visited[NextBB] == UNESCAPED) {
+ BB = NextBB;
+ Escaped = UNESCAPED;
+ break;
+ }
+ }
+ }
+ } while (BB);
+
+ for (CallInst *CI : DeferredTails) {
+ if (Visited[CI->getParent()] != ESCAPED) {
+ // If the escape point was part way through the block, calls after the
+ // escape point wouldn't have been put into DeferredTails.
+ emitOptimizationRemark(F.getContext(), "tailcallelim", F,
+ CI->getDebugLoc(),
+ "marked this call a tail call candidate");
+ CI->setTailCall();
+ Modified = true;
+ } else {
+ AllCallsAreTailCalls = false;
+ }
+ }
+
+ return Modified;
+}
+
+bool TailCallElim::runTRE(Function &F) {
// If this function is a varargs function, we won't be able to PHI the args
// right, so don't even try to convert it...
if (F.getFunctionType()->isVarArg()) return false;
TTI = &getAnalysis<TargetTransformInfo>();
- BasicBlock *OldEntry = 0;
+ BasicBlock *OldEntry = nullptr;
bool TailCallsAreMarkedTail = false;
SmallVector<PHINode*, 8> ArgumentPHIs;
bool MadeChange = false;
@@ -188,39 +394,23 @@ bool TailCallElim::runOnFunction(Function &F) {
// marked with the 'tail' attribute, because doing so would cause the stack
// size to increase (real TRE would deallocate variable sized allocas, TRE
// doesn't).
- bool CanTRETailMarkedCall = true;
-
- // Find calls that can be marked tail.
- AllocaCaptureTracker ACT;
- for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- CanTRETailMarkedCall &= CanTRE(AI);
- PointerMayBeCaptured(AI, &ACT);
- // If any allocas are captured, exit.
- if (ACT.Captured)
- return false;
- }
- }
- }
+ bool CanTRETailMarkedCall = CanTRE(F);
- // Second pass, change any tail recursive calls to loops.
+ // Change any tail recursive calls to loops.
//
// FIXME: The code generator produces really bad code when an 'escaping
// alloca' is changed from being a static alloca to being a dynamic alloca.
// Until this is resolved, disable this transformation if that would ever
// happen. This bug is PR962.
- if (ACT.UsesAlloca.empty()) {
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
- bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs, !CanTRETailMarkedCall);
- if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
- Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
- TailCallsAreMarkedTail, ArgumentPHIs,
- !CanTRETailMarkedCall);
- MadeChange |= Change;
- }
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs, !CanTRETailMarkedCall);
+ if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+ Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
+ TailCallsAreMarkedTail, ArgumentPHIs,
+ !CanTRETailMarkedCall);
+ MadeChange |= Change;
}
}
@@ -229,34 +419,13 @@ bool TailCallElim::runOnFunction(Function &F) {
// with themselves. Check to see if we did and clean up our mess if so. This
// occurs when a function passes an argument straight through to its tail
// call.
- if (!ArgumentPHIs.empty()) {
- for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
- PHINode *PN = ArgumentPHIs[i];
-
- // If the PHI Node is a dynamic constant, replace it with the value it is.
- if (Value *PNV = SimplifyInstruction(PN)) {
- PN->replaceAllUsesWith(PNV);
- PN->eraseFromParent();
- }
- }
- }
+ for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
+ PHINode *PN = ArgumentPHIs[i];
- // At this point, we know that the function does not have any captured
- // allocas. If additionally the function does not call setjmp, mark all calls
- // in the function that do not access stack memory with the tail keyword. This
- // implies ensuring that there does not exist any path from a call that takes
- // in an alloca but does not capture it and the call which we wish to mark
- // with "tail".
- if (!F.callsFunctionThatReturnsTwice()) {
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- if (!ACT.UsesAlloca.count(CI)) {
- CI->setTailCall();
- MadeChange = true;
- }
- }
- }
+ // If the PHI Node is a dynamic constant, replace it with the value it is.
+ if (Value *PNV = SimplifyInstruction(PN)) {
+ PN->replaceAllUsesWith(PNV);
+ PN->eraseFromParent();
}
}
@@ -343,11 +512,11 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
//
static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
Function *F = CI->getParent()->getParent();
- Value *ReturnedValue = 0;
+ Value *ReturnedValue = nullptr;
for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
- if (RI == 0 || RI == IgnoreRI) continue;
+ if (RI == nullptr || RI == IgnoreRI) continue;
// We can only perform this transformation if the value returned is
// evaluatable at the start of the initial invocation of the function,
@@ -355,10 +524,10 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
//
Value *RetOp = RI->getOperand(0);
if (!isDynamicConstant(RetOp, CI, RI))
- return 0;
+ return nullptr;
if (ReturnedValue && RetOp != ReturnedValue)
- return 0; // Cannot transform if differing values are returned.
+ return nullptr; // Cannot transform if differing values are returned.
ReturnedValue = RetOp;
}
return ReturnedValue;
@@ -370,18 +539,18 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
///
Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
CallInst *CI) {
- if (!I->isAssociative() || !I->isCommutative()) return 0;
+ if (!I->isAssociative() || !I->isCommutative()) return nullptr;
assert(I->getNumOperands() == 2 &&
"Associative/commutative operations should have 2 args!");
// Exactly one operand should be the result of the call instruction.
if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
(I->getOperand(0) != CI && I->getOperand(1) != CI))
- return 0;
+ return nullptr;
// The only user of this instruction we allow is a single return instruction.
if (!I->hasOneUse() || !isa<ReturnInst>(I->user_back()))
- return 0;
+ return nullptr;
// Ok, now we have to check all of the other return instructions in this
// function. If they return non-constants or differing values, then we cannot
@@ -402,11 +571,11 @@ TailCallElim::FindTRECandidate(Instruction *TI,
Function *F = BB->getParent();
if (&BB->front() == TI) // Make sure there is something before the terminator.
- return 0;
+ return nullptr;
// Scan backwards from the return, checking to see if there is a tail call in
// this block. If so, set CI to it.
- CallInst *CI = 0;
+ CallInst *CI = nullptr;
BasicBlock::iterator BBI = TI;
while (true) {
CI = dyn_cast<CallInst>(BBI);
@@ -414,14 +583,14 @@ TailCallElim::FindTRECandidate(Instruction *TI,
break;
if (BBI == BB->begin())
- return 0; // Didn't find a potential tail call.
+ return nullptr; // Didn't find a potential tail call.
--BBI;
}
// If this call is marked as a tail call, and if there are dynamic allocas in
// the function, we cannot perform this optimization.
if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
- return 0;
+ return nullptr;
// As a special case, detect code like this:
// double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
@@ -441,7 +610,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
for (; I != E && FI != FE; ++I, ++FI)
if (*I != &*FI) break;
if (I == E && FI == FE)
- return 0;
+ return nullptr;
}
return CI;
@@ -462,8 +631,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// which is different to the constant returned by other return instructions
// (which is recorded in AccumulatorRecursionEliminationInitVal). This is a
// special case of accumulator recursion, the operation being "return C".
- Value *AccumulatorRecursionEliminationInitVal = 0;
- Instruction *AccumulatorRecursionInstr = 0;
+ Value *AccumulatorRecursionEliminationInitVal = nullptr;
+ Instruction *AccumulatorRecursionInstr = nullptr;
// Ok, we found a potential tail call. We can currently only transform the
// tail call if all of the instructions between the call and the return are
@@ -493,8 +662,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// accumulator recursion variable eliminated.
if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
!isa<UndefValue>(Ret->getReturnValue()) &&
- AccumulatorRecursionEliminationInitVal == 0 &&
- !getCommonReturnValue(0, CI)) {
+ AccumulatorRecursionEliminationInitVal == nullptr &&
+ !getCommonReturnValue(nullptr, CI)) {
// One case remains that we are able to handle: the current return
// instruction returns a constant, and all other return instructions
// return a different constant.
@@ -510,9 +679,12 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
BasicBlock *BB = Ret->getParent();
Function *F = BB->getParent();
+ emitOptimizationRemark(F->getContext(), "tailcallelim", *F, CI->getDebugLoc(),
+ "transforming tail recursion to loop");
+
// OK! We can transform this tail call. If this is the first one found,
// create the new entry block, allowing us to branch back to the old entry.
- if (OldEntry == 0) {
+ if (!OldEntry) {
OldEntry = &F->getEntryBlock();
BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
NewEntry->takeName(OldEntry);
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
index f42635e..196ac79 100644
--- a/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -52,8 +52,6 @@
// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "add-discriminators"
-
#include "llvm/Transforms/Scalar.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -69,6 +67,8 @@
using namespace llvm;
+#define DEBUG_TYPE "add-discriminators"
+
namespace {
struct AddDiscriminators : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
@@ -99,7 +99,7 @@ FunctionPass *llvm::createAddDiscriminatorsPass() {
static bool hasDebugInfo(const Function &F) {
NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu");
- return CUNodes != 0;
+ return CUNodes != nullptr;
}
/// \brief Assign DWARF discriminators.
@@ -154,10 +154,15 @@ static bool hasDebugInfo(const Function &F) {
/// file and line location as I2. This new lexical block will have a
/// different discriminator number than I1.
bool AddDiscriminators::runOnFunction(Function &F) {
- // No need to do anything if there is no debug info for this function.
// If the function has debug information, but the user has disabled
// discriminators, do nothing.
- if (!hasDebugInfo(F) || NoDiscriminators) return false;
+ // Simlarly, if the function has no debug info, do nothing.
+ // Finally, if this module is built with dwarf versions earlier than 4,
+ // do nothing (discriminator support is a DWARF 4 feature).
+ if (NoDiscriminators ||
+ !hasDebugInfo(F) ||
+ F.getParent()->getDwarfVersion() < 4)
+ return false;
bool Changed = false;
Module *M = F.getParent();
diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk
index ab4d8a8..cbd8dd0 100644
--- a/lib/Transforms/Utils/Android.mk
+++ b/lib/Transforms/Utils/Android.mk
@@ -11,6 +11,7 @@ transforms_utils_SRC_FILES := \
CloneModule.cpp \
CmpInstAnalysis.cpp \
CodeExtractor.cpp \
+ CtorUtils.cpp \
DemoteRegToStack.cpp \
GlobalStatus.cpp \
InlineFunction.cpp \
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index b3cd5ce..80b7e22 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -68,8 +68,8 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
if (!isa<PHINode>(BB->begin())) return;
- AliasAnalysis *AA = 0;
- MemoryDependenceAnalysis *MemDep = 0;
+ AliasAnalysis *AA = nullptr;
+ MemoryDependenceAnalysis *MemDep = nullptr;
if (P) {
AA = P->getAnalysisIfAvailable<AliasAnalysis>();
MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
@@ -130,7 +130,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
BasicBlock *OnlySucc = BB;
for (; SI != SE; ++SI)
if (*SI != OnlySucc) {
- OnlySucc = 0; // There are multiple distinct successors!
+ OnlySucc = nullptr; // There are multiple distinct successors!
break;
}
@@ -217,7 +217,7 @@ void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
///
void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
BasicBlock::iterator &BI, Instruction *I) {
- assert(I->getParent() == 0 &&
+ assert(I->getParent() == nullptr &&
"ReplaceInstWithInst: Instruction already inserted into basic block!");
// Insert the new instruction into the basic block...
@@ -254,7 +254,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
// If the successor only has a single pred, split the top of the successor
// block.
assert(SP == BB && "CFG broken");
- SP = NULL;
+ SP = nullptr;
return SplitBlock(Succ, Succ->begin(), P);
}
@@ -310,7 +310,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
if (!P) return;
LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
- Loop *L = LI ? LI->getLoopFor(OldBB) : 0;
+ Loop *L = LI ? LI->getLoopFor(OldBB) : nullptr;
// If we need to preserve loop analyses, collect some information about how
// this split will affect loops.
@@ -351,7 +351,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
// loop). To find this, examine each of the predecessors and determine which
// loops enclose them, and select the most-nested loop which contains the
// loop containing the block being split.
- Loop *InnermostPredLoop = 0;
+ Loop *InnermostPredLoop = nullptr;
for (ArrayRef<BasicBlock*>::iterator
i = Preds.begin(), e = Preds.end(); i != e; ++i) {
BasicBlock *Pred = *i;
@@ -384,51 +384,68 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
ArrayRef<BasicBlock*> Preds, BranchInst *BI,
Pass *P, bool HasLoopExit) {
// Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
- AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
+ AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : nullptr;
+ SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end());
for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I++);
// Check to see if all of the values coming in are the same. If so, we
// don't need to create a new PHI node, unless it's needed for LCSSA.
- Value *InVal = 0;
+ Value *InVal = nullptr;
if (!HasLoopExit) {
InVal = PN->getIncomingValueForBlock(Preds[0]);
- for (unsigned i = 1, e = Preds.size(); i != e; ++i)
- if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
- InVal = 0;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (!PredSet.count(PN->getIncomingBlock(i)))
+ continue;
+ if (!InVal)
+ InVal = PN->getIncomingValue(i);
+ else if (InVal != PN->getIncomingValue(i)) {
+ InVal = nullptr;
break;
}
+ }
}
if (InVal) {
// If all incoming values for the new PHI would be the same, just don't
// make a new PHI. Instead, just remove the incoming values from the old
// PHI.
- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
- // Explicitly check the BB index here to handle duplicates in Preds.
- int Idx = PN->getBasicBlockIndex(Preds[i]);
- if (Idx >= 0)
- PN->removeIncomingValue(Idx, false);
- }
- } else {
- // If the values coming into the block are not the same, we need a PHI.
- // Create the new PHI node, insert it into NewBB at the end of the block
- PHINode *NewPHI =
- PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
- if (AA) AA->copyValue(PN, NewPHI);
- // Move all of the PHI values for 'Preds' to the new PHI.
- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
- Value *V = PN->removeIncomingValue(Preds[i], false);
- NewPHI->addIncoming(V, Preds[i]);
- }
+ // NOTE! This loop walks backwards for a reason! First off, this minimizes
+ // the cost of removal if we end up removing a large number of values, and
+ // second off, this ensures that the indices for the incoming values
+ // aren't invalidated when we remove one.
+ for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i)
+ if (PredSet.count(PN->getIncomingBlock(i)))
+ PN->removeIncomingValue(i, false);
+
+ // Add an incoming value to the PHI node in the loop for the preheader
+ // edge.
+ PN->addIncoming(InVal, NewBB);
+ continue;
+ }
- InVal = NewPHI;
+ // If the values coming into the block are not the same, we need a new
+ // PHI.
+ // Create the new PHI node, insert it into NewBB at the end of the block
+ PHINode *NewPHI =
+ PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
+ if (AA)
+ AA->copyValue(PN, NewPHI);
+
+ // NOTE! This loop walks backwards for a reason! First off, this minimizes
+ // the cost of removal if we end up removing a large number of values, and
+ // second off, this ensures that the indices for the incoming values aren't
+ // invalidated when we remove one.
+ for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) {
+ BasicBlock *IncomingBB = PN->getIncomingBlock(i);
+ if (PredSet.count(IncomingBB)) {
+ Value *V = PN->removeIncomingValue(i, false);
+ NewPHI->addIncoming(V, IncomingBB);
+ }
}
- // Add an incoming value to the PHI node in the loop for the preheader
- // edge.
- PN->addIncoming(InVal, NewBB);
+ PN->addIncoming(NewPHI, NewBB);
}
}
@@ -542,7 +559,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
e = pred_end(OrigBB);
}
- BasicBlock *NewBB2 = 0;
+ BasicBlock *NewBB2 = nullptr;
if (!NewBB2Preds.empty()) {
// Create another basic block for the rest of OrigBB's predecessors.
NewBB2 = BasicBlock::Create(OrigBB->getContext(),
@@ -607,7 +624,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
i != e; ++i) {
Value *V = *i;
- Instruction *NewBC = 0;
+ Instruction *NewBC = nullptr;
if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) {
// Return value might be bitcasted. Clone and insert it before the
// return instruction.
@@ -724,32 +741,32 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
BasicBlock *&IfFalse) {
PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
- BasicBlock *Pred1 = NULL;
- BasicBlock *Pred2 = NULL;
+ BasicBlock *Pred1 = nullptr;
+ BasicBlock *Pred2 = nullptr;
if (SomePHI) {
if (SomePHI->getNumIncomingValues() != 2)
- return NULL;
+ return nullptr;
Pred1 = SomePHI->getIncomingBlock(0);
Pred2 = SomePHI->getIncomingBlock(1);
} else {
pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
if (PI == PE) // No predecessor
- return NULL;
+ return nullptr;
Pred1 = *PI++;
if (PI == PE) // Only one predecessor
- return NULL;
+ return nullptr;
Pred2 = *PI++;
if (PI != PE) // More than two predecessors
- return NULL;
+ return nullptr;
}
// We can only handle branches. Other control flow will be lowered to
// branches if possible anyway.
BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
- if (Pred1Br == 0 || Pred2Br == 0)
- return 0;
+ if (!Pred1Br || !Pred2Br)
+ return nullptr;
// Eliminate code duplication by ensuring that Pred1Br is conditional if
// either are.
@@ -759,7 +776,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
// required anyway, we stand no chance of eliminating it, so the xform is
// probably not profitable.
if (Pred1Br->isConditional())
- return 0;
+ return nullptr;
std::swap(Pred1, Pred2);
std::swap(Pred1Br, Pred2Br);
@@ -769,8 +786,8 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
// The only thing we have to watch out for here is to make sure that Pred2
// doesn't have incoming edges from other blocks. If it does, the condition
// doesn't dominate BB.
- if (Pred2->getSinglePredecessor() == 0)
- return 0;
+ if (!Pred2->getSinglePredecessor())
+ return nullptr;
// If we found a conditional branch predecessor, make sure that it branches
// to BB and Pred2Br. If it doesn't, this isn't an "if statement".
@@ -785,7 +802,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
} else {
// We know that one arm of the conditional goes to BB, so the other must
// go somewhere unrelated, and this must not be an "if statement".
- return 0;
+ return nullptr;
}
return Pred1Br->getCondition();
@@ -795,12 +812,12 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
// BB. Don't panic! If both blocks only have a single (identical)
// predecessor, and THAT is a conditional branch, then we're all ok!
BasicBlock *CommonPred = Pred1->getSinglePredecessor();
- if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
- return 0;
+ if (CommonPred == nullptr || CommonPred != Pred2->getSinglePredecessor())
+ return nullptr;
// Otherwise, if this is a conditional branch, then we can use it!
BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
- if (BI == 0) return 0;
+ if (!BI) return nullptr;
assert(BI->isConditional() && "Two successors but not conditional?");
if (BI->getSuccessor(0) == Pred1) {
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 76ebb9f..80bd516 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "break-crit-edges"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -30,6 +29,8 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
+#define DEBUG_TYPE "break-crit-edges"
+
STATISTIC(NumBroken, "Number of blocks inserted");
namespace {
@@ -141,7 +142,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
Pass *P, bool MergeIdenticalEdges,
bool DontDeleteUselessPhis,
bool SplitLandingPads) {
- if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
+ if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return nullptr;
assert(!isa<IndirectBrInst>(TI) &&
"Cannot split critical edge from IndirectBrInst");
@@ -151,7 +152,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// Splitting the critical edge to a landing pad block is non-trivial. Don't do
// it in this generic function.
- if (DestBB->isLandingPad()) return 0;
+ if (DestBB->isLandingPad()) return nullptr;
// Create a new basic block, linking it into the CFG.
BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
@@ -207,15 +208,15 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// If we don't have a pass object, we can't update anything...
- if (P == 0) return NewBB;
+ if (!P) return NewBB;
DominatorTreeWrapperPass *DTWP =
P->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0;
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
// If we have nothing to update, just return.
- if (DT == 0 && LI == 0)
+ if (!DT && !LI)
return NewBB;
// Now update analysis information. Since the only predecessor of NewBB is
@@ -251,7 +252,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
//
if (TINode) { // Don't break unreachable code!
DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
- DomTreeNode *DestBBNode = 0;
+ DomTreeNode *DestBBNode = nullptr;
// If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
if (!OtherPreds.empty()) {
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 82384a1..be00b69 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -27,7 +27,8 @@ using namespace llvm;
/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
- return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr");
+ unsigned AS = V->getType()->getPointerAddressSpace();
+ return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr");
}
/// EmitStrLen - Emit a call to the strlen function to the builder, for the
@@ -35,7 +36,7 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strlen))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[2];
@@ -64,7 +65,7 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
const DataLayout *TD, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strnlen))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[2];
@@ -94,7 +95,7 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
const DataLayout *TD, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strchr))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
@@ -120,7 +121,7 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strncmp))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[3];
@@ -153,7 +154,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
const DataLayout *TD, const TargetLibraryInfo *TLI,
StringRef Name) {
if (!TLI->has(LibFunc::strcpy))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[2];
@@ -177,7 +178,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI, StringRef Name) {
if (!TLI->has(LibFunc::strncpy))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[2];
@@ -204,7 +205,7 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memcpy_chk))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS;
@@ -232,7 +233,7 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
Value *Len, IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memchr))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS;
@@ -260,7 +261,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
Value *Len, IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memcmp))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[3];
@@ -347,7 +348,7 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::putchar))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
@@ -369,7 +370,7 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::puts))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[2];
@@ -393,7 +394,7 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
const DataLayout *TD, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fputc))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[2];
@@ -426,7 +427,7 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
const DataLayout *TD, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fputs))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[3];
@@ -459,7 +460,7 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fwrite))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[3];
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index 1f517d0..f2d5e07 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "bypass-slow-division"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Function.h"
@@ -24,6 +23,8 @@
using namespace llvm;
+#define DEBUG_TYPE "bypass-slow-division"
+
namespace {
struct DivOpInfo {
bool SignedOp;
@@ -53,11 +54,11 @@ namespace llvm {
}
static DivOpInfo getEmptyKey() {
- return DivOpInfo(false, 0, 0);
+ return DivOpInfo(false, nullptr, nullptr);
}
static DivOpInfo getTombstoneKey() {
- return DivOpInfo(true, 0, 0);
+ return DivOpInfo(true, nullptr, nullptr);
}
static unsigned getHashValue(const DivOpInfo &Val) {
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index dac2090..e10ca90 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_library(LLVMTransformUtils
BreakCriticalEdges.cpp
BuildLibCalls.cpp
BypassSlowDivision.cpp
+ CtorUtils.cpp
CloneFunction.cpp
CloneModule.cpp
CmpInstAnalysis.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index a199086..5c8f20d 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -159,7 +159,7 @@ static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) {
for (DISubprogram Subprogram : Finder.subprograms()) {
if (Subprogram.describes(F)) return Subprogram;
}
- return NULL;
+ return nullptr;
}
// Add an operand to an existing MDNode. The new operand will be added at the
@@ -359,7 +359,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// If the condition was a known constant in the callee...
ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
// Or is a known constant in the caller...
- if (Cond == 0) {
+ if (!Cond) {
Value *V = VMap[BI->getCondition()];
Cond = dyn_cast_or_null<ConstantInt>(V);
}
@@ -375,7 +375,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
} else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
// If switching on a value known constant in the caller.
ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
- if (Cond == 0) { // Or known constant after constant prop in the callee...
+ if (!Cond) { // Or known constant after constant prop in the callee...
Value *V = VMap[SI->getCondition()];
Cond = dyn_cast_or_null<ConstantInt>(V);
}
@@ -454,7 +454,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
BI != BE; ++BI) {
Value *V = VMap[BI];
BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
- if (NewBB == 0) continue; // Dead block.
+ if (!NewBB) continue; // Dead block.
// Add the new block to the new function.
NewFunc->getBasicBlockList().push_back(NewBB);
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 64df089..eb67db1 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -47,8 +47,8 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
GlobalVariable *GV = new GlobalVariable(*New,
I->getType()->getElementType(),
I->isConstant(), I->getLinkage(),
- (Constant*) 0, I->getName(),
- (GlobalVariable*) 0,
+ (Constant*) nullptr, I->getName(),
+ (GlobalVariable*) nullptr,
I->getThreadLocalMode(),
I->getType()->getAddressSpace());
GV->copyAttributesFrom(I);
@@ -67,8 +67,10 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
// Loop over the aliases in the module
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
- GlobalAlias *GA = new GlobalAlias(I->getType(), I->getLinkage(),
- I->getName(), NULL, New);
+ auto *PTy = cast<PointerType>(I->getType());
+ auto *GA =
+ GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+ I->getLinkage(), I->getName(), New);
GA->copyAttributesFrom(I);
VMap[I] = GA;
}
@@ -105,8 +107,8 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
- if (const Constant *C = I->getAliasee())
- GA->setAliasee(MapValue(C, VMap));
+ if (const GlobalObject *C = I->getAliasee())
+ GA->setAliasee(cast<GlobalObject>(MapValue(C, VMap)));
}
// And named metadata....
diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp
index 8fa412a..3b15a0a 100644
--- a/lib/Transforms/Utils/CmpInstAnalysis.cpp
+++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -84,7 +84,7 @@ Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
case 7: // True.
return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
}
- return NULL;
+ return nullptr;
}
/// PredicatesFoldable - Return true if both predicates match sign or if at
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index b814842..e70a7d6 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -38,6 +38,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "code-extractor"
+
// Provide a command-line option to aggregate function arguments into a struct
// for functions produced by the code extractor. This is useful when converting
// extracted functions to pthread-based code, as only one argument (void*) can
@@ -118,7 +120,7 @@ buildExtractionBlockSet(const RegionNode &RN) {
}
CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs)
- : DT(0), AggregateArgs(AggregateArgs||AggregateArgsOpt),
+ : DT(nullptr), AggregateArgs(AggregateArgs||AggregateArgsOpt),
Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {}
CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
@@ -410,7 +412,7 @@ static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
return P->getIncomingBlock(U);
}
- return 0;
+ return nullptr;
}
/// emitCallAndSwitchStatement - This method sets up the caller side by adding
@@ -438,14 +440,14 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
StructValues.push_back(*i);
} else {
AllocaInst *alloca =
- new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc",
+ new AllocaInst((*i)->getType(), nullptr, (*i)->getName()+".loc",
codeReplacer->getParent()->begin()->begin());
ReloadOutputs.push_back(alloca);
params.push_back(alloca);
}
}
- AllocaInst *Struct = 0;
+ AllocaInst *Struct = nullptr;
if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
std::vector<Type*> ArgTypes;
for (ValueSet::iterator v = StructValues.begin(),
@@ -455,7 +457,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// Allocate a struct at the beginning of this function
Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
Struct =
- new AllocaInst(StructArgTy, 0, "structArg",
+ new AllocaInst(StructArgTy, nullptr, "structArg",
codeReplacer->getParent()->begin()->begin());
params.push_back(Struct);
@@ -484,7 +486,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// Reload the outputs passed in by reference
for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
- Value *Output = 0;
+ Value *Output = nullptr;
if (AggregateArgs) {
Value *Idx[2];
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
@@ -537,7 +539,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
newFunction);
unsigned SuccNum = switchVal++;
- Value *brVal = 0;
+ Value *brVal = nullptr;
switch (NumExitBlocks) {
case 0:
case 1: break; // No value needed.
@@ -633,7 +635,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// Check if the function should return a value
if (OldFnRetTy->isVoidTy()) {
- ReturnInst::Create(Context, 0, TheSwitch); // Return void
+ ReturnInst::Create(Context, nullptr, TheSwitch); // Return void
} else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
// return what we have
ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
@@ -685,7 +687,7 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) {
Function *CodeExtractor::extractCodeRegion() {
if (!isEligible())
- return 0;
+ return nullptr;
ValueSet inputs, outputs;
diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp
new file mode 100644
index 0000000..a359424
--- /dev/null
+++ b/lib/Transforms/Utils/CtorUtils.cpp
@@ -0,0 +1,183 @@
+//===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions that are used to process llvm.global_ctors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "ctor_utils"
+
+namespace llvm {
+
+namespace {
+/// Given a specified llvm.global_ctors list, install the
+/// specified array.
+void installGlobalCtors(GlobalVariable *GCL,
+ const std::vector<Function *> &Ctors) {
+ // If we made a change, reassemble the initializer list.
+ Constant *CSVals[3];
+
+ StructType *StructTy =
+ cast<StructType>(GCL->getType()->getElementType()->getArrayElementType());
+
+ // Create the new init list.
+ std::vector<Constant *> CAList;
+ for (Function *F : Ctors) {
+ Type *Int32Ty = Type::getInt32Ty(GCL->getContext());
+ if (F) {
+ CSVals[0] = ConstantInt::get(Int32Ty, 65535);
+ CSVals[1] = F;
+ } else {
+ CSVals[0] = ConstantInt::get(Int32Ty, 0x7fffffff);
+ CSVals[1] = Constant::getNullValue(StructTy->getElementType(1));
+ }
+ // FIXME: Only allow the 3-field form in LLVM 4.0.
+ size_t NumElts = StructTy->getNumElements();
+ if (NumElts > 2)
+ CSVals[2] = Constant::getNullValue(StructTy->getElementType(2));
+ CAList.push_back(
+ ConstantStruct::get(StructTy, makeArrayRef(CSVals, NumElts)));
+ }
+
+ // Create the array initializer.
+ Constant *CA =
+ ConstantArray::get(ArrayType::get(StructTy, CAList.size()), CAList);
+
+ // If we didn't change the number of elements, don't create a new GV.
+ if (CA->getType() == GCL->getInitializer()->getType()) {
+ GCL->setInitializer(CA);
+ return;
+ }
+
+ // Create the new global and insert it next to the existing list.
+ GlobalVariable *NGV =
+ new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(),
+ CA, "", GCL->getThreadLocalMode());
+ GCL->getParent()->getGlobalList().insert(GCL, NGV);
+ NGV->takeName(GCL);
+
+ // Nuke the old list, replacing any uses with the new one.
+ if (!GCL->use_empty()) {
+ Constant *V = NGV;
+ if (V->getType() != GCL->getType())
+ V = ConstantExpr::getBitCast(V, GCL->getType());
+ GCL->replaceAllUsesWith(V);
+ }
+ GCL->eraseFromParent();
+}
+
+/// Given a llvm.global_ctors list that we can understand,
+/// return a list of the functions and null terminator as a vector.
+std::vector<Function*> parseGlobalCtors(GlobalVariable *GV) {
+ if (GV->getInitializer()->isNullValue())
+ return std::vector<Function *>();
+ ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+ std::vector<Function *> Result;
+ Result.reserve(CA->getNumOperands());
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+ ConstantStruct *CS = cast<ConstantStruct>(*i);
+ Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
+ }
+ return Result;
+}
+
+/// Find the llvm.global_ctors list, verifying that all initializers have an
+/// init priority of 65535.
+GlobalVariable *findGlobalCtors(Module &M) {
+ GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+ if (!GV)
+ return nullptr;
+
+ // Verify that the initializer is simple enough for us to handle. We are
+ // only allowed to optimize the initializer if it is unique.
+ if (!GV->hasUniqueInitializer())
+ return nullptr;
+
+ if (isa<ConstantAggregateZero>(GV->getInitializer()))
+ return GV;
+ ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+ if (isa<ConstantAggregateZero>(*i))
+ continue;
+ ConstantStruct *CS = cast<ConstantStruct>(*i);
+ if (isa<ConstantPointerNull>(CS->getOperand(1)))
+ continue;
+
+ // Must have a function or null ptr.
+ if (!isa<Function>(CS->getOperand(1)))
+ return nullptr;
+
+ // Init priority must be standard.
+ ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
+ if (CI->getZExtValue() != 65535)
+ return nullptr;
+ }
+
+ return GV;
+}
+} // namespace
+
+/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the
+/// entries for which it returns true. Return true if anything changed.
+bool optimizeGlobalCtorsList(Module &M,
+ function_ref<bool(Function *)> ShouldRemove) {
+ GlobalVariable *GlobalCtors = findGlobalCtors(M);
+ if (!GlobalCtors)
+ return false;
+
+ std::vector<Function *> Ctors = parseGlobalCtors(GlobalCtors);
+ if (Ctors.empty())
+ return false;
+
+ bool MadeChange = false;
+
+ // Loop over global ctors, optimizing them when we can.
+ for (unsigned i = 0; i != Ctors.size(); ++i) {
+ Function *F = Ctors[i];
+ // Found a null terminator in the middle of the list, prune off the rest of
+ // the list.
+ if (!F) {
+ if (i != Ctors.size() - 1) {
+ Ctors.resize(i + 1);
+ MadeChange = true;
+ }
+ break;
+ }
+ DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
+
+ // We cannot simplify external ctor functions.
+ if (F->empty())
+ continue;
+
+ // If we can evaluate the ctor at compile time, do.
+ if (ShouldRemove(F)) {
+ Ctors.erase(Ctors.begin() + i);
+ MadeChange = true;
+ --i;
+ continue;
+ }
+ }
+
+ if (!MadeChange)
+ return false;
+
+ installGlobalCtors(GlobalCtors, Ctors);
+ return true;
+}
+
+} // End llvm namespace
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index ac6926f..9972b22 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -25,17 +25,17 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
Instruction *AllocaPoint) {
if (I.use_empty()) {
I.eraseFromParent();
- return 0;
+ return nullptr;
}
// Create a stack slot to hold the value.
AllocaInst *Slot;
if (AllocaPoint) {
- Slot = new AllocaInst(I.getType(), 0,
+ Slot = new AllocaInst(I.getType(), nullptr,
I.getName()+".reg2mem", AllocaPoint);
} else {
Function *F = I.getParent()->getParent();
- Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem",
+ Slot = new AllocaInst(I.getType(), nullptr, I.getName()+".reg2mem",
F->getEntryBlock().begin());
}
@@ -56,7 +56,7 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == &I) {
Value *&V = Loads[PN->getIncomingBlock(i)];
- if (V == 0) {
+ if (!V) {
// Insert the load into the predecessor block
V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
PN->getIncomingBlock(i)->getTerminator());
@@ -110,17 +110,17 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
if (P->use_empty()) {
P->eraseFromParent();
- return 0;
+ return nullptr;
}
// Create a stack slot to hold the value.
AllocaInst *Slot;
if (AllocaPoint) {
- Slot = new AllocaInst(P->getType(), 0,
+ Slot = new AllocaInst(P->getType(), nullptr,
P->getName()+".reg2mem", AllocaPoint);
} else {
Function *F = P->getParent()->getParent();
- Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem",
+ Slot = new AllocaInst(P->getType(), nullptr, P->getName()+".reg2mem",
F->getEntryBlock().begin());
}
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 39c80f8..51ead40 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "flattencfg"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -22,16 +21,19 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
+#define DEBUG_TYPE "flattencfg"
+
namespace {
class FlattenCFGOpt {
AliasAnalysis *AA;
/// \brief Use parallel-and or parallel-or to generate conditions for
/// conditional branches.
- bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+ bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
+ Pass *P = nullptr);
/// \brief If \param BB is the merge block of an if-region, attempt to merge
/// the if-region with an adjacent if-region upstream if two if-regions
/// contain identical instructions.
- bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+ bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = nullptr);
/// \brief Compare a pair of blocks: \p Block1 and \p Block2, which
/// are from two if-regions whose entry blocks are \p Head1 and \p
/// Head2. \returns true if \p Block1 and \p Block2 contain identical
@@ -126,9 +128,9 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
if (PHI)
return false; // For simplicity, avoid cases containing PHI nodes.
- BasicBlock *LastCondBlock = NULL;
- BasicBlock *FirstCondBlock = NULL;
- BasicBlock *UnCondBlock = NULL;
+ BasicBlock *LastCondBlock = nullptr;
+ BasicBlock *FirstCondBlock = nullptr;
+ BasicBlock *UnCondBlock = nullptr;
int Idx = -1;
// Check predecessors of \param BB.
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index e9ebc45..12057e4 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -61,7 +61,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
} else if (const Instruction *I = dyn_cast<Instruction>(UR)) {
if (!GS.HasMultipleAccessingFunctions) {
const Function *F = I->getParent()->getParent();
- if (GS.AccessingFunction == 0)
+ if (!GS.AccessingFunction)
GS.AccessingFunction = F;
else if (GS.AccessingFunction != F)
GS.HasMultipleAccessingFunctions = true;
@@ -176,6 +176,6 @@ bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) {
GlobalStatus::GlobalStatus()
: IsCompared(false), IsLoaded(false), StoredType(NotStored),
- StoredOnceValue(0), AccessingFunction(0),
+ StoredOnceValue(nullptr), AccessingFunction(nullptr),
HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
Ordering(NotAtomic) {}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 86def3e..e01d0c3 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
@@ -51,8 +52,8 @@ namespace {
public:
InvokeInliningInfo(InvokeInst *II)
- : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0),
- CallerLPad(0), InnerEHValuesPHI(0) {
+ : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr),
+ CallerLPad(nullptr), InnerEHValuesPHI(nullptr) {
// If there are PHI nodes in the unwind destination block, we need to keep
// track of which values came into them from the invoke before removing
// the edge from this block.
@@ -289,13 +290,13 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
// Only copy the edge if the call was inlined!
- if (VMI == VMap.end() || VMI->second == 0)
+ if (VMI == VMap.end() || VMI->second == nullptr)
continue;
// If the call was inlined, but then constant folded, there is no edge to
// add. Check for this case.
Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
- if (NewCall == 0) continue;
+ if (!NewCall) continue;
// Remember that this call site got inlined for the client of
// InlineFunction.
@@ -306,7 +307,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
// happens, set the callee of the new call site to a more precise
// destination. This can also happen if the call graph node of the caller
// was just unnecessarily imprecise.
- if (I->second->getFunction() == 0)
+ if (!I->second->getFunction())
if (Function *F = CallSite(NewCall).getCalledFunction()) {
// Indirect call site resolved to direct call.
CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
@@ -322,13 +323,44 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
CallerNode->removeCallEdgeFor(CS);
}
+static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
+ BasicBlock *InsertBlock,
+ InlineFunctionInfo &IFI) {
+ LLVMContext &Context = Src->getContext();
+ Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+ Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
+ Type *Tys[3] = { VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context) };
+ Function *MemCpyFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
+ IRBuilder<> builder(InsertBlock->begin());
+ Value *DstCast = builder.CreateBitCast(Dst, VoidPtrTy, "tmp");
+ Value *SrcCast = builder.CreateBitCast(Src, VoidPtrTy, "tmp");
+
+ Value *Size;
+ if (IFI.DL == nullptr)
+ Size = ConstantExpr::getSizeOf(AggTy);
+ else
+ Size = ConstantInt::get(Type::getInt64Ty(Context),
+ IFI.DL->getTypeStoreSize(AggTy));
+
+ // Always generate a memcpy of alignment 1 here because we don't know
+ // the alignment of the src pointer. Other optimizations can infer
+ // better alignment.
+ Value *CallArgs[] = {
+ DstCast, SrcCast, Size,
+ ConstantInt::get(Type::getInt32Ty(Context), 1),
+ ConstantInt::getFalse(Context) // isVolatile
+ };
+ builder.CreateCall(MemCpyFn, CallArgs);
+}
+
/// HandleByValArgument - When inlining a call site that has a byval argument,
/// we have to make the implicit memcpy explicit by adding it.
static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
const Function *CalledFunc,
InlineFunctionInfo &IFI,
unsigned ByValAlignment) {
- Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+ PointerType *ArgTy = cast<PointerType>(Arg->getType());
+ Type *AggTy = ArgTy->getElementType();
// If the called function is readonly, then it could not mutate the caller's
// copy of the byval'd memory. In this case, it is safe to elide the copy and
@@ -349,11 +381,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
// for code quality, but rarely happens and is required for correctness.
}
-
- LLVMContext &Context = Arg->getContext();
- Type *VoidPtrTy = Type::getInt8PtrTy(Context);
-
// Create the alloca. If we have DataLayout, use nice alignment.
unsigned Align = 1;
if (IFI.DL)
@@ -366,32 +394,9 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
Function *Caller = TheCall->getParent()->getParent();
- Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(),
+ Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(),
&*Caller->begin()->begin());
- // Emit a memcpy.
- Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
- Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
- Intrinsic::memcpy,
- Tys);
- Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
- Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
-
- Value *Size;
- if (IFI.DL == 0)
- Size = ConstantExpr::getSizeOf(AggTy);
- else
- Size = ConstantInt::get(Type::getInt64Ty(Context),
- IFI.DL->getTypeStoreSize(AggTy));
-
- // Always generate a memcpy of alignment 1 here because we don't know
- // the alignment of the src pointer. Other optimizations can infer
- // better alignment.
- Value *CallArgs[] = {
- DestCast, SrcCast, Size,
- ConstantInt::get(Type::getInt32Ty(Context), 1),
- ConstantInt::getFalse(Context) // isVolatile
- };
- IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
+ IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
// Uses of the argument in the function should use our new alloca
// instead.
@@ -417,8 +422,10 @@ static bool isUsedByLifetimeMarker(Value *V) {
// hasLifetimeMarkers - Check whether the given alloca already has
// lifetime.start or lifetime.end intrinsics.
static bool hasLifetimeMarkers(AllocaInst *AI) {
- Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext());
- if (AI->getType() == Int8PtrTy)
+ Type *Ty = AI->getType();
+ Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(),
+ Ty->getPointerAddressSpace());
+ if (Ty == Int8PtrTy)
return isUsedByLifetimeMarker(AI);
// Do a scan to find all the casts to i8*.
@@ -472,6 +479,33 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
}
}
+/// Returns a musttail call instruction if one immediately precedes the given
+/// return instruction with an optional bitcast instruction between them.
+static CallInst *getPrecedingMustTailCall(ReturnInst *RI) {
+ Instruction *Prev = RI->getPrevNode();
+ if (!Prev)
+ return nullptr;
+
+ if (Value *RV = RI->getReturnValue()) {
+ if (RV != Prev)
+ return nullptr;
+
+ // Look through the optional bitcast.
+ if (auto *BI = dyn_cast<BitCastInst>(Prev)) {
+ RV = BI->getOperand(0);
+ Prev = BI->getPrevNode();
+ if (!Prev || RV != Prev)
+ return nullptr;
+ }
+ }
+
+ if (auto *CI = dyn_cast<CallInst>(Prev)) {
+ if (CI->isMustTailCall())
+ return CI;
+ }
+ return nullptr;
+}
+
/// InlineFunction - This function inlines the called function into the basic
/// block of the caller. This returns false if it is not possible to inline
/// this call. The program is still in a well defined state if this occurs
@@ -491,15 +525,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
IFI.reset();
const Function *CalledFunc = CS.getCalledFunction();
- if (CalledFunc == 0 || // Can't inline external function or indirect
+ if (!CalledFunc || // Can't inline external function or indirect
CalledFunc->isDeclaration() || // call, or call to a vararg function!
CalledFunc->getFunctionType()->isVarArg()) return false;
- // If the call to the callee is not a tail call, we must clear the 'tail'
- // flags on any calls that we inline.
- bool MustClearTailCallFlags =
- !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());
-
// If the call to the callee cannot throw, set the 'nounwind' flag on any
// calls that we inline.
bool MarkNoUnwind = CS.doesNotThrow();
@@ -519,7 +548,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
// Get the personality function from the callee if it contains a landing pad.
- Value *CalleePersonality = 0;
+ Value *CalleePersonality = nullptr;
for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
I != E; ++I)
if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
@@ -562,6 +591,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
{ // Scope to destroy VMap after cloning.
ValueToValueMapTy VMap;
+ // Keep a list of pair (dst, src) to emit byval initializations.
+ SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
assert(CalledFunc->arg_size() == CS.arg_size() &&
"No varargs calls can be inlined!");
@@ -581,11 +612,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (CS.isByValArgument(ArgNo)) {
ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
CalledFunc->getParamAlignment(ArgNo+1));
-
- // Calls that we inline may use the new alloca, so we need to clear
- // their 'tail' flags if HandleByValArgument introduced a new alloca and
- // the callee has calls.
- MustClearTailCallFlags |= ActualArg != *AI;
+ if (ActualArg != *AI)
+ ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
}
VMap[I] = ActualArg;
@@ -602,6 +630,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
+ // Inject byval arguments initialization.
+ for (std::pair<Value*, Value*> &Init : ByValInit)
+ HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
+ FirstNewBlock, IFI);
+
// Update the callgraph if requested.
if (IFI.CG)
UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
@@ -619,7 +652,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
for (BasicBlock::iterator I = FirstNewBlock->begin(),
E = FirstNewBlock->end(); I != E; ) {
AllocaInst *AI = dyn_cast<AllocaInst>(I++);
- if (AI == 0) continue;
+ if (!AI) continue;
// If the alloca is now dead, remove it. This often occurs due to code
// specialization.
@@ -651,6 +684,45 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
}
+ bool InlinedMustTailCalls = false;
+ if (InlinedFunctionInfo.ContainsCalls) {
+ CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
+ if (CallInst *CI = dyn_cast<CallInst>(TheCall))
+ CallSiteTailKind = CI->getTailCallKind();
+
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
+ ++BB) {
+ for (Instruction &I : *BB) {
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (!CI)
+ continue;
+
+ // We need to reduce the strength of any inlined tail calls. For
+ // musttail, we have to avoid introducing potential unbounded stack
+ // growth. For example, if functions 'f' and 'g' are mutually recursive
+ // with musttail, we can inline 'g' into 'f' so long as we preserve
+ // musttail on the cloned call to 'f'. If either the inlined call site
+ // or the cloned call site is *not* musttail, the program already has
+ // one frame of stack growth, so it's safe to remove musttail. Here is
+ // a table of example transformations:
+ //
+ // f -> musttail g -> musttail f ==> f -> musttail f
+ // f -> musttail g -> tail f ==> f -> tail f
+ // f -> g -> musttail f ==> f -> f
+ // f -> g -> tail f ==> f -> f
+ CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
+ ChildTCK = std::min(CallSiteTailKind, ChildTCK);
+ CI->setTailCallKind(ChildTCK);
+ InlinedMustTailCalls |= CI->isMustTailCall();
+
+ // Calls inlined through a 'nounwind' call site should be marked
+ // 'nounwind'.
+ if (MarkNoUnwind)
+ CI->setDoesNotThrow();
+ }
+ }
+ }
+
// Leave lifetime markers for the static alloca's, scoping them to the
// function we just inlined.
if (InsertLifetime && !IFI.StaticAllocas.empty()) {
@@ -664,7 +736,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
continue;
// Try to determine the size of the allocation.
- ConstantInt *AllocaSize = 0;
+ ConstantInt *AllocaSize = nullptr;
if (ConstantInt *AIArraySize =
dyn_cast<ConstantInt>(AI->getArraySize())) {
if (IFI.DL) {
@@ -683,9 +755,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
builder.CreateLifetimeStart(AI, AllocaSize);
- for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
- IRBuilder<> builder(Returns[ri]);
- builder.CreateLifetimeEnd(AI, AllocaSize);
+ for (ReturnInst *RI : Returns) {
+ // Don't insert llvm.lifetime.end calls between a musttail call and a
+ // return. The return kills all local allocas.
+ if (InlinedMustTailCalls && getPrecedingMustTailCall(RI))
+ continue;
+ IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
}
}
}
@@ -704,33 +779,56 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Insert a call to llvm.stackrestore before any return instructions in the
// inlined function.
- for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
- IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
+ for (ReturnInst *RI : Returns) {
+ // Don't insert llvm.stackrestore calls between a musttail call and a
+ // return. The return will restore the stack pointer.
+ if (InlinedMustTailCalls && getPrecedingMustTailCall(RI))
+ continue;
+ IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr);
}
}
- // If we are inlining tail call instruction through a call site that isn't
- // marked 'tail', we must remove the tail marker for any calls in the inlined
- // code. Also, calls inlined through a 'nounwind' call site should be marked
- // 'nounwind'.
- if (InlinedFunctionInfo.ContainsCalls &&
- (MustClearTailCallFlags || MarkNoUnwind)) {
- for (Function::iterator BB = FirstNewBlock, E = Caller->end();
- BB != E; ++BB)
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- if (MustClearTailCallFlags)
- CI->setTailCall(false);
- if (MarkNoUnwind)
- CI->setDoesNotThrow();
- }
- }
-
// If we are inlining for an invoke instruction, we must make sure to rewrite
// any call instructions into invoke instructions.
if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
+ // Handle any inlined musttail call sites. In order for a new call site to be
+ // musttail, the source of the clone and the inlined call site must have been
+ // musttail. Therefore it's safe to return without merging control into the
+ // phi below.
+ if (InlinedMustTailCalls) {
+ // Check if we need to bitcast the result of any musttail calls.
+ Type *NewRetTy = Caller->getReturnType();
+ bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy;
+
+ // Handle the returns preceded by musttail calls separately.
+ SmallVector<ReturnInst *, 8> NormalReturns;
+ for (ReturnInst *RI : Returns) {
+ CallInst *ReturnedMustTail = getPrecedingMustTailCall(RI);
+ if (!ReturnedMustTail) {
+ NormalReturns.push_back(RI);
+ continue;
+ }
+ if (!NeedBitCast)
+ continue;
+
+ // Delete the old return and any preceding bitcast.
+ BasicBlock *CurBB = RI->getParent();
+ auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue());
+ RI->eraseFromParent();
+ if (OldCast)
+ OldCast->eraseFromParent();
+
+ // Insert a new bitcast and return with the right type.
+ IRBuilder<> Builder(CurBB);
+ Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy));
+ }
+
+ // Leave behind the normal returns so we can merge control flow.
+ std::swap(Returns, NormalReturns);
+ }
+
// If we cloned in _exactly one_ basic block, and if that block ends in a
// return instruction, we splice the body of the inlined callee directly into
// the calling basic block.
@@ -774,7 +872,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// "starter" and "ender" blocks. How we accomplish this depends on whether
// this is an invoke instruction or a call instruction.
BasicBlock *AfterCallBB;
- BranchInst *CreatedBranchToNormalDest = NULL;
+ BranchInst *CreatedBranchToNormalDest = nullptr;
if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
// Add an unconditional branch to make this look like the CallInst case...
@@ -813,7 +911,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// any users of the original call/invoke instruction.
Type *RTy = CalledFunc->getReturnType();
- PHINode *PHI = 0;
+ PHINode *PHI = nullptr;
if (Returns.size() > 1) {
// The PHI node should go at the front of the new basic block to merge all
// possible incoming values.
@@ -886,6 +984,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Since we are now done with the Call/Invoke, we can delete it.
TheCall->eraseFromParent();
+ // If we inlined any musttail calls and the original return is now
+ // unreachable, delete it. It can only contain a bitcast and ret.
+ if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB))
+ AfterCallBB->eraseFromParent();
+
// We should always be able to fold the entry block of the function into the
// single predecessor of the block...
assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index e73a543..9f91eeb 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "integer-division"
#include "llvm/Transforms/Utils/IntegerDivision.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -24,6 +23,8 @@
using namespace llvm;
+#define DEBUG_TYPE "integer-division"
+
/// Generate code to compute the remainder of two signed integers. Returns the
/// remainder, which will have the sign of the dividend. Builder's insert point
/// should be pointing where the caller wants code generated, e.g. at the srem
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index d538175..51a3d9c 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -27,7 +27,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "lcssa"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
@@ -44,6 +43,8 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
+#define DEBUG_TYPE "lcssa"
+
STATISTIC(NumLCSSA, "Number of live out of a loop variables");
/// Return true if the specified block is in the list.
@@ -267,8 +268,6 @@ struct LCSSA : public FunctionPass {
}
private:
- bool processLoop(Loop &L);
-
void verifyAnalysis() const override;
};
}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 9d0be8b..aedd787 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -43,6 +43,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "local"
+
STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
//===----------------------------------------------------------------------===//
@@ -159,7 +161,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Otherwise, check to see if the switch only branches to one destination.
// We do this by reseting "TheOnlyDest" to null when we find two non-equal
// destinations.
- if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = 0;
+ if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = nullptr;
}
if (CI && !TheOnlyDest) {
@@ -180,7 +182,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Found case matching a constant operand?
BasicBlock *Succ = SI->getSuccessor(i);
if (Succ == TheOnlyDest)
- TheOnlyDest = 0; // Don't modify the first branch to TheOnlyDest
+ TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest
else
Succ->removePredecessor(BB);
}
@@ -233,7 +235,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
if (IBI->getDestination(i) == TheOnlyDest)
- TheOnlyDest = 0;
+ TheOnlyDest = nullptr;
else
IBI->getDestination(i)->removePredecessor(IBI->getParent());
}
@@ -331,7 +333,7 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
// dead as we go.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
Value *OpV = I->getOperand(i);
- I->setOperand(i, 0);
+ I->setOperand(i, nullptr);
if (!OpV->use_empty()) continue;
@@ -894,24 +896,26 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
return PrefAlign;
}
- if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ if (auto *GO = dyn_cast<GlobalObject>(V)) {
// If there is a large requested alignment and we can, bump up the alignment
// of the global.
- if (GV->isDeclaration()) return Align;
+ if (GO->isDeclaration())
+ return Align;
// If the memory we set aside for the global may not be the memory used by
// the final program then it is impossible for us to reliably enforce the
// preferred alignment.
- if (GV->isWeakForLinker()) return Align;
+ if (GO->isWeakForLinker())
+ return Align;
- if (GV->getAlignment() >= PrefAlign)
- return GV->getAlignment();
+ if (GO->getAlignment() >= PrefAlign)
+ return GO->getAlignment();
// We can only increase the alignment of the global if it has no alignment
// specified or if it is not assigned a section. If it is assigned a
// section, the global could be densely packed with other objects in the
// section, increasing the alignment could cause padding issues.
- if (!GV->hasSection() || GV->getAlignment() == 0)
- GV->setAlignment(PrefAlign);
- return GV->getAlignment();
+ if (!GO->hasSection() || GO->getAlignment() == 0)
+ GO->setAlignment(PrefAlign);
+ return GO->getAlignment();
}
return Align;
@@ -928,7 +932,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64;
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(V, KnownZero, KnownOne, DL);
+ computeKnownBits(V, KnownZero, KnownOne, DL);
unsigned TrailZ = KnownZero.countTrailingOnes();
// Avoid trouble with ridiculously large TrailZ values, such as
@@ -981,10 +985,10 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
if (LdStHasDebugValue(DIVar, SI))
return true;
- Instruction *DbgVal = NULL;
+ Instruction *DbgVal = nullptr;
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
- Argument *ExtendedArg = NULL;
+ Argument *ExtendedArg = nullptr;
if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
@@ -993,14 +997,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, SI);
else
DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI);
-
- // Propagate any debug metadata from the store onto the dbg.value.
- DebugLoc SIDL = SI->getDebugLoc();
- if (!SIDL.isUnknown())
- DbgVal->setDebugLoc(SIDL);
- // Otherwise propagate debug metadata from dbg.declare.
- else
- DbgVal->setDebugLoc(DDI->getDebugLoc());
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
return true;
}
@@ -1020,17 +1017,16 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
Instruction *DbgVal =
Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
DIVar, LI);
-
- // Propagate any debug metadata from the store onto the dbg.value.
- DebugLoc LIDL = LI->getDebugLoc();
- if (!LIDL.isUnknown())
- DbgVal->setDebugLoc(LIDL);
- // Otherwise propagate debug metadata from dbg.declare.
- else
- DbgVal->setDebugLoc(DDI->getDebugLoc());
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
return true;
}
+/// Determine whether this alloca is either a VLA or an array.
+static bool isArray(AllocaInst *AI) {
+ return AI->isArrayAllocation() ||
+ AI->getType()->getElementType()->isArrayTy();
+}
+
/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
/// of llvm.dbg.value intrinsics.
bool llvm::LowerDbgDeclare(Function &F) {
@@ -1049,20 +1045,26 @@ bool llvm::LowerDbgDeclare(Function &F) {
AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
// If this is an alloca for a scalar variable, insert a dbg.value
// at each load and store to the alloca and erase the dbg.declare.
- if (AI && !AI->isArrayAllocation()) {
-
- // We only remove the dbg.declare intrinsic if all uses are
- // converted to dbg.value intrinsics.
- bool RemoveDDI = true;
+ // The dbg.values allow tracking a variable even if it is not
+ // stored on the stack, while the dbg.declare can only describe
+ // the stack slot (and at a lexical-scope granularity). Later
+ // passes will attempt to elide the stack slot.
+ if (AI && !isArray(AI)) {
for (User *U : AI->users())
if (StoreInst *SI = dyn_cast<StoreInst>(U))
ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
else if (LoadInst *LI = dyn_cast<LoadInst>(U))
ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
- else
- RemoveDDI = false;
- if (RemoveDDI)
- DDI->eraseFromParent();
+ else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ // This is a call by-value or some other instruction that
+ // takes a pointer to the variable. Insert a *value*
+ // intrinsic that describes the alloca.
+ auto DbgVal =
+ DIB.insertDbgValueIntrinsic(AI, 0,
+ DIVariable(DDI->getVariable()), CI);
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
+ }
+ DDI->eraseFromParent();
}
}
return true;
@@ -1076,7 +1078,7 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
return DDI;
- return 0;
+ return nullptr;
}
bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 47083ea..f7787da 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -37,7 +37,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-simplify"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetOperations.h"
@@ -63,6 +62,8 @@
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-simplify"
+
STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
STATISTIC(NumNested , "Number of nested loops split out");
@@ -85,7 +86,7 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
// Figure out *which* outside block to put this after. Prefer an outside
// block that neighbors a BB actually in the loop.
- BasicBlock *FoundBB = 0;
+ BasicBlock *FoundBB = nullptr;
for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
Function::iterator BBI = SplitPreds[i];
if (++BBI != NewBB->getParent()->end() &&
@@ -119,7 +120,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
// If the loop is branched to from an indirect branch, we won't
// be able to fully transform the loop, because it prohibits
// edge splitting.
- if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+ if (isa<IndirectBrInst>(P->getTerminator())) return nullptr;
// Keep track of it.
OutsideBlocks.push_back(P);
@@ -160,14 +161,14 @@ static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) {
BasicBlock *P = *I;
if (L->contains(P)) {
// Don't do this if the loop is exited via an indirect branch.
- if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+ if (isa<IndirectBrInst>(P->getTerminator())) return nullptr;
LoopBlocks.push_back(P);
}
}
assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
- BasicBlock *NewExitBB = 0;
+ BasicBlock *NewExitBB = nullptr;
if (Exit->isLandingPad()) {
SmallVector<BasicBlock*, 2> NewBBs;
@@ -211,7 +212,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
- if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
+ if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT)) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
if (AA) AA->deleteValue(PN);
@@ -226,7 +227,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
// We found something tasty to remove.
return PN;
}
- return 0;
+ return nullptr;
}
/// \brief If this loop has multiple backedges, try to pull one of them out into
@@ -253,14 +254,14 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
LoopInfo *LI, ScalarEvolution *SE, Pass *PP) {
// Don't try to separate loops without a preheader.
if (!Preheader)
- return 0;
+ return nullptr;
// The header is not a landing pad; preheader insertion should ensure this.
assert(!L->getHeader()->isLandingPad() &&
"Can't insert backedge to landing pad");
PHINode *PN = findPHIToPartitionLoops(L, AA, DT);
- if (PN == 0) return 0; // No known way to partition.
+ if (!PN) return nullptr; // No known way to partition.
// Pull out all predecessors that have varying values in the loop. This
// handles the case when a PHI node has multiple instances of itself as
@@ -271,7 +272,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
!L->contains(PN->getIncomingBlock(i))) {
// We can't split indirectbr edges.
if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
- return 0;
+ return nullptr;
OuterLoopPreds.push_back(PN->getIncomingBlock(i));
}
}
@@ -362,7 +363,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
// Unique backedge insertion currently depends on having a preheader.
if (!Preheader)
- return 0;
+ return nullptr;
// The header is not a landing pad; preheader insertion should ensure this.
assert(!Header->isLandingPad() && "Can't insert backedge to landing pad");
@@ -374,7 +375,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
// Indirectbr edges cannot be split, so we must fail if we find one.
if (isa<IndirectBrInst>(P->getTerminator()))
- return 0;
+ return nullptr;
if (P != Preheader) BackedgeBlocks.push_back(P);
}
@@ -403,7 +404,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
// preheader over to the new PHI node.
unsigned PreheaderIdx = ~0U;
bool HasUniqueIncomingValue = true;
- Value *UniqueValue = 0;
+ Value *UniqueValue = nullptr;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *IBB = PN->getIncomingBlock(i);
Value *IV = PN->getIncomingValue(i);
@@ -412,7 +413,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
} else {
NewPN->addIncoming(IV, IBB);
if (HasUniqueIncomingValue) {
- if (UniqueValue == 0)
+ if (!UniqueValue)
UniqueValue = IV;
else if (UniqueValue != IV)
HasUniqueIncomingValue = false;
@@ -609,7 +610,7 @@ ReprocessLoop:
PHINode *PN;
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
+ if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT)) {
if (AA) AA->deleteValue(PN);
if (SE) SE->forgetValue(PN);
PN->replaceAllUsesWith(V);
@@ -653,7 +654,8 @@ ReprocessLoop:
if (Inst == CI)
continue;
if (!L->makeLoopInvariant(Inst, AnyInvariant,
- Preheader ? Preheader->getTerminator() : 0)) {
+ Preheader ? Preheader->getTerminator()
+ : nullptr)) {
AllInvariant = false;
break;
}
@@ -761,12 +763,6 @@ namespace {
/// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
void verifyAnalysis() const override;
-
- private:
- bool ProcessLoop(Loop *L);
- BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
- Loop *SeparateNestedLoop(Loop *L, BasicBlock *Preheader);
- BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
};
}
@@ -782,7 +778,7 @@ INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
char &llvm::LoopSimplifyID = LoopSimplify::ID;
Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
-/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
+/// runOnFunction - Run down all loops in the CFG (recursively, but we could do
/// it in any convenient order) inserting preheaders...
///
bool LoopSimplify::runOnFunction(Function &F) {
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index d2dfc20..d953e30 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-unroll"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -25,6 +24,8 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -34,6 +35,8 @@
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-unroll"
+
// TODO: Should these be here or in LoopUnroll?
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
@@ -68,10 +71,10 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
// pred, and if there is only one distinct successor of the predecessor, and
// if there are no PHI nodes.
BasicBlock *OnlyPred = BB->getSinglePredecessor();
- if (!OnlyPred) return 0;
+ if (!OnlyPred) return nullptr;
if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
- return 0;
+ return nullptr;
DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred);
@@ -227,20 +230,33 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
(unsigned)GreatestCommonDivisor64(Count, TripMultiple);
}
+ // Report the unrolling decision.
+ DebugLoc LoopLoc = L->getStartLoc();
+ Function *F = Header->getParent();
+ LLVMContext &Ctx = F->getContext();
+
if (CompletelyUnroll) {
DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
<< " with trip count " << TripCount << "!\n");
+ emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
+ Twine("completely unrolled loop with ") +
+ Twine(TripCount) + " iterations");
} else {
DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
<< " by " << Count);
+ Twine DiagMsg("unrolled loop by a factor of " + Twine(Count));
if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
+ DiagMsg.concat(" with a breakout at trip " + Twine(BreakoutTrip));
} else if (TripMultiple != 1) {
DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ DiagMsg.concat(" with " + Twine(TripMultiple) + " trips per branch");
} else if (RuntimeTripCount) {
DEBUG(dbgs() << " with run-time trip count");
+ DiagMsg.concat(" with run-time trip count");
}
DEBUG(dbgs() << "!\n");
+ emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, DiagMsg);
}
bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
@@ -411,7 +427,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
}
}
- DominatorTree *DT = 0;
+ DominatorTree *DT = nullptr;
if (PP) {
// FIXME: Reconstruct dom info, because it is not preserved properly.
// Incrementally updating domtree after loop unrolling would be easy.
@@ -458,7 +474,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
Loop *OuterL = L->getParentLoop();
// Remove the loop from the LoopPassManager if it's completely removed.
- if (CompletelyUnroll && LPM != NULL)
+ if (CompletelyUnroll && LPM != nullptr)
LPM->deleteLoopFromQueue(L);
// If we have a pass and a DominatorTree we should re-simplify impacted loops
@@ -470,7 +486,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
OuterL = L;
if (OuterL) {
ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
- simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ 0, SE);
+ simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE);
formLCSSARecursively(*OuterL, *DT, SE);
}
}
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index d801d5f..5bef091 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -21,7 +21,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-unroll"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -37,6 +36,8 @@
using namespace llvm;
+#define DEBUG_TYPE "loop-unroll"
+
STATISTIC(NumRuntimeUnrolled,
"Number of loops unrolled with run-time trip counts");
@@ -58,7 +59,7 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
BasicBlock *OrigPH, BasicBlock *NewPH,
ValueToValueMapTy &LVMap, Pass *P) {
BasicBlock *Latch = L->getLoopLatch();
- assert(Latch != 0 && "Loop must have a latch");
+ assert(Latch && "Loop must have a latch");
// Create a PHI node for each outgoing value from the original loop
// (which means it is an outgoing value from the prolog code too).
@@ -110,7 +111,7 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
ConstantInt::get(TripCount->getType(), Count));
BasicBlock *Exit = L->getUniqueExitBlock();
- assert(Exit != 0 && "Loop must have a single exit block only");
+ assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
if (!Exit->isLandingPad()) {
@@ -232,7 +233,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
// Make sure the loop is in canonical form, and there is a single
// exit block only.
- if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0)
+ if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock())
return false;
// Use Scalar Evolution to compute the trip count. This allows more
@@ -240,7 +241,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
if (!LPM)
return false;
ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
- if (SE == 0)
+ if (!SE)
return false;
// Only unroll loops with a computable trip count and the trip count needs
@@ -301,7 +302,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
ValueToValueMapTy LVMap;
Function *F = Header->getParent();
// These variables are used to update the CFG links in each iteration
- BasicBlock *CompareBB = 0;
+ BasicBlock *CompareBB = nullptr;
BasicBlock *LastLoopBB = PH;
// Get an ordered list of blocks in the loop to help with the ordering of the
// cloned blocks in the prolog code
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index 3e61289..ff89e74 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "lower-expect-intrinsic"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/BasicBlock.h"
@@ -29,6 +28,8 @@
using namespace llvm;
+#define DEBUG_TYPE "lower-expect-intrinsic"
+
STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled");
static cl::opt<uint32_t>
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index b1f758e..66d57b0 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "lowerinvoke"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -25,6 +24,8 @@
#include "llvm/Support/CommandLine.h"
using namespace llvm;
+#define DEBUG_TYPE "lowerinvoke"
+
STATISTIC(NumInvokes, "Number of invokes replaced");
namespace {
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 6fb7410..9ef694c 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -27,6 +27,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "lower-switch"
+
namespace {
/// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
/// instructions.
@@ -51,7 +53,8 @@ namespace {
Constant* High;
BasicBlock* BB;
- CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) :
+ CaseRange(Constant *low = nullptr, Constant *high = nullptr,
+ BasicBlock *bb = nullptr) :
Low(low), High(high), BB(bb) { }
};
@@ -182,7 +185,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
F->getBasicBlockList().insert(++FI, NewLeaf);
// Emit comparison
- ICmpInst* Comp = NULL;
+ ICmpInst* Comp = nullptr;
if (Leaf.Low == Leaf.High) {
// Make the seteq instruction...
Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index a188ac5..189caa7 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mem2reg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Dominators.h"
@@ -22,6 +21,8 @@
#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
using namespace llvm;
+#define DEBUG_TYPE "mem2reg"
+
STATISTIC(NumPromoted, "Number of alloca's promoted");
namespace {
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index ff6e6f9..d9dbbca 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -24,16 +24,16 @@ static void appendToGlobalArray(const char *Array,
Module &M, Function *F, int Priority) {
IRBuilder<> IRB(M.getContext());
FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
- StructType *Ty = StructType::get(
- IRB.getInt32Ty(), PointerType::getUnqual(FnTy), NULL);
-
- Constant *RuntimeCtorInit = ConstantStruct::get(
- Ty, IRB.getInt32(Priority), F, NULL);
// Get the current set of static global constructors and add the new ctor
// to the list.
SmallVector<Constant *, 16> CurrentCtors;
- if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) {
+ StructType *EltTy;
+ if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) {
+ // If there is a global_ctors array, use the existing struct type, which can
+ // have 2 or 3 fields.
+ ArrayType *ATy = cast<ArrayType>(GVCtor->getType()->getElementType());
+ EltTy = cast<StructType>(ATy->getElementType());
if (Constant *Init = GVCtor->getInitializer()) {
unsigned n = Init->getNumOperands();
CurrentCtors.reserve(n + 1);
@@ -41,13 +41,26 @@ static void appendToGlobalArray(const char *Array,
CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
}
GVCtor->eraseFromParent();
+ } else {
+ // Use a simple two-field struct if there isn't one already.
+ EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
+ nullptr);
}
+ // Build a 2 or 3 field global_ctor entry. We don't take a comdat key.
+ Constant *CSVals[3];
+ CSVals[0] = IRB.getInt32(Priority);
+ CSVals[1] = F;
+ // FIXME: Drop support for the two element form in LLVM 4.0.
+ if (EltTy->getNumElements() >= 3)
+ CSVals[2] = llvm::Constant::getNullValue(IRB.getInt8PtrTy());
+ Constant *RuntimeCtorInit =
+ ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements()));
+
CurrentCtors.push_back(RuntimeCtorInit);
// Create a new initializer.
- ArrayType *AT = ArrayType::get(RuntimeCtorInit->getType(),
- CurrentCtors.size());
+ ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
// Create the new global variable and replace all uses of
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 25fab89..06d73fe 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -25,7 +25,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mem2reg"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -51,6 +50,8 @@
#include <queue>
using namespace llvm;
+#define DEBUG_TYPE "mem2reg"
+
STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
@@ -59,6 +60,7 @@ STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
bool llvm::isAllocaPromotable(const AllocaInst *AI) {
// FIXME: If the memory unit is of pointer or integer type, we can permit
// assignments to subsections of the memory unit.
+ unsigned AS = AI->getType()->getAddressSpace();
// Only allow direct and non-volatile loads and stores...
for (const User *U : AI->users()) {
@@ -79,12 +81,12 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
II->getIntrinsicID() != Intrinsic::lifetime_end)
return false;
} else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
- if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
+ if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
return false;
if (!onlyUsedByLifetimeMarkers(BCI))
return false;
} else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
- if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
+ if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
return false;
if (!GEPI->hasAllZeroIndices())
return false;
@@ -114,11 +116,11 @@ struct AllocaInfo {
void clear() {
DefiningBlocks.clear();
UsingBlocks.clear();
- OnlyStore = 0;
- OnlyBlock = 0;
+ OnlyStore = nullptr;
+ OnlyBlock = nullptr;
OnlyUsedInOneBlock = true;
- AllocaPointerVal = 0;
- DbgDeclare = 0;
+ AllocaPointerVal = nullptr;
+ DbgDeclare = nullptr;
}
/// Scan the uses of the specified alloca, filling in the AllocaInfo used
@@ -146,7 +148,7 @@ struct AllocaInfo {
}
if (OnlyUsedInOneBlock) {
- if (OnlyBlock == 0)
+ if (!OnlyBlock)
OnlyBlock = User->getParent();
else if (OnlyBlock != User->getParent())
OnlyUsedInOneBlock = false;
@@ -162,7 +164,7 @@ class RenamePassData {
public:
typedef std::vector<Value *> ValVector;
- RenamePassData() : BB(NULL), Pred(NULL), Values() {}
+ RenamePassData() : BB(nullptr), Pred(nullptr), Values() {}
RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V)
: BB(B), Pred(P), Values(V) {}
BasicBlock *BB;
@@ -471,7 +473,8 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// Find the nearest store that has a lower index than this load.
StoresByIndexTy::iterator I =
std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
- std::make_pair(LoadIdx, static_cast<StoreInst *>(0)),
+ std::make_pair(LoadIdx,
+ static_cast<StoreInst *>(nullptr)),
less_first());
if (I == StoresByIndex.begin())
@@ -632,7 +635,7 @@ void PromoteMem2Reg::run() {
// and inserting the phi nodes we marked as necessary
//
std::vector<RenamePassData> RenamePassWorkList;
- RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
+ RenamePassWorkList.push_back(RenamePassData(F.begin(), nullptr, Values));
do {
RenamePassData RPD;
RPD.swap(RenamePassWorkList.back());
@@ -682,7 +685,7 @@ void PromoteMem2Reg::run() {
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) {
+ if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT)) {
if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
@@ -990,7 +993,7 @@ NextIteration:
// Get the next phi node.
++PNI;
APN = dyn_cast<PHINode>(PNI);
- if (APN == 0)
+ if (!APN)
break;
// Verify that it is missing entries. If not, it is not being inserted
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 28f5c44..3fcb789 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ssaupdater"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/TinyPtrVector.h"
@@ -28,20 +27,22 @@
using namespace llvm;
+#define DEBUG_TYPE "ssaupdater"
+
typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
static AvailableValsTy &getAvailableVals(void *AV) {
return *static_cast<AvailableValsTy*>(AV);
}
SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
- : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
+ : AV(nullptr), ProtoType(nullptr), ProtoName(), InsertedPHIs(NewPHI) {}
SSAUpdater::~SSAUpdater() {
delete static_cast<AvailableValsTy*>(AV);
}
void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
- if (AV == 0)
+ if (!AV)
AV = new AvailableValsTy();
else
getAvailableVals(AV).clear();
@@ -54,7 +55,7 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
}
void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
- assert(ProtoType != 0 && "Need to initialize SSAUpdater");
+ assert(ProtoType && "Need to initialize SSAUpdater");
assert(ProtoType == V->getType() &&
"All rewritten values must have the same type");
getAvailableVals(AV)[BB] = V;
@@ -90,7 +91,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// Otherwise, we have the hard case. Get the live-in values for each
// predecessor.
SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
- Value *SingularValue = 0;
+ Value *SingularValue = nullptr;
// We can get our predecessor info by walking the pred_iterator list, but it
// is relatively slow. If we already have PHI nodes in this block, walk one
@@ -105,7 +106,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
if (i == 0)
SingularValue = PredVal;
else if (PredVal != SingularValue)
- SingularValue = 0;
+ SingularValue = nullptr;
}
} else {
bool isFirstPred = true;
@@ -119,7 +120,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
SingularValue = PredVal;
isFirstPred = false;
} else if (PredVal != SingularValue)
- SingularValue = 0;
+ SingularValue = nullptr;
}
}
@@ -128,7 +129,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
return UndefValue::get(ProtoType);
// Otherwise, if all the merged values are the same, just use it.
- if (SingularValue != 0)
+ if (SingularValue)
return SingularValue;
// Otherwise, we do need a PHI: check to see if we already have one available
@@ -291,7 +292,7 @@ public:
PHINode *PHI = ValueIsPHI(Val, Updater);
if (PHI && PHI->getNumIncomingValues() == 0)
return PHI;
- return 0;
+ return nullptr;
}
/// GetPHIValue - For the specified PHI instruction, return the value
@@ -401,7 +402,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// the order of these instructions in the block. If the first use in the
// block is a load, then it uses the live in value. The last store defines
// the live out value. We handle this by doing a linear scan of the block.
- Value *StoredValue = 0;
+ Value *StoredValue = nullptr;
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
if (LoadInst *L = dyn_cast<LoadInst>(II)) {
// If this is a load from an unrelated pointer, ignore it.
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 1e88587..150dbdd 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "simplifycfg"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -50,6 +49,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "simplifycfg"
+
static cl::opt<unsigned>
PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
cl::desc("Control the amount of phi node folding to perform (default = 1)"));
@@ -212,6 +213,7 @@ static unsigned ComputeSpeculationCost(const User *I) {
if (!cast<GEPOperator>(I)->hasAllConstantIndices())
return UINT_MAX;
return 1;
+ case Instruction::ExtractValue:
case Instruction::Load:
case Instruction::Add:
case Instruction::Sub:
@@ -272,12 +274,12 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// branch to BB, then it must be in the 'conditional' part of the "if
// statement". If not, it definitely dominates the region.
BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
- if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB)
+ if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
return true;
// If we aren't allowing aggressive promotion anymore, then don't consider
// instructions in the 'if region'.
- if (AggressiveInsts == 0) return false;
+ if (!AggressiveInsts) return false;
// If we have seen this instruction before, don't count it again.
if (AggressiveInsts->count(I)) return true;
@@ -332,7 +334,7 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout *DL) {
return cast<ConstantInt>
(ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
}
- return 0;
+ return nullptr;
}
/// GatherConstantCompares - Given a potentially 'or'd or 'and'd together
@@ -343,7 +345,7 @@ static Value *
GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
const DataLayout *DL, bool isEQ, unsigned &UsedICmps) {
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0) return 0;
+ if (!I) return nullptr;
// If this is an icmp against a constant, handle this as one of the cases.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
@@ -390,19 +392,19 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
// If there are a ton of values, we don't want to make a ginormous switch.
if (Span.getSetSize().ugt(8) || Span.isEmptySet())
- return 0;
+ return nullptr;
for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
UsedICmps++;
return hasAdd ? RHSVal : I->getOperand(0);
}
- return 0;
+ return nullptr;
}
// Otherwise, we can only handle an | or &, depending on isEQ.
if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
- return 0;
+ return nullptr;
unsigned NumValsBeforeLHS = Vals.size();
unsigned UsedICmpsBeforeLHS = UsedICmps;
@@ -420,19 +422,19 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
// The RHS of the or/and can't be folded in and we haven't used "Extra" yet,
// set it and return success.
- if (Extra == 0 || Extra == I->getOperand(1)) {
+ if (Extra == nullptr || Extra == I->getOperand(1)) {
Extra = I->getOperand(1);
return LHS;
}
Vals.resize(NumValsBeforeLHS);
UsedICmps = UsedICmpsBeforeLHS;
- return 0;
+ return nullptr;
}
// If the LHS can't be folded in, but Extra is available and RHS can, try to
// use LHS as Extra.
- if (Extra == 0 || Extra == I->getOperand(0)) {
+ if (Extra == nullptr || Extra == I->getOperand(0)) {
Value *OldExtra = Extra;
Extra = I->getOperand(0);
if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, DL,
@@ -442,11 +444,11 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
Extra = OldExtra;
}
- return 0;
+ return nullptr;
}
static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
- Instruction *Cond = 0;
+ Instruction *Cond = nullptr;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Cond = dyn_cast<Instruction>(SI->getCondition());
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@@ -463,7 +465,7 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
/// isValueEqualityComparison - Return true if the specified terminator checks
/// to see if a value is equal to constant integer value.
Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
- Value *CV = 0;
+ Value *CV = nullptr;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
// Do not permit merging of large switch instructions into their
// predecessors unless there is only one predecessor.
@@ -653,11 +655,11 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
// Otherwise, TI's block must correspond to some matched value. Find out
// which value (or set of values) this is.
- ConstantInt *TIV = 0;
+ ConstantInt *TIV = nullptr;
BasicBlock *TIBB = TI->getParent();
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
if (PredCases[i].Dest == TIBB) {
- if (TIV != 0)
+ if (TIV)
return false; // Cannot handle multiple values coming to this block.
TIV = PredCases[i].Value;
}
@@ -665,7 +667,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
// Okay, we found the one constant that our value can be if we get into TI's
// BB. Find out which successor will unconditionally be branched to.
- BasicBlock *TheRealDest = 0;
+ BasicBlock *TheRealDest = nullptr;
for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
if (ThisCases[i].Value == TIV) {
TheRealDest = ThisCases[i].Dest;
@@ -673,7 +675,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
}
// If not handled by any explicit cases, it is handled by the default case.
- if (TheRealDest == 0) TheRealDest = ThisDef;
+ if (!TheRealDest) TheRealDest = ThisDef;
// Remove PHI node entries for dead edges.
BasicBlock *CheckEdge = TheRealDest;
@@ -681,7 +683,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
if (*SI != CheckEdge)
(*SI)->removePredecessor(TIBB);
else
- CheckEdge = 0;
+ CheckEdge = nullptr;
// Insert the new branch.
Instruction *NI = Builder.CreateBr(TheRealDest);
@@ -950,10 +952,10 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// Okay, last check. If BB is still a successor of PSI, then we must
// have an infinite loop case. If so, add an infinitely looping block
// to handle the case to preserve the behavior of the code.
- BasicBlock *InfLoopBlock = 0;
+ BasicBlock *InfLoopBlock = nullptr;
for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
if (NewSI->getSuccessor(i) == BB) {
- if (InfLoopBlock == 0) {
+ if (!InfLoopBlock) {
// Insert it at the end of the function, because it's either code,
// or it won't matter if it's hot. :)
InfLoopBlock = BasicBlock::Create(BB->getContext(),
@@ -1099,7 +1101,7 @@ HoistTerminator:
// These values do not agree. Insert a select instruction before NT
// that determines the right value.
SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
- if (SI == 0)
+ if (!SI)
SI = cast<SelectInst>
(Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
BB1V->getName()+"."+BB2V->getName()));
@@ -1144,7 +1146,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
// Gather the PHI nodes in BBEnd.
std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2;
- Instruction *FirstNonPhiInBBEnd = 0;
+ Instruction *FirstNonPhiInBBEnd = nullptr;
for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end();
I != E; ++I) {
if (PHINode *PN = dyn_cast<PHINode>(I)) {
@@ -1222,7 +1224,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
// The operands should be either the same or they need to be generated
// with a PHI node after sinking. We only handle the case where there is
// a single pair of different operands.
- Value *DifferentOp1 = 0, *DifferentOp2 = 0;
+ Value *DifferentOp1 = nullptr, *DifferentOp2 = nullptr;
unsigned Op1Idx = 0;
for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
if (I1->getOperand(I) == I2->getOperand(I))
@@ -1318,11 +1320,11 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
BasicBlock *StoreBB, BasicBlock *EndBB) {
StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
if (!StoreToHoist)
- return 0;
+ return nullptr;
// Volatile or atomic.
if (!StoreToHoist->isSimple())
- return 0;
+ return nullptr;
Value *StorePtr = StoreToHoist->getPointerOperand();
@@ -1334,7 +1336,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
// Could be calling an instruction that effects memory like free().
if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI))
- return 0;
+ return nullptr;
StoreInst *SI = dyn_cast<StoreInst>(CurI);
// Found the previous store make sure it stores to the same location.
@@ -1342,10 +1344,10 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
// Found the previous store, return its value operand.
return SI->getValueOperand();
else if (SI)
- return 0; // Unknown store.
+ return nullptr; // Unknown store.
}
- return 0;
+ return nullptr;
}
/// \brief Speculate a conditional basic block flattening the CFG.
@@ -1411,8 +1413,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
unsigned SpeculationCost = 0;
- Value *SpeculatedStoreValue = 0;
- StoreInst *SpeculatedStore = 0;
+ Value *SpeculatedStoreValue = nullptr;
+ StoreInst *SpeculatedStore = nullptr;
for (BasicBlock::iterator BBI = ThenBB->begin(),
BBE = std::prev(ThenBB->end());
BBI != BBE; ++BBI) {
@@ -1620,7 +1622,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) {
// constants.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
- if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
+ if (!CB || !CB->getType()->isIntegerTy(1)) continue;
// Okay, we now know that all edges from PredBB should be revectored to
// branch to RealDest.
@@ -1745,7 +1747,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
// If we folded the first phi, PN dangles at this point. Refresh it. If
// we ran out of PHIs then we simplified them all.
PN = dyn_cast<PHINode>(BB->begin());
- if (PN == 0) return true;
+ if (!PN) return true;
// Don't fold i1 branches on PHIs which contain binary operators. These can
// often be turned into switches and other things.
@@ -1759,11 +1761,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
// instructions in the predecessor blocks can be promoted as well. If
// not, we won't be able to get rid of the control flow, so it's not
// worth promoting to select instructions.
- BasicBlock *DomBlock = 0;
+ BasicBlock *DomBlock = nullptr;
BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
- IfBlock1 = 0;
+ IfBlock1 = nullptr;
} else {
DomBlock = *pred_begin(IfBlock1);
for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
@@ -1776,7 +1778,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
}
if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
- IfBlock2 = 0;
+ IfBlock2 = nullptr;
} else {
DomBlock = *pred_begin(IfBlock2);
for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
@@ -1959,7 +1961,7 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
BasicBlock *BB = BI->getParent();
- Instruction *Cond = 0;
+ Instruction *Cond = nullptr;
if (BI->isConditional())
Cond = dyn_cast<Instruction>(BI->getCondition());
else {
@@ -1985,12 +1987,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
}
}
- if (Cond == 0)
+ if (!Cond)
return false;
}
- if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
- Cond->getParent() != BB || !Cond->hasOneUse())
+ if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+ Cond->getParent() != BB || !Cond->hasOneUse())
return false;
// Only allow this if the condition is a simple instruction that can be
@@ -2005,7 +2007,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// that feeds the branch. We later ensure that any values that _it_ uses
// were also live in the predecessor, so that we don't unnecessarily create
// register pressure or inhibit out-of-order execution.
- Instruction *BonusInst = 0;
+ Instruction *BonusInst = nullptr;
if (&*FrontIt != Cond &&
FrontIt->hasOneUse() && FrontIt->user_back() == Cond &&
isSafeToSpeculativelyExecute(FrontIt)) {
@@ -2040,7 +2042,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Finally, don't infinitely unroll conditional loops.
BasicBlock *TrueDest = BI->getSuccessor(0);
- BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0;
+ BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr;
if (TrueDest == BB || FalseDest == BB)
return false;
@@ -2052,7 +2054,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// the common successor, verify that the same value flows in from both
// blocks.
SmallVector<PHINode*, 4> PHIs;
- if (PBI == 0 || PBI->isUnconditional() ||
+ if (!PBI || PBI->isUnconditional() ||
(BI->isConditional() &&
!SafeToMergeTerminators(BI, PBI)) ||
(!BI->isConditional() &&
@@ -2142,7 +2144,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
}
// If we have a bonus inst, clone it into the predecessor block.
- Instruction *NewBonus = 0;
+ Instruction *NewBonus = nullptr;
if (BonusInst) {
NewBonus = BonusInst->clone();
@@ -2218,14 +2220,14 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
MDBuilder(BI->getContext()).
createBranchWeights(MDWeights));
} else
- PBI->setMetadata(LLVMContext::MD_prof, NULL);
+ PBI->setMetadata(LLVMContext::MD_prof, nullptr);
} else {
// Update PHI nodes in the common successors.
for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
ConstantInt *PBI_C = cast<ConstantInt>(
PHIs[i]->getIncomingValueForBlock(PBI->getParent()));
assert(PBI_C->getType()->isIntegerTy(1));
- Instruction *MergedCond = 0;
+ Instruction *MergedCond = nullptr;
if (PBI->getSuccessor(0) == TrueDest) {
// Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value)
// PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value)
@@ -2498,16 +2500,16 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
// If TrueBB and FalseBB are equal, only try to preserve one copy of that
// successor.
BasicBlock *KeepEdge1 = TrueBB;
- BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0;
+ BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
// Then remove the rest.
for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
BasicBlock *Succ = OldTerm->getSuccessor(I);
// Make sure only to keep exactly one copy of each edge.
if (Succ == KeepEdge1)
- KeepEdge1 = 0;
+ KeepEdge1 = nullptr;
else if (Succ == KeepEdge2)
- KeepEdge2 = 0;
+ KeepEdge2 = nullptr;
else
Succ->removePredecessor(OldTerm->getParent());
}
@@ -2516,7 +2518,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
// Insert an appropriate new terminator.
- if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) {
+ if (!KeepEdge1 && !KeepEdge2) {
if (TrueBB == FalseBB)
// We were only looking for one successor, and it was present.
// Create an unconditional branch to it.
@@ -2538,7 +2540,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
// One of the selected values was a successor, but the other wasn't.
// Insert an unconditional branch to the one that was found;
// the edge to the one that wasn't must be unreachable.
- if (KeepEdge1 == 0)
+ if (!KeepEdge1)
// Only TrueBB was found.
Builder.CreateBr(TrueBB);
else
@@ -2639,7 +2641,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
// 'V' and this block is the default case for the switch. In this case we can
// fold the compared value into the switch to simplify things.
BasicBlock *Pred = BB->getSinglePredecessor();
- if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
+ if (!Pred || !isa<SwitchInst>(Pred->getTerminator())) return false;
SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
if (SI->getCondition() != V)
@@ -2681,7 +2683,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
// the block.
BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
- if (PHIUse == 0 || PHIUse != &SuccBlock->front() ||
+ if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
isa<PHINode>(++BasicBlock::iterator(PHIUse)))
return false;
@@ -2733,16 +2735,16 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL,
IRBuilder<> &Builder) {
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
- if (Cond == 0) return false;
+ if (!Cond) return false;
// Change br (X == 0 | X == 1), T, F into a switch instruction.
// If this is a bunch of seteq's or'd together, or if it's a bunch of
// 'setne's and'ed together, collect them.
- Value *CompVal = 0;
+ Value *CompVal = nullptr;
std::vector<ConstantInt*> Values;
bool TrueWhenEqual = true;
- Value *ExtraCase = 0;
+ Value *ExtraCase = nullptr;
unsigned UsedICmps = 0;
if (Cond->getOpcode() == Instruction::Or) {
@@ -2755,7 +2757,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL,
}
// If we didn't have a multiply compared value, fail.
- if (CompVal == 0) return false;
+ if (!CompVal) return false;
// Avoid turning single icmps into a switch.
if (UsedICmps <= 1)
@@ -3050,7 +3052,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// Find the most popular block.
unsigned MaxPop = 0;
unsigned MaxIndex = 0;
- BasicBlock *MaxBlock = 0;
+ BasicBlock *MaxBlock = nullptr;
for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
if (I->second.first > MaxPop ||
@@ -3188,7 +3190,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) {
Value *Cond = SI->getCondition();
unsigned Bits = Cond->getType()->getIntegerBitWidth();
APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
- ComputeMaskedBits(Cond, KnownZero, KnownOne);
+ computeKnownBits(Cond, KnownZero, KnownOne);
// Gather dead cases.
SmallVector<ConstantInt*, 8> DeadCases;
@@ -3241,13 +3243,13 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
BasicBlock *BB,
int *PhiIndex) {
if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
- return NULL; // BB must be empty to be a candidate for simplification.
+ return nullptr; // BB must be empty to be a candidate for simplification.
if (!BB->getSinglePredecessor())
- return NULL; // BB must be dominated by the switch.
+ return nullptr; // BB must be dominated by the switch.
BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
if (!Branch || !Branch->isUnconditional())
- return NULL; // Terminator must be unconditional branch.
+ return nullptr; // Terminator must be unconditional branch.
BasicBlock *Succ = Branch->getSuccessor(0);
@@ -3263,7 +3265,7 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
return PHI;
}
- return NULL;
+ return nullptr;
}
/// ForwardSwitchConditionToPHI - Try to forward the condition of a switch
@@ -3336,12 +3338,12 @@ ConstantFold(Instruction *I,
if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
if (!A)
- return 0;
+ return nullptr;
if (A->isAllOnesValue())
return LookupConstant(Select->getTrueValue(), ConstantPool);
if (A->isNullValue())
return LookupConstant(Select->getFalseValue(), ConstantPool);
- return 0;
+ return nullptr;
}
SmallVector<Constant *, 4> COps;
@@ -3349,7 +3351,7 @@ ConstantFold(Instruction *I,
if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
COps.push_back(A);
else
- return 0;
+ return nullptr;
}
if (CmpInst *Cmp = dyn_cast<CmpInst>(I))
@@ -3492,7 +3494,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
Constant *DefaultValue,
const DataLayout *DL)
- : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) {
+ : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr),
+ Array(nullptr) {
assert(Values.size() && "Can't build lookup table without values!");
assert(TableSize >= Values.size() && "Can't fit values in table!");
@@ -3513,7 +3516,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
TableContents[Idx] = CaseRes;
if (CaseRes != SingleValue)
- SingleValue = 0;
+ SingleValue = nullptr;
}
// Fill in any holes in the table with the default result.
@@ -3526,7 +3529,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
}
if (DefaultValue != SingleValue)
- SingleValue = 0;
+ SingleValue = nullptr;
}
// If each element in the table contains the same value, we only need to store
@@ -3696,7 +3699,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
ConstantInt *MinCaseVal = CI.getCaseValue();
ConstantInt *MaxCaseVal = CI.getCaseValue();
- BasicBlock *CommonDest = 0;
+ BasicBlock *CommonDest = nullptr;
typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy;
SmallDenseMap<PHINode*, ResultListTy> ResultLists;
SmallDenseMap<PHINode*, Constant*> DefaultResults;
@@ -3741,8 +3744,8 @@ static bool SwitchToLookupTable(SwitchInst *SI,
SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
bool HasDefaultResults = false;
if (TableHasHoles) {
- HasDefaultResults = GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
- DefaultResultsList, DL);
+ HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(),
+ &CommonDest, DefaultResultsList, DL);
}
bool NeedMask = (TableHasHoles && !HasDefaultResults);
if (NeedMask) {
@@ -4038,8 +4041,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// from BI. We know that the condbr dominates the two blocks, so see if
// there is any identical code in the "then" and "else" blocks. If so, we
// can hoist it up to the branching block.
- if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
- if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+ if (BI->getSuccessor(0)->getSinglePredecessor()) {
+ if (BI->getSuccessor(1)->getSinglePredecessor()) {
if (HoistThenElseCodeToIf(BI))
return SimplifyCFG(BB, TTI, DL) | true;
} else {
@@ -4051,7 +4054,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
return SimplifyCFG(BB, TTI, DL) | true;
}
- } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+ } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
// If Successor #0 has multiple preds, we may be able to conditionally
// execute Successor #1 if it branches to successor #0.
TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 30f56be..b284e6f 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -13,8 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "indvars"
-
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -34,6 +32,8 @@
using namespace llvm;
+#define DEBUG_TYPE "indvars"
+
STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
STATISTIC(NumElimOperand, "Number of IV operands folded into a use");
STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
@@ -56,14 +56,14 @@ namespace {
public:
SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM,
- SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = NULL) :
+ SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = nullptr) :
L(Loop),
LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
SE(SE),
DeadInsts(Dead),
Changed(false) {
DataLayoutPass *DLP = LPM->getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
assert(LI && "IV simplification requires LoopInfo");
}
@@ -72,7 +72,7 @@ namespace {
/// Iteratively perform simplification on a worklist of users of the
/// specified induction variable. This is the top-level driver that applies
/// all simplicitions to users of an IV.
- void simplifyUsers(PHINode *CurrIV, IVVisitor *V = NULL);
+ void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr);
Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
@@ -95,25 +95,25 @@ namespace {
/// be folded (in case more folding opportunities have been exposed).
/// Otherwise return null.
Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
- Value *IVSrc = 0;
+ Value *IVSrc = nullptr;
unsigned OperIdx = 0;
- const SCEV *FoldedExpr = 0;
+ const SCEV *FoldedExpr = nullptr;
switch (UseInst->getOpcode()) {
default:
- return 0;
+ return nullptr;
case Instruction::UDiv:
case Instruction::LShr:
// We're only interested in the case where we know something about
// the numerator and have a constant denominator.
if (IVOperand != UseInst->getOperand(OperIdx) ||
!isa<ConstantInt>(UseInst->getOperand(1)))
- return 0;
+ return nullptr;
// Attempt to fold a binary operator with constant operand.
// e.g. ((I + 1) >> 2) => I >> 2
if (!isa<BinaryOperator>(IVOperand)
|| !isa<ConstantInt>(IVOperand->getOperand(1)))
- return 0;
+ return nullptr;
IVSrc = IVOperand->getOperand(0);
// IVSrc must be the (SCEVable) IV, since the other operand is const.
@@ -124,7 +124,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
// Get a constant for the divisor. See createSCEV.
uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
if (D->getValue().uge(BitWidth))
- return 0;
+ return nullptr;
D = ConstantInt::get(UseInst->getContext(),
APInt::getOneBitSet(BitWidth, D->getZExtValue()));
@@ -133,11 +133,11 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
}
// We have something that might fold it's operand. Compare SCEVs.
if (!SE->isSCEVable(UseInst->getType()))
- return 0;
+ return nullptr;
// Bypass the operand if SCEV can prove it has no effect.
if (SE->getSCEV(UseInst) != FoldedExpr)
- return 0;
+ return nullptr;
DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
<< " -> " << *UseInst << '\n');
@@ -283,8 +283,8 @@ Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser,
return IVUser;
// Find a branch guarded by the overflow check.
- BranchInst *Branch = 0;
- Instruction *AddVal = 0;
+ BranchInst *Branch = nullptr;
+ Instruction *AddVal = nullptr;
for (User *U : II->users()) {
if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(U)) {
if (ExtractInst->getNumIndices() != 1)
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index bbd65f1..33b3637 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "instsimplify"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -29,6 +28,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "instsimplify"
+
STATISTIC(NumSimplified, "Number of redundant instructions removed");
namespace {
@@ -47,17 +48,18 @@ namespace {
bool runOnFunction(Function &F) override {
const DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0;
+ const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
do {
- for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
- DE = df_end(&F.getEntryBlock()); DI != DE; ++DI)
- for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) {
+ for (BasicBlock *BB : depth_first(&F.getEntryBlock()))
+ // Here be subtlety: the iterator must be incremented before the loop
+ // body (not sure why), so a range-for loop won't work here.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
Instruction *I = BI++;
// The first time through the loop ToSimplify is empty and we try to
// simplify all instructions. On later iterations ToSimplify is not
@@ -74,7 +76,15 @@ namespace {
++NumSimplified;
Changed = true;
}
- Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ if (res) {
+ // RecursivelyDeleteTriviallyDeadInstruction can remove
+ // more than one instruction, so simply incrementing the
+ // iterator does not work. When instructions get deleted
+ // re-iterate instead.
+ BI = BB->begin(); BE = BB->end();
+ Changed |= res;
+ }
}
// Place the list of instructions to simplify on the next loop iteration
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index b5bc391..3b61bb5 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -75,7 +76,7 @@ public:
// We never change the calling convention.
if (!ignoreCallingConv() && CI->getCallingConv() != llvm::CallingConv::C)
- return NULL;
+ return nullptr;
return callOptimizer(CI->getCalledFunction(), CI, B);
}
@@ -186,14 +187,14 @@ struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
!FT->getParamType(1)->isPointerTy() ||
FT->getParamType(2) != DL->getIntPtrType(Context) ||
FT->getParamType(3) != DL->getIntPtrType(Context))
- return 0;
+ return nullptr;
if (isFoldable(3, 2, false)) {
B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
- return 0;
+ return nullptr;
}
};
@@ -210,14 +211,14 @@ struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
!FT->getParamType(1)->isPointerTy() ||
FT->getParamType(2) != DL->getIntPtrType(Context) ||
FT->getParamType(3) != DL->getIntPtrType(Context))
- return 0;
+ return nullptr;
if (isFoldable(3, 2, false)) {
B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
- return 0;
+ return nullptr;
}
};
@@ -234,7 +235,7 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
!FT->getParamType(1)->isIntegerTy() ||
FT->getParamType(2) != DL->getIntPtrType(Context) ||
FT->getParamType(3) != DL->getIntPtrType(Context))
- return 0;
+ return nullptr;
if (isFoldable(3, 2, false)) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
@@ -242,7 +243,7 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
- return 0;
+ return nullptr;
}
};
@@ -260,7 +261,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
FT->getParamType(2) != DL->getIntPtrType(Context))
- return 0;
+ return nullptr;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) // __strcpy_chk(x,x) -> x
@@ -277,10 +278,10 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
} else {
// Maybe we can stil fold __strcpy_chk to __memcpy_chk.
uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
+ if (Len == 0) return nullptr;
// This optimization require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
Value *Ret =
EmitMemCpyChk(Dst, Src,
@@ -288,7 +289,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
CI->getArgOperand(2), B, DL, TLI);
return Ret;
}
- return 0;
+ return nullptr;
}
};
@@ -306,12 +307,12 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
- return 0;
+ return nullptr;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
Value *StrLen = EmitStrLen(Src, B, DL, TLI);
- return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
}
// If a) we don't have any length information, or b) we know this will
@@ -325,10 +326,10 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
} else {
// Maybe we can stil fold __stpcpy_chk to __memcpy_chk.
uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
+ if (Len == 0) return nullptr;
// This optimization require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
Type *PT = FT->getParamType(0);
Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
@@ -336,10 +337,10 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
ConstantInt::get(DL->getIntPtrType(PT),
Len - 1));
if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, DL, TLI))
- return 0;
+ return nullptr;
return DstEnd;
}
- return 0;
+ return nullptr;
}
};
@@ -357,7 +358,7 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
!FT->getParamType(2)->isIntegerTy() ||
FT->getParamType(3) != DL->getIntPtrType(Context))
- return 0;
+ return nullptr;
if (isFoldable(3, 2, false)) {
Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
@@ -365,7 +366,7 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
Name.substr(2, 7));
return Ret;
}
- return 0;
+ return nullptr;
}
};
@@ -382,7 +383,7 @@ struct StrCatOpt : public LibCallOptimization {
FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
FT->getParamType(1) != FT->getReturnType())
- return 0;
+ return nullptr;
// Extract some information from the instruction
Value *Dst = CI->getArgOperand(0);
@@ -390,7 +391,7 @@ struct StrCatOpt : public LibCallOptimization {
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
+ if (Len == 0) return nullptr;
--Len; // Unbias length.
// Handle the simple, do-nothing case: strcat(x, "") -> x
@@ -398,7 +399,7 @@ struct StrCatOpt : public LibCallOptimization {
return Dst;
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
return emitStrLenMemCpy(Src, Dst, Len, B);
}
@@ -409,7 +410,7 @@ struct StrCatOpt : public LibCallOptimization {
// memory is to be moved to. We just generate a call to strlen.
Value *DstLen = EmitStrLen(Dst, B, DL, TLI);
if (!DstLen)
- return 0;
+ return nullptr;
// Now that we have the destination's length, we must index into the
// destination's pointer to get the actual memcpy destination (end of
@@ -434,7 +435,7 @@ struct StrNCatOpt : public StrCatOpt {
FT->getParamType(0) != FT->getReturnType() ||
FT->getParamType(1) != FT->getReturnType() ||
!FT->getParamType(2)->isIntegerTy())
- return 0;
+ return nullptr;
// Extract some information from the instruction
Value *Dst = CI->getArgOperand(0);
@@ -445,11 +446,11 @@ struct StrNCatOpt : public StrCatOpt {
if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
Len = LengthArg->getZExtValue();
else
- return 0;
+ return nullptr;
// See if we can get the length of the input string.
uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen == 0) return 0;
+ if (SrcLen == 0) return nullptr;
--SrcLen; // Unbias length.
// Handle the simple, do-nothing cases:
@@ -458,10 +459,10 @@ struct StrNCatOpt : public StrCatOpt {
if (SrcLen == 0 || Len == 0) return Dst;
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
// We don't optimize this case
- if (Len < SrcLen) return 0;
+ if (Len < SrcLen) return nullptr;
// strncat(x, s, c) -> strcat(x, s)
// s is constant so the strcat can be optimized further
@@ -478,20 +479,20 @@ struct StrChrOpt : public LibCallOptimization {
FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
!FT->getParamType(1)->isIntegerTy(32))
- return 0;
+ return nullptr;
Value *SrcStr = CI->getArgOperand(0);
// If the second operand is non-constant, see if we can compute the length
// of the input string and turn this into memchr.
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- if (CharC == 0) {
+ if (!CharC) {
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
uint64_t Len = GetStringLength(SrcStr);
if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
- return 0;
+ return nullptr;
return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
ConstantInt::get(DL->getIntPtrType(*Context), Len),
@@ -504,7 +505,7 @@ struct StrChrOpt : public LibCallOptimization {
if (!getConstantStringInfo(SrcStr, Str)) {
if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
- return 0;
+ return nullptr;
}
// Compute the offset, make sure to handle the case when we're searching for
@@ -528,21 +529,21 @@ struct StrRChrOpt : public LibCallOptimization {
FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
!FT->getParamType(1)->isIntegerTy(32))
- return 0;
+ return nullptr;
Value *SrcStr = CI->getArgOperand(0);
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
// Cannot fold anything if we're not looking for a constant.
if (!CharC)
- return 0;
+ return nullptr;
StringRef Str;
if (!getConstantStringInfo(SrcStr, Str)) {
// strrchr(s, 0) -> strchr(s, 0)
if (DL && CharC->isZero())
return EmitStrChr(SrcStr, '\0', B, DL, TLI);
- return 0;
+ return nullptr;
}
// Compute the offset.
@@ -565,7 +566,7 @@ struct StrCmpOpt : public LibCallOptimization {
!FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != B.getInt8PtrTy())
- return 0;
+ return nullptr;
Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
if (Str1P == Str2P) // strcmp(x,x) -> 0
@@ -591,14 +592,14 @@ struct StrCmpOpt : public LibCallOptimization {
uint64_t Len2 = GetStringLength(Str2P);
if (Len1 && Len2) {
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
return EmitMemCmp(Str1P, Str2P,
ConstantInt::get(DL->getIntPtrType(*Context),
std::min(Len1, Len2)), B, DL, TLI);
}
- return 0;
+ return nullptr;
}
};
@@ -612,7 +613,7 @@ struct StrNCmpOpt : public LibCallOptimization {
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getParamType(2)->isIntegerTy())
- return 0;
+ return nullptr;
Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
if (Str1P == Str2P) // strncmp(x,x,n) -> 0
@@ -623,7 +624,7 @@ struct StrNCmpOpt : public LibCallOptimization {
if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
Length = LengthArg->getZExtValue();
else
- return 0;
+ return nullptr;
if (Length == 0) // strncmp(x,y,0) -> 0
return ConstantInt::get(CI->getType(), 0);
@@ -649,7 +650,7 @@ struct StrNCmpOpt : public LibCallOptimization {
if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
- return 0;
+ return nullptr;
}
};
@@ -662,18 +663,18 @@ struct StrCpyOpt : public LibCallOptimization {
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != B.getInt8PtrTy())
- return 0;
+ return nullptr;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) // strcpy(x,x) -> x
return Src;
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
+ if (Len == 0) return nullptr;
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
@@ -692,20 +693,20 @@ struct StpCpyOpt: public LibCallOptimization {
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != B.getInt8PtrTy())
- return 0;
+ return nullptr;
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
Value *StrLen = EmitStrLen(Src, B, DL, TLI);
- return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
}
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
+ if (Len == 0) return nullptr;
Type *PT = FT->getParamType(0);
Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
@@ -728,7 +729,7 @@ struct StrNCpyOpt : public LibCallOptimization {
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getParamType(2)->isIntegerTy())
- return 0;
+ return nullptr;
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
@@ -736,7 +737,7 @@ struct StrNCpyOpt : public LibCallOptimization {
// See if we can get the length of the input string.
uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen == 0) return 0;
+ if (SrcLen == 0) return nullptr;
--SrcLen;
if (SrcLen == 0) {
@@ -749,15 +750,15 @@ struct StrNCpyOpt : public LibCallOptimization {
if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
Len = LengthArg->getZExtValue();
else
- return 0;
+ return nullptr;
if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
// Let strncpy handle the zero padding
- if (Len > SrcLen+1) return 0;
+ if (Len > SrcLen+1) return nullptr;
Type *PT = FT->getParamType(0);
// strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
@@ -776,7 +777,7 @@ struct StrLenOpt : public LibCallOptimization {
if (FT->getNumParams() != 1 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getReturnType()->isIntegerTy())
- return 0;
+ return nullptr;
Value *Src = CI->getArgOperand(0);
@@ -784,11 +785,26 @@ struct StrLenOpt : public LibCallOptimization {
if (uint64_t Len = GetStringLength(Src))
return ConstantInt::get(CI->getType(), Len-1);
+ // strlen(x?"foo":"bars") --> x ? 3 : 4
+ if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
+ uint64_t LenTrue = GetStringLength(SI->getTrueValue());
+ uint64_t LenFalse = GetStringLength(SI->getFalseValue());
+ if (LenTrue && LenFalse) {
+ emitOptimizationRemark(*Context, "simplify-libcalls", *Caller,
+ SI->getDebugLoc(),
+ "folded strlen(select) to select of constants");
+ return B.CreateSelect(SI->getCondition(),
+ ConstantInt::get(CI->getType(), LenTrue-1),
+ ConstantInt::get(CI->getType(), LenFalse-1));
+ }
+ }
+
// strlen(x) != 0 --> *x != 0
// strlen(x) == 0 --> *x == 0
if (isOnlyUsedInZeroEqualityComparison(CI))
return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
- return 0;
+
+ return nullptr;
}
};
@@ -800,7 +816,7 @@ struct StrPBrkOpt : public LibCallOptimization {
FT->getParamType(0) != B.getInt8PtrTy() ||
FT->getParamType(1) != FT->getParamType(0) ||
FT->getReturnType() != FT->getParamType(0))
- return 0;
+ return nullptr;
StringRef S1, S2;
bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
@@ -824,7 +840,7 @@ struct StrPBrkOpt : public LibCallOptimization {
if (DL && HasS2 && S2.size() == 1)
return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI);
- return 0;
+ return nullptr;
}
};
@@ -835,7 +851,7 @@ struct StrToOpt : public LibCallOptimization {
if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy())
- return 0;
+ return nullptr;
Value *EndPtr = CI->getArgOperand(1);
if (isa<ConstantPointerNull>(EndPtr)) {
@@ -844,7 +860,7 @@ struct StrToOpt : public LibCallOptimization {
CI->addAttribute(1, Attribute::NoCapture);
}
- return 0;
+ return nullptr;
}
};
@@ -856,7 +872,7 @@ struct StrSpnOpt : public LibCallOptimization {
FT->getParamType(0) != B.getInt8PtrTy() ||
FT->getParamType(1) != FT->getParamType(0) ||
!FT->getReturnType()->isIntegerTy())
- return 0;
+ return nullptr;
StringRef S1, S2;
bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
@@ -874,7 +890,7 @@ struct StrSpnOpt : public LibCallOptimization {
return ConstantInt::get(CI->getType(), Pos);
}
- return 0;
+ return nullptr;
}
};
@@ -886,7 +902,7 @@ struct StrCSpnOpt : public LibCallOptimization {
FT->getParamType(0) != B.getInt8PtrTy() ||
FT->getParamType(1) != FT->getParamType(0) ||
!FT->getReturnType()->isIntegerTy())
- return 0;
+ return nullptr;
StringRef S1, S2;
bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
@@ -907,7 +923,7 @@ struct StrCSpnOpt : public LibCallOptimization {
if (DL && HasS2 && S2.empty())
return EmitStrLen(CI->getArgOperand(0), B, DL, TLI);
- return 0;
+ return nullptr;
}
};
@@ -919,7 +935,7 @@ struct StrStrOpt : public LibCallOptimization {
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!FT->getReturnType()->isPointerTy())
- return 0;
+ return nullptr;
// fold strstr(x, x) -> x.
if (CI->getArgOperand(0) == CI->getArgOperand(1))
@@ -929,11 +945,11 @@ struct StrStrOpt : public LibCallOptimization {
if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI);
if (!StrLen)
- return 0;
+ return nullptr;
Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
StrLen, B, DL, TLI);
if (!StrNCmp)
- return 0;
+ return nullptr;
for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) {
ICmpInst *Old = cast<ICmpInst>(*UI++);
Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
@@ -969,9 +985,9 @@ struct StrStrOpt : public LibCallOptimization {
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1) {
Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI);
- return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0;
+ return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
}
- return 0;
+ return nullptr;
}
};
@@ -982,7 +998,7 @@ struct MemCmpOpt : public LibCallOptimization {
if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!FT->getReturnType()->isIntegerTy(32))
- return 0;
+ return nullptr;
Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
@@ -991,7 +1007,7 @@ struct MemCmpOpt : public LibCallOptimization {
// Make sure we have a constant length.
ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
- if (!LenC) return 0;
+ if (!LenC) return nullptr;
uint64_t Len = LenC->getZExtValue();
if (Len == 0) // memcmp(s1,s2,0) -> 0
@@ -1012,7 +1028,7 @@ struct MemCmpOpt : public LibCallOptimization {
getConstantStringInfo(RHS, RHSStr)) {
// Make sure we're not reading out-of-bounds memory.
if (Len > LHSStr.size() || Len > RHSStr.size())
- return 0;
+ return nullptr;
// Fold the memcmp and normalize the result. This way we get consistent
// results across multiple platforms.
uint64_t Ret = 0;
@@ -1024,7 +1040,7 @@ struct MemCmpOpt : public LibCallOptimization {
return ConstantInt::get(CI->getType(), Ret);
}
- return 0;
+ return nullptr;
}
};
@@ -1032,14 +1048,14 @@ struct MemCpyOpt : public LibCallOptimization {
Value *callOptimizer(Function *Callee, CallInst *CI,
IRBuilder<> &B) override {
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
FT->getParamType(2) != DL->getIntPtrType(*Context))
- return 0;
+ return nullptr;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
@@ -1052,14 +1068,14 @@ struct MemMoveOpt : public LibCallOptimization {
Value *callOptimizer(Function *Callee, CallInst *CI,
IRBuilder<> &B) override {
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
FT->getParamType(2) != DL->getIntPtrType(*Context))
- return 0;
+ return nullptr;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
@@ -1072,14 +1088,14 @@ struct MemSetOpt : public LibCallOptimization {
Value *callOptimizer(Function *Callee, CallInst *CI,
IRBuilder<> &B) override {
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
- return 0;
+ return nullptr;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
@@ -1103,21 +1119,21 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
!FT->getParamType(0)->isDoubleTy())
- return 0;
+ return nullptr;
if (CheckRetType) {
// Check if all the uses for function like 'sin' are converted to float.
for (User *U : CI->users()) {
FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
- if (Cast == 0 || !Cast->getType()->isFloatTy())
- return 0;
+ if (!Cast || !Cast->getType()->isFloatTy())
+ return nullptr;
}
}
// If this is something like 'floor((double)floatval)', convert to floorf.
FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
- if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
- return 0;
+ if (!Cast || !Cast->getOperand(0)->getType()->isFloatTy())
+ return nullptr;
// floor((double)floatval) -> (double)floorf(floatval)
Value *V = Cast->getOperand(0);
@@ -1138,15 +1154,15 @@ struct BinaryDoubleFPOpt : public LibCallOptimization {
if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
!FT->getParamType(0)->isFloatingPointTy())
- return 0;
+ return nullptr;
if (CheckRetType) {
// Check if all the uses for function like 'fmin/fmax' are converted to
// float.
for (User *U : CI->users()) {
FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
- if (Cast == 0 || !Cast->getType()->isFloatTy())
- return 0;
+ if (!Cast || !Cast->getType()->isFloatTy())
+ return nullptr;
}
}
@@ -1154,13 +1170,13 @@ struct BinaryDoubleFPOpt : public LibCallOptimization {
// we convert it to fminf.
FPExtInst *Cast1 = dyn_cast<FPExtInst>(CI->getArgOperand(0));
FPExtInst *Cast2 = dyn_cast<FPExtInst>(CI->getArgOperand(1));
- if (Cast1 == 0 || !Cast1->getOperand(0)->getType()->isFloatTy() ||
- Cast2 == 0 || !Cast2->getOperand(0)->getType()->isFloatTy())
- return 0;
+ if (!Cast1 || !Cast1->getOperand(0)->getType()->isFloatTy() ||
+ !Cast2 || !Cast2->getOperand(0)->getType()->isFloatTy())
+ return nullptr;
// fmin((double)floatval1, (double)floatval2)
// -> (double)fmin(floatval1, floatval2)
- Value *V = NULL;
+ Value *V = nullptr;
Value *V1 = Cast1->getOperand(0);
Value *V2 = Cast2->getOperand(0);
V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B,
@@ -1180,7 +1196,7 @@ struct CosOpt : public UnsafeFPLibCallOptimization {
CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
Value *callOptimizer(Function *Callee, CallInst *CI,
IRBuilder<> &B) override {
- Value *Ret = NULL;
+ Value *Ret = nullptr;
if (UnsafeFPShrink && Callee->getName() == "cos" &&
TLI->has(LibFunc::cosf)) {
UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
@@ -1208,7 +1224,7 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
Value *callOptimizer(Function *Callee, CallInst *CI,
IRBuilder<> &B) override {
- Value *Ret = NULL;
+ Value *Ret = nullptr;
if (UnsafeFPShrink && Callee->getName() == "pow" &&
TLI->has(LibFunc::powf)) {
UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
@@ -1242,7 +1258,7 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
}
ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
- if (Op2C == 0) return Ret;
+ if (!Op2C) return Ret;
if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
return ConstantFP::get(CI->getType(), 1.0);
@@ -1275,7 +1291,7 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
Op1, "powrecip");
- return 0;
+ return nullptr;
}
};
@@ -1283,7 +1299,7 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
Value *callOptimizer(Function *Callee, CallInst *CI,
IRBuilder<> &B) override {
- Value *Ret = NULL;
+ Value *Ret = nullptr;
if (UnsafeFPShrink && Callee->getName() == "exp2" &&
TLI->has(LibFunc::exp2f)) {
UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
@@ -1307,7 +1323,7 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
LdExp = LibFunc::ldexp;
if (TLI->has(LdExp)) {
- Value *LdExpArg = 0;
+ Value *LdExpArg = nullptr;
if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
@@ -1344,7 +1360,7 @@ struct SinCosPiOpt : public LibCallOptimization {
// Make sure the prototype is as expected, otherwise the rest of the
// function is probably invalid and likely to abort.
if (!isTrigLibCall(CI))
- return 0;
+ return nullptr;
Value *Arg = CI->getArgOperand(0);
SmallVector<CallInst *, 1> SinCalls;
@@ -1362,7 +1378,7 @@ struct SinCosPiOpt : public LibCallOptimization {
// It's only worthwhile if both sinpi and cospi are actually used.
if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
- return 0;
+ return nullptr;
Value *Sin, *Cos, *SinCos;
insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
@@ -1372,7 +1388,7 @@ struct SinCosPiOpt : public LibCallOptimization {
replaceTrigInsts(CosCalls, Cos);
replaceTrigInsts(SinCosCalls, SinCos);
- return 0;
+ return nullptr;
}
bool isTrigLibCall(CallInst *CI) {
@@ -1498,7 +1514,7 @@ struct FFSOpt : public LibCallOptimization {
if (FT->getNumParams() != 1 ||
!FT->getReturnType()->isIntegerTy(32) ||
!FT->getParamType(0)->isIntegerTy())
- return 0;
+ return nullptr;
Value *Op = CI->getArgOperand(0);
@@ -1531,7 +1547,7 @@ struct AbsOpt : public LibCallOptimization {
// We require integer(integer) where the types agree.
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
FT->getParamType(0) != FT->getReturnType())
- return 0;
+ return nullptr;
// abs(x) -> x >s -1 ? x : -x
Value *Op = CI->getArgOperand(0);
@@ -1549,7 +1565,7 @@ struct IsDigitOpt : public LibCallOptimization {
// We require integer(i32)
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
!FT->getParamType(0)->isIntegerTy(32))
- return 0;
+ return nullptr;
// isdigit(c) -> (c-'0') <u 10
Value *Op = CI->getArgOperand(0);
@@ -1566,7 +1582,7 @@ struct IsAsciiOpt : public LibCallOptimization {
// We require integer(i32)
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
!FT->getParamType(0)->isIntegerTy(32))
- return 0;
+ return nullptr;
// isascii(c) -> c <u 128
Value *Op = CI->getArgOperand(0);
@@ -1582,7 +1598,7 @@ struct ToAsciiOpt : public LibCallOptimization {
// We require i32(i32)
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isIntegerTy(32))
- return 0;
+ return nullptr;
// toascii(c) -> c & 0x7f
return B.CreateAnd(CI->getArgOperand(0),
@@ -1612,7 +1628,7 @@ struct ErrorReportingOpt : public LibCallOptimization {
CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold);
}
- return 0;
+ return nullptr;
}
protected:
@@ -1649,7 +1665,7 @@ struct PrintFOpt : public LibCallOptimization {
// Check for a fixed format string.
StringRef FormatStr;
if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
- return 0;
+ return nullptr;
// Empty format string -> noop.
if (FormatStr.empty()) // Tolerate printf's declared void.
@@ -1660,7 +1676,7 @@ struct PrintFOpt : public LibCallOptimization {
// is used, in general the printf return value is not compatible with either
// putchar() or puts().
if (!CI->use_empty())
- return 0;
+ return nullptr;
// printf("x") -> putchar('x'), even for '%'.
if (FormatStr.size() == 1) {
@@ -1697,7 +1713,7 @@ struct PrintFOpt : public LibCallOptimization {
CI->getArgOperand(1)->getType()->isPointerTy()) {
return EmitPutS(CI->getArgOperand(1), B, DL, TLI);
}
- return 0;
+ return nullptr;
}
Value *callOptimizer(Function *Callee, CallInst *CI,
@@ -1707,7 +1723,7 @@ struct PrintFOpt : public LibCallOptimization {
if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
!(FT->getReturnType()->isIntegerTy() ||
FT->getReturnType()->isVoidTy()))
- return 0;
+ return nullptr;
if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
return V;
@@ -1724,7 +1740,7 @@ struct PrintFOpt : public LibCallOptimization {
B.Insert(New);
return New;
}
- return 0;
+ return nullptr;
}
};
@@ -1734,7 +1750,7 @@ struct SPrintFOpt : public LibCallOptimization {
// Check for a fixed format string.
StringRef FormatStr;
if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
- return 0;
+ return nullptr;
// If we just have a format string (nothing else crazy) transform it.
if (CI->getNumArgOperands() == 2) {
@@ -1742,10 +1758,10 @@ struct SPrintFOpt : public LibCallOptimization {
// %% -> % in the future if we cared.
for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
if (FormatStr[i] == '%')
- return 0; // we found a format specifier, bail out.
+ return nullptr; // we found a format specifier, bail out.
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
@@ -1758,12 +1774,12 @@ struct SPrintFOpt : public LibCallOptimization {
// and have an extra operand.
if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
CI->getNumArgOperands() < 3)
- return 0;
+ return nullptr;
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
// sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
- if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr;
Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
B.CreateStore(V, Ptr);
@@ -1775,14 +1791,14 @@ struct SPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 's') {
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
- if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
+ if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr;
Value *Len = EmitStrLen(CI->getArgOperand(2), B, DL, TLI);
if (!Len)
- return 0;
+ return nullptr;
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
@@ -1791,7 +1807,7 @@ struct SPrintFOpt : public LibCallOptimization {
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
}
- return 0;
+ return nullptr;
}
Value *callOptimizer(Function *Callee, CallInst *CI,
@@ -1801,7 +1817,7 @@ struct SPrintFOpt : public LibCallOptimization {
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!FT->getReturnType()->isIntegerTy())
- return 0;
+ return nullptr;
if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
return V;
@@ -1818,7 +1834,7 @@ struct SPrintFOpt : public LibCallOptimization {
B.Insert(New);
return New;
}
- return 0;
+ return nullptr;
}
};
@@ -1831,22 +1847,22 @@ struct FPrintFOpt : public LibCallOptimization {
// All the optimizations depend on the format string.
StringRef FormatStr;
if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
- return 0;
+ return nullptr;
// Do not do any of the following transformations if the fprintf return
// value is used, in general the fprintf return value is not compatible
// with fwrite(), fputc() or fputs().
if (!CI->use_empty())
- return 0;
+ return nullptr;
// fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
if (CI->getNumArgOperands() == 2) {
for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
- return 0; // We found a format specifier.
+ return nullptr; // We found a format specifier.
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
return EmitFWrite(CI->getArgOperand(1),
ConstantInt::get(DL->getIntPtrType(*Context),
@@ -1858,22 +1874,22 @@ struct FPrintFOpt : public LibCallOptimization {
// and have an extra operand.
if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
CI->getNumArgOperands() < 3)
- return 0;
+ return nullptr;
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
// fprintf(F, "%c", chr) --> fputc(chr, F)
- if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr;
return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
if (!CI->getArgOperand(2)->getType()->isPointerTy())
- return 0;
+ return nullptr;
return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
}
- return 0;
+ return nullptr;
}
Value *callOptimizer(Function *Callee, CallInst *CI,
@@ -1883,7 +1899,7 @@ struct FPrintFOpt : public LibCallOptimization {
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!FT->getReturnType()->isIntegerTy())
- return 0;
+ return nullptr;
if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
return V;
@@ -1900,7 +1916,7 @@ struct FPrintFOpt : public LibCallOptimization {
B.Insert(New);
return New;
}
- return 0;
+ return nullptr;
}
};
@@ -1917,12 +1933,12 @@ struct FWriteOpt : public LibCallOptimization {
!FT->getParamType(2)->isIntegerTy() ||
!FT->getParamType(3)->isPointerTy() ||
!FT->getReturnType()->isIntegerTy())
- return 0;
+ return nullptr;
// Get the element size and count.
ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
- if (!SizeC || !CountC) return 0;
+ if (!SizeC || !CountC) return nullptr;
uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
// If this is writing zero records, remove the call (it's a noop).
@@ -1934,10 +1950,10 @@ struct FWriteOpt : public LibCallOptimization {
if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI);
- return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
}
- return 0;
+ return nullptr;
}
};
@@ -1948,18 +1964,18 @@ struct FPutsOpt : public LibCallOptimization {
(void) ER.callOptimizer(Callee, CI, B);
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
// Require two pointers. Also, we can't optimize if return value is used.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!CI->use_empty())
- return 0;
+ return nullptr;
// fputs(s,F) --> fwrite(s,1,strlen(s),F)
uint64_t Len = GetStringLength(CI->getArgOperand(0));
- if (!Len) return 0;
+ if (!Len) return nullptr;
// Known to have no uses (see above).
return EmitFWrite(CI->getArgOperand(0),
ConstantInt::get(DL->getIntPtrType(*Context), Len-1),
@@ -1975,12 +1991,12 @@ struct PutsOpt : public LibCallOptimization {
if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
!(FT->getReturnType()->isIntegerTy() ||
FT->getReturnType()->isVoidTy()))
- return 0;
+ return nullptr;
// Check for a constant string.
StringRef Str;
if (!getConstantStringInfo(CI->getArgOperand(0), Str))
- return 0;
+ return nullptr;
if (Str.empty() && CI->use_empty()) {
// puts("") -> putchar('\n')
@@ -1989,7 +2005,7 @@ struct PutsOpt : public LibCallOptimization {
return B.CreateIntCast(Res, CI->getType(), true);
}
- return 0;
+ return nullptr;
}
};
@@ -2100,7 +2116,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
case Intrinsic::exp2:
return &Exp2;
default:
- return 0;
+ return nullptr;
}
}
@@ -2210,7 +2226,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
case LibFunc::trunc:
if (hasFloatVersion(FuncName))
return &UnaryDoubleFP;
- return 0;
+ return nullptr;
case LibFunc::acos:
case LibFunc::acosh:
case LibFunc::asin:
@@ -2234,16 +2250,16 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
case LibFunc::tanh:
if (UnsafeFPShrink && hasFloatVersion(FuncName))
return &UnsafeUnaryDoubleFP;
- return 0;
+ return nullptr;
case LibFunc::fmin:
case LibFunc::fmax:
if (hasFloatVersion(FuncName))
return &BinaryDoubleFP;
- return 0;
+ return nullptr;
case LibFunc::memcpy_chk:
return &MemCpyChk;
default:
- return 0;
+ return nullptr;
}
}
@@ -2263,7 +2279,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
return &StrNCpyChk;
}
- return 0;
+ return nullptr;
}
@@ -2273,7 +2289,7 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
IRBuilder<> Builder(CI);
return LCO->optimizeCall(CI, DL, TLI, LCS, Builder);
}
- return 0;
+ return nullptr;
}
LibCallSimplifier::LibCallSimplifier(const DataLayout *DL,
@@ -2287,7 +2303,7 @@ LibCallSimplifier::~LibCallSimplifier() {
}
Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
- if (CI->isNoBuiltin()) return 0;
+ if (CI->isNoBuiltin()) return nullptr;
return Impl->optimizeCall(CI);
}
diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp
index c318560..2c6fcd1 100644
--- a/lib/Transforms/Utils/SpecialCaseList.cpp
+++ b/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -41,7 +41,7 @@ struct SpecialCaseList::Entry {
StringSet<> Strings;
Regex *RegEx;
- Entry() : RegEx(0) {}
+ Entry() : RegEx(nullptr) {}
bool match(StringRef Query) const {
return Strings.count(Query) || (RegEx && RegEx->match(Query));
@@ -57,7 +57,7 @@ SpecialCaseList *SpecialCaseList::create(
std::unique_ptr<MemoryBuffer> File;
if (error_code EC = MemoryBuffer::getFile(Path, File)) {
Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str();
- return 0;
+ return nullptr;
}
return create(File.get(), Error);
}
@@ -66,7 +66,7 @@ SpecialCaseList *SpecialCaseList::create(
const MemoryBuffer *MB, std::string &Error) {
std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
if (!SCL->parse(MB, Error))
- return 0;
+ return nullptr;
return SCL.release();
}
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 560f581..0c2fc0a 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -59,7 +59,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
// Then unreachable blocks.
if (UnreachableBlocks.empty()) {
- UnreachableBlock = 0;
+ UnreachableBlock = nullptr;
} else if (UnreachableBlocks.size() == 1) {
UnreachableBlock = UnreachableBlocks.front();
} else {
@@ -77,7 +77,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
// Now handle return blocks.
if (ReturningBlocks.empty()) {
- ReturnBlock = 0;
+ ReturnBlock = nullptr;
return false; // No blocks return
} else if (ReturningBlocks.size() == 1) {
ReturnBlock = ReturningBlocks.front(); // Already has a single return block
@@ -91,9 +91,9 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
"UnifiedReturnBlock", &F);
- PHINode *PN = 0;
+ PHINode *PN = nullptr;
if (F.getReturnType()->isVoidTy()) {
- ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
+ ReturnInst::Create(F.getContext(), nullptr, NewRetBlock);
} else {
// If the function doesn't return void... add a PHI node to the block...
PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 457fc80..0f20e6d 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -71,12 +71,12 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// Check all operands to see if any need to be remapped.
for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
Value *OP = MD->getOperand(i);
- if (OP == 0) continue;
+ if (!OP) continue;
Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer);
// Use identity map if Mapped_Op is null and we can ignore missing
// entries.
if (Mapped_OP == OP ||
- (Mapped_OP == 0 && (Flags & RF_IgnoreMissingEntries)))
+ (Mapped_OP == nullptr && (Flags & RF_IgnoreMissingEntries)))
continue;
// Ok, at least one operand needs remapping.
@@ -84,13 +84,13 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
Elts.reserve(MD->getNumOperands());
for (i = 0; i != e; ++i) {
Value *Op = MD->getOperand(i);
- if (Op == 0)
- Elts.push_back(0);
+ if (!Op)
+ Elts.push_back(nullptr);
else {
Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer);
// Use identity map if Mapped_Op is null and we can ignore missing
// entries.
- if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries))
+ if (Mapped_Op == nullptr && (Flags & RF_IgnoreMissingEntries))
Mapped_Op = Op;
Elts.push_back(Mapped_Op);
}
@@ -112,8 +112,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// Okay, this either must be a constant (which may or may not be mappable) or
// is something that is not in the mapping table.
Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
- if (C == 0)
- return 0;
+ if (!C)
+ return nullptr;
if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
Function *F =
@@ -126,7 +126,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// Otherwise, we have some other constant to remap. Start by checking to see
// if all operands have an identity remapping.
unsigned OpNo = 0, NumOperands = C->getNumOperands();
- Value *Mapped = 0;
+ Value *Mapped = nullptr;
for (; OpNo != NumOperands; ++OpNo) {
Value *Op = C->getOperand(OpNo);
Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer);
@@ -187,7 +187,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer);
// If we aren't ignoring missing entries, assert that something happened.
- if (V != 0)
+ if (V)
*op = V;
else
assert((Flags & RF_IgnoreMissingEntries) &&
@@ -199,7 +199,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags);
// If we aren't ignoring missing entries, assert that something happened.
- if (V != 0)
+ if (V)
PN->setIncomingBlock(i, cast<BasicBlock>(V));
else
assert((Flags & RF_IgnoreMissingEntries) &&
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 71350e7..28ec83b 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -15,7 +15,6 @@
//===----------------------------------------------------------------------===//
#define BBV_NAME "bb-vectorize"
-#define DEBUG_TYPE BBV_NAME
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -50,6 +49,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE BBV_NAME
+
static cl::opt<bool>
IgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false),
cl::Hidden, cl::desc("Ignore target information"));
@@ -122,6 +123,10 @@ NoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden,
cl::desc("Don't try to vectorize floating-point math intrinsics"));
static cl::opt<bool>
+ NoBitManipulation("bb-vectorize-no-bitmanip", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize BitManipulation intrinsics"));
+
+static cl::opt<bool>
NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden,
cl::desc("Don't try to vectorize the fused-multiply-add intrinsic"));
@@ -202,8 +207,8 @@ namespace {
DT = &P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &P->getAnalysis<ScalarEvolution>();
DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
- TTI = IgnoreTargetInfo ? 0 : &P->getAnalysis<TargetTransformInfo>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
+ TTI = IgnoreTargetInfo ? nullptr : &P->getAnalysis<TargetTransformInfo>();
}
typedef std::pair<Value *, Value *> ValuePair;
@@ -279,7 +284,7 @@ namespace {
bool trackUsesOfI(DenseSet<Value *> &Users,
AliasSetTracker &WriteSet, Instruction *I,
Instruction *J, bool UpdateUsers = true,
- DenseSet<ValuePair> *LoadMoveSetPairs = 0);
+ DenseSet<ValuePair> *LoadMoveSetPairs = nullptr);
void computePairsConnectedTo(
DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
@@ -292,8 +297,8 @@ namespace {
bool pairsConflict(ValuePair P, ValuePair Q,
DenseSet<ValuePair> &PairableInstUsers,
DenseMap<ValuePair, std::vector<ValuePair> >
- *PairableInstUserMap = 0,
- DenseSet<VPPair> *PairableInstUserPairSet = 0);
+ *PairableInstUserMap = nullptr,
+ DenseSet<VPPair> *PairableInstUserPairSet = nullptr);
bool pairWillFormCycle(ValuePair P,
DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUsers,
@@ -438,8 +443,8 @@ namespace {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolution>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
- TTI = IgnoreTargetInfo ? 0 : &getAnalysis<TargetTransformInfo>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
+ TTI = IgnoreTargetInfo ? nullptr : &getAnalysis<TargetTransformInfo>();
return vectorizeBB(BB);
}
@@ -674,7 +679,20 @@ namespace {
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::pow:
+ case Intrinsic::round:
+ case Intrinsic::copysign:
+ case Intrinsic::ceil:
+ case Intrinsic::nearbyint:
+ case Intrinsic::rint:
+ case Intrinsic::trunc:
+ case Intrinsic::floor:
+ case Intrinsic::fabs:
return Config.VectorizeMath;
+ case Intrinsic::bswap:
+ case Intrinsic::ctpop:
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ return Config.VectorizeBitManipulations;
case Intrinsic::fma:
case Intrinsic::fmuladd:
return Config.VectorizeFMA;
@@ -878,7 +896,7 @@ namespace {
}
// We can't vectorize memory operations without target data
- if (DL == 0 && IsSimpleLoadStore)
+ if (!DL && IsSimpleLoadStore)
return false;
Type *T1, *T2;
@@ -915,7 +933,7 @@ namespace {
if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy())
return false;
- if ((!Config.VectorizePointers || DL == 0) &&
+ if ((!Config.VectorizePointers || !DL) &&
(T1->getScalarType()->isPointerTy() ||
T2->getScalarType()->isPointerTy()))
return false;
@@ -1049,7 +1067,7 @@ namespace {
(isa<ConstantVector>(JOp) || isa<ConstantDataVector>(JOp))) {
Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
Constant *SplatValue = cast<Constant>(IOp)->getSplatValue();
- if (SplatValue != NULL &&
+ if (SplatValue != nullptr &&
SplatValue == cast<Constant>(JOp)->getSplatValue())
Op2VK = TargetTransformInfo::OK_UniformConstantValue;
}
@@ -1079,13 +1097,14 @@ namespace {
CostSavings = ICost + JCost - VCost;
}
- // The powi intrinsic is special because only the first argument is
- // vectorized, the second arguments must be equal.
+ // The powi,ctlz,cttz intrinsics are special because only the first
+ // argument is vectorized, the second arguments must be equal.
CallInst *CI = dyn_cast<CallInst>(I);
Function *FI;
if (CI && (FI = CI->getCalledFunction())) {
Intrinsic::ID IID = (Intrinsic::ID) FI->getIntrinsicID();
- if (IID == Intrinsic::powi) {
+ if (IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
+ IID == Intrinsic::cttz) {
Value *A1I = CI->getArgOperand(1),
*A1J = cast<CallInst>(J)->getArgOperand(1);
const SCEV *A1ISCEV = SE->getSCEV(A1I),
@@ -1109,7 +1128,8 @@ namespace {
assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
"Intrinsic argument counts differ");
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
- if (IID == Intrinsic::powi && i == 1)
+ if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
+ IID == Intrinsic::cttz) && i == 1)
Tys.push_back(CI->getArgOperand(i)->getType());
else
Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(),
@@ -1665,8 +1685,9 @@ namespace {
C2->first.second == C->first.first ||
C2->first.second == C->first.second ||
pairsConflict(C2->first, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : 0,
- UseCycleCheck ? &PairableInstUserPairSet : 0)) {
+ UseCycleCheck ? &PairableInstUserMap : nullptr,
+ UseCycleCheck ? &PairableInstUserPairSet
+ : nullptr)) {
if (C2->second >= C->second) {
CanAdd = false;
break;
@@ -1686,8 +1707,9 @@ namespace {
T->second == C->first.first ||
T->second == C->first.second ||
pairsConflict(*T, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : 0,
- UseCycleCheck ? &PairableInstUserPairSet : 0)) {
+ UseCycleCheck ? &PairableInstUserMap : nullptr,
+ UseCycleCheck ? &PairableInstUserPairSet
+ : nullptr)) {
CanAdd = false;
break;
}
@@ -1704,8 +1726,9 @@ namespace {
C2->first.second == C->first.first ||
C2->first.second == C->first.second ||
pairsConflict(C2->first, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : 0,
- UseCycleCheck ? &PairableInstUserPairSet : 0)) {
+ UseCycleCheck ? &PairableInstUserMap : nullptr,
+ UseCycleCheck ? &PairableInstUserPairSet
+ : nullptr)) {
CanAdd = false;
break;
}
@@ -1720,8 +1743,9 @@ namespace {
ChosenPairs.begin(), E2 = ChosenPairs.end();
C2 != E2; ++C2) {
if (pairsConflict(*C2, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : 0,
- UseCycleCheck ? &PairableInstUserPairSet : 0)) {
+ UseCycleCheck ? &PairableInstUserMap : nullptr,
+ UseCycleCheck ? &PairableInstUserPairSet
+ : nullptr)) {
CanAdd = false;
break;
}
@@ -1802,8 +1826,8 @@ namespace {
for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(),
E = ChosenPairs.end(); C != E; ++C) {
if (pairsConflict(*C, IJ, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : 0,
- UseCycleCheck ? &PairableInstUserPairSet : 0)) {
+ UseCycleCheck ? &PairableInstUserMap : nullptr,
+ UseCycleCheck ? &PairableInstUserPairSet : nullptr)) {
DoesConflict = true;
break;
}
@@ -2373,7 +2397,7 @@ namespace {
} while ((LIENext =
dyn_cast<InsertElementInst>(LIENext->getOperand(0))));
- LIENext = 0;
+ LIENext = nullptr;
Value *LIEPrev = UndefValue::get(ArgTypeH);
for (unsigned i = 0; i < numElemL; ++i) {
if (isa<UndefValue>(VectElemts[i])) continue;
@@ -2441,14 +2465,14 @@ namespace {
if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) {
// We can have at most two unique vector inputs.
bool CanUseInputs = true;
- Value *I1, *I2 = 0;
+ Value *I1, *I2 = nullptr;
if (LEE) {
I1 = LEE->getOperand(0);
} else {
I1 = LSV->getOperand(0);
I2 = LSV->getOperand(1);
if (I2 == I1 || isa<UndefValue>(I2))
- I2 = 0;
+ I2 = nullptr;
}
if (HEE) {
@@ -2764,10 +2788,11 @@ namespace {
ReplacedOperands[o] = Intrinsic::getDeclaration(M, IID, VArgType);
continue;
- } else if (IID == Intrinsic::powi && o == 1) {
- // The second argument of powi is a single integer and we've already
- // checked that both arguments are equal. As a result, we just keep
- // I's second argument.
+ } else if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
+ IID == Intrinsic::cttz) && o == 1) {
+ // The second argument of powi/ctlz/cttz is a single integer/constant
+ // and we've already checked that both arguments are equal.
+ // As a result, we just keep I's second argument.
ReplacedOperands[o] = I->getOperand(o);
continue;
}
@@ -2952,7 +2977,7 @@ namespace {
switch (Kind) {
default:
- K->setMetadata(Kind, 0); // Remove unknown metadata
+ K->setMetadata(Kind, nullptr); // Remove unknown metadata
break;
case LLVMContext::MD_tbaa:
K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD));
@@ -3123,7 +3148,7 @@ namespace {
// Instruction insertion point:
Instruction *InsertionPt = K;
- Instruction *K1 = 0, *K2 = 0;
+ Instruction *K1 = nullptr, *K2 = nullptr;
replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2);
// The use dag of the first original instruction must be moved to after
@@ -3213,6 +3238,7 @@ VectorizeConfig::VectorizeConfig() {
VectorizePointers = !::NoPointers;
VectorizeCasts = !::NoCasts;
VectorizeMath = !::NoMath;
+ VectorizeBitManipulations = !::NoBitManipulation;
VectorizeFMA = !::NoFMA;
VectorizeSelect = !::NoSelect;
VectorizeCmp = !::NoCmp;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9a98c44..34d8a10 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -42,9 +42,6 @@
//
//===----------------------------------------------------------------------===//
-#define LV_NAME "loop-vectorize"
-#define DEBUG_TYPE LV_NAME
-
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
@@ -54,6 +51,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -67,7 +65,9 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -85,16 +85,23 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/VectorUtils.h"
#include <algorithm>
#include <map>
+#include <tuple>
using namespace llvm;
using namespace llvm::PatternMatch;
+#define LV_NAME "loop-vectorize"
+#define DEBUG_TYPE LV_NAME
+
+STATISTIC(LoopsVectorized, "Number of loops vectorized");
+STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
+
static cl::opt<unsigned>
VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
cl::desc("Sets the SIMD width. Zero is autoselect."));
@@ -223,8 +230,9 @@ public:
const TargetLibraryInfo *TLI, unsigned VecWidth,
unsigned UnrollFactor)
: OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI),
- VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), Induction(0),
- OldInduction(0), WidenMap(UnrollFactor), Legal(0) {}
+ VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()),
+ Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor),
+ Legal(nullptr) {}
// Perform the actual loop widening (vectorization).
void vectorize(LoopVectorizationLegality *L) {
@@ -469,6 +477,24 @@ static void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) {
B.SetCurrentDebugLocation(DebugLoc());
}
+#ifndef NDEBUG
+/// \return string containing a file name and a line # for the given loop.
+static std::string getDebugLocString(const Loop *L) {
+ std::string Result;
+ if (L) {
+ raw_string_ostream OS(Result);
+ const DebugLoc LoopDbgLoc = L->getStartLoc();
+ if (!LoopDbgLoc.isUnknown())
+ LoopDbgLoc.print(L->getHeader()->getContext(), OS);
+ else
+ // Just print the module name.
+ OS << L->getHeader()->getParent()->getParent()->getModuleIdentifier();
+ OS.flush();
+ }
+ return Result;
+}
+#endif
+
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
/// to what vectorization factor.
/// This class does not look at the profitability of vectorization, only the
@@ -491,8 +517,8 @@ public:
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
DominatorTree *DT, TargetLibraryInfo *TLI)
: NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
- DT(DT), TLI(TLI), Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
- MaxSafeDepDistBytes(-1U) {}
+ DT(DT), TLI(TLI), Induction(nullptr), WidestIndTy(nullptr),
+ HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -530,7 +556,7 @@ public:
/// This struct holds information about reduction variables.
struct ReductionDescriptor {
- ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
+ ReductionDescriptor() : StartValue(nullptr), LoopExitInstr(nullptr),
Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K,
@@ -602,7 +628,7 @@ public:
/// A struct for saving information about induction variables.
struct InductionInfo {
InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
- InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
+ InductionInfo() : StartValue(nullptr), IK(IK_NoInduction) {}
/// Start value.
TrackingVH<Value> StartValue;
/// Induction kind.
@@ -789,7 +815,8 @@ public:
/// then this vectorization factor will be selected if vectorization is
/// possible.
VectorizationFactor selectVectorizationFactor(bool OptForSize,
- unsigned UserVF);
+ unsigned UserVF,
+ bool ForceVectorization);
/// \return The size (in bits) of the widest type in the code that
/// needs to be vectorized. We ignore values that remain scalar such as
@@ -856,35 +883,32 @@ private:
/// Utility class for getting and setting loop vectorizer hints in the form
/// of loop metadata.
-struct LoopVectorizeHints {
- /// Vectorization width.
- unsigned Width;
- /// Vectorization unroll factor.
- unsigned Unroll;
- /// Vectorization forced (-1 not selected, 0 force disabled, 1 force enabled)
- int Force;
+class LoopVectorizeHints {
+public:
+ enum ForceKind {
+ FK_Undefined = -1, ///< Not selected.
+ FK_Disabled = 0, ///< Forcing disabled.
+ FK_Enabled = 1, ///< Forcing enabled.
+ };
LoopVectorizeHints(const Loop *L, bool DisableUnrolling)
- : Width(VectorizationFactor)
- , Unroll(DisableUnrolling ? 1 : VectorizationUnroll)
- , Force(-1)
- , LoopID(L->getLoopID()) {
+ : Width(VectorizationFactor),
+ Unroll(DisableUnrolling),
+ Force(FK_Undefined),
+ LoopID(L->getLoopID()) {
getHints(L);
- // The command line options override any loop metadata except for when
- // width == 1 which is used to indicate the loop is already vectorized.
- if (VectorizationFactor.getNumOccurrences() > 0 && Width != 1)
- Width = VectorizationFactor;
+ // force-vector-unroll overrides DisableUnrolling.
if (VectorizationUnroll.getNumOccurrences() > 0)
Unroll = VectorizationUnroll;
- DEBUG(if (DisableUnrolling && Unroll == 1)
- dbgs() << "LV: Unrolling disabled by the pass manager\n");
+ DEBUG(if (DisableUnrolling && Unroll == 1) dbgs()
+ << "LV: Unrolling disabled by the pass manager\n");
}
/// Return the loop vectorizer metadata prefix.
static StringRef Prefix() { return "llvm.vectorizer."; }
- MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) {
+ MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) const {
SmallVector<Value*, 2> Vals;
Vals.push_back(MDString::get(Context, Name));
Vals.push_back(ConstantInt::get(Type::getInt32Ty(Context), V));
@@ -918,9 +942,12 @@ struct LoopVectorizeHints {
LoopID = NewLoopID;
}
-private:
- MDNode *LoopID;
+ unsigned getWidth() const { return Width; }
+ unsigned getUnroll() const { return Unroll; }
+ enum ForceKind getForce() const { return Force; }
+ MDNode *getLoopID() const { return LoopID; }
+private:
/// Find hints specified in the loop metadata.
void getHints(const Loop *L) {
if (!LoopID)
@@ -931,7 +958,7 @@ private:
assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- const MDString *S = 0;
+ const MDString *S = nullptr;
SmallVector<Value*, 4> Args;
// The expected hint is either a MDString or a MDNode with the first
@@ -980,13 +1007,23 @@ private:
DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n");
} else if (Hint == "enable") {
if (C->getBitWidth() == 1)
- Force = Val;
+ Force = Val == 1 ? LoopVectorizeHints::FK_Enabled
+ : LoopVectorizeHints::FK_Disabled;
else
DEBUG(dbgs() << "LV: ignoring invalid enable hint metadata\n");
} else {
DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n');
}
}
+
+ /// Vectorization width.
+ unsigned Width;
+ /// Vectorization unroll factor.
+ unsigned Unroll;
+ /// Vectorization forced
+ enum ForceKind Force;
+
+ MDNode *LoopID;
};
static void addInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
@@ -1024,7 +1061,7 @@ struct LoopVectorize : public FunctionPass {
bool runOnFunction(Function &F) override {
SE = &getAnalysis<ScalarEvolution>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -1041,8 +1078,9 @@ struct LoopVectorize : public FunctionPass {
if (!TTI->getNumberOfRegisters(true))
return false;
- if (DL == NULL) {
- DEBUG(dbgs() << "LV: Not vectorizing: Missing data layout\n");
+ if (!DL) {
+ DEBUG(dbgs() << "\nLV: Not vectorizing " << F.getName()
+ << ": Missing data layout\n");
return false;
}
@@ -1054,6 +1092,8 @@ struct LoopVectorize : public FunctionPass {
for (Loop *L : *LI)
addInnerLoop(*L, Worklist);
+ LoopsAnalyzed += Worklist.size();
+
// Now walk the identified inner loops.
bool Changed = false;
while (!Worklist.empty())
@@ -1065,26 +1105,56 @@ struct LoopVectorize : public FunctionPass {
bool processLoop(Loop *L) {
assert(L->empty() && "Only process inner loops.");
- DEBUG(dbgs() << "LV: Checking a loop in \"" <<
- L->getHeader()->getParent()->getName() << "\"\n");
+
+#ifndef NDEBUG
+ const std::string DebugLocStr = getDebugLocString(L);
+#endif /* NDEBUG */
+
+ DEBUG(dbgs() << "\nLV: Checking a loop in \""
+ << L->getHeader()->getParent()->getName() << "\" from "
+ << DebugLocStr << "\n");
LoopVectorizeHints Hints(L, DisableUnrolling);
- if (Hints.Force == 0) {
+ DEBUG(dbgs() << "LV: Loop hints:"
+ << " force="
+ << (Hints.getForce() == LoopVectorizeHints::FK_Disabled
+ ? "disabled"
+ : (Hints.getForce() == LoopVectorizeHints::FK_Enabled
+ ? "enabled"
+ : "?")) << " width=" << Hints.getWidth()
+ << " unroll=" << Hints.getUnroll() << "\n");
+
+ if (Hints.getForce() == LoopVectorizeHints::FK_Disabled) {
DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
return false;
}
- if (!AlwaysVectorize && Hints.Force != 1) {
+ if (!AlwaysVectorize && Hints.getForce() != LoopVectorizeHints::FK_Enabled) {
DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
return false;
}
- if (Hints.Width == 1 && Hints.Unroll == 1) {
+ if (Hints.getWidth() == 1 && Hints.getUnroll() == 1) {
DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
return false;
}
+ // Check the loop for a trip count threshold:
+ // do not vectorize loops with a tiny trip count.
+ BasicBlock *Latch = L->getLoopLatch();
+ const unsigned TC = SE->getSmallConstantTripCount(L, Latch);
+ if (TC > 0u && TC < TinyTripCountVectorThreshold) {
+ DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
+ << "This loop is not worth vectorizing.");
+ if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)
+ DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
+ else {
+ DEBUG(dbgs() << "\n");
+ return false;
+ }
+ }
+
// Check if it is legal to vectorize the loop.
LoopVectorizationLegality LVL(L, SE, DL, DT, TLI);
if (!LVL.canVectorize()) {
@@ -1098,8 +1168,8 @@ struct LoopVectorize : public FunctionPass {
// Check the function attributes to find out if this function should be
// optimized for size.
Function *F = L->getHeader()->getParent();
- bool OptForSize =
- Hints.Force != 1 && F->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+ F->hasFnAttribute(Attribute::OptimizeForSize);
// Compute the weighted frequency of this loop being executed and see if it
// is less than 20% of the function entry baseline frequency. Note that we
@@ -1108,7 +1178,8 @@ struct LoopVectorize : public FunctionPass {
// exactly what block frequency models.
if (LoopVectorizeWithBlockFrequency) {
BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader());
- if (Hints.Force != 1 && LoopEntryFreq < ColdEntryFreq)
+ if (Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+ LoopEntryFreq < ColdEntryFreq)
OptForSize = true;
}
@@ -1123,14 +1194,17 @@ struct LoopVectorize : public FunctionPass {
}
// Select the optimal vectorization factor.
- LoopVectorizationCostModel::VectorizationFactor VF;
- VF = CM.selectVectorizationFactor(OptForSize, Hints.Width);
+ const LoopVectorizationCostModel::VectorizationFactor VF =
+ CM.selectVectorizationFactor(OptForSize, Hints.getWidth(),
+ Hints.getForce() ==
+ LoopVectorizeHints::FK_Enabled);
+
// Select the unroll factor.
- unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width,
- VF.Cost);
+ const unsigned UF =
+ CM.selectUnrollFactor(OptForSize, Hints.getUnroll(), VF.Width, VF.Cost);
- DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<<
- F->getParent()->getModuleIdentifier() << '\n');
+ DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
+ << DebugLocStr << '\n');
DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n');
if (VF.Width == 1) {
@@ -1138,6 +1212,13 @@ struct LoopVectorize : public FunctionPass {
if (UF == 1)
return false;
DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n");
+
+ // Report the unrolling decision.
+ emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ Twine("unrolled with interleaving factor " +
+ Twine(UF) +
+ " (vectorization not beneficial)"));
+
// We decided not to vectorize, but we may want to unroll.
InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
Unroller.vectorize(&LVL);
@@ -1145,6 +1226,13 @@ struct LoopVectorize : public FunctionPass {
// If we decided that it is *legal* to vectorize the loop then do it.
InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
LB.vectorize(&LVL);
+ ++LoopsVectorized;
+
+ // Report the vectorization decision.
+ emitOptimizationRemark(
+ F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ Twine("vectorized loop (vectorization factor: ") + Twine(VF.Width) +
+ ", unrolling interleave factor: " + Twine(UF) + ")");
}
// Mark the loop as already vectorized to avoid vectorizing again.
@@ -1188,7 +1276,7 @@ static Value *stripIntegerCast(Value *V) {
/// \p Ptr.
static const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE,
ValueToValueMap &PtrToStride,
- Value *Ptr, Value *OrigPtr = 0) {
+ Value *Ptr, Value *OrigPtr = nullptr) {
const SCEV *OrigSCEV = SE->getSCEV(Ptr);
@@ -1355,7 +1443,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
// We can emit wide load/stores only if the last non-zero index is the
// induction variable.
- const SCEV *Last = 0;
+ const SCEV *Last = nullptr;
if (!Strides.count(Gep))
Last = SE->getSCEV(Gep->getOperand(InductionOperand));
else {
@@ -1604,17 +1692,17 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
// Does this instruction return a value ?
bool IsVoidRetTy = Instr->getType()->isVoidTy();
- Value *UndefVec = IsVoidRetTy ? 0 :
+ Value *UndefVec = IsVoidRetTy ? nullptr :
UndefValue::get(VectorType::get(Instr->getType(), VF));
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
Instruction *InsertPt = Builder.GetInsertPoint();
BasicBlock *IfBlock = Builder.GetInsertBlock();
- BasicBlock *CondBlock = 0;
+ BasicBlock *CondBlock = nullptr;
VectorParts Cond;
- Loop *VectorLp = 0;
+ Loop *VectorLp = nullptr;
if (IfPredicateStore) {
assert(Instr->getParent()->getSinglePredecessor() &&
"Only support single predecessor blocks");
@@ -1630,7 +1718,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
for (unsigned Width = 0; Width < VF; ++Width) {
// Start if-block.
- Value *Cmp = 0;
+ Value *Cmp = nullptr;
if (IfPredicateStore) {
Cmp = Builder.CreateExtractElement(Cond[Part], Builder.getInt32(Width));
Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp, ConstantInt::get(Cmp->getType(), 1));
@@ -1681,21 +1769,21 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
if (FirstInst)
return FirstInst;
if (Instruction *I = dyn_cast<Instruction>(V))
- return I->getParent() == Loc->getParent() ? I : 0;
- return 0;
+ return I->getParent() == Loc->getParent() ? I : nullptr;
+ return nullptr;
}
std::pair<Instruction *, Instruction *>
InnerLoopVectorizer::addStrideCheck(Instruction *Loc) {
- Instruction *tnullptr = 0;
+ Instruction *tnullptr = nullptr;
if (!Legal->mustCheckStrides())
return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
IRBuilder<> ChkBuilder(Loc);
// Emit checks.
- Value *Check = 0;
- Instruction *FirstInst = 0;
+ Value *Check = nullptr;
+ Instruction *FirstInst = nullptr;
for (SmallPtrSet<Value *, 8>::iterator SI = Legal->strides_begin(),
SE = Legal->strides_end();
SI != SE; ++SI) {
@@ -1727,7 +1815,7 @@ InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) {
LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
Legal->getRuntimePointerCheck();
- Instruction *tnullptr = 0;
+ Instruction *tnullptr = nullptr;
if (!PtrRtCheck->Need)
return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
@@ -1737,7 +1825,7 @@ InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) {
LLVMContext &Ctx = Loc->getContext();
SCEVExpander Exp(*SE, "induction");
- Instruction *FirstInst = 0;
+ Instruction *FirstInst = nullptr;
for (unsigned i = 0; i < NumPointers; ++i) {
Value *Ptr = PtrRtCheck->Pointers[i];
@@ -1764,7 +1852,7 @@ InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) {
IRBuilder<> ChkBuilder(Loc);
// Our instructions might fold to a constant.
- Value *MemoryRuntimeCheck = 0;
+ Value *MemoryRuntimeCheck = nullptr;
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i+1; j < NumPointers; ++j) {
// No need to check if two readonly pointers intersect.
@@ -2028,7 +2116,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
// start value.
// This variable saves the new starting index for the scalar loop.
- PHINode *ResumeIndex = 0;
+ PHINode *ResumeIndex = nullptr;
LoopVectorizationLegality::InductionList::iterator I, E;
LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
// Set builder to point to last bypass block.
@@ -2044,9 +2132,9 @@ void InnerLoopVectorizer::createEmptyLoop() {
// truncated version for the scalar loop.
PHINode *TruncResumeVal = (OrigPhi == OldInduction) ?
PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val",
- MiddleBlock->getTerminator()) : 0;
+ MiddleBlock->getTerminator()) : nullptr;
- Value *EndValue = 0;
+ Value *EndValue = nullptr;
switch (II.IK) {
case LoopVectorizationLegality::IK_NoInduction:
llvm_unreachable("Unknown induction");
@@ -2209,148 +2297,6 @@ LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
}
}
-static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
- Intrinsic::ID ValidIntrinsicID) {
- if (I.getNumArgOperands() != 1 ||
- !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
- I.getType() != I.getArgOperand(0)->getType() ||
- !I.onlyReadsMemory())
- return Intrinsic::not_intrinsic;
-
- return ValidIntrinsicID;
-}
-
-static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I,
- Intrinsic::ID ValidIntrinsicID) {
- if (I.getNumArgOperands() != 2 ||
- !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
- !I.getArgOperand(1)->getType()->isFloatingPointTy() ||
- I.getType() != I.getArgOperand(0)->getType() ||
- I.getType() != I.getArgOperand(1)->getType() ||
- !I.onlyReadsMemory())
- return Intrinsic::not_intrinsic;
-
- return ValidIntrinsicID;
-}
-
-
-static Intrinsic::ID
-getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
- // If we have an intrinsic call, check if it is trivially vectorizable.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::sqrt:
- case Intrinsic::sin:
- case Intrinsic::cos:
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::log:
- case Intrinsic::log10:
- case Intrinsic::log2:
- case Intrinsic::fabs:
- case Intrinsic::copysign:
- case Intrinsic::floor:
- case Intrinsic::ceil:
- case Intrinsic::trunc:
- case Intrinsic::rint:
- case Intrinsic::nearbyint:
- case Intrinsic::round:
- case Intrinsic::pow:
- case Intrinsic::fma:
- case Intrinsic::fmuladd:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- return II->getIntrinsicID();
- default:
- return Intrinsic::not_intrinsic;
- }
- }
-
- if (!TLI)
- return Intrinsic::not_intrinsic;
-
- LibFunc::Func Func;
- Function *F = CI->getCalledFunction();
- // We're going to make assumptions on the semantics of the functions, check
- // that the target knows that it's available in this environment and it does
- // not have local linkage.
- if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func))
- return Intrinsic::not_intrinsic;
-
- // Otherwise check if we have a call to a function that can be turned into a
- // vector intrinsic.
- switch (Func) {
- default:
- break;
- case LibFunc::sin:
- case LibFunc::sinf:
- case LibFunc::sinl:
- return checkUnaryFloatSignature(*CI, Intrinsic::sin);
- case LibFunc::cos:
- case LibFunc::cosf:
- case LibFunc::cosl:
- return checkUnaryFloatSignature(*CI, Intrinsic::cos);
- case LibFunc::exp:
- case LibFunc::expf:
- case LibFunc::expl:
- return checkUnaryFloatSignature(*CI, Intrinsic::exp);
- case LibFunc::exp2:
- case LibFunc::exp2f:
- case LibFunc::exp2l:
- return checkUnaryFloatSignature(*CI, Intrinsic::exp2);
- case LibFunc::log:
- case LibFunc::logf:
- case LibFunc::logl:
- return checkUnaryFloatSignature(*CI, Intrinsic::log);
- case LibFunc::log10:
- case LibFunc::log10f:
- case LibFunc::log10l:
- return checkUnaryFloatSignature(*CI, Intrinsic::log10);
- case LibFunc::log2:
- case LibFunc::log2f:
- case LibFunc::log2l:
- return checkUnaryFloatSignature(*CI, Intrinsic::log2);
- case LibFunc::fabs:
- case LibFunc::fabsf:
- case LibFunc::fabsl:
- return checkUnaryFloatSignature(*CI, Intrinsic::fabs);
- case LibFunc::copysign:
- case LibFunc::copysignf:
- case LibFunc::copysignl:
- return checkBinaryFloatSignature(*CI, Intrinsic::copysign);
- case LibFunc::floor:
- case LibFunc::floorf:
- case LibFunc::floorl:
- return checkUnaryFloatSignature(*CI, Intrinsic::floor);
- case LibFunc::ceil:
- case LibFunc::ceilf:
- case LibFunc::ceill:
- return checkUnaryFloatSignature(*CI, Intrinsic::ceil);
- case LibFunc::trunc:
- case LibFunc::truncf:
- case LibFunc::truncl:
- return checkUnaryFloatSignature(*CI, Intrinsic::trunc);
- case LibFunc::rint:
- case LibFunc::rintf:
- case LibFunc::rintl:
- return checkUnaryFloatSignature(*CI, Intrinsic::rint);
- case LibFunc::nearbyint:
- case LibFunc::nearbyintf:
- case LibFunc::nearbyintl:
- return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint);
- case LibFunc::round:
- case LibFunc::roundf:
- case LibFunc::roundl:
- return checkUnaryFloatSignature(*CI, Intrinsic::round);
- case LibFunc::pow:
- case LibFunc::powf:
- case LibFunc::powl:
- return checkBinaryFloatSignature(*CI, Intrinsic::pow);
- }
-
- return Intrinsic::not_intrinsic;
-}
-
/// This function translates the reduction kind to an LLVM binary operator.
static unsigned
getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
@@ -2651,7 +2597,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
assert(isPowerOf2_32(VF) &&
"Reduction emission only supported for pow2 vectors!");
Value *TmpVec = ReducedPartRdx;
- SmallVector<Constant*, 32> ShuffleMask(VF, 0);
+ SmallVector<Constant*, 32> ShuffleMask(VF, nullptr);
for (unsigned i = VF; i != 1; i >>= 1) {
// Move the upper half of the vector to the lower half.
for (unsigned j = 0; j != i/2; ++j)
@@ -3049,7 +2995,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
VectorParts &A = getVectorValue(it->getOperand(0));
VectorParts &B = getVectorValue(it->getOperand(1));
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *C = 0;
+ Value *C = nullptr;
if (FCmp)
C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]);
else
@@ -3275,15 +3221,6 @@ bool LoopVectorizationLegality::canVectorize() {
return false;
}
- // Do not loop-vectorize loops with a tiny trip count.
- BasicBlock *Latch = TheLoop->getLoopLatch();
- unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch);
- if (TC > 0u && TC < TinyTripCountVectorThreshold) {
- DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " <<
- "This loop is not worth vectorizing.\n");
- return false;
- }
-
// Check if we can vectorize the instructions and CFG in this loop.
if (!canVectorizeInstrs()) {
DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
@@ -3536,14 +3473,14 @@ static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE,
///\brief Look for a cast use of the passed value.
static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
- Value *UniqueCast = 0;
+ Value *UniqueCast = nullptr;
for (User *U : Ptr->users()) {
CastInst *CI = dyn_cast<CastInst>(U);
if (CI && CI->getType() == Ty) {
if (!UniqueCast)
UniqueCast = CI;
else
- return 0;
+ return nullptr;
}
}
return UniqueCast;
@@ -3556,7 +3493,7 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
const DataLayout *DL, Loop *Lp) {
const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
if (!PtrTy || PtrTy->isAggregateType())
- return 0;
+ return nullptr;
// Try to remove a gep instruction to make the pointer (actually index at this
// point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the
@@ -3576,11 +3513,11 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
if (!S)
- return 0;
+ return nullptr;
V = S->getStepRecurrence(*SE);
if (!V)
- return 0;
+ return nullptr;
// Strip off the size of access multiplication if we are still analyzing the
// pointer.
@@ -3588,24 +3525,24 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
DL->getTypeAllocSize(PtrTy->getElementType());
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
if (M->getOperand(0)->getSCEVType() != scConstant)
- return 0;
+ return nullptr;
const APInt &APStepVal =
cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue();
// Huge step value - give up.
if (APStepVal.getBitWidth() > 64)
- return 0;
+ return nullptr;
int64_t StepVal = APStepVal.getSExtValue();
if (PtrAccessSize != StepVal)
- return 0;
+ return nullptr;
V = M->getOperand(1);
}
}
// Strip off casts.
- Type *StripedOffRecurrenceCast = 0;
+ Type *StripedOffRecurrenceCast = nullptr;
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
StripedOffRecurrenceCast = C->getType();
V = C->getOperand();
@@ -3614,11 +3551,11 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
// Look for the loop invariant symbolic value.
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
if (!U)
- return 0;
+ return nullptr;
Value *Stride = U->getValue();
if (!Lp->isLoopInvariant(Stride))
- return 0;
+ return nullptr;
// If we have stripped off the recurrence cast we have to make sure that we
// return the value that is used in this loop so that we can replace it later.
@@ -3629,7 +3566,7 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
}
void LoopVectorizationLegality::collectStridedAcccess(Value *MemAccess) {
- Value *Ptr = 0;
+ Value *Ptr = nullptr;
if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
Ptr = LI->getPointerOperand();
else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
@@ -4628,7 +4565,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// We only allow for a single reduction value to be used outside the loop.
// This includes users of the reduction, variables (which form a cycle
// which ends in the phi node).
- Instruction *ExitInstruction = 0;
+ Instruction *ExitInstruction = nullptr;
// Indicates that we found a reduction operation in our scan.
bool FoundReduxOp = false;
@@ -4642,7 +4579,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// the number of instruction we saw from the recognized min/max pattern,
// to make sure we only see exactly the two instructions.
unsigned NumCmpSelectPatternInst = 0;
- ReductionInstDesc ReduxDesc(false, 0);
+ ReductionInstDesc ReduxDesc(false, nullptr);
SmallPtrSet<Instruction *, 8> VisitedInsts;
SmallVector<Instruction *, 8> Worklist;
@@ -4725,7 +4662,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// being used. In this case the user uses the value of the previous
// iteration, in which case we would loose "VF-1" iterations of the
// reduction operation if we vectorize.
- if (ExitInstruction != 0 || Cur == Phi)
+ if (ExitInstruction != nullptr || Cur == Phi)
return false;
// The instruction used by an outside user must be the last instruction
@@ -4741,7 +4678,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// Process instructions only once (termination). Each reduction cycle
// value must only be used once, except by phi nodes and min/max
// reductions which are represented as a cmp followed by a select.
- ReductionInstDesc IgnoredVal(false, 0);
+ ReductionInstDesc IgnoredVal(false, nullptr);
if (VisitedInsts.insert(UI)) {
if (isa<PHINode>(UI))
PHIs.push_back(UI);
@@ -4795,8 +4732,8 @@ LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I,
assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
"Expect a select instruction");
- Instruction *Cmp = 0;
- SelectInst *Select = 0;
+ Instruction *Cmp = nullptr;
+ SelectInst *Select = nullptr;
// We must handle the select(cmp()) as a single instruction. Advance to the
// select.
@@ -4982,7 +4919,8 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
LoopVectorizationCostModel::VectorizationFactor
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
- unsigned UserVF) {
+ unsigned UserVF,
+ bool ForceVectorization) {
// Width 1 means no vectorize
VectorizationFactor Factor = { 1U, 0U };
if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
@@ -5052,8 +4990,18 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
}
float Cost = expectedCost(1);
+#ifndef NDEBUG
+ const float ScalarCost = Cost;
+#endif /* NDEBUG */
unsigned Width = 1;
- DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)Cost << ".\n");
+ DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n");
+
+ // Ignore scalar width, because the user explicitly wants vectorization.
+ if (ForceVectorization && VF > 1) {
+ Width = 2;
+ Cost = expectedCost(Width) / (float)Width;
+ }
+
for (unsigned i=2; i <= VF; i*=2) {
// Notice that the vector loop needs to be executed less times, so
// we need to divide the cost of the vector loops by the width of
@@ -5067,7 +5015,10 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
}
}
- DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n");
+ DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs()
+ << "LV: Vectorization seems to be not beneficial, "
+ << "but was forced by a user.\n");
+ DEBUG(dbgs() << "LV: Selecting VF: "<< Width << ".\n");
Factor.Width = Width;
Factor.Cost = Width * Cost;
return Factor;
@@ -5516,7 +5467,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
Op2VK = TargetTransformInfo::OK_UniformConstantValue;
else if (isa<ConstantVector>(Op2) || isa<ConstantDataVector>(Op2)) {
Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
- if (cast<Constant>(Op2)->getSplatValue() != NULL)
+ if (cast<Constant>(Op2)->getSplatValue() != nullptr)
Op2VK = TargetTransformInfo::OK_UniformConstantValue;
}
@@ -5730,17 +5681,17 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
// Does this instruction return a value ?
bool IsVoidRetTy = Instr->getType()->isVoidTy();
- Value *UndefVec = IsVoidRetTy ? 0 :
+ Value *UndefVec = IsVoidRetTy ? nullptr :
UndefValue::get(Instr->getType());
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
Instruction *InsertPt = Builder.GetInsertPoint();
BasicBlock *IfBlock = Builder.GetInsertBlock();
- BasicBlock *CondBlock = 0;
+ BasicBlock *CondBlock = nullptr;
VectorParts Cond;
- Loop *VectorLp = 0;
+ Loop *VectorLp = nullptr;
if (IfPredicateStore) {
assert(Instr->getParent()->getSinglePredecessor() &&
"Only support single predecessor blocks");
@@ -5755,7 +5706,7 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
// For each scalar that we create:
// Start an "if (pred) a[i] = ..." block.
- Value *Cmp = 0;
+ Value *Cmp = nullptr;
if (IfPredicateStore) {
if (Cond[Part]->getType()->isVectorTy())
Cond[Part] =
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ee32227..e13ba95 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15,9 +15,6 @@
// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
//
//===----------------------------------------------------------------------===//
-#define SV_NAME "slp-vectorizer"
-#define DEBUG_TYPE "SLP"
-
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -34,6 +31,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
@@ -41,11 +39,15 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/VectorUtils.h"
#include <algorithm>
#include <map>
using namespace llvm;
+#define SV_NAME "slp-vectorizer"
+#define DEBUG_TYPE "SLP"
+
static cl::opt<int>
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
cl::desc("Only vectorize if you gain more than this "
@@ -72,8 +74,6 @@ struct BlockNumbering {
BlockNumbering(BasicBlock *Bb) : BB(Bb), Valid(false) {}
- BlockNumbering() : BB(0), Valid(false) {}
-
void numberInstructions() {
unsigned Loc = 0;
InstrIdx.clear();
@@ -120,15 +120,15 @@ private:
static BasicBlock *getSameBlock(ArrayRef<Value *> VL) {
Instruction *I0 = dyn_cast<Instruction>(VL[0]);
if (!I0)
- return 0;
+ return nullptr;
BasicBlock *BB = I0->getParent();
for (int i = 1, e = VL.size(); i < e; i++) {
Instruction *I = dyn_cast<Instruction>(VL[i]);
if (!I)
- return 0;
+ return nullptr;
if (BB != I->getParent())
- return 0;
+ return nullptr;
}
return BB;
}
@@ -180,7 +180,7 @@ static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
switch (Kind) {
default:
- MD = 0; // Remove unknown metadata
+ MD = nullptr; // Remove unknown metadata
break;
case LLVMContext::MD_tbaa:
MD = MDNode::getMostGenericTBAA(MD, IMD);
@@ -201,7 +201,7 @@ static Type* getSameType(ArrayRef<Value *> VL) {
Type *Ty = VL[0]->getType();
for (int i = 1, e = VL.size(); i < e; i++)
if (VL[i]->getType() != Ty)
- return 0;
+ return nullptr;
return Ty;
}
@@ -345,17 +345,10 @@ public:
typedef SmallVector<StoreInst *, 8> StoreList;
BoUpSLP(Function *Func, ScalarEvolution *Se, const DataLayout *Dl,
- TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li,
- DominatorTree *Dt) :
- F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), DT(Dt),
- Builder(Se->getContext()) {
- // Setup the block numbering utility for all of the blocks in the
- // function.
- for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) {
- BasicBlock *BB = it;
- BlocksNumbers[BB] = BlockNumbering(BB);
- }
- }
+ TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa,
+ LoopInfo *Li, DominatorTree *Dt)
+ : F(Func), SE(Se), DL(Dl), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
+ Builder(Se->getContext()) {}
/// \brief Vectorize the tree that starts with the elements in \p VL.
/// Returns the vectorized root.
@@ -365,13 +358,13 @@ public:
/// A negative number means that this is profitable.
int getTreeCost();
- /// Construct a vectorizable tree that starts at \p Roots and is possibly
- /// used by a reduction of \p RdxOps.
- void buildTree(ArrayRef<Value *> Roots, ValueSet *RdxOps = 0);
+ /// Construct a vectorizable tree that starts at \p Roots, ignoring users for
+ /// the purpose of scheduling and extraction in the \p UserIgnoreLst.
+ void buildTree(ArrayRef<Value *> Roots,
+ ArrayRef<Value *> UserIgnoreLst = None);
/// Clear the internal data structures that are created by 'buildTree'.
void deleteTree() {
- RdxOps = 0;
VectorizableTree.clear();
ScalarToTreeEntry.clear();
MustGather.clear();
@@ -446,7 +439,7 @@ private:
bool isFullyVectorizableTinyTree();
struct TreeEntry {
- TreeEntry() : Scalars(), VectorizedValue(0), LastScalarIndex(0),
+ TreeEntry() : Scalars(), VectorizedValue(nullptr), LastScalarIndex(0),
NeedToGather(0) {}
/// \returns true if the scalars in VL are equal to this entry.
@@ -527,14 +520,22 @@ private:
/// Numbers instructions in different blocks.
DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
- /// Reduction operators.
- ValueSet *RdxOps;
+ /// \brief Get the corresponding instruction numbering list for a given
+ /// BasicBlock. The list is allocated lazily.
+ BlockNumbering &getBlockNumbering(BasicBlock *BB) {
+ auto I = BlocksNumbers.insert(std::make_pair(BB, BlockNumbering(BB)));
+ return I.first->second;
+ }
+
+ /// List of users to ignore during scheduling and that don't need extracting.
+ ArrayRef<Value *> UserIgnoreList;
// Analysis and block reference.
Function *F;
ScalarEvolution *SE;
const DataLayout *DL;
TargetTransformInfo *TTI;
+ TargetLibraryInfo *TLI;
AliasAnalysis *AA;
LoopInfo *LI;
DominatorTree *DT;
@@ -542,9 +543,10 @@ private:
IRBuilder<> Builder;
};
-void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
+ ArrayRef<Value *> UserIgnoreLst) {
deleteTree();
- RdxOps = Rdx;
+ UserIgnoreList = UserIgnoreLst;
if (!getSameType(Roots))
return;
buildTree_rec(Roots, 0);
@@ -576,8 +578,9 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
if (!UserInst)
continue;
- // Ignore uses that are part of the reduction.
- if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end())
+ // Ignore users in the user ignore list.
+ if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) !=
+ UserIgnoreList.end())
continue;
DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " <<
@@ -708,12 +711,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
continue;
}
- // This user is part of the reduction.
- if (RdxOps && RdxOps->count(UI))
+ // Ignore users in the user ignore list.
+ if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UI) !=
+ UserIgnoreList.end())
continue;
// Make sure that we can schedule this unknown user.
- BlockNumbering &BN = BlocksNumbers[BB];
+ BlockNumbering &BN = getBlockNumbering(BB);
int UserIndex = BN.getIndex(UI);
if (UserIndex < MyLastIndex) {
@@ -948,32 +952,36 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
case Instruction::Call: {
// Check if the calls are all to the same vectorizable intrinsic.
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]);
- if (II==NULL) {
+ CallInst *CI = cast<CallInst>(VL[0]);
+ // Check if this is an Intrinsic call or something that can be
+ // represented by an intrinsic call
+ Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
+ if (!isTriviallyVectorizable(ID)) {
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
}
- Function *Int = II->getCalledFunction();
+ Function *Int = CI->getCalledFunction();
for (unsigned i = 1, e = VL.size(); i != e; ++i) {
- IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[i]);
- if (!II2 || II2->getCalledFunction() != Int) {
+ CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
+ if (!CI2 || CI2->getCalledFunction() != Int ||
+ getIntrinsicIDForCall(CI2, TLI) != ID) {
newTreeEntry(VL, false);
- DEBUG(dbgs() << "SLP: mismatched calls:" << *II << "!=" << *VL[i]
+ DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
<< "\n");
return;
}
}
newTreeEntry(VL, true);
- for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
ValueList Operands;
// Prepare the operand vector.
for (unsigned j = 0; j < VL.size(); ++j) {
- IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[j]);
- Operands.push_back(II2->getArgOperand(i));
+ CallInst *CI2 = dyn_cast<CallInst>(VL[j]);
+ Operands.push_back(CI2->getArgOperand(i));
}
buildTree_rec(Operands, Depth + 1);
}
@@ -1090,7 +1098,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// If instead not all operands are constants, then set the operand kind
// to OK_AnyValue. If all operands are constants but not the same,
// then set the operand kind to OK_NonUniformConstantValue.
- ConstantInt *CInt = NULL;
+ ConstantInt *CInt = nullptr;
for (unsigned i = 0; i < VL.size(); ++i) {
const Instruction *I = cast<Instruction>(VL[i]);
if (!isa<ConstantInt>(I->getOperand(1))) {
@@ -1129,12 +1137,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
- IntrinsicInst *II = cast<IntrinsicInst>(CI);
- Intrinsic::ID ID = II->getIntrinsicID();
+ Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
// Calculate the cost of the scalar and vector calls.
SmallVector<Type*, 4> ScalarTys, VecTys;
- for (unsigned op = 0, opc = II->getNumArgOperands(); op!= opc; ++op) {
+ for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) {
ScalarTys.push_back(CI->getArgOperand(op)->getType());
VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(),
VecTy->getNumElements()));
@@ -1147,7 +1154,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost
<< " (" << VecCallCost << "-" << ScalarCallCost << ")"
- << " for " << *II << "\n");
+ << " for " << *CI << "\n");
return VecCallCost - ScalarCallCost;
}
@@ -1244,7 +1251,7 @@ Value *BoUpSLP::getPointerOperand(Value *I) {
return LI->getPointerOperand();
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getPointerOperand();
- return 0;
+ return nullptr;
}
unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
@@ -1318,13 +1325,13 @@ Value *BoUpSLP::getSinkBarrier(Instruction *Src, Instruction *Dst) {
if (!A.Ptr || !B.Ptr || AA->alias(A, B))
return I;
}
- return 0;
+ return nullptr;
}
int BoUpSLP::getLastIndex(ArrayRef<Value *> VL) {
BasicBlock *BB = cast<Instruction>(VL[0])->getParent();
- assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block");
- BlockNumbering &BN = BlocksNumbers[BB];
+ assert(BB == getSameBlock(VL) && "Invalid block");
+ BlockNumbering &BN = getBlockNumbering(BB);
int MaxIdx = BN.getIndex(BB->getFirstNonPHI());
for (unsigned i = 0, e = VL.size(); i < e; ++i)
@@ -1334,8 +1341,8 @@ int BoUpSLP::getLastIndex(ArrayRef<Value *> VL) {
Instruction *BoUpSLP::getLastInstruction(ArrayRef<Value *> VL) {
BasicBlock *BB = cast<Instruction>(VL[0])->getParent();
- assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block");
- BlockNumbering &BN = BlocksNumbers[BB];
+ assert(BB == getSameBlock(VL) && "Invalid block");
+ BlockNumbering &BN = getBlockNumbering(BB);
int MaxIdx = BN.getIndex(cast<Instruction>(VL[0]));
for (unsigned i = 1, e = VL.size(); i < e; ++i)
@@ -1394,7 +1401,7 @@ Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const {
if (En->isSame(VL) && En->VectorizedValue)
return En->VectorizedValue;
}
- return 0;
+ return nullptr;
}
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
@@ -1615,6 +1622,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
VecTy->getPointerTo(AS));
unsigned Alignment = LI->getAlignment();
LI = Builder.CreateLoad(VecPtr);
+ if (!Alignment)
+ Alignment = DL->getABITypeAlignment(LI->getPointerOperand()->getType());
LI->setAlignment(Alignment);
E->VectorizedValue = LI;
return propagateMetadata(LI, E->Scalars);
@@ -1634,13 +1643,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
VecTy->getPointerTo(AS));
StoreInst *S = Builder.CreateStore(VecValue, VecPtr);
+ if (!Alignment)
+ Alignment = DL->getABITypeAlignment(SI->getPointerOperand()->getType());
S->setAlignment(Alignment);
E->VectorizedValue = S;
return propagateMetadata(S, E->Scalars);
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
-
setInsertPointAfterBundle(E->Scalars);
std::vector<Value *> OpVecs;
for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
@@ -1656,8 +1666,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Module *M = F->getParent();
- IntrinsicInst *II = cast<IntrinsicInst>(CI);
- Intrinsic::ID ID = II->getIntrinsicID();
+ Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) };
Function *CF = Intrinsic::getDeclaration(M, ID, Tys);
Value *V = Builder.CreateCall(CF, OpVecs);
@@ -1667,7 +1676,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
default:
llvm_unreachable("unknown inst");
}
- return 0;
+ return nullptr;
}
Value *BoUpSLP::vectorizeTree() {
@@ -1746,8 +1755,9 @@ Value *BoUpSLP::vectorizeTree() {
DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
assert((ScalarToTreeEntry.count(U) ||
- // It is legal to replace the reduction users by undef.
- (RdxOps && RdxOps->count(U))) &&
+ // It is legal to replace users in the ignorelist by undef.
+ (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) !=
+ UserIgnoreList.end())) &&
"Replacing out-of-tree value with undef");
}
#endif
@@ -1759,9 +1769,9 @@ Value *BoUpSLP::vectorizeTree() {
}
}
- for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) {
- BlocksNumbers[it].forget();
- }
+ for (auto &BN : BlocksNumbers)
+ BN.second.forget();
+
Builder.ClearInsertionPoint();
return VectorizableTree[0].VectorizedValue;
@@ -1802,11 +1812,19 @@ void BoUpSLP::optimizeGatherSequence() {
Insert->moveBefore(PreHeader->getTerminator());
}
+ // Make a list of all reachable blocks in our CSE queue.
+ SmallVector<const DomTreeNode *, 8> CSEWorkList;
+ CSEWorkList.reserve(CSEBlocks.size());
+ for (BasicBlock *BB : CSEBlocks)
+ if (DomTreeNode *N = DT->getNode(BB)) {
+ assert(DT->isReachableFromEntry(N));
+ CSEWorkList.push_back(N);
+ }
+
// Sort blocks by domination. This ensures we visit a block after all blocks
// dominating it are visited.
- SmallVector<BasicBlock *, 8> CSEWorkList(CSEBlocks.begin(), CSEBlocks.end());
std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(),
- [this](const BasicBlock *A, const BasicBlock *B) {
+ [this](const DomTreeNode *A, const DomTreeNode *B) {
return DT->properlyDominates(A, B);
});
@@ -1814,12 +1832,10 @@ void BoUpSLP::optimizeGatherSequence() {
// instructions. TODO: We can further optimize this scan if we split the
// instructions into different buckets based on the insert lane.
SmallVector<Instruction *, 16> Visited;
- for (SmallVectorImpl<BasicBlock *>::iterator I = CSEWorkList.begin(),
- E = CSEWorkList.end();
- I != E; ++I) {
+ for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) {
assert((I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) &&
"Worklist not sorted properly!");
- BasicBlock *BB = *I;
+ BasicBlock *BB = (*I)->getBlock();
// For all instructions in blocks containing gather sequences:
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
Instruction *In = it++;
@@ -1835,7 +1851,7 @@ void BoUpSLP::optimizeGatherSequence() {
DT->dominates((*v)->getParent(), In->getParent())) {
In->replaceAllUsesWith(*v);
In->eraseFromParent();
- In = 0;
+ In = nullptr;
break;
}
}
@@ -1864,6 +1880,7 @@ struct SLPVectorizer : public FunctionPass {
ScalarEvolution *SE;
const DataLayout *DL;
TargetTransformInfo *TTI;
+ TargetLibraryInfo *TLI;
AliasAnalysis *AA;
LoopInfo *LI;
DominatorTree *DT;
@@ -1874,8 +1891,9 @@ struct SLPVectorizer : public FunctionPass {
SE = &getAnalysis<ScalarEvolution>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TTI = &getAnalysis<TargetTransformInfo>();
+ TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
AA = &getAnalysis<AliasAnalysis>();
LI = &getAnalysis<LoopInfo>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -1900,8 +1918,8 @@ struct SLPVectorizer : public FunctionPass {
DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
// Use the bottom up slp vectorizer to construct chains that start with
- // he store instructions.
- BoUpSLP R(&F, SE, DL, TTI, AA, LI, DT);
+ // store instructions.
+ BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT);
// Scan the blocks in the function in post order.
for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()),
@@ -1951,8 +1969,11 @@ private:
bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
/// \brief Try to vectorize a list of operands.
+ /// \@param BuildVector A list of users to ignore for the purpose of
+ /// scheduling and that don't need extracting.
/// \returns true if a value was vectorized.
- bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R);
+ bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
+ ArrayRef<Value *> BuildVector = None);
/// \brief Try to vectorize a chain that may start at the operands of \V;
bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
@@ -2106,7 +2127,7 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
// Check that the pointer points to scalars.
Type *Ty = SI->getValueOperand()->getType();
if (Ty->isAggregateType() || Ty->isVectorTy())
- return 0;
+ continue;
// Find the base pointer.
Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), DL);
@@ -2125,7 +2146,8 @@ bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
return tryToVectorizeList(VL, R);
}
-bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
+bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
+ ArrayRef<Value *> BuildVector) {
if (VL.size() < 2)
return false;
@@ -2153,7 +2175,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
bool Changed = false;
- // Keep track of values that were delete by vectorizing in the loop below.
+ // Keep track of values that were deleted by vectorizing in the loop below.
SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
@@ -2175,13 +2197,38 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
<< "\n");
ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
- R.buildTree(Ops);
+ ArrayRef<Value *> BuildVectorSlice;
+ if (!BuildVector.empty())
+ BuildVectorSlice = BuildVector.slice(i, OpsWidth);
+
+ R.buildTree(Ops, BuildVectorSlice);
int Cost = R.getTreeCost();
if (Cost < -SLPCostThreshold) {
DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
- R.vectorizeTree();
-
+ Value *VectorizedRoot = R.vectorizeTree();
+
+ // Reconstruct the build vector by extracting the vectorized root. This
+ // way we handle the case where some elements of the vector are undefined.
+ // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
+ if (!BuildVectorSlice.empty()) {
+ // The insert point is the last build vector instruction. The vectorized
+ // root will precede it. This guarantees that we get an instruction. The
+ // vectorized tree could have been constant folded.
+ Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
+ unsigned VecIdx = 0;
+ for (auto &V : BuildVectorSlice) {
+ IRBuilder<true, NoFolder> Builder(
+ ++BasicBlock::iterator(InsertAfter));
+ InsertElementInst *IE = cast<InsertElementInst>(V);
+ Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement(
+ VectorizedRoot, Builder.getInt32(VecIdx++)));
+ IE->setOperand(1, Extract);
+ IE->removeFromParent();
+ IE->insertAfter(Extract);
+ InsertAfter = IE;
+ }
+ }
// Move to the next bundle.
i += VF - 1;
Changed = true;
@@ -2290,7 +2337,7 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
/// *p =
///
class HorizontalReduction {
- SmallPtrSet<Value *, 16> ReductionOps;
+ SmallVector<Value *, 16> ReductionOps;
SmallVector<Value *, 32> ReducedVals;
BinaryOperator *ReductionRoot;
@@ -2308,7 +2355,7 @@ class HorizontalReduction {
public:
HorizontalReduction()
- : ReductionRoot(0), ReductionPHI(0), ReductionOpcode(0),
+ : ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0),
ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
/// \brief Try to find a reduction tree.
@@ -2323,10 +2370,10 @@ public:
// In such a case start looking for a tree rooted in the first '+'.
if (Phi) {
if (B->getOperand(0) == Phi) {
- Phi = 0;
+ Phi = nullptr;
B = dyn_cast<BinaryOperator>(B->getOperand(1));
} else if (B->getOperand(1) == Phi) {
- Phi = 0;
+ Phi = nullptr;
B = dyn_cast<BinaryOperator>(B->getOperand(0));
}
}
@@ -2384,7 +2431,7 @@ public:
// We need to be able to reassociate the adds.
if (!TreeN->isAssociative())
return false;
- ReductionOps.insert(TreeN);
+ ReductionOps.push_back(TreeN);
}
// Retract.
Stack.pop_back();
@@ -2412,7 +2459,7 @@ public:
if (NumReducedVals < ReduxWidth)
return false;
- Value *VectorizedTree = 0;
+ Value *VectorizedTree = nullptr;
IRBuilder<> Builder(ReductionRoot);
FastMathFlags Unsafe;
Unsafe.setUnsafeAlgebra();
@@ -2421,7 +2468,7 @@ public:
for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
ArrayRef<Value *> ValsToReduce(&ReducedVals[i], ReduxWidth);
- V.buildTree(ValsToReduce, &ReductionOps);
+ V.buildTree(ValsToReduce, ReductionOps);
// Estimate cost.
int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
@@ -2455,13 +2502,13 @@ public:
}
// Update users.
if (ReductionPHI) {
- assert(ReductionRoot != NULL && "Need a reduction operation");
+ assert(ReductionRoot && "Need a reduction operation");
ReductionRoot->setOperand(0, VectorizedTree);
ReductionRoot->setOperand(1, ReductionPHI);
} else
ReductionRoot->replaceAllUsesWith(VectorizedTree);
}
- return VectorizedTree != 0;
+ return VectorizedTree != nullptr;
}
private:
@@ -2540,13 +2587,16 @@ private:
///
/// Returns true if it matches
///
-static bool findBuildVector(InsertElementInst *IE,
- SmallVectorImpl<Value *> &Ops) {
- if (!isa<UndefValue>(IE->getOperand(0)))
+static bool findBuildVector(InsertElementInst *FirstInsertElem,
+ SmallVectorImpl<Value *> &BuildVector,
+ SmallVectorImpl<Value *> &BuildVectorOpds) {
+ if (!isa<UndefValue>(FirstInsertElem->getOperand(0)))
return false;
+ InsertElementInst *IE = FirstInsertElem;
while (true) {
- Ops.push_back(IE->getOperand(1));
+ BuildVector.push_back(IE);
+ BuildVectorOpds.push_back(IE->getOperand(1));
if (IE->use_empty())
return false;
@@ -2641,7 +2691,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
Value *Rdx =
(P->getIncomingBlock(0) == BB
? (P->getIncomingValue(0))
- : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : 0));
+ : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1)
+ : nullptr));
// Check if this is a Binary Operator.
BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
if (!BI)
@@ -2680,7 +2731,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
if (BinaryOperator *BinOp =
dyn_cast<BinaryOperator>(SI->getValueOperand())) {
HorizontalReduction HorRdx;
- if (((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
+ if (((HorRdx.matchAssociativeReduction(nullptr, BinOp, DL) &&
HorRdx.tryToReduce(R, TTI)) ||
tryToVectorize(BinOp, R))) {
Changed = true;
@@ -2716,12 +2767,16 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
}
// Try to vectorize trees that start at insertelement instructions.
- if (InsertElementInst *IE = dyn_cast<InsertElementInst>(it)) {
- SmallVector<Value *, 8> Ops;
- if (!findBuildVector(IE, Ops))
+ if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) {
+ SmallVector<Value *, 16> BuildVector;
+ SmallVector<Value *, 16> BuildVectorOpds;
+ if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds))
continue;
- if (tryToVectorizeList(Ops, R)) {
+ // Vectorize starting with the build vector operands ignoring the
+ // BuildVector instructions for the purpose of scheduling and user
+ // extraction.
+ if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) {
Changed = true;
it = BB->begin();
e = BB->end();