aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-04-23 16:57:46 -0700
committerStephen Hines <srhines@google.com>2014-04-24 15:53:16 -0700
commit36b56886974eae4f9c5ebc96befd3e7bfe5de338 (patch)
treee6cfb69fbbd937f450eeb83bfb83b9da3b01275a /lib/Transforms
parent69a8640022b04415ae9fac62f8ab090601d8f889 (diff)
downloadexternal_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.zip
external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.gz
external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.bz2
Update to LLVM 3.5a.
Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/Hello/CMakeLists.txt8
-rw-r--r--lib/Transforms/Hello/Hello.cpp6
-rw-r--r--lib/Transforms/IPO/Android.mk2
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp87
-rw-r--r--lib/Transforms/IPO/BarrierNoopPass.cpp2
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp19
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp69
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp2
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp39
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp2
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp313
-rw-r--r--lib/Transforms/IPO/IPConstantPropagation.cpp31
-rw-r--r--lib/Transforms/IPO/IPO.cpp1
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp12
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp27
-rw-r--r--lib/Transforms/IPO/Inliner.cpp40
-rw-r--r--lib/Transforms/IPO/Internalize.cpp24
-rw-r--r--lib/Transforms/IPO/LLVMBuild.txt2
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp17
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp230
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp25
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp44
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp10
-rw-r--r--lib/Transforms/IPO/StripDeadPrototypes.cpp2
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp32
-rw-r--r--lib/Transforms/InstCombine/Android.mk2
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h33
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp186
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp106
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp114
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp257
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp218
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp109
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp231
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp38
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp37
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp148
-rw-r--r--lib/Transforms/InstCombine/InstCombineWorklist.h5
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp182
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp585
-rw-r--r--lib/Transforms/Instrumentation/Android.mk2
-rw-r--r--lib/Transforms/Instrumentation/BoundsChecking.cpp24
-rw-r--r--lib/Transforms/Instrumentation/DataFlowSanitizer.cpp116
-rw-r--r--lib/Transforms/Instrumentation/DebugIR.cpp54
-rw-r--r--lib/Transforms/Instrumentation/DebugIR.h9
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp197
-rw-r--r--lib/Transforms/Instrumentation/Instrumentation.cpp2
-rw-r--r--lib/Transforms/Instrumentation/LLVMBuild.txt2
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp360
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp53
-rw-r--r--lib/Transforms/ObjCARC/Android.mk2
-rw-r--r--lib/Transforms/ObjCARC/DependencyAnalysis.cpp2
-rw-r--r--lib/Transforms/ObjCARC/ObjCARC.h5
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCAPElim.cpp6
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h22
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCContract.cpp30
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCExpand.cpp8
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCOpts.cpp64
-rw-r--r--lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp7
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp11
-rw-r--r--lib/Transforms/Scalar/Android.mk10
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt3
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp2002
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp590
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp16
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp9
-rw-r--r--lib/Transforms/Scalar/DCE.cpp15
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp99
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp34
-rw-r--r--lib/Transforms/Scalar/FlattenCFGPass.cpp6
-rw-r--r--lib/Transforms/Scalar/GVN.cpp186
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp53
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp216
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp42
-rw-r--r--lib/Transforms/Scalar/LICM.cpp337
-rw-r--r--lib/Transforms/Scalar/LLVMBuild.txt2
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp17
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp68
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp27
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp106
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp72
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp193
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp35
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp46
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp4
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp130
-rw-r--r--lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp4
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp56
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp17
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp72
-rw-r--r--lib/Transforms/Scalar/SROA.cpp788
-rw-r--r--lib/Transforms/Scalar/SampleProfile.cpp1093
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp9
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp312
-rw-r--r--lib/Transforms/Scalar/Scalarizer.cpp662
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp20
-rw-r--r--lib/Transforms/Scalar/Sink.cpp35
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp56
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp25
-rw-r--r--lib/Transforms/Utils/ASanStackFrameLayout.cpp114
-rw-r--r--lib/Transforms/Utils/AddDiscriminators.cpp217
-rw-r--r--lib/Transforms/Utils/Android.mk4
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp89
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp90
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp46
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt2
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp100
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp28
-rw-r--r--lib/Transforms/Utils/DemoteRegToStack.cpp4
-rw-r--r--lib/Transforms/Utils/FlattenCFG.cpp2
-rw-r--r--lib/Transforms/Utils/GlobalStatus.cpp20
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp84
-rw-r--r--lib/Transforms/Utils/InstructionNamer.cpp4
-rw-r--r--lib/Transforms/Utils/IntegerDivision.cpp244
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp367
-rw-r--r--lib/Transforms/Utils/Local.cpp67
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp800
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp38
-rw-r--r--lib/Transforms/Utils/LowerExpectIntrinsic.cpp25
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp518
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp9
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp12
-rw-r--r--lib/Transforms/Utils/MetaRenamer.cpp4
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp42
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp4
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp310
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp94
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp20
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp446
-rw-r--r--lib/Transforms/Utils/SpecialCaseList.cpp9
-rw-r--r--lib/Transforms/Utils/Utils.cpp3
-rw-r--r--lib/Transforms/Vectorize/Android.mk2
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp171
-rw-r--r--lib/Transforms/Vectorize/LLVMBuild.txt3
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp1117
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp278
-rw-r--r--lib/Transforms/Vectorize/Vectorize.cpp2
139 files changed, 9469 insertions, 7674 deletions
diff --git a/lib/Transforms/Hello/CMakeLists.txt b/lib/Transforms/Hello/CMakeLists.txt
index 917b745..e724dbc 100644
--- a/lib/Transforms/Hello/CMakeLists.txt
+++ b/lib/Transforms/Hello/CMakeLists.txt
@@ -1,3 +1,11 @@
+# If we don't need RTTI or EH, there's no reason to export anything
+# from the hello plugin.
+if( NOT LLVM_REQUIRES_RTTI )
+ if( NOT LLVM_REQUIRES_EH )
+ set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/Hello.exports)
+ endif()
+endif()
+
add_llvm_loadable_module( LLVMHello
Hello.cpp
)
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index 9251783..c514c49 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -27,7 +27,7 @@ namespace {
static char ID; // Pass identification, replacement for typeid
Hello() : FunctionPass(ID) {}
- virtual bool runOnFunction(Function &F) {
+ bool runOnFunction(Function &F) override {
++HelloCounter;
errs() << "Hello: ";
errs().write_escaped(F.getName()) << '\n';
@@ -45,7 +45,7 @@ namespace {
static char ID; // Pass identification, replacement for typeid
Hello2() : FunctionPass(ID) {}
- virtual bool runOnFunction(Function &F) {
+ bool runOnFunction(Function &F) override {
++HelloCounter;
errs() << "Hello: ";
errs().write_escaped(F.getName()) << '\n';
@@ -53,7 +53,7 @@ namespace {
}
// We don't modify the program, so we preserve all analyses.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
};
diff --git a/lib/Transforms/IPO/Android.mk b/lib/Transforms/IPO/Android.mk
index dcf48df..1fe7d63 100644
--- a/lib/Transforms/IPO/Android.mk
+++ b/lib/Transforms/IPO/Android.mk
@@ -38,6 +38,7 @@ include $(BUILD_HOST_STATIC_LIBRARY)
# For the device
# =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(transforms_ipo_SRC_FILES)
@@ -48,3 +49,4 @@ LOCAL_MODULE_TAGS := optional
include $(LLVM_DEVICE_BUILD_MK)
include $(LLVM_GEN_INTRINSICS_MK)
include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index df08091..48d3fba 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -37,13 +37,13 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <set>
@@ -58,12 +58,12 @@ namespace {
/// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
///
struct ArgPromotion : public CallGraphSCCPass {
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AliasAnalysis>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
- virtual bool runOnSCC(CallGraphSCC &SCC);
+ bool runOnSCC(CallGraphSCC &SCC) override;
static char ID; // Pass identification, replacement for typeid
explicit ArgPromotion(unsigned maxElements = 3)
: CallGraphSCCPass(ID), maxElements(maxElements) {
@@ -88,7 +88,7 @@ char ArgPromotion::ID = 0;
INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
@@ -136,11 +136,10 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// transform functions that have indirect callers. Also see if the function
// is self-recursive.
bool isSelfRecursive = false;
- for (Value::use_iterator UI = F->use_begin(), E = F->use_end();
- UI != E; ++UI) {
- CallSite CS(*UI);
+ for (Use &U : F->uses()) {
+ CallSite CS(U.getUser());
// Must be a direct call.
- if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0;
+ if (CS.getInstruction() == 0 || !CS.isCallee(&U)) return 0;
if (CS.getInstruction()->getParent()->getParent() == F)
isSelfRecursive = true;
@@ -155,7 +154,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
// If this is a byval argument, and if the aggregate type is small, just
- // pass the elements, which is always safe.
+ // pass the elements, which is always safe. This does not apply to
+ // inalloca.
if (PtrArg->hasByValAttr()) {
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
if (maxElements > 0 && STy->getNumElements() > maxElements) {
@@ -201,7 +201,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
}
// Otherwise, see if we can promote the pointer to its value.
- if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValAttr()))
+ if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr()))
ArgsToPromote.insert(PtrArg);
}
@@ -221,9 +221,8 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
// Look at all call sites of the function. At this pointer we know we only
// have direct callees.
- for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end();
- UI != E; ++UI) {
- CallSite CS(*UI);
+ for (User *U : Callee->users()) {
+ CallSite CS(U);
assert(CS && "Should only have direct calls!");
if (!CS.getArgument(ArgNo)->isDereferenceablePointer())
@@ -301,7 +300,8 @@ static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
/// This method limits promotion of aggregates to only promote up to three
/// elements of the aggregate in order to avoid exploding the number of
/// arguments passed in.
-bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
+bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
+ bool isByValOrInAlloca) const {
typedef std::set<IndicesVector> GEPIndicesSet;
// Quick exit for unused arguments
@@ -323,6 +323,9 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
//
// This set will contain all sets of indices that are loaded in the entry
// block, and thus are safe to unconditionally load in the caller.
+ //
+ // This optimization is also safe for InAlloca parameters, because it verifies
+ // that the address isn't captured.
GEPIndicesSet SafeToUnconditionallyLoad;
// This set contains all the sets of indices that we are planning to promote.
@@ -330,7 +333,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if (isByVal || AllCallersPassInValidPointerForArgument(Arg))
+ if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as
@@ -370,17 +373,16 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
// not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
SmallVector<LoadInst*, 16> Loads;
IndicesVector Operands;
- for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end();
- UI != E; ++UI) {
- User *U = *UI;
+ for (Use &U : Arg->uses()) {
+ User *UR = U.getUser();
Operands.clear();
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(UR)) {
// Don't hack volatile/atomic loads
if (!LI->isSimple()) return false;
Loads.push_back(LI);
// Direct loads are equivalent to a GEP with a zero index and then a load.
Operands.push_back(0);
- } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) {
if (GEP->use_empty()) {
// Dead GEP's cause trouble later. Just remove them if we run into
// them.
@@ -389,7 +391,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
// TODO: This runs the above loop over and over again for dead GEPs
// Couldn't we just do increment the UI iterator earlier and erase the
// use?
- return isSafeToPromoteArgument(Arg, isByVal);
+ return isSafeToPromoteArgument(Arg, isByValOrInAlloca);
}
// Ensure that all of the indices are constants.
@@ -401,9 +403,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
return false; // Not a constant operand GEP!
// Ensure that the only users of the GEP are load instructions.
- for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
- UI != E; ++UI)
- if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ for (User *GEPU : GEP->users())
+ if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) {
// Don't hack volatile/atomic loads
if (!LI->isSimple()) return false;
Loads.push_back(LI);
@@ -549,16 +550,15 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// In this table, we will track which indices are loaded from the argument
// (where direct loads are tracked as no indices).
ScalarizeTable &ArgIndices = ScalarizedElements[I];
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
- ++UI) {
- Instruction *User = cast<Instruction>(*UI);
- assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User));
+ for (User *U : I->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ assert(isa<LoadInst>(UI) || isa<GetElementPtrInst>(UI));
IndicesVector Indices;
- Indices.reserve(User->getNumOperands() - 1);
+ Indices.reserve(UI->getNumOperands() - 1);
// Since loads will only have a single operand, and GEPs only a single
// non-index operand, this will record direct loads without any indices,
// and gep+loads with the GEP indices.
- for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end();
+ for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
II != IE; ++II)
Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
// GEPs with a single 0 index can be merged with direct loads
@@ -566,11 +566,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
Indices.clear();
ArgIndices.insert(Indices);
LoadInst *OrigLoad;
- if (LoadInst *L = dyn_cast<LoadInst>(User))
+ if (LoadInst *L = dyn_cast<LoadInst>(UI))
OrigLoad = L;
else
// Take any load, we will use it only to update Alias Analysis
- OrigLoad = cast<LoadInst>(User->use_back());
+ OrigLoad = cast<LoadInst>(UI->user_back());
OriginalLoads[std::make_pair(I, Indices)] = OrigLoad;
}
@@ -621,8 +621,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Get the callgraph information that we need to update to reflect our
// changes.
- CallGraph &CG = getAnalysis<CallGraph>();
-
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+
// Get a new callgraph node for NF.
CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
@@ -631,7 +631,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
//
SmallVector<Value*, 16> Args;
while (!F->use_empty()) {
- CallSite CS(F->use_back());
+ CallSite CS(F->user_back());
assert(CS.getCalledFunction() == F);
Instruction *Call = CS.getInstruction();
const AttributeSet &CallPAL = CS.getAttributes();
@@ -807,6 +807,15 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
I->replaceAllUsesWith(TheAlloca);
TheAlloca->takeName(I);
AA.replaceWithNewValue(I, TheAlloca);
+
+ // If the alloca is used in a call, we must clear the tail flag since
+ // the callee now uses an alloca from the caller.
+ for (User *U : TheAlloca->users()) {
+ CallInst *Call = dyn_cast<CallInst>(U);
+ if (!Call)
+ continue;
+ Call->setTailCall(false);
+ }
continue;
}
@@ -821,7 +830,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ScalarizeTable &ArgIndices = ScalarizedElements[I];
while (!I->use_empty()) {
- if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
assert(ArgIndices.begin()->empty() &&
"Load element should sort to front!");
I2->setName(I->getName()+".val");
@@ -831,7 +840,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
<< "' in function '" << F->getName() << "'\n");
} else {
- GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
IndicesVector Operands;
Operands.reserve(GEP->getNumIndices());
for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
@@ -861,7 +870,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// All of the uses must be load instructions. Replace them all with
// the argument specified by ArgNo.
while (!GEP->use_empty()) {
- LoadInst *L = cast<LoadInst>(GEP->use_back());
+ LoadInst *L = cast<LoadInst>(GEP->user_back());
L->replaceAllUsesWith(TheArg);
AA.replaceWithNewValue(L, TheArg);
L->eraseFromParent();
diff --git a/lib/Transforms/IPO/BarrierNoopPass.cpp b/lib/Transforms/IPO/BarrierNoopPass.cpp
index 2e32240..6af1043 100644
--- a/lib/Transforms/IPO/BarrierNoopPass.cpp
+++ b/lib/Transforms/IPO/BarrierNoopPass.cpp
@@ -36,7 +36,7 @@ public:
initializeBarrierNoopPass(*PassRegistry::getPassRegistry());
}
- bool runOnModule(Module &M) { return false; }
+ bool runOnModule(Module &M) override { return false; }
};
}
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index d94c0f4..5c3acea 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -42,7 +42,7 @@ namespace {
// For this pass, process all of the globals in the module, eliminating
// duplicate constants.
- bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
// Return true iff we can determine the alignment of this global variable.
bool hasKnownAlignment(GlobalVariable *GV) const;
@@ -51,7 +51,7 @@ namespace {
// alignment to a concrete value.
unsigned getAlignment(GlobalVariable *GV) const;
- const DataLayout *TD;
+ const DataLayout *DL;
};
}
@@ -77,8 +77,8 @@ static void FindUsedValues(GlobalVariable *LLVMUsed,
}
// True if A is better than B.
-static bool IsBetterCannonical(const GlobalVariable &A,
- const GlobalVariable &B) {
+static bool IsBetterCanonical(const GlobalVariable &A,
+ const GlobalVariable &B) {
if (!A.hasLocalLinkage() && B.hasLocalLinkage())
return true;
@@ -89,20 +89,21 @@ static bool IsBetterCannonical(const GlobalVariable &A,
}
bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const {
- return TD || GV->getAlignment() != 0;
+ return DL || GV->getAlignment() != 0;
}
unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
unsigned Align = GV->getAlignment();
if (Align)
return Align;
- if (TD)
- return TD->getPreferredAlignment(GV);
+ if (DL)
+ return DL->getPreferredAlignment(GV);
return 0;
}
bool ConstantMerge::runOnModule(Module &M) {
- TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
// Find all the globals that are marked "used". These cannot be merged.
SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
@@ -160,7 +161,7 @@ bool ConstantMerge::runOnModule(Module &M) {
// If this is the first constant we find or if the old one is local,
// replace with the current one. If the current is externally visible
// it cannot be replace, but can be the canonical constant we merge with.
- if (Slot == 0 || IsBetterCannonical(*GV, *Slot))
+ if (Slot == 0 || IsBetterCanonical(*GV, *Slot))
Slot = GV;
}
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 911c14e..1aba3df 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -23,17 +23,17 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
@@ -62,12 +62,7 @@ namespace {
/// Make RetOrArg comparable, so we can put it into a map.
bool operator<(const RetOrArg &O) const {
- if (F != O.F)
- return F < O.F;
- else if (Idx != O.Idx)
- return Idx < O.Idx;
- else
- return IsArg < O.IsArg;
+ return std::tie(F, Idx, IsArg) < std::tie(O.F, O.Idx, O.IsArg);
}
/// Make RetOrArg comparable, so we can easily iterate the multimap.
@@ -143,13 +138,13 @@ namespace {
initializeDAEPass(*PassRegistry::getPassRegistry());
}
- bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
virtual bool ShouldHackArguments() const { return false; }
private:
Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
- Liveness SurveyUse(Value::const_use_iterator U, UseVector &MaybeLiveUses,
+ Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses,
unsigned RetValNum = 0);
Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
@@ -178,7 +173,7 @@ namespace {
static char ID;
DAH() : DAE(ID) {}
- virtual bool ShouldHackArguments() const { return true; }
+ bool ShouldHackArguments() const override { return true; }
};
}
@@ -265,7 +260,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
// to pass in a smaller number of arguments into the new function.
//
std::vector<Value*> Args;
- for (Value::use_iterator I = Fn.use_begin(), E = Fn.use_end(); I != E; ) {
+ for (Value::user_iterator I = Fn.user_begin(), E = Fn.user_end(); I != E; ) {
CallSite CS(*I++);
if (!CS)
continue;
@@ -378,7 +373,7 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
I != E; ++I) {
Argument *Arg = I;
- if (Arg->use_empty() && !Arg->hasByValAttr())
+ if (Arg->use_empty() && !Arg->hasByValOrInAllocaAttr())
UnusedArgs.push_back(Arg->getArgNo());
}
@@ -387,10 +382,9 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
bool Changed = false;
- for (Function::use_iterator I = Fn.use_begin(), E = Fn.use_end();
- I != E; ++I) {
- CallSite CS(*I);
- if (!CS || !CS.isCallee(I))
+ for (Use &U : Fn.uses()) {
+ CallSite CS(U.getUser());
+ if (!CS || !CS.isCallee(&U))
continue;
// Now go through all unused args and replace them with "undef".
@@ -441,9 +435,9 @@ DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) {
/// RetValNum is the return value number to use when this use is used in a
/// return instruction. This is used in the recursion, you should always leave
/// it at 0.
-DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U,
+DAE::Liveness DAE::SurveyUse(const Use *U,
UseVector &MaybeLiveUses, unsigned RetValNum) {
- const User *V = *U;
+ const User *V = U->getUser();
if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
// The value is returned from a function. It's only live when the
// function's return value is live. We use RetValNum here, for the case
@@ -454,7 +448,7 @@ DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U,
return MarkIfNotLive(Use, MaybeLiveUses);
}
if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
- if (U.getOperandNo() != InsertValueInst::getAggregateOperandIndex()
+ if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex()
&& IV->hasIndices())
// The use we are examining is inserted into an aggregate. Our liveness
// depends on all uses of that aggregate, but if it is used as a return
@@ -465,9 +459,8 @@ DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U,
// we don't change RetValNum, but do survey all our uses.
Liveness Result = MaybeLive;
- for (Value::const_use_iterator I = IV->use_begin(),
- E = V->use_end(); I != E; ++I) {
- Result = SurveyUse(I, MaybeLiveUses, RetValNum);
+ for (const Use &UU : IV->uses()) {
+ Result = SurveyUse(&UU, MaybeLiveUses, RetValNum);
if (Result == Live)
break;
}
@@ -490,7 +483,7 @@ DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U,
return Live;
assert(CS.getArgument(ArgNo)
- == CS->getOperand(U.getOperandNo())
+ == CS->getOperand(U->getOperandNo())
&& "Argument is not where we expected it");
// Value passed to a normal call. It's only live when the corresponding
@@ -513,9 +506,8 @@ DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) {
// Assume it's dead (which will only hold if there are no uses at all..).
Liveness Result = MaybeLive;
// Check each use.
- for (Value::const_use_iterator I = V->use_begin(),
- E = V->use_end(); I != E; ++I) {
- Result = SurveyUse(I, MaybeLiveUses);
+ for (const Use &U : V->uses()) {
+ Result = SurveyUse(&U, MaybeLiveUses);
if (Result == Live)
break;
}
@@ -531,6 +523,13 @@ DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) {
// well as arguments to functions which have their "address taken".
//
void DAE::SurveyFunction(const Function &F) {
+ // Functions with inalloca parameters are expecting args in a particular
+ // register and memory layout.
+ if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca)) {
+ MarkLive(F);
+ return;
+ }
+
unsigned RetCount = NumRetVals(&F);
// Assume all return values are dead
typedef SmallVector<Liveness, 5> RetVals;
@@ -562,12 +561,11 @@ void DAE::SurveyFunction(const Function &F) {
unsigned NumLiveRetVals = 0;
Type *STy = dyn_cast<StructType>(F.getReturnType());
// Loop all uses of the function.
- for (Value::const_use_iterator I = F.use_begin(), E = F.use_end();
- I != E; ++I) {
+ for (const Use &U : F.uses()) {
// If the function is PASSED IN as an argument, its address has been
// taken.
- ImmutableCallSite CS(*I);
- if (!CS || !CS.isCallee(I)) {
+ ImmutableCallSite CS(U.getUser());
+ if (!CS || !CS.isCallee(&U)) {
MarkLive(F);
return;
}
@@ -586,9 +584,8 @@ void DAE::SurveyFunction(const Function &F) {
if (NumLiveRetVals != RetCount) {
if (STy) {
// Check all uses of the return value.
- for (Value::const_use_iterator I = TheCall->use_begin(),
- E = TheCall->use_end(); I != E; ++I) {
- const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(*I);
+ for (const User *U : TheCall->users()) {
+ const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U);
if (Ext && Ext->hasIndices()) {
// This use uses a part of our return value, survey the uses of
// that part and store the results for this index only.
@@ -891,7 +888,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
//
std::vector<Value*> Args;
while (!F->use_empty()) {
- CallSite CS(F->use_back());
+ CallSite CS(F->user_back());
Instruction *Call = CS.getInstruction();
AttributesVec.clear();
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 50fb3e6..4211f12 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -68,7 +68,7 @@ namespace {
explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true)
: ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
- bool runOnModule(Module &M) {
+ bool runOnModule(Module &M) override {
// Visit the global inline asm.
if (!deleteStuff)
M.setModuleInlineAsm("");
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 60e5f06..b716718 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -29,9 +29,9 @@
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/InstIterator.h"
#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
@@ -51,7 +51,7 @@ namespace {
}
// runOnSCC - Analyze the SCC, performing the transformation if possible.
- bool runOnSCC(CallGraphSCC &SCC);
+ bool runOnSCC(CallGraphSCC &SCC) override;
// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
bool AddReadAttrs(const CallGraphSCC &SCC);
@@ -120,7 +120,7 @@ namespace {
// call declarations.
bool annotateLibraryCalls(const CallGraphSCC &SCC);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
AU.addRequired<TargetLibraryInfo>();
@@ -137,7 +137,7 @@ char FunctionAttrs::ID = 0;
INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
@@ -342,9 +342,9 @@ namespace {
ArgumentUsesTracker(const SmallPtrSet<Function*, 8> &SCCNodes)
: Captured(false), SCCNodes(SCCNodes) {}
- void tooManyUses() { Captured = true; }
+ void tooManyUses() override { Captured = true; }
- bool captured(Use *U) {
+ bool captured(const Use *U) override {
CallSite CS(U->getUser());
if (!CS.getInstruction()) { Captured = true; return true; }
@@ -414,17 +414,19 @@ determinePointerReadAttrs(Argument *A,
SmallSet<Use*, 32> Visited;
int Count = 0;
+ // inalloca arguments are always clobbered by the call.
+ if (A->hasInAllocaAttr())
+ return Attribute::None;
+
bool IsRead = false;
// We don't need to track IsWritten. If A is written to, return immediately.
- for (Value::use_iterator UI = A->use_begin(), UE = A->use_end();
- UI != UE; ++UI) {
+ for (Use &U : A->uses()) {
if (Count++ >= 20)
return Attribute::None;
- Use *U = &UI.getUse();
- Visited.insert(U);
- Worklist.push_back(U);
+ Visited.insert(&U);
+ Worklist.push_back(&U);
}
while (!Worklist.empty()) {
@@ -437,13 +439,11 @@ determinePointerReadAttrs(Argument *A,
case Instruction::GetElementPtr:
case Instruction::PHI:
case Instruction::Select:
+ case Instruction::AddrSpaceCast:
// The original value is not read/written via this if the new value isn't.
- for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI) {
- Use *U = &UI.getUse();
- if (Visited.insert(U))
- Worklist.push_back(U);
- }
+ for (Use &UU : I->uses())
+ if (Visited.insert(&UU))
+ Worklist.push_back(&UU);
break;
case Instruction::Call:
@@ -599,8 +599,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// made. If the definition doesn't have a 'nocapture' attribute by now, it
// captures.
- for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG), E = scc_end(&AG);
- I != E; ++I) {
+ for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
std::vector<ArgumentGraphNode*> &ArgumentSCC = *I;
if (ArgumentSCC.size() == 1) {
if (!ArgumentSCC[0]->Definition) continue; // synthetic root node
@@ -723,6 +722,7 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
// Extend the analysis by looking upwards.
case Instruction::BitCast:
case Instruction::GetElementPtr:
+ case Instruction::AddrSpaceCast:
FlowsToReturn.insert(RVI->getOperand(0));
continue;
case Instruction::Select: {
@@ -1649,6 +1649,7 @@ bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
+ break;
default:
// Didn't mark any attributes.
return false;
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 901295d..0c081f1 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -38,7 +38,7 @@ namespace {
// run - Do the GlobalDCE pass on the specified module, optionally updating
// the specified callgraph to reflect the changes.
//
- bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
private:
SmallPtrSet<GlobalValue*, 32> AliveGlobals;
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 2ea89a1..1a510cf 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -22,22 +22,22 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -63,7 +63,7 @@ STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
namespace {
struct GlobalOpt : public ModulePass {
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfo>();
}
static char ID; // Pass identification, replacement for typeid
@@ -71,7 +71,7 @@ namespace {
initializeGlobalOptPass(*PassRegistry::getPassRegistry());
}
- bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
private:
GlobalVariable *FindGlobalCtors(Module &M);
@@ -84,7 +84,7 @@ namespace {
const GlobalStatus &GS);
bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
- DataLayout *TD;
+ const DataLayout *DL;
TargetLibraryInfo *TLI;
};
}
@@ -196,7 +196,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
// Constants can't be pointers to dynamically allocated memory.
- for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
+ for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end();
UI != E;) {
User *U = *UI++;
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
@@ -266,13 +266,14 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
/// quick scan over the use list to clean up the easy and obvious cruft. This
/// returns true if it made a change.
static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
- DataLayout *TD, TargetLibraryInfo *TLI) {
+ const DataLayout *DL,
+ TargetLibraryInfo *TLI) {
bool Changed = false;
// Note that we need to use a weak value handle for the worklist items. When
// we delete a constant array, we may also be holding pointer to one of its
// elements (or an element of one of its elements if we're dealing with an
// array of arrays) in the worklist.
- SmallVector<WeakVH, 8> WorkList(V->use_begin(), V->use_end());
+ SmallVector<WeakVH, 8> WorkList(V->user_begin(), V->user_end());
while (!WorkList.empty()) {
Value *UV = WorkList.pop_back_val();
if (!UV)
@@ -296,11 +297,12 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
Constant *SubInit = 0;
if (Init)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
- Changed |= CleanupConstantGlobalUsers(CE, SubInit, TD, TLI);
- } else if (CE->getOpcode() == Instruction::BitCast &&
- CE->getType()->isPointerTy()) {
+ Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI);
+ } else if ((CE->getOpcode() == Instruction::BitCast &&
+ CE->getType()->isPointerTy()) ||
+ CE->getOpcode() == Instruction::AddrSpaceCast) {
// Pointer cast, delete any stores and memsets to the global.
- Changed |= CleanupConstantGlobalUsers(CE, 0, TD, TLI);
+ Changed |= CleanupConstantGlobalUsers(CE, 0, DL, TLI);
}
if (CE->use_empty()) {
@@ -314,7 +316,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
Constant *SubInit = 0;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
ConstantExpr *CE =
- dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, TD, TLI));
+ dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, DL, TLI));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
@@ -324,7 +326,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
SubInit = Constant::getNullValue(GEP->getType()->getElementType());
}
- Changed |= CleanupConstantGlobalUsers(GEP, SubInit, TD, TLI);
+ Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI);
if (GEP->use_empty()) {
GEP->eraseFromParent();
@@ -341,7 +343,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
// us, and if they are all dead, nuke them without remorse.
if (isSafeToDestroyConstant(C)) {
C->destroyConstant();
- CleanupConstantGlobalUsers(V, Init, TD, TLI);
+ CleanupConstantGlobalUsers(V, Init, DL, TLI);
return true;
}
}
@@ -374,9 +376,8 @@ static bool isSafeSROAElementUse(Value *V) {
!cast<Constant>(GEPI->getOperand(1))->isNullValue())
return false;
- for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
- I != E; ++I)
- if (!isSafeSROAElementUse(*I))
+ for (User *U : GEPI->users())
+ if (!isSafeSROAElementUse(U))
return false;
return true;
}
@@ -442,9 +443,10 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
}
}
- for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I)
- if (!isSafeSROAElementUse(*I))
+ for (User *UU : U->users())
+ if (!isSafeSROAElementUse(UU))
return false;
+
return true;
}
@@ -452,11 +454,10 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
/// is safe for us to perform this transformation.
///
static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
- for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
- UI != E; ++UI) {
- if (!IsUserOfGlobalSafeForSRA(*UI, GV))
+ for (User *U : GV->users())
+ if (!IsUserOfGlobalSafeForSRA(U, GV))
return false;
- }
+
return true;
}
@@ -466,7 +467,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
/// behavior of the program in a more fine-grained way. We have determined that
/// this transformation is safe already. We return the first global variable we
/// insert so that the caller can reprocess it.
-static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) {
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// Make sure this global only has simple uses that we can SRA.
if (!GlobalUsersSafeToSRA(GV))
return 0;
@@ -481,11 +482,11 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) {
// Get the alignment of the global, either explicit or target-specific.
unsigned StartAlignment = GV->getAlignment();
if (StartAlignment == 0)
- StartAlignment = TD.getABITypeAlignment(GV->getType());
+ StartAlignment = DL.getABITypeAlignment(GV->getType());
if (StructType *STy = dyn_cast<StructType>(Ty)) {
NewGlobals.reserve(STy->getNumElements());
- const StructLayout &Layout = *TD.getStructLayout(STy);
+ const StructLayout &Layout = *DL.getStructLayout(STy);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Constant *In = Init->getAggregateElement(i);
assert(In && "Couldn't get element of initializer?");
@@ -502,7 +503,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) {
// propagate info to each field.
uint64_t FieldOffset = Layout.getElementOffset(i);
unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
- if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i)))
+ if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i)))
NGV->setAlignment(NewAlign);
}
} else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
@@ -516,8 +517,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) {
return 0; // It's not worth it.
NewGlobals.reserve(NumElements);
- uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
- unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
+ uint64_t EltSize = DL.getTypeAllocSize(STy->getElementType());
+ unsigned EltAlign = DL.getABITypeAlignment(STy->getElementType());
for (unsigned i = 0, e = NumElements; i != e; ++i) {
Constant *In = Init->getAggregateElement(i);
assert(In && "Couldn't get element of initializer?");
@@ -549,7 +550,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) {
// Loop over all of the uses of the global, replacing the constantexpr geps,
// with smaller constantexpr geps or direct references.
while (!GV->use_empty()) {
- User *GEP = GV->use_back();
+ User *GEP = GV->user_back();
assert(((isa<ConstantExpr>(GEP) &&
cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
@@ -610,10 +611,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &TD) {
/// phi nodes we've seen to avoid reprocessing them.
static bool AllUsesOfValueWillTrapIfNull(const Value *V,
SmallPtrSet<const PHINode*, 8> &PHIs) {
- for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
- ++UI) {
- const User *U = *UI;
-
+ for (const User *U : V->users())
if (isa<LoadInst>(U)) {
// Will trap.
} else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
@@ -641,13 +639,13 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
return false;
} else if (isa<ICmpInst>(U) &&
- isa<ConstantPointerNull>(UI->getOperand(1))) {
+ isa<ConstantPointerNull>(U->getOperand(1))) {
// Ignore icmp X, null
} else {
//cerr << "NONTRAPPING USE: " << *U;
return false;
}
- }
+
return true;
}
@@ -655,10 +653,7 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
/// from GV will trap if the loaded value is null. Note that this also permits
/// comparisons of the loaded value against null, as a special case.
static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
- for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
- UI != E; ++UI) {
- const User *U = *UI;
-
+ for (const User *U : GV->users())
if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
SmallPtrSet<const PHINode*, 8> PHIs;
if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
@@ -670,13 +665,12 @@ static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
//cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
return false;
}
- }
return true;
}
static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
bool Changed = false;
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
+ for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
Instruction *I = cast<Instruction>(*UI++);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
LI->setOperand(0, NewV);
@@ -702,7 +696,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
if (PassedAsArg) {
// Being passed as an argument also. Be careful to not invalidate UI!
- UI = V->use_begin();
+ UI = V->user_begin();
}
}
} else if (CastInst *CI = dyn_cast<CastInst>(I)) {
@@ -742,7 +736,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
/// if the loaded value is dynamically null, then we know that they cannot be
/// reachable with a null optimize away the load.
static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
- DataLayout *TD,
+ const DataLayout *DL,
TargetLibraryInfo *TLI) {
bool Changed = false;
@@ -751,7 +745,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
bool AllNonStoreUsesGone = true;
// Replace all uses of loads with uses of uses of the stored value.
- for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
+ for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){
User *GlobalUser = *GUI++;
if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
@@ -791,7 +785,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
Changed |= CleanupPointerRootUsers(GV, TLI);
} else {
Changed = true;
- CleanupConstantGlobalUsers(GV, 0, TD, TLI);
+ CleanupConstantGlobalUsers(GV, 0, DL, TLI);
}
if (GV->use_empty()) {
DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
@@ -805,11 +799,11 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
/// instructions that are foldable.
-static void ConstantPropUsersOf(Value *V,
- DataLayout *TD, TargetLibraryInfo *TLI) {
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
+static void ConstantPropUsersOf(Value *V, const DataLayout *DL,
+ TargetLibraryInfo *TLI) {
+ for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
if (Instruction *I = dyn_cast<Instruction>(*UI++))
- if (Constant *NewC = ConstantFoldInstruction(I, TD, TLI)) {
+ if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
I->replaceAllUsesWith(NewC);
// Advance UI to the next non-I use to avoid invalidating it!
@@ -829,7 +823,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
CallInst *CI,
Type *AllocTy,
ConstantInt *NElements,
- DataLayout *TD,
+ const DataLayout *DL,
TargetLibraryInfo *TLI) {
DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
@@ -855,7 +849,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
// other users to use the global as well.
BitCastInst *TheBC = 0;
while (!CI->use_empty()) {
- Instruction *User = cast<Instruction>(CI->use_back());
+ Instruction *User = cast<Instruction>(CI->user_back());
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
if (BCI->getType() == NewGV->getType()) {
BCI->replaceAllUsesWith(NewGV);
@@ -886,7 +880,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
// Loop over all uses of GV, processing them in turn.
while (!GV->use_empty()) {
- if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
// The global is initialized when the store to it occurs.
new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
SI->getOrdering(), SI->getSynchScope(), SI);
@@ -894,15 +888,15 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
continue;
}
- LoadInst *LI = cast<LoadInst>(GV->use_back());
+ LoadInst *LI = cast<LoadInst>(GV->user_back());
while (!LI->use_empty()) {
- Use &LoadUse = LI->use_begin().getUse();
- if (!isa<ICmpInst>(LoadUse.getUser())) {
+ Use &LoadUse = *LI->use_begin();
+ ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
+ if (!ICI) {
LoadUse = RepValue;
continue;
}
- ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
// Replace the cmp X, 0 with a use of the bool value.
// Sink the load to where the compare was, if atomic rules allow us to.
Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
@@ -936,7 +930,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
// If the initialization boolean was used, insert it, otherwise delete it.
if (!InitBoolUsed) {
while (!InitBool->use_empty()) // Delete initializations
- cast<StoreInst>(InitBool->use_back())->eraseFromParent();
+ cast<StoreInst>(InitBool->user_back())->eraseFromParent();
delete InitBool;
} else
GV->getParent()->getGlobalList().insert(GV, InitBool);
@@ -948,9 +942,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
// To further other optimizations, loop over all users of NewGV and try to
// constant prop them. This will promote GEP instructions with constant
// indices into GEP constant-exprs, which will allow global-opt to hack on it.
- ConstantPropUsersOf(NewGV, TD, TLI);
+ ConstantPropUsersOf(NewGV, DL, TLI);
if (RepValue != NewGV)
- ConstantPropUsersOf(RepValue, TD, TLI);
+ ConstantPropUsersOf(RepValue, DL, TLI);
return NewGV;
}
@@ -962,9 +956,8 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
const GlobalVariable *GV,
SmallPtrSet<const PHINode*, 8> &PHIs) {
- for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
- UI != E; ++UI) {
- const Instruction *Inst = cast<Instruction>(*UI);
+ for (const User *U : V->users()) {
+ const Instruction *Inst = cast<Instruction>(U);
if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
continue; // Fine, ignore.
@@ -1011,7 +1004,7 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
GlobalVariable *GV) {
while (!Alloc->use_empty()) {
- Instruction *U = cast<Instruction>(*Alloc->use_begin());
+ Instruction *U = cast<Instruction>(*Alloc->user_begin());
Instruction *InsertPt = U;
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// If this is the store of the allocation into the global, remove it.
@@ -1022,7 +1015,7 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
} else if (PHINode *PN = dyn_cast<PHINode>(U)) {
// Insert the load in the corresponding predecessor, not right before the
// PHI.
- InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator();
+ InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator();
} else if (isa<BitCastInst>(U)) {
// Must be bitcast between the malloc and store to initialize the global.
ReplaceUsesOfMallocWithGlobal(U, GV);
@@ -1032,7 +1025,7 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
// If this is a "GEP bitcast" and the user is a store to the global, then
// just process it as a bitcast.
if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
- if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back()))
+ if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->user_back()))
if (SI->getOperand(1) == GV) {
// Must be bitcast GEP between the malloc and store to initialize
// the global.
@@ -1056,19 +1049,18 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
// We permit two users of the load: setcc comparing against the null
// pointer, and a getelementptr of a specific form.
- for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
- ++UI) {
- const Instruction *User = cast<Instruction>(*UI);
+ for (const User *U : V->users()) {
+ const Instruction *UI = cast<Instruction>(U);
// Comparison against null is ok.
- if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UI)) {
if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
return false;
continue;
}
// getelementptr is also ok, but only a simple form.
- if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
// Must index into the array and into the struct.
if (GEPI->getNumOperands() < 3)
return false;
@@ -1077,7 +1069,7 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
continue;
}
- if (const PHINode *PN = dyn_cast<PHINode>(User)) {
+ if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
if (!LoadUsingPHIsPerLoad.insert(PN))
// This means some phi nodes are dependent on each other.
// Avoid infinite looping!
@@ -1108,9 +1100,8 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
Instruction *StoredVal) {
SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
- for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
- UI != E; ++UI)
- if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ for (const User *U : GV->users())
+ if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
LoadUsingPHIsPerLoad))
return false;
@@ -1249,7 +1240,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
// If this is the first time we've seen this PHI, recursively process all
// users.
- for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
+ for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
Instruction *User = cast<Instruction>(*UI++);
RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
}
@@ -1262,8 +1253,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
- for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
- UI != E; ) {
+ for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) {
Instruction *User = cast<Instruction>(*UI++);
RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
}
@@ -1277,7 +1267,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
/// it up into multiple allocations of arrays of the fields.
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
- Value *NElems, DataLayout *TD,
+ Value *NElems, const DataLayout *DL,
const TargetLibraryInfo *TLI) {
DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
Type *MAT = getMallocAllocatedType(CI, TLI);
@@ -1306,10 +1296,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
GV->getThreadLocalMode());
FieldGlobals.push_back(NGV);
- unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
+ unsigned TypeSize = DL->getTypeAllocSize(FieldTy);
if (StructType *ST = dyn_cast<StructType>(FieldTy))
- TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
- Type *IntPtrTy = TD->getIntPtrType(CI->getType());
+ TypeSize = DL->getStructLayout(ST)->getSizeInBytes();
+ Type *IntPtrTy = DL->getIntPtrType(CI->getType());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
NElems, 0,
@@ -1394,7 +1384,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// Okay, the malloc site is completely handled. All of the uses of GV are now
// loads, and all uses of those loads are simple. Rewrite them to use loads
// of the per-field globals instead.
- for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
+ for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) {
Instruction *User = cast<Instruction>(*UI++);
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
@@ -1469,9 +1459,9 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
Type *AllocTy,
AtomicOrdering Ordering,
Module::global_iterator &GVI,
- DataLayout *TD,
+ const DataLayout *DL,
TargetLibraryInfo *TLI) {
- if (!TD)
+ if (!DL)
return false;
// If this is a malloc of an abstract type, don't touch it.
@@ -1501,7 +1491,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// This eliminates dynamic allocation, avoids an indirection accessing the
// data, and exposes the resultant global to further GlobalOpt.
// We cannot optimize the malloc if we cannot determine malloc array size.
- Value *NElems = getMallocArraySize(CI, TD, TLI, true);
+ Value *NElems = getMallocArraySize(CI, DL, TLI, true);
if (!NElems)
return false;
@@ -1509,8 +1499,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// Restrict this transformation to only working on small allocations
// (2048 bytes currently), as we don't want to introduce a 16M global or
// something.
- if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
- GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD, TLI);
+ if (NElements->getZExtValue() * DL->getTypeAllocSize(AllocTy) < 2048) {
+ GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
return true;
}
@@ -1539,8 +1529,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// If this is a fixed size array, transform the Malloc to be an alloc of
// structs. malloc [100 x struct],1 -> malloc struct, 100
if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
- Type *IntPtrTy = TD->getIntPtrType(CI->getType());
- unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
+ Type *IntPtrTy = DL->getIntPtrType(CI->getType());
+ unsigned TypeSize = DL->getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
@@ -1555,8 +1545,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
CI = cast<CallInst>(Malloc);
}
- GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, TLI, true),
- TD, TLI);
+ GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true),
+ DL, TLI);
return true;
}
@@ -1568,7 +1558,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
AtomicOrdering Ordering,
Module::global_iterator &GVI,
- DataLayout *TD, TargetLibraryInfo *TLI) {
+ const DataLayout *DL,
+ TargetLibraryInfo *TLI) {
// Ignore no-op GEPs and bitcasts.
StoredOnceVal = StoredOnceVal->stripPointerCasts();
@@ -1583,13 +1574,13 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
// Optimize away any trapping uses of the loaded value.
- if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI))
+ if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI))
return true;
} else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
Type *MallocType = getMallocAllocatedType(CI, TLI);
if (MallocType &&
TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
- TD, TLI))
+ DL, TLI))
return true;
}
}
@@ -1616,11 +1607,9 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
// Walk the use list of the global seeing if all the uses are load or store.
// If there is anything else, bail out.
- for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
- User *U = *I;
+ for (User *U : GV->users())
if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
return false;
- }
DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV);
@@ -1645,7 +1634,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
IsOneZero = InitVal->isNullValue() && CI->isOne();
while (!GV->use_empty()) {
- Instruction *UI = cast<Instruction>(GV->use_back());
+ Instruction *UI = cast<Instruction>(GV->user_back());
if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
// Change the store into a boolean store.
bool StoringOther = SI->getOperand(0) == OtherVal;
@@ -1746,7 +1735,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// and this function is main (which we know is not recursive), we replace
// the global with a local alloca in this function.
//
- // NOTE: It doesn't make sense to promote non single-value types since we
+ // NOTE: It doesn't make sense to promote non-single-value types since we
// are just replacing static memory to stack memory.
//
// If the global is in different address space, don't bring it to stack.
@@ -1783,7 +1772,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
} else {
// Delete any stores we can find to the global. We may not be able to
// make it completely dead though.
- Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
+ Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
}
// If the global is dead now, delete it.
@@ -1799,7 +1788,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
GV->setConstant(true);
// Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
// If the global is dead now, just nuke it.
if (GV->use_empty()) {
@@ -1812,11 +1801,13 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
++NumMarked;
return true;
} else if (!GV->getInitializer()->getType()->isSingleValueType()) {
- if (DataLayout *TD = getAnalysisIfAvailable<DataLayout>())
- if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
+ if (DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>()) {
+ const DataLayout &DL = DLP->getDataLayout();
+ if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) {
GVI = FirstNewGV; // Don't skip the newly produced globals!
return true;
}
+ }
} else if (GS.StoredType == GlobalStatus::StoredOnce) {
// If the initial value for the global was an undef value, and if only
// one other value was stored into it, we can just change the
@@ -1828,7 +1819,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
GV->setInitializer(SOVConstant);
// Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
if (GV->use_empty()) {
DEBUG(dbgs() << " *** Substituting initializer allowed us to "
@@ -1845,7 +1836,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// Try to optimize globals based on the knowledge that only one value
// (besides its initializer) is ever stored to the global.
if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI,
- TD, TLI))
+ DL, TLI))
return true;
// Otherwise, if the global was not a boolean, we can shrink it to be a
@@ -1866,11 +1857,11 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
/// function, changing them to FastCC.
static void ChangeCalleesToFastCall(Function *F) {
- for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
- if (isa<BlockAddress>(*UI))
+ for (User *U : F->users()) {
+ if (isa<BlockAddress>(U))
continue;
- CallSite User(cast<Instruction>(*UI));
- User.setCallingConv(CallingConv::Fast);
+ CallSite CS(cast<Instruction>(U));
+ CS.setCallingConv(CallingConv::Fast);
}
}
@@ -1889,14 +1880,24 @@ static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) {
static void RemoveNestAttribute(Function *F) {
F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
- for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
- if (isa<BlockAddress>(*UI))
+ for (User *U : F->users()) {
+ if (isa<BlockAddress>(U))
continue;
- CallSite User(cast<Instruction>(*UI));
- User.setAttributes(StripNest(F->getContext(), User.getAttributes()));
+ CallSite CS(cast<Instruction>(U));
+ CS.setAttributes(StripNest(F->getContext(), CS.getAttributes()));
}
}
+/// Return true if this is a calling convention that we'd like to change. The
+/// idea here is that we don't want to mess with the convention if the user
+/// explicitly requested something with performance implications like coldcc,
+/// GHC, or anyregcc.
+static bool isProfitableToMakeFastCC(Function *F) {
+ CallingConv::ID CC = F->getCallingConv();
+ // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
+ return CC == CallingConv::C || CC == CallingConv::X86_ThisCall;
+}
+
bool GlobalOpt::OptimizeFunctions(Module &M) {
bool Changed = false;
// Optimize functions.
@@ -1911,11 +1912,11 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
Changed = true;
++NumFnDeleted;
} else if (F->hasLocalLinkage()) {
- if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
+ if (isProfitableToMakeFastCC(F) && !F->isVarArg() &&
!F->hasAddressTaken()) {
- // If this function has C calling conventions, is not a varargs
- // function, and is only called directly, promote it to use the Fast
- // calling convention.
+ // If this function has a calling convention worth changing, is not a
+ // varargs function, and is only called directly, promote it to use the
+ // Fast calling convention.
F->setCallingConv(CallingConv::Fast);
ChangeCalleesToFastCall(F);
++NumFastCallFns;
@@ -1946,7 +1947,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
// Simplify the initializer.
if (GV->hasInitializer())
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
- Constant *New = ConstantFoldConstantExpression(CE, TD, TLI);
+ Constant *New = ConstantFoldConstantExpression(CE, DL, TLI);
if (New && New != CE)
GV->setInitializer(New);
}
@@ -2069,7 +2070,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
SmallPtrSet<Constant*, 8> &SimpleConstants,
- const DataLayout *TD);
+ const DataLayout *DL);
/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
@@ -2082,7 +2083,7 @@ isSimpleEnoughValueToCommit(Constant *C,
/// time.
static bool isSimpleEnoughValueToCommitHelper(Constant *C,
SmallPtrSet<Constant*, 8> &SimpleConstants,
- const DataLayout *TD) {
+ const DataLayout *DL) {
// Simple integer, undef, constant aggregate zero, global addresses, etc are
// all supported.
if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
@@ -2094,7 +2095,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
isa<ConstantVector>(C)) {
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
Constant *Op = cast<Constant>(C->getOperand(i));
- if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, TD))
+ if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, DL))
return false;
}
return true;
@@ -2107,29 +2108,29 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
switch (CE->getOpcode()) {
case Instruction::BitCast:
// Bitcast is fine if the casted value is fine.
- return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
case Instruction::IntToPtr:
case Instruction::PtrToInt:
// int <=> ptr is fine if the int type is the same size as the
// pointer type.
- if (!TD || TD->getTypeSizeInBits(CE->getType()) !=
- TD->getTypeSizeInBits(CE->getOperand(0)->getType()))
+ if (!DL || DL->getTypeSizeInBits(CE->getType()) !=
+ DL->getTypeSizeInBits(CE->getOperand(0)->getType()))
return false;
- return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
// GEP is fine if it is simple + constant offset.
case Instruction::GetElementPtr:
for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
if (!isa<ConstantInt>(CE->getOperand(i)))
return false;
- return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
case Instruction::Add:
// We allow simple+cst.
if (!isa<ConstantInt>(CE->getOperand(1)))
return false;
- return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
+ return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
}
return false;
}
@@ -2137,11 +2138,11 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
SmallPtrSet<Constant*, 8> &SimpleConstants,
- const DataLayout *TD) {
+ const DataLayout *DL) {
// If we already checked this constant, we win.
if (!SimpleConstants.insert(C)) return true;
// Check the constant.
- return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, TD);
+ return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, DL);
}
@@ -2173,7 +2174,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
return false;
// The first index must be zero.
- ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin()));
+ ConstantInt *CI = dyn_cast<ConstantInt>(*std::next(CE->op_begin()));
if (!CI || !CI->isZero()) return false;
// The remaining indices must be compile-time known integers within the
@@ -2268,8 +2269,8 @@ namespace {
/// Once an evaluation call fails, the evaluation object should not be reused.
class Evaluator {
public:
- Evaluator(const DataLayout *TD, const TargetLibraryInfo *TLI)
- : TD(TD), TLI(TLI) {
+ Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI)
+ : DL(DL), TLI(TLI) {
ValueStack.push_back(new DenseMap<Value*, Constant*>);
}
@@ -2349,7 +2350,7 @@ private:
/// simple enough to live in a static initializer of a global.
SmallPtrSet<Constant*, 8> SimpleConstants;
- const DataLayout *TD;
+ const DataLayout *DL;
const TargetLibraryInfo *TLI;
};
@@ -2402,7 +2403,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
Constant *Ptr = getVal(SI->getOperand(1));
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
- Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
+ Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
DEBUG(dbgs() << "; To: " << *Ptr << "\n");
}
if (!isSimpleEnoughPointerToCommit(Ptr)) {
@@ -2415,7 +2416,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// If this might be too difficult for the backend to handle (e.g. the addr
// of one global variable divided by another) then we can't commit it.
- if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) {
+ if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) {
DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
<< "\n");
return false;
@@ -2447,7 +2448,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
- Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
+ Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
// If we can't improve the situation by introspecting NewTy,
// we have to give up.
@@ -2511,7 +2512,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
Constant *Ptr = getVal(LI->getOperand(0));
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
- Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
+ Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
DEBUG(dbgs() << "Found a constant pointer expression, constant "
"folding: " << *Ptr << "\n");
}
@@ -2580,7 +2581,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// We don't insert an entry into Values, as it doesn't have a
// meaningful return value.
if (!II->use_empty()) {
- DEBUG(dbgs() << "Found unused invariant_start. Cant evaluate.\n");
+ DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n");
return false;
}
ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
@@ -2588,9 +2589,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
Value *Ptr = PtrArg->stripPointerCasts();
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
- if (TD && !Size->isAllOnesValue() &&
+ if (DL && !Size->isAllOnesValue() &&
Size->getValue().getLimitedValue() >=
- TD->getTypeStoreSize(ElemTy)) {
+ DL->getTypeStoreSize(ElemTy)) {
Invariants.insert(GV);
DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
<< "\n");
@@ -2696,7 +2697,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
if (!CurInst->use_empty()) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
- InstResult = ConstantFoldConstantExpression(CE, TD, TLI);
+ InstResult = ConstantFoldConstantExpression(CE, DL, TLI);
setVal(CurInst, InstResult);
}
@@ -2779,10 +2780,10 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
/// we can. Return true if we can, false otherwise.
-static bool EvaluateStaticConstructor(Function *F, const DataLayout *TD,
+static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL,
const TargetLibraryInfo *TLI) {
// Call the function.
- Evaluator Eval(TD, TLI);
+ Evaluator Eval(DL, TLI);
Constant *RetValDummy;
bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
SmallVector<Constant*, 0>());
@@ -2830,7 +2831,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
if (F->empty()) continue;
// If we can evaluate the ctor at compile time, do.
- if (EvaluateStaticConstructor(F, TD, TLI)) {
+ if (EvaluateStaticConstructor(F, DL, TLI)) {
Ctors.erase(Ctors.begin()+i);
MadeChange = true;
--i;
@@ -2856,12 +2857,14 @@ static void setUsedInitializer(GlobalVariable &V,
return;
}
- SmallVector<llvm::Constant *, 8> UsedArray;
- PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext());
+ // Type of pointer to the array of pointers.
+ PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
+ SmallVector<llvm::Constant *, 8> UsedArray;
for (SmallPtrSet<GlobalValue *, 8>::iterator I = Init.begin(), E = Init.end();
I != E; ++I) {
- Constant *Cast = llvm::ConstantExpr::getBitCast(*I, Int8PtrTy);
+ Constant *Cast
+ = ConstantExpr::getPointerBitCastOrAddrSpaceCast(*I, Int8PtrTy);
UsedArray.push_back(Cast);
}
// Sort to get deterministic order.
@@ -3015,7 +3018,8 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
// Give the aliasee the name, linkage and other attributes of the alias.
Target->takeName(J);
Target->setLinkage(J->getLinkage());
- Target->GlobalValue::copyAttributesFrom(J);
+ Target->setVisibility(J->getVisibility());
+ Target->setDLLStorageClass(J->getDLLStorageClass());
if (Used.usedErase(J))
Used.usedInsert(Target);
@@ -3122,8 +3126,8 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
// and remove them.
bool Changed = false;
- for (Function::use_iterator I = CXAAtExitFn->use_begin(),
- E = CXAAtExitFn->use_end(); I != E;) {
+ for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end();
+ I != E;) {
// We're only interested in calls. Theoretically, we could handle invoke
// instructions as well, but neither llvm-gcc nor clang generate invokes
// to __cxa_atexit.
@@ -3155,7 +3159,8 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
bool GlobalOpt::runOnModule(Module &M) {
bool Changed = false;
- TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
TLI = &getAnalysis<TargetLibraryInfo>();
// Try to find the llvm.globalctors list.
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index 4ac1dfc..8684796 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -20,11 +20,11 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CallSite.h"
using namespace llvm;
STATISTIC(NumArgumentsProped, "Number of args turned into constants");
@@ -39,7 +39,7 @@ namespace {
initializeIPCPPass(*PassRegistry::getPassRegistry());
}
- bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
private:
bool PropagateConstantsIntoArguments(Function &F);
bool PropagateConstantReturn(Function &F);
@@ -86,18 +86,18 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
ArgumentConstants.resize(F.arg_size());
unsigned NumNonconstant = 0;
- for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
- User *U = *UI;
+ for (Use &U : F.uses()) {
+ User *UR = U.getUser();
// Ignore blockaddress uses.
- if (isa<BlockAddress>(U)) continue;
+ if (isa<BlockAddress>(UR)) continue;
// Used by a non-instruction, or not the callee of a function, do not
// transform.
- if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
+ if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR))
return false;
- CallSite CS(cast<Instruction>(U));
- if (!CS.isCallee(UI))
+ CallSite CS(cast<Instruction>(UR));
+ if (!CS.isCallee(&U))
return false;
// Check out all of the potentially constant arguments. Note that we don't
@@ -135,7 +135,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {
// Do we have a constant argument?
if (ArgumentConstants[i].second || AI->use_empty() ||
- (AI->hasByValAttr() && !F.onlyReadsMemory()))
+ AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory()))
continue;
Value *V = ArgumentConstants[i].first;
@@ -210,7 +210,7 @@ bool IPCP::PropagateConstantReturn(Function &F) {
// Different or no known return value? Don't propagate this return
// value.
RetVals[i] = 0;
- // All values non constant? Stop looking.
+ // All values non-constant? Stop looking.
if (++NumNonConstant == RetVals.size())
return false;
}
@@ -220,13 +220,13 @@ bool IPCP::PropagateConstantReturn(Function &F) {
// over all users, replacing any uses of the return value with the returned
// constant.
bool MadeChange = false;
- for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
- CallSite CS(*UI);
+ for (Use &U : F.uses()) {
+ CallSite CS(U.getUser());
Instruction* Call = CS.getInstruction();
// Not a call instruction or a call instruction that's not calling F
// directly?
- if (!Call || !CS.isCallee(UI))
+ if (!Call || !CS.isCallee(&U))
continue;
// Call result not used?
@@ -244,9 +244,8 @@ bool IPCP::PropagateConstantReturn(Function &F) {
Call->replaceAllUsesWith(New);
continue;
}
-
- for (Value::use_iterator I = Call->use_begin(), E = Call->use_end();
- I != E;) {
+
+ for (auto I = Call->user_begin(), E = Call->user_end(); I != E;) {
Instruction *Ins = cast<Instruction>(*I);
// Increment now, so we can remove the use
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 5d563d8..b4d31d8 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -44,6 +44,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeStripDebugDeclarePass(Registry);
initializeStripDeadDebugInfoPass(Registry);
initializeStripNonDebugSymbolsPass(Registry);
+ initializeBarrierNoopPass(Registry);
}
void LLVMInitializeIPO(LLVMPassRegistryRef R) {
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 437597e..6cf3040 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -17,13 +17,13 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
using namespace llvm;
@@ -47,13 +47,13 @@ public:
static char ID; // Pass identification, replacement for typeid
- virtual InlineCost getInlineCost(CallSite CS);
+ InlineCost getInlineCost(CallSite CS) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual bool runOnSCC(CallGraphSCC &SCC);
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnSCC(CallGraphSCC &SCC) override;
using llvm::Pass::doFinalization;
- virtual bool doFinalization(CallGraph &CG) {
+ bool doFinalization(CallGraph &CG) override {
return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true);
}
};
@@ -63,7 +63,7 @@ public:
char AlwaysInliner::ID = 0;
INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
-INITIALIZE_PASS_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 57379a3..7141064 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -15,13 +15,13 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
using namespace llvm;
@@ -48,20 +48,31 @@ public:
static char ID; // Pass identification, replacement for typeid
- InlineCost getInlineCost(CallSite CS) {
+ InlineCost getInlineCost(CallSite CS) override {
return ICA->getInlineCost(CS, getInlineThreshold(CS));
}
- virtual bool runOnSCC(CallGraphSCC &SCC);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnSCC(CallGraphSCC &SCC) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
};
+static int computeThresholdFromOptLevels(unsigned OptLevel,
+ unsigned SizeOptLevel) {
+ if (OptLevel > 2)
+ return 275;
+ if (SizeOptLevel == 1) // -Os
+ return 75;
+ if (SizeOptLevel == 2) // -Oz
+ return 25;
+ return 225;
+}
+
} // end anonymous namespace
char SimpleInliner::ID = 0;
INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
-INITIALIZE_PASS_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
@@ -72,6 +83,12 @@ Pass *llvm::createFunctionInliningPass(int Threshold) {
return new SimpleInliner(Threshold);
}
+Pass *llvm::createFunctionInliningPass(unsigned OptLevel,
+ unsigned SizeOptLevel) {
+ return new SimpleInliner(
+ computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
+}
+
bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
ICA = &getAnalysis<InlineCostAnalysis>();
return Inliner::runOnSCC(SCC);
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index d75d6ca..e97fb83 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -19,11 +19,11 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -50,6 +50,13 @@ static cl::opt<int>
HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
cl::desc("Threshold for inlining functions with inline hint"));
+// We instroduce this threshold to help performance of instrumentation based
+// PGO before we actually hook up inliner with analysis passes such as BPI and
+// BFI.
+static cl::opt<int>
+ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225),
+ cl::desc("Threshold for inlining functions with cold attribute"));
+
// Threshold to use when optsize is specified (and there is no -inline-limit).
const int OptSizeThreshold = 75;
@@ -117,7 +124,7 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
InlinedArrayAllocasTy &InlinedArrayAllocas,
int InlineHistory, bool InsertLifetime,
- const DataLayout *TD) {
+ const DataLayout *DL) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
@@ -196,7 +203,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
// If we don't have data layout information, and only one alloca is using
// the target default, then we can't safely merge them because we can't
// pick the greater alignment.
- if (!TD && (!Align1 || !Align2) && Align1 != Align2)
+ if (!DL && (!Align1 || !Align2) && Align1 != Align2)
continue;
// The available alloca has to be in the right function, not in some other
@@ -218,8 +225,8 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
if (Align1 != Align2) {
if (!Align1 || !Align2) {
- assert(TD && "DataLayout required to compare default alignments");
- unsigned TypeAlign = TD->getABITypeAlignment(AI->getAllocatedType());
+ assert(DL && "DataLayout required to compare default alignments");
+ unsigned TypeAlign = DL->getABITypeAlignment(AI->getAllocatedType());
Align1 = Align1 ? Align1 : TypeAlign;
Align2 = Align2 ? Align2 : TypeAlign;
@@ -277,6 +284,13 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
Attribute::MinSize))
thres = HintThreshold;
+ // Listen to the cold attribute when it would decrease the threshold.
+ bool ColdCallee = Callee && !Callee->isDeclaration() &&
+ Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Cold);
+ if (ColdCallee && ColdThreshold < thres)
+ thres = ColdThreshold;
+
return thres;
}
@@ -330,9 +344,8 @@ bool Inliner::shouldInline(CallSite CS) {
bool callerWillBeRemoved = Caller->hasLocalLinkage();
// This bool tracks what happens if we DO inline C into B.
bool inliningPreventsSomeOuterInline = false;
- for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end();
- I != E; ++I) {
- CallSite CS2(*I);
+ for (User *U : Caller->users()) {
+ CallSite CS2(U);
// If this isn't a call to Caller (it could be some other sort
// of reference) skip it. Such references will prevent the caller
@@ -363,7 +376,7 @@ bool Inliner::shouldInline(CallSite CS) {
// one is set very low by getInlineCost, in anticipation that Caller will
// be removed entirely. We did not account for this above unless there
// is only one caller of Caller.
- if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end())
+ if (callerWillBeRemoved && !Caller->use_empty())
TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) {
@@ -395,8 +408,9 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
}
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
- CallGraph &CG = getAnalysis<CallGraph>();
- const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
SmallPtrSet<Function*, 8> SCCFunctions;
@@ -456,7 +470,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, TD);
+ InlineFunctionInfo InlineInfo(&CG, DL);
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
@@ -505,7 +519,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// Attempt to inline the function.
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
- InlineHistoryID, InsertLifetime, TD))
+ InlineHistoryID, InsertLifetime, DL))
continue;
++NumInlined;
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 64e2ced..c1fe01c 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -59,11 +59,11 @@ namespace {
explicit InternalizePass();
explicit InternalizePass(ArrayRef<const char *> ExportList);
void LoadFile(const char *Filename);
- virtual bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addPreserved<CallGraph>();
+ AU.addPreserved<CallGraphWrapperPass>();
}
};
} // end anonymous namespace
@@ -72,8 +72,7 @@ char InternalizePass::ID = 0;
INITIALIZE_PASS(InternalizePass, "internalize",
"Internalize Global Symbols", false, false)
-InternalizePass::InternalizePass()
- : ModulePass(ID) {
+InternalizePass::InternalizePass() : ModulePass(ID) {
initializeInternalizePassPass(*PassRegistry::getPassRegistry());
if (!APIFile.empty()) // If a filename is specified, use it.
LoadFile(APIFile.c_str());
@@ -81,7 +80,7 @@ InternalizePass::InternalizePass()
}
InternalizePass::InternalizePass(ArrayRef<const char *> ExportList)
- : ModulePass(ID){
+ : ModulePass(ID) {
initializeInternalizePassPass(*PassRegistry::getPassRegistry());
for(ArrayRef<const char *>::const_iterator itr = ExportList.begin();
itr != ExportList.end(); itr++) {
@@ -115,6 +114,10 @@ static bool shouldInternalize(const GlobalValue &GV,
if (GV.hasAvailableExternallyLinkage())
return false;
+ // Assume that dllexported symbols are referenced elsewhere
+ if (GV.hasDLLExportStorageClass())
+ return false;
+
// Already has internal linkage
if (GV.hasLocalLinkage())
return false;
@@ -127,7 +130,8 @@ static bool shouldInternalize(const GlobalValue &GV,
}
bool InternalizePass::runOnModule(Module &M) {
- CallGraph *CG = getAnalysisIfAvailable<CallGraph>();
+ CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>();
+ CallGraph *CG = CGPass ? &CGPass->getCallGraph() : 0;
CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
bool Changed = false;
@@ -150,7 +154,6 @@ bool InternalizePass::runOnModule(Module &M) {
}
// Mark all functions not in the api as internal.
- // FIXME: maybe use private linkage?
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (!shouldInternalize(*I, ExternalNames))
continue;
@@ -186,7 +189,6 @@ bool InternalizePass::runOnModule(Module &M) {
// Mark all global variables with initializers that are not in the api as
// internal as well.
- // FIXME: maybe use private linkage?
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
if (!shouldInternalize(*I, ExternalNames))
@@ -213,9 +215,7 @@ bool InternalizePass::runOnModule(Module &M) {
return Changed;
}
-ModulePass *llvm::createInternalizePass() {
- return new InternalizePass();
-}
+ModulePass *llvm::createInternalizePass() { return new InternalizePass(); }
ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) {
return new InternalizePass(ExportList);
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
index 124cbb6..77e0b22 100644
--- a/lib/Transforms/IPO/LLVMBuild.txt
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
name = IPO
parent = Transforms
library_name = ipo
-required_libraries = Analysis Core IPA InstCombine Scalar Vectorize Support Target TransformUtils ObjCARC
+required_libraries = Analysis Core IPA InstCombine Scalar Support Target TransformUtils Vectorize
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 8282a8e..464aa99 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -17,8 +17,8 @@
#define DEBUG_TYPE "loop-extract"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
@@ -42,12 +42,12 @@ namespace {
initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredID(BreakCriticalEdgesID);
AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
}
};
}
@@ -57,7 +57,7 @@ INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
"Extract loops into new functions", false, false)
INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
"Extract loops into new functions", false, false)
@@ -79,6 +79,9 @@ INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
// Only visit top-level loops.
if (L->getParentLoop())
return false;
@@ -87,7 +90,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
if (!L->isLoopSimplifyForm())
return false;
- DominatorTree &DT = getAnalysis<DominatorTree>();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
bool Changed = false;
// If there is more than one top-level loop in this function, extract all of
@@ -177,7 +180,7 @@ namespace {
LoadFile(BlockFile.c_str());
}
- bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
};
}
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 3861421..8555d2c 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -50,6 +50,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IRBuilder.h"
@@ -58,11 +59,10 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
using namespace llvm;
@@ -108,12 +108,12 @@ public:
static const ComparableFunction TombstoneKey;
static DataLayout * const LookupOnly;
- ComparableFunction(Function *Func, DataLayout *TD)
- : Func(Func), Hash(profileFunction(Func)), TD(TD) {}
+ ComparableFunction(Function *Func, const DataLayout *DL)
+ : Func(Func), Hash(profileFunction(Func)), DL(DL) {}
Function *getFunc() const { return Func; }
unsigned getHash() const { return Hash; }
- DataLayout *getTD() const { return TD; }
+ const DataLayout *getDataLayout() const { return DL; }
// Drops AssertingVH reference to the function. Outside of debug mode, this
// does nothing.
@@ -125,11 +125,11 @@ public:
private:
explicit ComparableFunction(unsigned Hash)
- : Func(NULL), Hash(Hash), TD(NULL) {}
+ : Func(NULL), Hash(Hash), DL(NULL) {}
AssertingVH<Function> Func;
unsigned Hash;
- DataLayout *TD;
+ const DataLayout *DL;
};
const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0);
@@ -164,9 +164,9 @@ namespace {
/// side of claiming that two functions are different).
class FunctionComparator {
public:
- FunctionComparator(const DataLayout *TD, const Function *F1,
+ FunctionComparator(const DataLayout *DL, const Function *F1,
const Function *F2)
- : F1(F1), F2(F2), TD(TD) {}
+ : F1(F1), F2(F2), DL(DL) {}
/// Test whether the two functions have equivalent behaviour.
bool compare();
@@ -193,13 +193,58 @@ private:
return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
}
- /// Compare two Types, treating all pointer types as equal.
- bool isEquivalentType(Type *Ty1, Type *Ty2) const;
+ /// cmpType - compares two types,
+ /// defines total ordering among the types set.
+ ///
+ /// Return values:
+ /// 0 if types are equal,
+ /// -1 if Left is less than Right,
+ /// +1 if Left is greater than Right.
+ ///
+ /// Description:
+ /// Comparison is broken onto stages. Like in lexicographical comparison
+ /// stage coming first has higher priority.
+ /// On each explanation stage keep in mind total ordering properties.
+ ///
+ /// 0. Before comparison we coerce pointer types of 0 address space to
+ /// integer.
+ /// We also don't bother with same type at left and right, so
+ /// just return 0 in this case.
+ ///
+ /// 1. If types are of different kind (different type IDs).
+ /// Return result of type IDs comparison, treating them as numbers.
+ /// 2. If types are vectors or integers, compare Type* values as numbers.
+ /// 3. Types has same ID, so check whether they belongs to the next group:
+ /// * Void
+ /// * Float
+ /// * Double
+ /// * X86_FP80
+ /// * FP128
+ /// * PPC_FP128
+ /// * Label
+ /// * Metadata
+ /// If so - return 0, yes - we can treat these types as equal only because
+ /// their IDs are same.
+ /// 4. If Left and Right are pointers, return result of address space
+ /// comparison (numbers comparison). We can treat pointer types of same
+ /// address space as equal.
+ /// 5. If types are complex.
+ /// Then both Left and Right are to be expanded and their element types will
+ /// be checked with the same way. If we get Res != 0 on some stage, return it.
+ /// Otherwise return 0.
+ /// 6. For all other cases put llvm_unreachable.
+ int cmpType(Type *TyL, Type *TyR) const;
+
+ bool isEquivalentType(Type *Ty1, Type *Ty2) const {
+ return cmpType(Ty1, Ty2) == 0;
+ }
+
+ int cmpNumbers(uint64_t L, uint64_t R) const;
// The two functions undergoing comparison.
const Function *F1, *F2;
- const DataLayout *TD;
+ const DataLayout *DL;
DenseMap<const Value *, const Value *> id_map;
DenseSet<const Value *> seen_values;
@@ -207,32 +252,39 @@ private:
}
-// Any two pointers in the same address space are equivalent, intptr_t and
-// pointers are equivalent. Otherwise, standard type equivalence rules apply.
-bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
+int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
+ if (L < R) return -1;
+ if (L > R) return 1;
+ return 0;
+}
- PointerType *PTy1 = dyn_cast<PointerType>(Ty1);
- PointerType *PTy2 = dyn_cast<PointerType>(Ty2);
+/// cmpType - compares two types,
+/// defines total ordering among the types set.
+/// See method declaration comments for more details.
+int FunctionComparator::cmpType(Type *TyL, Type *TyR) const {
- if (TD) {
- if (PTy1 && PTy1->getAddressSpace() == 0) Ty1 = TD->getIntPtrType(Ty1);
- if (PTy2 && PTy2->getAddressSpace() == 0) Ty2 = TD->getIntPtrType(Ty2);
+ PointerType *PTyL = dyn_cast<PointerType>(TyL);
+ PointerType *PTyR = dyn_cast<PointerType>(TyR);
+
+ if (DL) {
+ if (PTyL && PTyL->getAddressSpace() == 0) TyL = DL->getIntPtrType(TyL);
+ if (PTyR && PTyR->getAddressSpace() == 0) TyR = DL->getIntPtrType(TyR);
}
- if (Ty1 == Ty2)
- return true;
+ if (TyL == TyR)
+ return 0;
- if (Ty1->getTypeID() != Ty2->getTypeID())
- return false;
+ if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID()))
+ return Res;
- switch (Ty1->getTypeID()) {
+ switch (TyL->getTypeID()) {
default:
llvm_unreachable("Unknown type!");
// Fall through in Release mode.
case Type::IntegerTyID:
case Type::VectorTyID:
- // Ty1 == Ty2 would have returned true earlier.
- return false;
+ // TyL == TyR would have returned true earlier.
+ return cmpNumbers((uint64_t)TyL, (uint64_t)TyR);
case Type::VoidTyID:
case Type::FloatTyID:
@@ -242,51 +294,55 @@ bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
case Type::PPC_FP128TyID:
case Type::LabelTyID:
case Type::MetadataTyID:
- return true;
+ return 0;
case Type::PointerTyID: {
- assert(PTy1 && PTy2 && "Both types must be pointers here.");
- return PTy1->getAddressSpace() == PTy2->getAddressSpace();
+ assert(PTyL && PTyR && "Both types must be pointers here.");
+ return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace());
}
case Type::StructTyID: {
- StructType *STy1 = cast<StructType>(Ty1);
- StructType *STy2 = cast<StructType>(Ty2);
- if (STy1->getNumElements() != STy2->getNumElements())
- return false;
-
- if (STy1->isPacked() != STy2->isPacked())
- return false;
-
- for (unsigned i = 0, e = STy1->getNumElements(); i != e; ++i) {
- if (!isEquivalentType(STy1->getElementType(i), STy2->getElementType(i)))
- return false;
+ StructType *STyL = cast<StructType>(TyL);
+ StructType *STyR = cast<StructType>(TyR);
+ if (STyL->getNumElements() != STyR->getNumElements())
+ return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
+
+ if (STyL->isPacked() != STyR->isPacked())
+ return cmpNumbers(STyL->isPacked(), STyR->isPacked());
+
+ for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) {
+ if (int Res = cmpType(STyL->getElementType(i),
+ STyR->getElementType(i)))
+ return Res;
}
- return true;
+ return 0;
}
case Type::FunctionTyID: {
- FunctionType *FTy1 = cast<FunctionType>(Ty1);
- FunctionType *FTy2 = cast<FunctionType>(Ty2);
- if (FTy1->getNumParams() != FTy2->getNumParams() ||
- FTy1->isVarArg() != FTy2->isVarArg())
- return false;
+ FunctionType *FTyL = cast<FunctionType>(TyL);
+ FunctionType *FTyR = cast<FunctionType>(TyR);
+ if (FTyL->getNumParams() != FTyR->getNumParams())
+ return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams());
- if (!isEquivalentType(FTy1->getReturnType(), FTy2->getReturnType()))
- return false;
+ if (FTyL->isVarArg() != FTyR->isVarArg())
+ return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg());
- for (unsigned i = 0, e = FTy1->getNumParams(); i != e; ++i) {
- if (!isEquivalentType(FTy1->getParamType(i), FTy2->getParamType(i)))
- return false;
+ if (int Res = cmpType(FTyL->getReturnType(), FTyR->getReturnType()))
+ return Res;
+
+ for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) {
+ if (int Res = cmpType(FTyL->getParamType(i), FTyR->getParamType(i)))
+ return Res;
}
- return true;
+ return 0;
}
case Type::ArrayTyID: {
- ArrayType *ATy1 = cast<ArrayType>(Ty1);
- ArrayType *ATy2 = cast<ArrayType>(Ty2);
- return ATy1->getNumElements() == ATy2->getNumElements() &&
- isEquivalentType(ATy1->getElementType(), ATy2->getElementType());
+ ArrayType *ATyL = cast<ArrayType>(TyL);
+ ArrayType *ATyR = cast<ArrayType>(TyR);
+ if (ATyL->getNumElements() != ATyR->getNumElements())
+ return cmpNumbers(ATyL->getNumElements(), ATyR->getNumElements());
+ return cmpType(ATyL->getElementType(), ATyR->getElementType());
}
}
}
@@ -341,7 +397,10 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope();
if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1))
return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() &&
- CXI->getOrdering() == cast<AtomicCmpXchgInst>(I2)->getOrdering() &&
+ CXI->getSuccessOrdering() ==
+ cast<AtomicCmpXchgInst>(I2)->getSuccessOrdering() &&
+ CXI->getFailureOrdering() ==
+ cast<AtomicCmpXchgInst>(I2)->getFailureOrdering() &&
CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope();
if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1))
return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() &&
@@ -359,13 +418,13 @@ bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
if (AS != GEP2->getPointerAddressSpace())
return false;
- if (TD) {
+ if (DL) {
// When we have target data, we can reduce the GEP down to the value in bytes
// added to the address.
- unsigned BitWidth = TD ? TD->getPointerSizeInBits(AS) : 1;
+ unsigned BitWidth = DL ? DL->getPointerSizeInBits(AS) : 1;
APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0);
- if (GEP1->accumulateConstantOffset(*TD, Offset1) &&
- GEP2->accumulateConstantOffset(*TD, Offset2)) {
+ if (GEP1->accumulateConstantOffset(*DL, Offset1) &&
+ GEP2->accumulateConstantOffset(*DL, Offset2)) {
return Offset1 == Offset2;
}
}
@@ -561,7 +620,7 @@ public:
initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
}
- bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
private:
typedef DenseSet<ComparableFunction> FnSetType;
@@ -606,7 +665,7 @@ private:
FnSetType FnSet;
/// DataLayout for more accurate GEP comparisons. May be NULL.
- DataLayout *TD;
+ const DataLayout *DL;
/// Whether or not the target supports global aliases.
bool HasGlobalAliases;
@@ -623,7 +682,8 @@ ModulePass *llvm::createMergeFunctionsPass() {
bool MergeFunctions::runOnModule(Module &M) {
bool Changed = false;
- TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
@@ -646,7 +706,7 @@ bool MergeFunctions::runOnModule(Module &M) {
Function *F = cast<Function>(*I);
if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
!F->mayBeOverridden()) {
- ComparableFunction CF = ComparableFunction(F, TD);
+ ComparableFunction CF = ComparableFunction(F, DL);
Changed |= insert(CF);
}
}
@@ -661,7 +721,7 @@ bool MergeFunctions::runOnModule(Module &M) {
Function *F = cast<Function>(*I);
if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
F->mayBeOverridden()) {
- ComparableFunction CF = ComparableFunction(F, TD);
+ ComparableFunction CF = ComparableFunction(F, DL);
Changed |= insert(CF);
}
}
@@ -682,28 +742,27 @@ bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS,
return false;
// One of these is a special "underlying pointer comparison only" object.
- if (LHS.getTD() == ComparableFunction::LookupOnly ||
- RHS.getTD() == ComparableFunction::LookupOnly)
+ if (LHS.getDataLayout() == ComparableFunction::LookupOnly ||
+ RHS.getDataLayout() == ComparableFunction::LookupOnly)
return false;
- assert(LHS.getTD() == RHS.getTD() &&
+ assert(LHS.getDataLayout() == RHS.getDataLayout() &&
"Comparing functions for different targets");
- return FunctionComparator(LHS.getTD(), LHS.getFunc(),
+ return FunctionComparator(LHS.getDataLayout(), LHS.getFunc(),
RHS.getFunc()).compare();
}
// Replace direct callers of Old with New.
void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
- for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
- UI != UE;) {
- Value::use_iterator TheIter = UI;
+ for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE;) {
+ Use *U = &*UI;
++UI;
- CallSite CS(*TheIter);
- if (CS && CS.isCallee(TheIter)) {
+ CallSite CS(U->getUser());
+ if (CS && CS.isCallee(U)) {
remove(CS.getInstruction()->getParent()->getParent());
- TheIter.getUse().set(BitcastNew);
+ U->set(BitcastNew);
}
}
}
@@ -723,7 +782,7 @@ void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
// Helper for writeThunk,
// Selects proper bitcast operation,
-// but a bit simplier then CastInst::getCastOpcode.
+// but a bit simpler then CastInst::getCastOpcode.
static Value* createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
Type *SrcTy = V->getType();
if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
@@ -894,17 +953,14 @@ void MergeFunctions::removeUsers(Value *V) {
Value *V = Worklist.back();
Worklist.pop_back();
- for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
- UI != UE; ++UI) {
- Use &U = UI.getUse();
- if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
+ for (User *U : V->users()) {
+ if (Instruction *I = dyn_cast<Instruction>(U)) {
remove(I->getParent()->getParent());
- } else if (isa<GlobalValue>(U.getUser())) {
+ } else if (isa<GlobalValue>(U)) {
// do nothing
- } else if (Constant *C = dyn_cast<Constant>(U.getUser())) {
- for (Value::use_iterator CUI = C->use_begin(), CUE = C->use_end();
- CUI != CUE; ++CUI)
- Worklist.push_back(*CUI);
+ } else if (Constant *C = dyn_cast<Constant>(U)) {
+ for (User *UU : C->users())
+ Worklist.push_back(UU);
}
}
}
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index fa518cb..ac88aee 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -15,11 +15,11 @@
#define DEBUG_TYPE "partialinlining"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
using namespace llvm;
@@ -28,14 +28,14 @@ STATISTIC(NumPartialInlined, "Number of functions partially inlined");
namespace {
struct PartialInliner : public ModulePass {
- virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+ void getAnalysisUsage(AnalysisUsage &AU) const override { }
static char ID; // Pass identification, replacement for typeid
PartialInliner() : ModulePass(ID) {
initializePartialInlinerPass(*PassRegistry::getPassRegistry());
}
-
- bool runOnModule(Module& M);
-
+
+ bool runOnModule(Module& M) override;
+
private:
Function* unswitchFunction(Function* F);
};
@@ -119,8 +119,8 @@ Function* PartialInliner::unswitchFunction(Function* F) {
// The CodeExtractor needs a dominator tree.
DominatorTree DT;
- DT.runOnFunction(*duplicateFunction);
-
+ DT.recalculate(*duplicateFunction);
+
// Extract the body of the if.
Function* extractedFunction
= CodeExtractor(toExtract, &DT).extractCodeRegion();
@@ -128,8 +128,8 @@ Function* PartialInliner::unswitchFunction(Function* F) {
InlineFunctionInfo IFI;
// Inline the top-level if test into all callers.
- std::vector<User*> Users(duplicateFunction->use_begin(),
- duplicateFunction->use_end());
+ std::vector<User *> Users(duplicateFunction->user_begin(),
+ duplicateFunction->user_end());
for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
UI != UE; ++UI)
if (CallInst *CI = dyn_cast<CallInst>(*UI))
@@ -162,9 +162,8 @@ bool PartialInliner::runOnModule(Module& M) {
if (currFunc->use_empty()) continue;
bool recursive = false;
- for (Function::use_iterator UI = currFunc->use_begin(),
- UE = currFunc->use_end(); UI != UE; ++UI)
- if (Instruction* I = dyn_cast<Instruction>(*UI))
+ for (User *U : currFunc->users())
+ if (Instruction* I = dyn_cast<Instruction>(U))
if (I->getParent()->getParent() == currFunc) {
recursive = true;
break;
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 24c5018..4a28b34 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -17,7 +17,7 @@
#include "llvm-c/Transforms/PassManagerBuilder.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
@@ -33,11 +33,6 @@ RunLoopVectorization("vectorize-loops", cl::Hidden,
cl::desc("Run the Loop vectorization passes"));
static cl::opt<bool>
-LateVectorization("late-vectorize", cl::init(true), cl::Hidden,
- cl::desc("Run the vectorization pasess late in the pass "
- "pipeline (after the inliner)"));
-
-static cl::opt<bool>
RunSLPVectorization("vectorize-slp", cl::Hidden,
cl::desc("Run the SLP vectorization passes"));
@@ -68,7 +63,6 @@ PassManagerBuilder::PassManagerBuilder() {
BBVectorize = RunBBVectorization;
SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
- LateVectorize = LateVectorization;
RerollLoops = RunLoopRerolling;
}
@@ -200,11 +194,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createLoopDeletionPass()); // Delete dead loops
- if (!LateVectorize && LoopVectorize)
- MPM.add(createLoopVectorizePass(DisableUnrollLoops));
-
if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass()); // Unroll small loops
+ MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
if (OptLevel > 1)
@@ -243,21 +234,21 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createInstructionCombiningPass()); // Clean up after everything.
- // As an experimental mode, run any vectorization passes in a separate
- // pipeline from the CGSCC pass manager that runs iteratively with the
- // inliner.
- if (LateVectorize && LoopVectorize) {
- // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
- // pass manager that we are specifically trying to avoid. To prevent this
- // we must insert a no-op module pass to reset the pass manager.
- MPM.add(createBarrierNoopPass());
+ // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
+ // pass manager that we are specifically trying to avoid. To prevent this
+ // we must insert a no-op module pass to reset the pass manager.
+ MPM.add(createBarrierNoopPass());
+ MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
+ // FIXME: Because of #pragma vectorize enable, the passes below are always
+ // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
+ // on -O1 and no #pragma is found). Would be good to have these two passes
+ // as function calls, so that we can only pass them when the vectorizer
+ // changed the code.
+ MPM.add(createInstructionCombiningPass());
+ MPM.add(createCFGSimplificationPass());
- // Add the various vectorization passes and relevant cleanup passes for
- // them since we are no longer in the middle of the main scalar pipeline.
- MPM.add(createLoopVectorizePass(DisableUnrollLoops));
- MPM.add(createInstructionCombiningPass());
- MPM.add(createCFGSimplificationPass());
- }
+ if (!DisableUnrollLoops)
+ MPM.add(createLoopUnrollPass()); // Unroll small loops
if (!DisableUnitAtATime) {
// FIXME: We shouldn't bother with this anymore.
@@ -343,6 +334,9 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// Nuke dead stores.
PM.add(createDeadStoreEliminationPass());
+ // More loops are countable try to vectorize them.
+ PM.add(createLoopVectorizePass(true, true));
+
// Cleanup and simplify the code after the scalar optimizations.
PM.add(createInstructionCombiningPass());
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index b160913..c61ec5e 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -21,12 +21,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/CFG.h"
#include <algorithm>
using namespace llvm;
@@ -41,7 +41,7 @@ namespace {
}
// runOnSCC - Analyze the SCC, performing the transformation if possible.
- bool runOnSCC(CallGraphSCC &SCC);
+ bool runOnSCC(CallGraphSCC &SCC) override;
bool SimplifyFunction(Function *F);
void DeleteBasicBlock(BasicBlock *BB);
@@ -51,7 +51,7 @@ namespace {
char PruneEH::ID = 0;
INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
"Remove unused exception handling info", false, false)
-INITIALIZE_PASS_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_END(PruneEH, "prune-eh",
"Remove unused exception handling info", false, false)
@@ -60,7 +60,7 @@ Pass *llvm::createPruneEHPass() { return new PruneEH(); }
bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
SmallPtrSet<CallGraphNode *, 8> SCCNodes;
- CallGraph &CG = getAnalysis<CallGraph>();
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
bool MadeChange = false;
// Fill SCCNodes with the elements of the SCC. Used for quickly
@@ -234,7 +234,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
/// exist in the BB.
void PruneEH::DeleteBasicBlock(BasicBlock *BB) {
assert(pred_begin(BB) == pred_end(BB) && "BB is not dead!");
- CallGraph &CG = getAnalysis<CallGraph>();
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
CallGraphNode *CGN = CG[BB->getParent()];
for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) {
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
index f00830a..1c6532d 100644
--- a/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -32,7 +32,7 @@ public:
StripDeadPrototypesPass() : ModulePass(ID) {
initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
};
} // end anonymous namespace
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index c4f5cfc..6d0be8f 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -23,8 +23,8 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/DebugInfo.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -44,9 +44,9 @@ namespace {
initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
};
@@ -59,9 +59,9 @@ namespace {
initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
};
@@ -74,9 +74,9 @@ namespace {
initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
};
@@ -89,9 +89,9 @@ namespace {
initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
};
@@ -132,11 +132,10 @@ ModulePass *llvm::createStripDeadDebugInfoPass() {
/// OnlyUsedBy - Return true if V is only used by Usr.
static bool OnlyUsedBy(Value *V, Value *Usr) {
- for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
- User *U = *I;
+ for (User *U : V->users())
if (U != Usr)
return false;
- }
+
return true;
}
@@ -147,7 +146,7 @@ static void RemoveDeadConstant(Constant *C) {
if (OnlyUsedBy(C->getOperand(i), C))
Operands.insert(cast<Constant>(C->getOperand(i)));
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
- if (!GV->hasLocalLinkage()) return; // Don't delete non static globals.
+ if (!GV->hasLocalLinkage()) return; // Don't delete non-static globals.
GV->eraseFromParent();
}
else if (!isa<Function>(C))
@@ -250,7 +249,7 @@ bool StripDebugDeclare::runOnModule(Module &M) {
if (Declare) {
while (!Declare->use_empty()) {
- CallInst *CI = cast<CallInst>(Declare->use_back());
+ CallInst *CI = cast<CallInst>(Declare->user_back());
Value *Arg1 = CI->getArgOperand(0);
Value *Arg2 = CI->getArgOperand(1);
assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
@@ -307,10 +306,7 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
SmallVector<Value *, 64> LiveSubprograms;
DenseSet<const MDNode *> VisitedSet;
- for (DebugInfoFinder::iterator CI = F.compile_unit_begin(),
- CE = F.compile_unit_end(); CI != CE; ++CI) {
- // Create our compile unit.
- DICompileUnit DIC(*CI);
+ for (DICompileUnit DIC : F.compile_units()) {
assert(DIC.Verify() && "DIC must verify as a DICompileUnit.");
// Create our live subprogram list.
diff --git a/lib/Transforms/InstCombine/Android.mk b/lib/Transforms/InstCombine/Android.mk
index 31605d9..62dc7d9 100644
--- a/lib/Transforms/InstCombine/Android.mk
+++ b/lib/Transforms/InstCombine/Android.mk
@@ -30,6 +30,7 @@ include $(BUILD_HOST_STATIC_LIBRARY)
# For the device
# =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(transforms_inst_combine_SRC_FILES)
@@ -40,3 +41,4 @@ LOCAL_MODULE_TAGS := optional
include $(LLVM_DEVICE_BUILD_MK)
include $(LLVM_GEN_INTRINSICS_MK)
include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index a5eddc2..822e146 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -11,13 +11,13 @@
#define INSTCOMBINE_INSTCOMBINE_H
#include "InstCombineWorklist.h"
+#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
-#include "llvm/InstVisitor.h"
#include "llvm/Pass.h"
-#include "llvm/Support/TargetFolder.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
namespace llvm {
@@ -51,6 +51,15 @@ static inline unsigned getComplexity(Value *V) {
return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
}
+/// AddOne - Add one to a Constant
+static inline Constant *AddOne(Constant *C) {
+ return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a Constant
+static inline Constant *SubOne(Constant *C) {
+ return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
+}
+
/// InstCombineIRInserter - This is an IRBuilder insertion helper that works
/// just like the normal insertion helper, but also adds any new instructions
@@ -72,7 +81,7 @@ public:
class LLVM_LIBRARY_VISIBILITY InstCombiner
: public FunctionPass,
public InstVisitor<InstCombiner, Instruction*> {
- DataLayout *TD;
+ const DataLayout *DL;
TargetLibraryInfo *TLI;
bool MadeIRChange;
LibCallSimplifier *Simplifier;
@@ -87,19 +96,19 @@ public:
BuilderTy *Builder;
static char ID; // Pass identification, replacement for typeid
- InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {
+ InstCombiner() : FunctionPass(ID), DL(0), Builder(0) {
MinimizeSize = false;
initializeInstCombinerPass(*PassRegistry::getPassRegistry());
}
public:
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
bool DoOneIteration(Function &F, unsigned ItNum);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
- DataLayout *getDataLayout() const { return TD; }
+ const DataLayout *getDataLayout() const { return DL; }
TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
@@ -116,7 +125,7 @@ public:
Instruction *visitSub(BinaryOperator &I);
Instruction *visitFSub(BinaryOperator &I);
Instruction *visitMul(BinaryOperator &I);
- Value *foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
+ Value *foldFMulConst(Instruction *FMulOrDiv, Constant *C,
Instruction *InsertBefore);
Instruction *visitFMul(BinaryOperator &I);
Instruction *visitURem(BinaryOperator &I);
@@ -225,7 +234,7 @@ private:
Type *Ty);
Instruction *visitCallSite(CallSite CS);
- Instruction *tryOptimizeCall(CallInst *CI, const DataLayout *TD);
+ Instruction *tryOptimizeCall(CallInst *CI, const DataLayout *DL);
bool transformConstExprCastCall(CallSite CS);
Instruction *transformCallThroughTrampoline(CallSite CS,
IntrinsicInst *Tramp);
@@ -302,15 +311,15 @@ public:
void ComputeMaskedBits(Value *V, APInt &KnownZero,
APInt &KnownOne, unsigned Depth = 0) const {
- return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
+ return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, DL, Depth);
}
bool MaskedValueIsZero(Value *V, const APInt &Mask,
unsigned Depth = 0) const {
- return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
+ return llvm::MaskedValueIsZero(V, Mask, DL, Depth);
}
unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const {
- return llvm::ComputeNumSignBits(Op, TD, Depth);
+ return llvm::ComputeNumSignBits(Op, DL, Depth);
}
private:
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 534feb8..97910c7 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -15,8 +15,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/PatternMatch.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
@@ -175,7 +175,7 @@ namespace {
Value *createFDiv(Value *Opnd0, Value *Opnd1);
Value *createFNeg(Value *V);
Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
- void createInstPostProc(Instruction *NewInst);
+ void createInstPostProc(Instruction *NewInst, bool NoNumber = false);
InstCombiner::BuilderTy *Builder;
Instruction *Instr;
@@ -483,6 +483,11 @@ Value *FAddCombine::performFactorization(Instruction *I) {
if (!Factor)
return 0;
+ FastMathFlags Flags;
+ Flags.setUnsafeAlgebra();
+ if (I0) Flags &= I->getFastMathFlags();
+ if (I1) Flags &= I->getFastMathFlags();
+
// Create expression "NewAddSub = AddSub0 +/- AddsSub1"
Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ?
createFAdd(AddSub0, AddSub1) :
@@ -491,12 +496,20 @@ Value *FAddCombine::performFactorization(Instruction *I) {
const APFloat &F = CFP->getValueAPF();
if (!F.isNormal())
return 0;
- }
+ } else if (Instruction *II = dyn_cast<Instruction>(NewAddSub))
+ II->setFastMathFlags(Flags);
- if (isMpy)
- return createFMul(Factor, NewAddSub);
+ if (isMpy) {
+ Value *RI = createFMul(Factor, NewAddSub);
+ if (Instruction *II = dyn_cast<Instruction>(RI))
+ II->setFastMathFlags(Flags);
+ return RI;
+ }
- return createFDiv(NewAddSub, Factor);
+ Value *RI = createFDiv(NewAddSub, Factor);
+ if (Instruction *II = dyn_cast<Instruction>(RI))
+ II->setFastMathFlags(Flags);
+ return RI;
}
Value *FAddCombine::simplify(Instruction *I) {
@@ -746,7 +759,10 @@ Value *FAddCombine::createFSub
Value *FAddCombine::createFNeg(Value *V) {
Value *Zero = cast<Value>(ConstantFP::get(V->getType(), 0.0));
- return createFSub(Zero, V);
+ Value *NewV = createFSub(Zero, V);
+ if (Instruction *I = dyn_cast<Instruction>(NewV))
+ createInstPostProc(I, true); // fneg's don't receive instruction numbers.
+ return NewV;
}
Value *FAddCombine::createFAdd
@@ -771,11 +787,13 @@ Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
return V;
}
-void FAddCombine::createInstPostProc(Instruction *NewInstr) {
+void FAddCombine::createInstPostProc(Instruction *NewInstr,
+ bool NoNumber) {
NewInstr->setDebugLoc(Instr->getDebugLoc());
// Keep track of the number of instruction created.
- incCreateInstNum();
+ if (!NoNumber)
+ incCreateInstNum();
// Propagate fast-math flags
NewInstr->setFastMathFlags(Instr->getFastMathFlags());
@@ -845,39 +863,25 @@ Value *FAddCombine::createAddendVal
return createFMul(OpndVal, Coeff.getValue(Instr->getType()));
}
-/// AddOne - Add one to a ConstantInt.
-static Constant *AddOne(Constant *C) {
- return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
-}
-
-/// SubOne - Subtract one from a ConstantInt.
-static Constant *SubOne(ConstantInt *C) {
- return ConstantInt::get(C->getContext(), C->getValue()-1);
-}
-
-
// dyn_castFoldableMul - If this value is a multiply that can be folded into
// other computations (because it has a constant operand), return the
// non-constant operand of the multiply, and set CST to point to the multiplier.
// Otherwise, return null.
//
-static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
- if (!V->hasOneUse() || !V->getType()->isIntegerTy())
+static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) {
+ if (!V->hasOneUse() || !V->getType()->isIntOrIntVectorTy())
return 0;
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0) return 0;
if (I->getOpcode() == Instruction::Mul)
- if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
+ if ((CST = dyn_cast<Constant>(I->getOperand(1))))
return I->getOperand(0);
if (I->getOpcode() == Instruction::Shl)
- if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) {
+ if ((CST = dyn_cast<Constant>(I->getOperand(1)))) {
// The multiplier is really 1 << CST.
- uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
- uint32_t CSTVal = CST->getLimitedValue(BitWidth);
- CST = ConstantInt::get(V->getType()->getContext(),
- APInt::getOneBitSet(BitWidth, CSTVal));
+ CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST);
return I->getOperand(0);
}
return 0;
@@ -915,7 +919,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), TD))
+ I.hasNoUnsignedWrap(), DL))
return ReplaceInstUsesWith(I, V);
// (A*B)+(A*C) -> A*(B+C) etc
@@ -987,7 +991,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
- if (I.getType()->isIntegerTy(1))
+ if (I.getType()->getScalarType()->isIntegerTy(1))
return BinaryOperator::CreateXor(LHS, RHS);
// X + X --> X << 1
@@ -1017,21 +1021,23 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateSub(LHS, V);
- ConstantInt *C2;
- if (Value *X = dyn_castFoldableMul(LHS, C2)) {
- if (X == RHS) // X*C + X --> X * (C+1)
- return BinaryOperator::CreateMul(RHS, AddOne(C2));
+ {
+ Constant *C2;
+ if (Value *X = dyn_castFoldableMul(LHS, C2)) {
+ if (X == RHS) // X*C + X --> X * (C+1)
+ return BinaryOperator::CreateMul(RHS, AddOne(C2));
+
+ // X*C1 + X*C2 --> X * (C1+C2)
+ Constant *C1;
+ if (X == dyn_castFoldableMul(RHS, C1))
+ return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
+ }
- // X*C1 + X*C2 --> X * (C1+C2)
- ConstantInt *C1;
- if (X == dyn_castFoldableMul(RHS, C1))
- return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
+ // X + X*C --> X * (C+1)
+ if (dyn_castFoldableMul(RHS, C2) == LHS)
+ return BinaryOperator::CreateMul(LHS, AddOne(C2));
}
- // X + X*C --> X * (C+1)
- if (dyn_castFoldableMul(RHS, C2) == LHS)
- return BinaryOperator::CreateMul(LHS, AddOne(C2));
-
// A+B --> A|B iff A and B have no bits set in common.
if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
APInt LHSKnownOne(IT->getBitWidth(), 0);
@@ -1071,12 +1077,16 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
}
- if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
- Value *X = 0;
- if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X
+ if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
+ Value *X;
+ if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X
return BinaryOperator::CreateSub(SubOne(CRHS), X);
+ }
+ if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
// (X & FF00) + xx00 -> (X+xx00) & FF00
+ Value *X;
+ ConstantInt *C2;
if (LHS->hasOneUse() &&
match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) &&
CRHS->getValue() == (CRHS->getValue() & C2->getValue())) {
@@ -1183,7 +1193,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
- if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), TD))
+ if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL))
return ReplaceInstUsesWith(I, V);
if (isa<Constant>(RHS)) {
@@ -1198,13 +1208,19 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
// -A + B --> B - A
// -A + -B --> -(A + B)
- if (Value *LHSV = dyn_castFNegVal(LHS))
- return BinaryOperator::CreateFSub(RHS, LHSV);
+ if (Value *LHSV = dyn_castFNegVal(LHS)) {
+ Instruction *RI = BinaryOperator::CreateFSub(RHS, LHSV);
+ RI->copyFastMathFlags(&I);
+ return RI;
+ }
// A + -B --> A - B
if (!isa<Constant>(RHS))
- if (Value *V = dyn_castFNegVal(RHS))
- return BinaryOperator::CreateFSub(LHS, V);
+ if (Value *V = dyn_castFNegVal(RHS)) {
+ Instruction *RI = BinaryOperator::CreateFSub(LHS, V);
+ RI->copyFastMathFlags(&I);
+ return RI;
+ }
// Check for (fadd double (sitofp x), y), see if we can merge this into an
// integer add followed by a promotion.
@@ -1284,7 +1300,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
///
Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
Type *Ty) {
- assert(TD && "Must have target data info for this");
+ assert(DL && "Must have target data info for this");
// If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
// this.
@@ -1353,7 +1369,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), TD))
+ I.hasNoUnsignedWrap(), DL))
return ReplaceInstUsesWith(I, V);
// (A*B)-(A*C) -> A*(B-C) etc
@@ -1375,51 +1391,53 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (match(Op0, m_AllOnes()))
return BinaryOperator::CreateNot(Op1);
- if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
+ if (Constant *C = dyn_cast<Constant>(Op0)) {
// C - ~X == X + (1+C)
Value *X = 0;
if (match(Op1, m_Not(m_Value(X))))
return BinaryOperator::CreateAdd(X, AddOne(C));
- // -(X >>u 31) -> (X >>s 31)
- // -(X >>s 31) -> (X >>u 31)
- if (C->isZero()) {
- Value *X; ConstantInt *CI;
- if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) &&
- // Verify we are shifting out everything but the sign bit.
- CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
- return BinaryOperator::CreateAShr(X, CI);
-
- if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) &&
- // Verify we are shifting out everything but the sign bit.
- CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
- return BinaryOperator::CreateLShr(X, CI);
- }
-
// Try to fold constant sub into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
// C-(X+C2) --> (C-C2)-X
- ConstantInt *C2;
- if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2))))
+ Constant *C2;
+ if (match(Op1, m_Add(m_Value(X), m_Constant(C2))))
return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
if (SimplifyDemandedInstructionBits(I))
return &I;
// Fold (sub 0, (zext bool to B)) --> (sext bool to B)
- if (C->isZero() && match(Op1, m_ZExt(m_Value(X))))
- if (X->getType()->isIntegerTy(1))
+ if (C->isNullValue() && match(Op1, m_ZExt(m_Value(X))))
+ if (X->getType()->getScalarType()->isIntegerTy(1))
return CastInst::CreateSExtOrBitCast(X, Op1->getType());
// Fold (sub 0, (sext bool to B)) --> (zext bool to B)
- if (C->isZero() && match(Op1, m_SExt(m_Value(X))))
- if (X->getType()->isIntegerTy(1))
+ if (C->isNullValue() && match(Op1, m_SExt(m_Value(X))))
+ if (X->getType()->getScalarType()->isIntegerTy(1))
return CastInst::CreateZExtOrBitCast(X, Op1->getType());
}
+ if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
+ // -(X >>u 31) -> (X >>s 31)
+ // -(X >>s 31) -> (X >>u 31)
+ if (C->isZero()) {
+ Value *X; ConstantInt *CI;
+ if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) &&
+ // Verify we are shifting out everything but the sign bit.
+ CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+ return BinaryOperator::CreateAShr(X, CI);
+
+ if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) &&
+ // Verify we are shifting out everything but the sign bit.
+ CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+ return BinaryOperator::CreateLShr(X, CI);
+ }
+ }
+
{ Value *Y;
// X-(X+Y) == -Y X-(Y+X) == -Y
@@ -1435,7 +1453,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (Op1->hasOneUse()) {
Value *X = 0, *Y = 0, *Z = 0;
Constant *C = 0;
- ConstantInt *CI = 0;
+ Constant *CI = 0;
// (X - (Y - Z)) --> (X + (Z - Y)).
if (match(Op1, m_Sub(m_Value(Y), m_Value(Z))))
@@ -1460,13 +1478,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateShl(XNeg, Y);
// X - X*C --> X * (1-C)
- if (match(Op1, m_Mul(m_Specific(Op0), m_ConstantInt(CI)))) {
+ if (match(Op1, m_Mul(m_Specific(Op0), m_Constant(CI)))) {
Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI);
return BinaryOperator::CreateMul(Op0, CP1);
}
// X - X<<C --> X * (1-(1<<C))
- if (match(Op1, m_Shl(m_Specific(Op0), m_ConstantInt(CI)))) {
+ if (match(Op1, m_Shl(m_Specific(Op0), m_Constant(CI)))) {
Constant *One = ConstantInt::get(I.getType(), 1);
C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI));
return BinaryOperator::CreateMul(Op0, C);
@@ -1481,26 +1499,26 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// X - A*CI -> X + A*-CI
// X - CI*A -> X + A*-CI
- if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) ||
- match(Op1, m_Mul(m_ConstantInt(CI), m_Value(A)))) {
+ if (match(Op1, m_Mul(m_Value(A), m_Constant(CI))) ||
+ match(Op1, m_Mul(m_Constant(CI), m_Value(A)))) {
Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI));
return BinaryOperator::CreateAdd(Op0, NewMul);
}
}
- ConstantInt *C1;
+ Constant *C1;
if (Value *X = dyn_castFoldableMul(Op0, C1)) {
if (X == Op1) // X*C - X --> X * (C-1)
return BinaryOperator::CreateMul(Op1, SubOne(C1));
- ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2)
+ Constant *C2; // X*C1 - X*C2 -> X * (C1-C2)
if (X == dyn_castFoldableMul(Op1, C2))
return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
}
// Optimize pointer differences into the same array into a size. Consider:
// &A[10] - &A[0]: we should compile this to "10".
- if (TD) {
+ if (DL) {
Value *LHSOp, *RHSOp;
if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
match(Op1, m_PtrToInt(m_Value(RHSOp))))
@@ -1520,7 +1538,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), TD))
+ if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL))
return ReplaceInstUsesWith(I, V);
if (isa<Constant>(Op0))
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 88bb69b..2c1bfc7 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -13,23 +13,13 @@
#include "InstCombine.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/Support/ConstantRange.h"
-#include "llvm/Support/PatternMatch.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
using namespace llvm;
using namespace PatternMatch;
-
-/// AddOne - Add one to a ConstantInt.
-static Constant *AddOne(ConstantInt *C) {
- return ConstantInt::get(C->getContext(), C->getValue() + 1);
-}
-/// SubOne - Subtract one from a ConstantInt.
-static Constant *SubOne(ConstantInt *C) {
- return ConstantInt::get(C->getContext(), C->getValue()-1);
-}
-
/// isFreeToInvert - Return true if the specified value is free to invert (apply
/// ~ to). This happens in cases where the ~ can be eliminated.
static inline bool isFreeToInvert(Value *V) {
@@ -513,31 +503,46 @@ static unsigned conjugateICmpMask(unsigned Mask) {
/// decomposition fails.
static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred,
Value *&X, Value *&Y, Value *&Z) {
- // X < 0 is equivalent to (X & SignBit) != 0.
- if (I->getPredicate() == ICmpInst::ICMP_SLT)
- if (ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
- if (C->isZero()) {
- X = I->getOperand(0);
- Y = ConstantInt::get(I->getContext(),
- APInt::getSignBit(C->getBitWidth()));
- Pred = ICmpInst::ICMP_NE;
- Z = C;
- return true;
- }
+ ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!C)
+ return false;
- // X > -1 is equivalent to (X & SignBit) == 0.
- if (I->getPredicate() == ICmpInst::ICMP_SGT)
- if (ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
- if (C->isAllOnesValue()) {
- X = I->getOperand(0);
- Y = ConstantInt::get(I->getContext(),
- APInt::getSignBit(C->getBitWidth()));
- Pred = ICmpInst::ICMP_EQ;
- Z = ConstantInt::getNullValue(C->getType());
- return true;
- }
+ switch (I->getPredicate()) {
+ default:
+ return false;
+ case ICmpInst::ICMP_SLT:
+ // X < 0 is equivalent to (X & SignBit) != 0.
+ if (!C->isZero())
+ return false;
+ Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth()));
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ case ICmpInst::ICMP_SGT:
+ // X > -1 is equivalent to (X & SignBit) == 0.
+ if (!C->isAllOnesValue())
+ return false;
+ Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth()));
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case ICmpInst::ICMP_ULT:
+ // X <u 2^n is equivalent to (X & ~(2^n-1)) == 0.
+ if (!C->getValue().isPowerOf2())
+ return false;
+ Y = ConstantInt::get(I->getContext(), -C->getValue());
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case ICmpInst::ICMP_UGT:
+ // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0.
+ if (!(C->getValue() + 1).isPowerOf2())
+ return false;
+ Y = ConstantInt::get(I->getContext(), ~C->getValue());
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ }
- return false;
+ X = I->getOperand(0);
+ Z = ConstantInt::getNullValue(C->getType());
+ return true;
}
/// foldLogOpOfMaskedICmpsHelper:
@@ -1099,7 +1104,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifyAndInst(Op0, Op1, TD))
+ if (Value *V = SimplifyAndInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
// (A|B)&(A|C) -> A|(B&C) etc
@@ -1543,23 +1548,6 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
return 0;
}
-/// IsOneHotValue - Returns true for "one-hot" values (values where at most
-/// one bit can be set).
-static bool IsOneHotValue(Value *V) {
- // Match 1<<K.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
- if (BO->getOpcode() == Instruction::Shl) {
- ConstantInt *One = dyn_cast<ConstantInt>(BO->getOperand(0));
- return One && One->isOne();
- }
-
- // Check for power of two integer constants.
- if (ConstantInt *K = dyn_cast<ConstantInt>(V))
- return K->getValue().isPowerOf2();
-
- return false;
-}
-
/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -1581,13 +1569,13 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *Mask = 0;
Value *Masked = 0;
if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
- IsOneHotValue(LAnd->getOperand(1)) &&
- IsOneHotValue(RAnd->getOperand(1))) {
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(1)) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(1))) {
Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
} else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
- IsOneHotValue(LAnd->getOperand(0)) &&
- IsOneHotValue(RAnd->getOperand(0))) {
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(0)) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(0))) {
Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
}
@@ -1917,7 +1905,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifyOrInst(Op0, Op1, TD))
+ if (Value *V = SimplifyOrInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
// (A&B)|(A&C) -> A&(B|C) etc
@@ -2249,7 +2237,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifyXorInst(Op0, Op1, TD))
+ if (Value *V = SimplifyXorInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
// (A&B)^(A&C) -> A&(B^C) etc
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 0cd7b14..0bc3ac7 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -14,9 +14,9 @@
#include "InstCombine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/PatternMatch.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -56,8 +56,8 @@ static Type *reduceToSingleValueType(Type *T) {
}
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
- unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD);
- unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD);
+ unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL);
+ unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL);
unsigned MinAlign = std::min(DstAlign, SrcAlign);
unsigned CopyAlign = MI->getAlignment();
@@ -77,7 +77,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
// A single load+store correctly handles overlapping memory in the memmove
// case.
uint64_t Size = MemOpLength->getLimitedValue();
- assert(Size && "0-sized memory transfering should be removed already.");
+ assert(Size && "0-sized memory transferring should be removed already.");
if (Size > 8 || (Size&(Size-1)))
return 0; // If not 1/2/4/8 bytes, exit.
@@ -103,7 +103,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
if (StrippedDest != MI->getArgOperand(0)) {
Type *SrcETy = cast<PointerType>(StrippedDest->getType())
->getElementType();
- if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
+ if (DL && SrcETy->isSized() && DL->getTypeStoreSize(SrcETy) == Size) {
// The SrcETy might be something like {{{double}}} or [1 x double]. Rip
// down through these levels if so.
SrcETy = reduceToSingleValueType(SrcETy);
@@ -152,7 +152,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
}
Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
- unsigned Alignment = getKnownAlignment(MI->getDest(), TD);
+ unsigned Alignment = getKnownAlignment(MI->getDest(), DL);
if (MI->getAlignment() < Alignment) {
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
Alignment, false));
@@ -274,7 +274,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
default: break;
case Intrinsic::objectsize: {
uint64_t Size;
- if (getObjectSize(II->getArgOperand(0), Size, TD, TLI))
+ if (getObjectSize(II->getArgOperand(0), Size, DL, TLI))
return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
return 0;
}
@@ -504,7 +504,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
// Turn PPC lvx -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL) >= 16) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
return new LoadInst(Ptr);
@@ -513,7 +513,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
// Turn stvx -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) {
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL) >= 16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
@@ -524,7 +524,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL) >= 16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(1)->getType());
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
@@ -641,7 +641,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
- unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD);
+ unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL);
unsigned AlignArg = II->getNumArgOperands() - 1;
ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
@@ -654,7 +654,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
case Intrinsic::arm_neon_vmulls:
- case Intrinsic::arm_neon_vmullu: {
+ case Intrinsic::arm_neon_vmullu:
+ case Intrinsic::arm64_neon_smull:
+ case Intrinsic::arm64_neon_umull: {
Value *Arg0 = II->getArgOperand(0);
Value *Arg1 = II->getArgOperand(1);
@@ -664,42 +666,28 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
// Check for constant LHS & RHS - in this case we just simplify.
- bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu);
+ bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
+ II->getIntrinsicID() == Intrinsic::arm64_neon_umull);
VectorType *NewVT = cast<VectorType>(II->getType());
- unsigned NewWidth = NewVT->getElementType()->getIntegerBitWidth();
- if (ConstantDataVector *CV0 = dyn_cast<ConstantDataVector>(Arg0)) {
- if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) {
- VectorType* VT = cast<VectorType>(CV0->getType());
- SmallVector<Constant*, 4> NewElems;
- for (unsigned i = 0; i < VT->getNumElements(); ++i) {
- APInt CV0E =
- (cast<ConstantInt>(CV0->getAggregateElement(i)))->getValue();
- CV0E = Zext ? CV0E.zext(NewWidth) : CV0E.sext(NewWidth);
- APInt CV1E =
- (cast<ConstantInt>(CV1->getAggregateElement(i)))->getValue();
- CV1E = Zext ? CV1E.zext(NewWidth) : CV1E.sext(NewWidth);
- NewElems.push_back(
- ConstantInt::get(NewVT->getElementType(), CV0E * CV1E));
- }
- return ReplaceInstUsesWith(CI, ConstantVector::get(NewElems));
+ if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
+ if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
+ CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
+ CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
+
+ return ReplaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
}
- // Couldn't simplify - cannonicalize constant to the RHS.
+ // Couldn't simplify - canonicalize constant to the RHS.
std::swap(Arg0, Arg1);
}
// Handle mul by one:
- if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) {
+ if (Constant *CV1 = dyn_cast<Constant>(Arg1))
if (ConstantInt *Splat =
- dyn_cast_or_null<ConstantInt>(CV1->getSplatValue())) {
- if (Splat->isOne()) {
- if (Zext)
- return CastInst::CreateZExtOrBitCast(Arg0, II->getType());
- // else
- return CastInst::CreateSExtOrBitCast(Arg0, II->getType());
- }
- }
- }
+ dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
+ if (Splat->isOne())
+ return CastInst::CreateIntegerCast(Arg0, II->getType(),
+ /*isSigned=*/!Zext);
break;
}
@@ -762,15 +750,15 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
/// passed through the varargs area, we can eliminate the use of the cast.
static bool isSafeToEliminateVarargsCast(const CallSite CS,
const CastInst * const CI,
- const DataLayout * const TD,
+ const DataLayout * const DL,
const int ix) {
if (!CI->isLosslessCast())
return false;
- // The size of ByVal arguments is derived from the type, so we
+ // The size of ByVal or InAlloca arguments is derived from the type, so we
// can't change to a type with a different size. If the size were
// passed explicitly we could avoid this check.
- if (!CS.isByValArgument(ix))
+ if (!CS.isByValOrInAllocaArgument(ix))
return true;
Type* SrcTy =
@@ -778,7 +766,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
if (!SrcTy->isSized() || !DstTy->isSized())
return false;
- if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
+ if (!DL || DL->getTypeAllocSize(SrcTy) != DL->getTypeAllocSize(DstTy))
return false;
return true;
}
@@ -787,7 +775,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
// Currently we're only working with the checking functions, memcpy_chk,
// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
// strcat_chk and strncat_chk.
-Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) {
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) {
if (CI->getCalledFunction() == 0) return 0;
if (Value *With = Simplifier->optimizeCall(CI)) {
@@ -803,15 +791,14 @@ static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
// is good enough in practice and simpler than handling any number of casts.
Value *Underlying = TrampMem->stripPointerCasts();
if (Underlying != TrampMem &&
- (!Underlying->hasOneUse() || *Underlying->use_begin() != TrampMem))
+ (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
return 0;
if (!isa<AllocaInst>(Underlying))
return 0;
IntrinsicInst *InitTrampoline = 0;
- for (Value::use_iterator I = TrampMem->use_begin(), E = TrampMem->use_end();
- I != E; I++) {
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(*I);
+ for (User *U : TrampMem->users()) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
if (!II)
return 0;
if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
@@ -949,7 +936,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
E = CS.arg_end(); I != E; ++I, ++ix) {
CastInst *CI = dyn_cast<CastInst>(*I);
- if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
+ if (CI && isSafeToEliminateVarargsCast(CS, CI, DL, ix)) {
*I = CI->getOperand(0);
Changed = true;
}
@@ -966,7 +953,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
// this. None of these calls are seen as possibly dead so go ahead and
// delete the instruction now.
if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
- Instruction *I = tryOptimizeCall(CI, TD);
+ Instruction *I = tryOptimizeCall(CI, DL);
// If we changed something return the result, etc. Otherwise let
// the fallthrough check.
if (I) return EraseInstFromFunction(*I);
@@ -994,11 +981,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
Type *OldRetTy = Caller->getType();
Type *NewRetTy = FT->getReturnType();
- if (NewRetTy->isStructTy())
- return false; // TODO: Handle multiple return values.
-
// Check to see if we are changing the return type...
if (OldRetTy != NewRetTy) {
+
+ if (NewRetTy->isStructTy())
+ return false; // TODO: Handle multiple return values.
+
if (!CastInst::isBitCastable(NewRetTy, OldRetTy)) {
if (Callee->isDeclaration())
return false; // Cannot transform this return value.
@@ -1024,9 +1012,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// the critical edge). Bail out in this case.
if (!Caller->use_empty())
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
- for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
- UI != E; ++UI)
- if (PHINode *PN = dyn_cast<PHINode>(*UI))
+ for (User *U : II->users())
+ if (PHINode *PN = dyn_cast<PHINode>(U))
if (PN->getParent() == II->getNormalDest() ||
PN->getParent() == II->getUnwindDest())
return false;
@@ -1048,18 +1035,21 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
typeIncompatible(ParamTy, i + 1), i + 1))
return false; // Attribute not compatible with transformed value.
+ if (CS.isInAllocaArgument(i))
+ return false; // Cannot transform to and from inalloca.
+
// If the parameter is passed as a byval argument, then we have to have a
// sized type and the sized type has to have the same size as the old type.
if (ParamTy != ActTy &&
CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
- if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
+ if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || DL == 0)
return false;
Type *CurElTy = ActTy->getPointerElementType();
- if (TD->getTypeAllocSize(CurElTy) !=
- TD->getTypeAllocSize(ParamPTy->getElementType()))
+ if (DL->getTypeAllocSize(CurElTy) !=
+ DL->getTypeAllocSize(ParamPTy->getElementType()))
return false;
}
}
@@ -1223,6 +1213,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!Caller->use_empty())
ReplaceInstUsesWith(*Caller, NV);
+ else if (Caller->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(Caller, NV);
EraseInstFromFunction(*Caller);
return true;
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 72377dc..c2b862a 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -14,7 +14,7 @@
#include "InstCombine.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/Support/PatternMatch.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
using namespace PatternMatch;
@@ -79,7 +79,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
AllocaInst &AI) {
// This requires DataLayout to get the alloca alignment and size information.
- if (!TD) return 0;
+ if (!DL) return 0;
PointerType *PTy = cast<PointerType>(CI.getType());
@@ -91,8 +91,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
Type *CastElTy = PTy->getElementType();
if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0;
- unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy);
- unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy);
+ unsigned AllocElTyAlign = DL->getABITypeAlignment(AllocElTy);
+ unsigned CastElTyAlign = DL->getABITypeAlignment(CastElTy);
if (CastElTyAlign < AllocElTyAlign) return 0;
// If the allocation has multiple uses, only promote it if we are strictly
@@ -100,14 +100,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// same, we open the door to infinite loops of various kinds.
if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0;
- uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy);
- uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
+ uint64_t AllocElTySize = DL->getTypeAllocSize(AllocElTy);
+ uint64_t CastElTySize = DL->getTypeAllocSize(CastElTy);
if (CastElTySize == 0 || AllocElTySize == 0) return 0;
// If the allocation has multiple uses, only promote it if we're not
// shrinking the amount of memory being allocated.
- uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy);
- uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy);
+ uint64_t AllocElTyStoreSize = DL->getTypeStoreSize(AllocElTy);
+ uint64_t CastElTyStoreSize = DL->getTypeStoreSize(CastElTy);
if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0;
// See if we can satisfy the modulus by pulling a scale out of the array
@@ -161,9 +161,9 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
bool isSigned) {
if (Constant *C = dyn_cast<Constant>(V)) {
C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
- // If we got a constantexpr back, try to simplify it with TD info.
+ // If we got a constantexpr back, try to simplify it with DL info.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- C = ConstantFoldConstantExpression(CE, TD, TLI);
+ C = ConstantFoldConstantExpression(CE, DL, TLI);
return C;
}
@@ -235,7 +235,7 @@ isEliminableCastPair(
const CastInst *CI, ///< The first cast instruction
unsigned opcode, ///< The opcode of the second cast instruction
Type *DstTy, ///< The target type for the second cast instruction
- DataLayout *TD ///< The target data for pointer size
+ const DataLayout *DL ///< The target data for pointer size
) {
Type *SrcTy = CI->getOperand(0)->getType(); // A from above
@@ -244,12 +244,12 @@ isEliminableCastPair(
// Get the opcodes of the two Cast instructions
Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
Instruction::CastOps secondOp = Instruction::CastOps(opcode);
- Type *SrcIntPtrTy = TD && SrcTy->isPtrOrPtrVectorTy() ?
- TD->getIntPtrType(SrcTy) : 0;
- Type *MidIntPtrTy = TD && MidTy->isPtrOrPtrVectorTy() ?
- TD->getIntPtrType(MidTy) : 0;
- Type *DstIntPtrTy = TD && DstTy->isPtrOrPtrVectorTy() ?
- TD->getIntPtrType(DstTy) : 0;
+ Type *SrcIntPtrTy = DL && SrcTy->isPtrOrPtrVectorTy() ?
+ DL->getIntPtrType(SrcTy) : 0;
+ Type *MidIntPtrTy = DL && MidTy->isPtrOrPtrVectorTy() ?
+ DL->getIntPtrType(MidTy) : 0;
+ Type *DstIntPtrTy = DL && DstTy->isPtrOrPtrVectorTy() ?
+ DL->getIntPtrType(DstTy) : 0;
unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
DstTy, SrcIntPtrTy, MidIntPtrTy,
DstIntPtrTy);
@@ -275,7 +275,7 @@ bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
// If this is another cast that can be eliminated, we prefer to have it
// eliminated.
if (const CastInst *CI = dyn_cast<CastInst>(V))
- if (isEliminableCastPair(CI, opc, Ty, TD))
+ if (isEliminableCastPair(CI, opc, Ty, DL))
return false;
// If this is a vector sext from a compare, then we don't want to break the
@@ -295,7 +295,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
// eliminate it now.
if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
if (Instruction::CastOps opc =
- isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
+ isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) {
// The first cast (CSrc) is eliminable so we need to fix up or replace
// the second cast (CI). CSrc will then have a good chance of being dead.
return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
@@ -757,7 +757,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// If this zero extend is only used by a truncate, let the truncate be
// eliminated before we try to optimize this zext.
- if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
+ if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
return 0;
// If one of the common conversion will work, do it.
@@ -858,37 +858,27 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
}
}
- // zext(trunc(t) & C) -> (t & zext(C)).
- if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse())
- if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
- if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) {
- Value *TI0 = TI->getOperand(0);
- if (TI0->getType() == CI.getType())
- return
- BinaryOperator::CreateAnd(TI0,
- ConstantExpr::getZExt(C, CI.getType()));
- }
-
- // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
- if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse())
- if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
- if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0)))
- if (And->getOpcode() == Instruction::And && And->hasOneUse() &&
- And->getOperand(1) == C)
- if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
- Value *TI0 = TI->getOperand(0);
- if (TI0->getType() == CI.getType()) {
- Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
- Value *NewAnd = Builder->CreateAnd(TI0, ZC);
- return BinaryOperator::CreateXor(NewAnd, ZC);
- }
- }
+ // zext(trunc(X) & C) -> (X & zext(C)).
+ Constant *C;
+ Value *X;
+ if (SrcI &&
+ match(SrcI, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
+ X->getType() == CI.getType())
+ return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, CI.getType()));
+
+ // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
+ Value *And;
+ if (SrcI && match(SrcI, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
+ match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&
+ X->getType() == CI.getType()) {
+ Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+ return BinaryOperator::CreateXor(Builder->CreateAnd(X, ZC), ZC);
+ }
// zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1
- Value *X;
- if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) &&
- match(SrcI, m_Not(m_Value(X))) &&
- (!X->hasOneUse() || !isa<CmpInst>(X))) {
+ if (SrcI && SrcI->hasOneUse() &&
+ SrcI->getType()->getScalarType()->isIntegerTy(1) &&
+ match(SrcI, m_Not(m_Value(X))) && (!X->hasOneUse() || !isa<CmpInst>(X))) {
Value *New = Builder->CreateZExt(X, CI.getType());
return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
}
@@ -902,10 +892,10 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
ICmpInst::Predicate Pred = ICI->getPredicate();
- if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+ if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
// (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
// (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
- if ((Pred == ICmpInst::ICMP_SLT && Op1C->isZero()) ||
+ if ((Pred == ICmpInst::ICMP_SLT && Op1C->isNullValue()) ||
(Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) {
Value *Sh = ConstantInt::get(Op0->getType(),
@@ -918,7 +908,9 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
In = Builder->CreateNot(In, In->getName()+".not");
return ReplaceInstUsesWith(CI, In);
}
+ }
+ if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
// If we know that only one bit of the LHS of the icmp can be set and we
// have an equality comparison with zero or a power of 2, we can transform
// the icmp and sext into bitwise/integer operations.
@@ -975,19 +967,6 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
}
}
- // vector (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed.
- if (VectorType *VTy = dyn_cast<VectorType>(CI.getType())) {
- if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) &&
- Op0->getType() == CI.getType()) {
- Type *EltTy = VTy->getElementType();
-
- // splat the shift constant to a constant vector.
- Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1);
- Value *In = Builder->CreateAShr(Op0, VSh, Op0->getName()+".lobit");
- return ReplaceInstUsesWith(CI, In);
- }
- }
-
return 0;
}
@@ -1059,7 +1038,7 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// If this sign extend is only used by a truncate, let the truncate be
// eliminated before we try to optimize this sext.
- if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
+ if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
return 0;
if (Instruction *I = commonCastTransforms(CI))
@@ -1189,43 +1168,112 @@ static Value *LookThroughFPExtensions(Value *V) {
Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
if (Instruction *I = commonCastTransforms(CI))
return I;
-
- // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
- // smaller than the destination type, we can eliminate the truncate by doing
- // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well
- // as many builtins (sqrt, etc).
+ // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
+ // simpilify this expression to avoid one or more of the trunc/extend
+ // operations if we can do so without changing the numerical results.
+ //
+ // The exact manner in which the widths of the operands interact to limit
+ // what we can and cannot do safely varies from operation to operation, and
+ // is explained below in the various case statements.
BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
if (OpI && OpI->hasOneUse()) {
+ Value *LHSOrig = LookThroughFPExtensions(OpI->getOperand(0));
+ Value *RHSOrig = LookThroughFPExtensions(OpI->getOperand(1));
+ unsigned OpWidth = OpI->getType()->getFPMantissaWidth();
+ unsigned LHSWidth = LHSOrig->getType()->getFPMantissaWidth();
+ unsigned RHSWidth = RHSOrig->getType()->getFPMantissaWidth();
+ unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
+ unsigned DstWidth = CI.getType()->getFPMantissaWidth();
switch (OpI->getOpcode()) {
- default: break;
- case Instruction::FAdd:
- case Instruction::FSub:
- case Instruction::FMul:
- case Instruction::FDiv:
- case Instruction::FRem:
- Type *SrcTy = OpI->getType();
- Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
- Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
- if (LHSTrunc->getType() != SrcTy &&
- RHSTrunc->getType() != SrcTy) {
- unsigned DstSize = CI.getType()->getScalarSizeInBits();
- // If the source types were both smaller than the destination type of
- // the cast, do this xform.
- if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
- RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
- LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType());
- RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType());
- return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
+ default: break;
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ // For addition and subtraction, the infinitely precise result can
+ // essentially be arbitrarily wide; proving that double rounding
+ // will not occur because the result of OpI is exact (as we will for
+ // FMul, for example) is hopeless. However, we *can* nonetheless
+ // frequently know that double rounding cannot occur (or that it is
+ // innocuous) by taking advantage of the specific structure of
+ // infinitely-precise results that admit double rounding.
+ //
+ // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
+ // to represent both sources, we can guarantee that the double
+ // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
+ // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
+ // for proof of this fact).
+ //
+ // Note: Figueroa does not consider the case where DstFormat !=
+ // SrcFormat. It's possible (likely even!) that this analysis
+ // could be tightened for those cases, but they are rare (the main
+ // case of interest here is (float)((double)float + float)).
+ if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
+ if (LHSOrig->getType() != CI.getType())
+ LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType());
+ if (RHSOrig->getType() != CI.getType())
+ RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType());
+ Instruction *RI =
+ BinaryOperator::Create(OpI->getOpcode(), LHSOrig, RHSOrig);
+ RI->copyFastMathFlags(OpI);
+ return RI;
}
- }
- break;
+ break;
+ case Instruction::FMul:
+ // For multiplication, the infinitely precise result has at most
+ // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
+ // that such a value can be exactly represented, then no double
+ // rounding can possibly occur; we can safely perform the operation
+ // in the destination format if it can represent both sources.
+ if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
+ if (LHSOrig->getType() != CI.getType())
+ LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType());
+ if (RHSOrig->getType() != CI.getType())
+ RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType());
+ Instruction *RI =
+ BinaryOperator::CreateFMul(LHSOrig, RHSOrig);
+ RI->copyFastMathFlags(OpI);
+ return RI;
+ }
+ break;
+ case Instruction::FDiv:
+ // For division, we use again use the bound from Figueroa's
+ // dissertation. I am entirely certain that this bound can be
+ // tightened in the unbalanced operand case by an analysis based on
+ // the diophantine rational approximation bound, but the well-known
+ // condition used here is a good conservative first pass.
+ // TODO: Tighten bound via rigorous analysis of the unbalanced case.
+ if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
+ if (LHSOrig->getType() != CI.getType())
+ LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType());
+ if (RHSOrig->getType() != CI.getType())
+ RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType());
+ Instruction *RI =
+ BinaryOperator::CreateFDiv(LHSOrig, RHSOrig);
+ RI->copyFastMathFlags(OpI);
+ return RI;
+ }
+ break;
+ case Instruction::FRem:
+ // Remainder is straightforward. Remainder is always exact, so the
+ // type of OpI doesn't enter into things at all. We simply evaluate
+ // in whichever source type is larger, then convert to the
+ // destination type.
+ if (LHSWidth < SrcWidth)
+ LHSOrig = Builder->CreateFPExt(LHSOrig, RHSOrig->getType());
+ else if (RHSWidth <= SrcWidth)
+ RHSOrig = Builder->CreateFPExt(RHSOrig, LHSOrig->getType());
+ Value *ExactResult = Builder->CreateFRem(LHSOrig, RHSOrig);
+ if (Instruction *RI = dyn_cast<Instruction>(ExactResult))
+ RI->copyFastMathFlags(OpI);
+ return CastInst::CreateFPCast(ExactResult, CI.getType());
}
// (fptrunc (fneg x)) -> (fneg (fptrunc x))
if (BinaryOperator::isFNeg(OpI)) {
Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1),
CI.getType());
- return BinaryOperator::CreateFNeg(InnerTrunc);
+ Instruction *RI = BinaryOperator::CreateFNeg(InnerTrunc);
+ RI->copyFastMathFlags(OpI);
+ return RI;
}
}
@@ -1357,11 +1405,11 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
// trunc or zext to the intptr_t type, then inttoptr of it. This allows the
// cast to be exposed to other transforms.
- if (TD) {
+ if (DL) {
unsigned AS = CI.getAddressSpace();
if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
- TD->getPointerSizeInBits(AS)) {
- Type *Ty = TD->getIntPtrType(CI.getContext(), AS);
+ DL->getPointerSizeInBits(AS)) {
+ Type *Ty = DL->getIntPtrType(CI.getContext(), AS);
if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
@@ -1392,7 +1440,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
return &CI;
}
- if (!TD)
+ if (!DL)
return commonCastTransforms(CI);
// If the GEP has a single use, and the base pointer is a bitcast, and the
@@ -1400,12 +1448,12 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
// instructions into fewer. This typically happens with unions and other
// non-type-safe code.
unsigned AS = GEP->getPointerAddressSpace();
- unsigned OffsetBits = TD->getPointerSizeInBits(AS);
+ unsigned OffsetBits = DL->getPointerSizeInBits(AS);
APInt Offset(OffsetBits, 0);
BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0));
if (GEP->hasOneUse() &&
BCI &&
- GEP->accumulateConstantOffset(*TD, Offset)) {
+ GEP->accumulateConstantOffset(*DL, Offset)) {
// Get the base pointer input of the bitcast, and the type it points to.
Value *OrigBase = BCI->getOperand(0);
SmallVector<Value*, 8> NewIndices;
@@ -1436,16 +1484,16 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
// to be exposed to other transforms.
- if (!TD)
+ if (!DL)
return commonPointerCastTransforms(CI);
Type *Ty = CI.getType();
unsigned AS = CI.getPointerAddressSpace();
- if (Ty->getScalarSizeInBits() == TD->getPointerSizeInBits(AS))
+ if (Ty->getScalarSizeInBits() == DL->getPointerSizeInBits(AS))
return commonPointerCastTransforms(CI);
- Type *PtrTy = TD->getIntPtrType(CI.getContext(), AS);
+ Type *PtrTy = DL->getIntPtrType(CI.getContext(), AS);
if (Ty->isVectorTy()) // Handle vectors of pointers.
PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
@@ -1741,11 +1789,6 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
Type *DstElTy = DstPTy->getElementType();
Type *SrcElTy = SrcPTy->getElementType();
- // If the address spaces don't match, don't eliminate the bitcast, which is
- // required for changing types.
- if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
- return 0;
-
// If we are casting a alloca to a pointer to a type of the same
// size, rewrite the allocation instruction to allocate the "right" type.
// There is no need to modify malloc calls because it is their bitcast that
@@ -1858,5 +1901,5 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
- return commonCastTransforms(CI);
+ return commonPointerCastTransforms(CI);
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 9bb65ef..8c0ad52 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -15,11 +15,11 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/ConstantRange.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/PatternMatch.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
using namespace PatternMatch;
@@ -28,15 +28,6 @@ static ConstantInt *getOne(Constant *C) {
return ConstantInt::get(cast<IntegerType>(C->getType()), 1);
}
-/// AddOne - Add one to a ConstantInt
-static Constant *AddOne(Constant *C) {
- return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
-}
-/// SubOne - Subtract one from a ConstantInt
-static Constant *SubOne(Constant *C) {
- return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
-}
-
static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
}
@@ -227,7 +218,7 @@ Instruction *InstCombiner::
FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
CmpInst &ICI, ConstantInt *AndCst) {
// We need TD information to know the pointer size unless this is inbounds.
- if (!GEP->isInBounds() && TD == 0)
+ if (!GEP->isInBounds() && DL == 0)
return 0;
Constant *Init = GV->getInitializer();
@@ -316,7 +307,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// Find out if the comparison would be true or false for the i'th element.
Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
- CompareRHS, TD, TLI);
+ CompareRHS, DL, TLI);
// If the result is undef for this element, ignore it.
if (isa<UndefValue>(C)) {
// Extend range state machines to cover this element in case there is an
@@ -395,7 +386,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// index down like the GEP would do implicitly. We don't have to do this for
// an inbounds GEP because the index can't be out of range.
if (!GEP->isInBounds()) {
- Type *IntPtrTy = TD->getIntPtrType(GEP->getType());
+ Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
Idx = Builder->CreateTrunc(Idx, IntPtrTy);
@@ -484,8 +475,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// - Default to i32
if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
Ty = Idx->getType();
- else if (TD)
- Ty = TD->getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
+ else if (DL)
+ Ty = DL->getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
else if (ArrayElementCount <= 32)
Ty = Type::getInt32Ty(Init->getContext());
@@ -512,7 +503,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
/// If we can't emit an optimized form for this expression, this returns null.
///
static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
- DataLayout &TD = *IC.getDataLayout();
+ const DataLayout &DL = *IC.getDataLayout();
gep_type_iterator GTI = gep_type_begin(GEP);
// Check to see if this gep only has a single variable index. If so, and if
@@ -529,9 +520,9 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
- Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
} else {
- uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
Offset += Size*CI->getSExtValue();
}
} else {
@@ -547,7 +538,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
Value *VariableIdx = GEP->getOperand(i);
// Determine the scale factor of the variable element. For example, this is
// 4 if the variable index is into an array of i32.
- uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType());
+ uint64_t VariableScale = DL.getTypeAllocSize(GTI.getIndexedType());
// Verify that there are no other variable indices. If so, emit the hard way.
for (++i, ++GTI; i != e; ++i, ++GTI) {
@@ -559,9 +550,9 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
- Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
} else {
- uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
Offset += Size*CI->getSExtValue();
}
}
@@ -571,7 +562,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
// Okay, we know we have a single variable index, which must be a
// pointer/array/vector index. If there is no offset, life is simple, return
// the index.
- Type *IntPtrTy = TD.getIntPtrType(GEP->getOperand(0)->getType());
+ Type *IntPtrTy = DL.getIntPtrType(GEP->getOperand(0)->getType());
unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth();
if (Offset == 0) {
// Cast to intptrty in case a truncation occurs. If an extension is needed,
@@ -624,7 +615,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
RHS = BCI->getOperand(0);
Value *PtrBase = GEPLHS->getOperand(0);
- if (TD && PtrBase == RHS && GEPLHS->isInBounds()) {
+ if (DL && PtrBase == RHS && GEPLHS->isInBounds()) {
// ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
// This transformation (ignoring the base and scales) is valid because we
// know pointers can't overflow since the gep is inbounds. See if we can
@@ -657,7 +648,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// If we're comparing GEPs with two base pointers that only differ in type
// and both GEPs have only constant indices or just one use, then fold
// the compare with the adjusted indices.
- if (TD && GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
+ if (DL && GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
(GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
(GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
PtrBase->stripPointerCasts() ==
@@ -728,7 +719,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// Only lower this if the icmp is the only user of the GEP or if we expect
// the result to fold to a constant!
- if (TD &&
+ if (DL &&
GEPsInBounds &&
(isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
(isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
@@ -1078,17 +1069,17 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
break;
- case Instruction::Xor: // (icmp pred (xor X, XorCST), CI)
- if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+ case Instruction::Xor: // (icmp pred (xor X, XorCst), CI)
+ if (ConstantInt *XorCst = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
// If this is a comparison that tests the signbit (X < 0) or (x > -1),
// fold the xor.
if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
(ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
Value *CompareVal = LHSI->getOperand(0);
- // If the sign bit of the XorCST is not set, there is no change to
+ // If the sign bit of the XorCst is not set, there is no change to
// the operation, just stop using the Xor.
- if (!XorCST->isNegative()) {
+ if (!XorCst->isNegative()) {
ICI.setOperand(0, CompareVal);
Worklist.Add(LHSI);
return &ICI;
@@ -1110,8 +1101,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (LHSI->hasOneUse()) {
// (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
- if (!ICI.isEquality() && XorCST->getValue().isSignBit()) {
- const APInt &SignBit = XorCST->getValue();
+ if (!ICI.isEquality() && XorCst->getValue().isSignBit()) {
+ const APInt &SignBit = XorCst->getValue();
ICmpInst::Predicate Pred = ICI.isSigned()
? ICI.getUnsignedPredicate()
: ICI.getSignedPredicate();
@@ -1120,8 +1111,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
// (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
- if (!ICI.isEquality() && XorCST->isMaxValue(true)) {
- const APInt &NotSignBit = XorCST->getValue();
+ if (!ICI.isEquality() && XorCst->isMaxValue(true)) {
+ const APInt &NotSignBit = XorCst->getValue();
ICmpInst::Predicate Pred = ICI.isSigned()
? ICI.getUnsignedPredicate()
: ICI.getSignedPredicate();
@@ -1134,20 +1125,20 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// (icmp ugt (xor X, C), ~C) -> (icmp ult X, C)
// iff -C is a power of 2
if (ICI.getPredicate() == ICmpInst::ICMP_UGT &&
- XorCST->getValue() == ~RHSV && (RHSV + 1).isPowerOf2())
- return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), XorCST);
+ XorCst->getValue() == ~RHSV && (RHSV + 1).isPowerOf2())
+ return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), XorCst);
// (icmp ult (xor X, C), -C) -> (icmp uge X, C)
// iff -C is a power of 2
if (ICI.getPredicate() == ICmpInst::ICMP_ULT &&
- XorCST->getValue() == -RHSV && RHSV.isPowerOf2())
- return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), XorCST);
+ XorCst->getValue() == -RHSV && RHSV.isPowerOf2())
+ return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), XorCst);
}
break;
- case Instruction::And: // (icmp pred (and X, AndCST), RHS)
+ case Instruction::And: // (icmp pred (and X, AndCst), RHS)
if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
LHSI->getOperand(0)->hasOneUse()) {
- ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1));
+ ConstantInt *AndCst = cast<ConstantInt>(LHSI->getOperand(1));
// If the LHS is an AND of a truncating cast, we can widen the
// and/compare to be the input width without changing the value
@@ -1158,10 +1149,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// Extending a relational comparison when we're checking the sign
// bit would not work.
if (ICI.isEquality() ||
- (!AndCST->isNegative() && RHSV.isNonNegative())) {
+ (!AndCst->isNegative() && RHSV.isNonNegative())) {
Value *NewAnd =
Builder->CreateAnd(Cast->getOperand(0),
- ConstantExpr::getZExt(AndCST, Cast->getSrcTy()));
+ ConstantExpr::getZExt(AndCst, Cast->getSrcTy()));
NewAnd->takeName(LHSI);
return new ICmpInst(ICI.getPredicate(), NewAnd,
ConstantExpr::getZExt(RHS, Cast->getSrcTy()));
@@ -1177,7 +1168,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) {
Value *NewAnd =
Builder->CreateAnd(Cast->getOperand(0),
- ConstantExpr::getTrunc(AndCST, Ty));
+ ConstantExpr::getTrunc(AndCst, Ty));
NewAnd->takeName(LHSI);
return new ICmpInst(ICI.getPredicate(), NewAnd,
ConstantExpr::getTrunc(RHS, Ty));
@@ -1194,45 +1185,54 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
ConstantInt *ShAmt;
ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0;
- Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift.
- Type *AndTy = AndCST->getType(); // Type of the and.
-
- // We can fold this as long as we can't shift unknown bits
- // into the mask. This can happen with signed shift
- // rights, as they sign-extend. With logical shifts,
- // we must still make sure the comparison is not signed
- // because we are effectively changing the
- // position of the sign bit (PR17827).
- // TODO: We can relax these constraints a bit more.
+
+ // This seemingly simple opportunity to fold away a shift turns out to
+ // be rather complicated. See PR17827
+ // ( http://llvm.org/bugs/show_bug.cgi?id=17827 ) for details.
if (ShAmt) {
bool CanFold = false;
unsigned ShiftOpcode = Shift->getOpcode();
if (ShiftOpcode == Instruction::AShr) {
- // To test for the bad case of the signed shr, see if any
- // of the bits shifted in could be tested after the mask.
- uint32_t TyBits = Ty->getPrimitiveSizeInBits();
- int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits);
-
- uint32_t BitWidth = AndTy->getPrimitiveSizeInBits();
- if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
- AndCST->getValue()) == 0)
+ // There may be some constraints that make this possible,
+ // but nothing simple has been discovered yet.
+ CanFold = false;
+ } else if (ShiftOpcode == Instruction::Shl) {
+ // For a left shift, we can fold if the comparison is not signed.
+ // We can also fold a signed comparison if the mask value and
+ // comparison value are not negative. These constraints may not be
+ // obvious, but we can prove that they are correct using an SMT
+ // solver.
+ if (!ICI.isSigned() || (!AndCst->isNegative() && !RHS->isNegative()))
+ CanFold = true;
+ } else if (ShiftOpcode == Instruction::LShr) {
+ // For a logical right shift, we can fold if the comparison is not
+ // signed. We can also fold a signed comparison if the shifted mask
+ // value and the shifted comparison value are not negative.
+ // These constraints may not be obvious, but we can prove that they
+ // are correct using an SMT solver.
+ if (!ICI.isSigned())
CanFold = true;
- } else if (ShiftOpcode == Instruction::Shl ||
- ShiftOpcode == Instruction::LShr) {
- CanFold = !ICI.isSigned();
+ else {
+ ConstantInt *ShiftedAndCst =
+ cast<ConstantInt>(ConstantExpr::getShl(AndCst, ShAmt));
+ ConstantInt *ShiftedRHSCst =
+ cast<ConstantInt>(ConstantExpr::getShl(RHS, ShAmt));
+
+ if (!ShiftedAndCst->isNegative() && !ShiftedRHSCst->isNegative())
+ CanFold = true;
+ }
}
if (CanFold) {
Constant *NewCst;
- if (Shift->getOpcode() == Instruction::Shl)
+ if (ShiftOpcode == Instruction::Shl)
NewCst = ConstantExpr::getLShr(RHS, ShAmt);
else
NewCst = ConstantExpr::getShl(RHS, ShAmt);
// Check to see if we are shifting out any of the bits being
// compared.
- if (ConstantExpr::get(Shift->getOpcode(),
- NewCst, ShAmt) != RHS) {
+ if (ConstantExpr::get(ShiftOpcode, NewCst, ShAmt) != RHS) {
// If we shifted bits out, the fold is not going to work out.
// As a special case, check to see if this means that the
// result is always true or false now.
@@ -1242,12 +1242,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return ReplaceInstUsesWith(ICI, Builder->getTrue());
} else {
ICI.setOperand(1, NewCst);
- Constant *NewAndCST;
- if (Shift->getOpcode() == Instruction::Shl)
- NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt);
+ Constant *NewAndCst;
+ if (ShiftOpcode == Instruction::Shl)
+ NewAndCst = ConstantExpr::getLShr(AndCst, ShAmt);
else
- NewAndCST = ConstantExpr::getShl(AndCST, ShAmt);
- LHSI->setOperand(1, NewAndCST);
+ NewAndCst = ConstantExpr::getShl(AndCst, ShAmt);
+ LHSI->setOperand(1, NewAndCst);
LHSI->setOperand(0, Shift->getOperand(0));
Worklist.Add(Shift); // Shift is dead.
return &ICI;
@@ -1264,10 +1264,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// Compute C << Y.
Value *NS;
if (Shift->getOpcode() == Instruction::LShr) {
- NS = Builder->CreateShl(AndCST, Shift->getOperand(1));
+ NS = Builder->CreateShl(AndCst, Shift->getOperand(1));
} else {
// Insert a logical shift.
- NS = Builder->CreateLShr(AndCST, Shift->getOperand(1));
+ NS = Builder->CreateLShr(AndCst, Shift->getOperand(1));
}
// Compute X & (C << Y).
@@ -1278,12 +1278,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return &ICI;
}
- // Replace ((X & AndCST) > RHSV) with ((X & AndCST) != 0), if any
- // bit set in (X & AndCST) will produce a result greater than RHSV.
+ // Replace ((X & AndCst) > RHSV) with ((X & AndCst) != 0), if any
+ // bit set in (X & AndCst) will produce a result greater than RHSV.
if (ICI.getPredicate() == ICmpInst::ICMP_UGT) {
- unsigned NTZ = AndCST->getValue().countTrailingZeros();
- if ((NTZ < AndCST->getBitWidth()) &&
- APInt::getOneBitSet(AndCST->getBitWidth(), NTZ).ugt(RHSV))
+ unsigned NTZ = AndCst->getValue().countTrailingZeros();
+ if ((NTZ < AndCst->getBitWidth()) &&
+ APInt::getOneBitSet(AndCst->getBitWidth(), NTZ).ugt(RHSV))
return new ICmpInst(ICmpInst::ICMP_NE, LHSI,
Constant::getNullValue(RHS->getType()));
}
@@ -1792,8 +1792,8 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
- if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
- TD->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
+ if (DL && LHSCI->getOpcode() == Instruction::PtrToInt &&
+ DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
Value *RHSOp = 0;
if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
@@ -1937,16 +1937,15 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// and truncates that discard the high bits of the add. Verify that this is
// the case.
Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
- for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end();
- UI != E; ++UI) {
- if (*UI == AddWithCst) continue;
+ for (User *U : OrigAdd->users()) {
+ if (U == AddWithCst) continue;
// Only accept truncates for now. We would really like a nice recursive
// predicate like SimplifyDemandedBits, but which goes downwards the use-def
// chain to see which bits of a value are actually demanded. If the
// original add had another add which was then immediately truncated, we
// could still do the transformation.
- TruncInst *TI = dyn_cast<TruncInst>(*UI);
+ TruncInst *TI = dyn_cast<TruncInst>(U);
if (TI == 0 ||
TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0;
}
@@ -2048,7 +2047,7 @@ static APInt DemandedBitsLHSMask(ICmpInst &I,
/// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst
/// should be swapped.
-/// The descision is based on how many times these two operands are reused
+/// The decision is based on how many times these two operands are reused
/// as subtract operands and their positions in those instructions.
/// The rational is that several architectures use the same instruction for
/// both subtract and cmp, thus it is better if the order of those operands
@@ -2064,12 +2063,12 @@ static bool swapMayExposeCSEOpportunities(const Value * Op0,
// Each time Op0 is the first operand, count -1: swapping is bad, the
// subtract has already the same layout as the compare.
// Each time Op0 is the second operand, count +1: swapping is good, the
- // subtract has a diffrent layout as the compare.
+ // subtract has a different layout as the compare.
// At the end, if the benefit is greater than 0, Op0 should come second to
// expose more CSE opportunities.
int GlobalSwapBenefits = 0;
- for (Value::const_use_iterator UI = Op0->use_begin(), UIEnd = Op0->use_end(); UI != UIEnd; ++UI) {
- const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(*UI);
+ for (const User *U : Op0->users()) {
+ const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(U);
if (!BinOp || BinOp->getOpcode() != Instruction::Sub)
continue;
// If Op0 is the first argument, this is not beneficial to swap the
@@ -2104,7 +2103,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Changed = true;
}
- if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
+ if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
// comparing -val or val with non-zero is the same as just comparing val
@@ -2172,8 +2171,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
unsigned BitWidth = 0;
if (Ty->isIntOrIntVectorTy())
BitWidth = Ty->getScalarSizeInBits();
- else if (TD) // Pointers require TD info to get their size.
- BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
+ else if (DL) // Pointers require DL info to get their size.
+ BitWidth = DL->getTypeSizeInBits(Ty->getScalarType());
bool isSignBit = false;
@@ -2468,7 +2467,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// operands has at least one user besides the compare (the select),
// which would often largely negate the benefit of folding anyway.
if (I.hasOneUse())
- if (SelectInst *SI = dyn_cast<SelectInst>(*I.use_begin()))
+ if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin()))
if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
(SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
return 0;
@@ -2532,8 +2531,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
- if (RHSC->isNullValue() && TD &&
- TD->getIntPtrType(RHSC->getType()) ==
+ if (RHSC->isNullValue() && DL &&
+ DL->getIntPtrType(RHSC->getType()) ==
LHSI->getOperand(0)->getType())
return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
Constant::getNullValue(LHSI->getOperand(0)->getType()));
@@ -3229,7 +3228,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD))
+ if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
// Simplify 'fcmp pred X, X'
@@ -3313,31 +3312,6 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC))
return NV;
break;
- case Instruction::Select: {
- // If either operand of the select is a constant, we can fold the
- // comparison into the select arms, which will cause one to be
- // constant folded and the select turned into a bitwise or.
- Value *Op1 = 0, *Op2 = 0;
- if (LHSI->hasOneUse()) {
- if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
- // Fold the known value into the constant operand.
- Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
- // Insert a new FCmp of the other select operand.
- Op2 = Builder->CreateFCmp(I.getPredicate(),
- LHSI->getOperand(2), RHSC, I.getName());
- } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
- // Fold the known value into the constant operand.
- Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
- // Insert a new FCmp of the other select operand.
- Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1),
- RHSC, I.getName());
- }
- }
-
- if (Op1)
- return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
- break;
- }
case Instruction::FSub: {
// fcmp pred (fneg x), C -> fcmp swap(pred) x, -C
Value *Op;
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 4c861b3..dcc8b0f 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -51,22 +51,22 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
// ahead and replace the value with the global, this lets the caller quickly
// eliminate the markers.
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
- User *U = cast<Instruction>(*UI);
+ for (Use &U : V->uses()) {
+ Instruction *I = cast<Instruction>(U.getUser());
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
// Ignore non-volatile loads, they are always ok.
if (!LI->isSimple()) return false;
continue;
}
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
// If uses of the bitcast are ok, we are ok.
if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset))
return false;
continue;
}
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
// If the GEP has all zero indices, it doesn't offset the pointer. If it
// doesn't, it does.
if (!isOnlyCopiedFromConstantGlobal(
@@ -75,16 +75,20 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
continue;
}
- if (CallSite CS = U) {
+ if (CallSite CS = I) {
// If this is the function being called then we treat it like a load and
// ignore it.
- if (CS.isCallee(UI))
+ if (CS.isCallee(&U))
continue;
+ // Inalloca arguments are clobbered by the call.
+ unsigned ArgNo = CS.getArgumentNo(&U);
+ if (CS.isInAllocaArgument(ArgNo))
+ return false;
+
// If this is a readonly/readnone call site, then we know it is just a
// load (but one that potentially returns the value itself), so we can
// ignore it if we know that the value isn't captured.
- unsigned ArgNo = CS.getArgumentNo(UI);
if (CS.onlyReadsMemory() &&
(CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
continue;
@@ -96,7 +100,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
}
// Lifetime intrinsics can be handled by the caller.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end) {
assert(II->use_empty() && "Lifetime markers have no result to use!");
@@ -107,13 +111,13 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
// If this is isn't our memcpy/memmove, reject it as something we can't
// handle.
- MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
+ MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
if (MI == 0)
return false;
// If the transfer is using the alloca as a source of the transfer, then
// ignore it since it is a load (unless the transfer is volatile).
- if (UI.getOperandNo() == 1) {
+ if (U.getOperandNo() == 1) {
if (MI->isVolatile()) return false;
continue;
}
@@ -126,7 +130,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (IsOffset) return false;
// If the memintrinsic isn't using the alloca as the dest, reject it.
- if (UI.getOperandNo() != 0) return false;
+ if (U.getOperandNo() != 0) return false;
// If the source of the memcpy/move is not a constant global, reject it.
if (!pointsToConstantGlobal(MI->getSource()))
@@ -153,8 +157,8 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Ensure that the alloca array size argument has type intptr_t, so that
// any casting is exposed early.
- if (TD) {
- Type *IntPtrTy = TD->getIntPtrType(AI.getType());
+ if (DL) {
+ Type *IntPtrTy = DL->getIntPtrType(AI.getType());
if (AI.getArraySize()->getType() != IntPtrTy) {
Value *V = Builder->CreateIntCast(AI.getArraySize(),
IntPtrTy, false);
@@ -180,8 +184,8 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Now that I is pointing to the first non-allocation-inst in the block,
// insert our getelementptr instruction...
//
- Type *IdxTy = TD
- ? TD->getIntPtrType(AI.getType())
+ Type *IdxTy = DL
+ ? DL->getIntPtrType(AI.getType())
: Type::getInt64Ty(AI.getContext());
Value *NullIdx = Constant::getNullValue(IdxTy);
Value *Idx[2] = { NullIdx, NullIdx };
@@ -197,15 +201,15 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
}
}
- if (TD && AI.getAllocatedType()->isSized()) {
+ if (DL && AI.getAllocatedType()->isSized()) {
// If the alignment is 0 (unspecified), assign it the preferred alignment.
if (AI.getAlignment() == 0)
- AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
+ AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType()));
// Move all alloca's of zero byte objects to the entry block and merge them
// together. Note that we only do this for alloca's, because malloc should
// allocate and return a unique pointer, even for a zero byte allocation.
- if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) {
+ if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) {
// For a zero sized alloca there is no point in doing an array allocation.
// This is helpful if the array size is a complicated expression not used
// elsewhere.
@@ -223,7 +227,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// dominance as the array size was forced to a constant earlier already.
AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
- TD->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
+ DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
AI.moveBefore(FirstInst);
return &AI;
}
@@ -232,7 +236,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// assign it the preferred alignment.
if (EntryAI->getAlignment() == 0)
EntryAI->setAlignment(
- TD->getPrefTypeAlignment(EntryAI->getAllocatedType()));
+ DL->getPrefTypeAlignment(EntryAI->getAllocatedType()));
// Replace this zero-sized alloca with the one at the start of the entry
// block after ensuring that the address will be aligned enough for both
// types.
@@ -256,7 +260,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
SmallVector<Instruction *, 4> ToDelete;
if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(),
- AI.getAlignment(), TD);
+ AI.getAlignment(), DL);
if (AI.getAlignment() <= SourceAlign) {
DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
@@ -281,7 +285,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
- const DataLayout *TD) {
+ const DataLayout *DL) {
User *CI = cast<User>(LI.getOperand(0));
Value *CastOp = CI->getOperand(0);
@@ -303,8 +307,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
if (Constant *CSrc = dyn_cast<Constant>(CastOp))
if (ASrcTy->getNumElements() != 0) {
- Type *IdxTy = TD
- ? TD->getIntPtrType(SrcTy)
+ Type *IdxTy = DL
+ ? DL->getIntPtrType(SrcTy)
: Type::getInt64Ty(SrcTy->getContext());
Value *Idx = Constant::getNullValue(IdxTy);
Value *Idxs[2] = { Idx, Idx };
@@ -331,6 +335,13 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
NewLoad->setAlignment(LI.getAlignment());
NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
// Now cast the result of the load.
+ PointerType *OldTy = dyn_cast<PointerType>(NewLoad->getType());
+ PointerType *NewTy = dyn_cast<PointerType>(LI.getType());
+ if (OldTy && NewTy &&
+ OldTy->getAddressSpace() != NewTy->getAddressSpace()) {
+ return new AddrSpaceCastInst(NewLoad, LI.getType());
+ }
+
return new BitCastInst(NewLoad, LI.getType());
}
}
@@ -342,12 +353,12 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Value *Op = LI.getOperand(0);
// Attempt to improve the alignment.
- if (TD) {
+ if (DL) {
unsigned KnownAlign =
- getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD);
+ getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()),DL);
unsigned LoadAlign = LI.getAlignment();
unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
- TD->getABITypeAlignment(LI.getType());
+ DL->getABITypeAlignment(LI.getType());
if (KnownAlign > EffectiveLoadAlign)
LI.setAlignment(KnownAlign);
@@ -357,7 +368,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// load (cast X) --> cast (load X) iff safe.
if (isa<CastInst>(Op))
- if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+ if (Instruction *Res = InstCombineLoadCast(*this, LI, DL))
return Res;
// None of the following transforms are legal for volatile/atomic loads.
@@ -401,7 +412,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// Instcombine load (constantexpr_cast global) -> cast (load global)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
if (CE->isCast())
- if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+ if (Instruction *Res = InstCombineLoadCast(*this, LI, DL))
return Res;
if (Op->hasOneUse()) {
@@ -418,8 +429,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
// load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
unsigned Align = LI.getAlignment();
- if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) &&
- isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) {
+ if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) &&
+ isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) {
LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
SI->getOperand(1)->getName()+".val");
LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
@@ -497,28 +508,39 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
return 0;
- // If the pointers point into different address spaces or if they point to
- // values with different sizes, we can't do the transformation.
+ // If the pointers point into different address spaces don't do the
+ // transformation.
+ if (SrcTy->getAddressSpace() !=
+ cast<PointerType>(CI->getType())->getAddressSpace())
+ return 0;
+
+ // If the pointers point to values of different sizes don't do the
+ // transformation.
if (!IC.getDataLayout() ||
- SrcTy->getAddressSpace() !=
- cast<PointerType>(CI->getType())->getAddressSpace() ||
IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
IC.getDataLayout()->getTypeSizeInBits(DestPTy))
return 0;
+ // If the pointers point to pointers to different address spaces don't do the
+ // transformation. It is not safe to introduce an addrspacecast instruction in
+ // this case since, depending on the target, addrspacecast may not be a no-op
+ // cast.
+ if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() &&
+ SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace())
+ return 0;
+
// Okay, we are casting from one integer or pointer type to another of
// the same size. Instead of casting the pointer before
// the store, cast the value to be stored.
Value *NewCast;
- Value *SIOp0 = SI.getOperand(0);
Instruction::CastOps opcode = Instruction::BitCast;
- Type* CastSrcTy = SIOp0->getType();
+ Type* CastSrcTy = DestPTy;
Type* CastDstTy = SrcPTy;
if (CastDstTy->isPointerTy()) {
if (CastSrcTy->isIntegerTy())
opcode = Instruction::IntToPtr;
} else if (CastDstTy->isIntegerTy()) {
- if (SIOp0->getType()->isPointerTy())
+ if (CastSrcTy->isPointerTy())
opcode = Instruction::PtrToInt;
}
@@ -527,6 +549,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
if (!NewGEPIndices.empty())
CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
+ Value *SIOp0 = SI.getOperand(0);
NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
SIOp0->getName()+".c");
SI.setOperand(0, NewCast);
@@ -568,13 +591,13 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
Value *Ptr = SI.getOperand(1);
// Attempt to improve the alignment.
- if (TD) {
+ if (DL) {
unsigned KnownAlign =
- getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()),
- TD);
+ getOrEnforceKnownAlignment(Ptr, DL->getPrefTypeAlignment(Val->getType()),
+ DL);
unsigned StoreAlign = SI.getAlignment();
unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
- TD->getABITypeAlignment(Val->getType());
+ DL->getABITypeAlignment(Val->getType());
if (KnownAlign > EffectiveStoreAlign)
SI.setAlignment(KnownAlign);
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index a759548..71fbb6c 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -15,7 +15,7 @@
#include "InstCombine.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/PatternMatch.h"
+#include "llvm/IR/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
@@ -118,7 +118,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifyMulInst(Op0, Op1, TD))
+ if (Value *V = SimplifyMulInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
if (Value *V = SimplifyUsingDistributiveLaws(I))
@@ -158,15 +158,6 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
- // Canonicalize (X+C1)*CI -> X*CI+C1*CI.
- { Value *X; ConstantInt *C1;
- if (Op0->hasOneUse() &&
- match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) {
- Value *Add = Builder->CreateMul(X, CI);
- return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI));
- }
- }
-
// (Y - X) * (-(2**n)) -> (X - Y) * (2**n), for positive nonzero n
// (Y + const) * (-(2**n)) -> (-constY) * (2**n), for positive nonzero n
// The "* (2**n)" thus becomes a potential shifting opportunity.
@@ -201,6 +192,16 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (isa<PHINode>(Op0))
if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
+
+ // Canonicalize (X+C1)*CI -> X*CI+C1*CI.
+ {
+ Value *X;
+ Constant *C1;
+ if (match(Op0, m_OneUse(m_Add(m_Value(X), m_Constant(C1))))) {
+ Value *Add = Builder->CreateMul(X, Op1);
+ return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, Op1));
+ }
+ }
}
if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y
@@ -247,7 +248,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
/// i1 mul -> i1 and.
- if (I.getType()->isIntegerTy(1))
+ if (I.getType()->getScalarType()->isIntegerTy(1))
return BinaryOperator::CreateAnd(Op0, Op1);
// X*(1 << Y) --> X << Y
@@ -313,16 +314,41 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
return;
- ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0));
- if (CFP && CFP->isExactlyValue(0.5)) {
+ if (match(I->getOperand(0), m_SpecificFP(0.5)))
Y = I->getOperand(1);
- return;
- }
- CFP = dyn_cast<ConstantFP>(I->getOperand(1));
- if (CFP && CFP->isExactlyValue(0.5))
+ else if (match(I->getOperand(1), m_SpecificFP(0.5)))
Y = I->getOperand(0);
}
+static bool isFiniteNonZeroFp(Constant *C) {
+ if (C->getType()->isVectorTy()) {
+ for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E;
+ ++I) {
+ ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I));
+ if (!CFP || !CFP->getValueAPF().isFiniteNonZero())
+ return false;
+ }
+ return true;
+ }
+
+ return isa<ConstantFP>(C) &&
+ cast<ConstantFP>(C)->getValueAPF().isFiniteNonZero();
+}
+
+static bool isNormalFp(Constant *C) {
+ if (C->getType()->isVectorTy()) {
+ for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E;
+ ++I) {
+ ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I));
+ if (!CFP || !CFP->getValueAPF().isNormal())
+ return false;
+ }
+ return true;
+ }
+
+ return isa<ConstantFP>(C) && cast<ConstantFP>(C)->getValueAPF().isNormal();
+}
+
/// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns
/// true iff the given value is FMul or FDiv with one and only one operand
/// being a normal constant (i.e. not Zero/NaN/Infinity).
@@ -332,19 +358,13 @@ static bool isFMulOrFDivWithConstant(Value *V) {
I->getOpcode() != Instruction::FDiv))
return false;
- ConstantFP *C0 = dyn_cast<ConstantFP>(I->getOperand(0));
- ConstantFP *C1 = dyn_cast<ConstantFP>(I->getOperand(1));
+ Constant *C0 = dyn_cast<Constant>(I->getOperand(0));
+ Constant *C1 = dyn_cast<Constant>(I->getOperand(1));
if (C0 && C1)
return false;
- return (C0 && C0->getValueAPF().isFiniteNonZero()) ||
- (C1 && C1->getValueAPF().isFiniteNonZero());
-}
-
-static bool isNormalFp(const ConstantFP *C) {
- const APFloat &Flt = C->getValueAPF();
- return Flt.isNormal();
+ return (C0 && isFiniteNonZeroFp(C0)) || (C1 && isFiniteNonZeroFp(C1));
}
/// foldFMulConst() is a helper routine of InstCombiner::visitFMul().
@@ -354,41 +374,41 @@ static bool isNormalFp(const ConstantFP *C) {
/// resulting expression. Note that this function could return NULL in
/// case the constants cannot be folded into a normal floating-point.
///
-Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
+Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, Constant *C,
Instruction *InsertBefore) {
assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid");
Value *Opnd0 = FMulOrDiv->getOperand(0);
Value *Opnd1 = FMulOrDiv->getOperand(1);
- ConstantFP *C0 = dyn_cast<ConstantFP>(Opnd0);
- ConstantFP *C1 = dyn_cast<ConstantFP>(Opnd1);
+ Constant *C0 = dyn_cast<Constant>(Opnd0);
+ Constant *C1 = dyn_cast<Constant>(Opnd1);
BinaryOperator *R = 0;
// (X * C0) * C => X * (C0*C)
if (FMulOrDiv->getOpcode() == Instruction::FMul) {
Constant *F = ConstantExpr::getFMul(C1 ? C1 : C0, C);
- if (isNormalFp(cast<ConstantFP>(F)))
+ if (isNormalFp(F))
R = BinaryOperator::CreateFMul(C1 ? Opnd0 : Opnd1, F);
} else {
if (C0) {
// (C0 / X) * C => (C0 * C) / X
if (FMulOrDiv->hasOneUse()) {
// It would otherwise introduce another div.
- ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
+ Constant *F = ConstantExpr::getFMul(C0, C);
if (isNormalFp(F))
R = BinaryOperator::CreateFDiv(F, Opnd1);
}
} else {
// (X / C1) * C => X * (C/C1) if C/C1 is not a denormal
- ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFDiv(C, C1));
+ Constant *F = ConstantExpr::getFDiv(C, C1);
if (isNormalFp(F)) {
R = BinaryOperator::CreateFMul(Opnd0, F);
} else {
// (X / C1) * C => X / (C1/C)
Constant *F = ConstantExpr::getFDiv(C1, C);
- if (isNormalFp(cast<ConstantFP>(F)))
+ if (isNormalFp(F))
R = BinaryOperator::CreateFDiv(Opnd0, F);
}
}
@@ -409,7 +429,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (isa<Constant>(Op0))
std::swap(Op0, Op1);
- if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), TD))
+ if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL))
return ReplaceInstUsesWith(I, V);
bool AllowReassociate = I.hasUnsafeAlgebra();
@@ -425,17 +445,23 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
- ConstantFP *C = dyn_cast<ConstantFP>(Op1);
- if (C && AllowReassociate && C->getValueAPF().isFiniteNonZero()) {
+ // (fmul X, -1.0) --> (fsub -0.0, X)
+ if (match(Op1, m_SpecificFP(-1.0))) {
+ Constant *NegZero = ConstantFP::getNegativeZero(Op1->getType());
+ Instruction *RI = BinaryOperator::CreateFSub(NegZero, Op0);
+ RI->copyFastMathFlags(&I);
+ return RI;
+ }
+
+ Constant *C = cast<Constant>(Op1);
+ if (AllowReassociate && isFiniteNonZeroFp(C)) {
// Let MDC denote an expression in one of these forms:
// X * C, C/X, X/C, where C is a constant.
//
// Try to simplify "MDC * Constant"
- if (isFMulOrFDivWithConstant(Op0)) {
- Value *V = foldFMulConst(cast<Instruction>(Op0), C, &I);
- if (V)
+ if (isFMulOrFDivWithConstant(Op0))
+ if (Value *V = foldFMulConst(cast<Instruction>(Op0), C, &I))
return ReplaceInstUsesWith(I, V);
- }
// (MDC +/- C1) * C => (MDC * C) +/- (C1 * C)
Instruction *FAddSub = dyn_cast<Instruction>(Op0);
@@ -444,8 +470,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
FAddSub->getOpcode() == Instruction::FSub)) {
Value *Opnd0 = FAddSub->getOperand(0);
Value *Opnd1 = FAddSub->getOperand(1);
- ConstantFP *C0 = dyn_cast<ConstantFP>(Opnd0);
- ConstantFP *C1 = dyn_cast<ConstantFP>(Opnd1);
+ Constant *C0 = dyn_cast<Constant>(Opnd0);
+ Constant *C1 = dyn_cast<Constant>(Opnd1);
bool Swap = false;
if (C0) {
std::swap(C0, C1);
@@ -453,10 +479,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
Swap = true;
}
- if (C1 && C1->getValueAPF().isFiniteNonZero() &&
- isFMulOrFDivWithConstant(Opnd0)) {
+ if (C1 && isFiniteNonZeroFp(C1) && isFMulOrFDivWithConstant(Opnd0)) {
Value *M1 = ConstantExpr::getFMul(C1, C);
- Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ?
+ Value *M0 = isNormalFp(cast<Constant>(M1)) ?
foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
0;
if (M0 && M1) {
@@ -515,8 +540,11 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
// -X * -Y => X*Y
- if (N1)
- return BinaryOperator::CreateFMul(N0, N1);
+ if (N1) {
+ Value *FMul = Builder->CreateFMul(N0, N1);
+ FMul->takeName(&I);
+ return ReplaceInstUsesWith(I, FMul);
+ }
if (Opnd0->hasOneUse()) {
// -X * Y => -(X*Y) (Promote negation as high as possible)
@@ -564,7 +592,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (!match(RHS, m_UIToFP(m_Value(C))))
std::swap(LHS, RHS);
- if (match(RHS, m_UIToFP(m_Value(C))) && C->getType()->isIntegerTy(1)) {
+ if (match(RHS, m_UIToFP(m_Value(C))) &&
+ C->getType()->getScalarType()->isIntegerTy(1)) {
B = LHS;
Value *Zero = ConstantFP::getNegativeZero(B->getType());
return SelectInst::Create(C, B, Zero);
@@ -579,7 +608,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
std::swap(LHS, RHS);
if (match(RHS, m_FSub(m_FPOne(), m_UIToFP(m_Value(C)))) &&
- C->getType()->isIntegerTy(1)) {
+ C->getType()->getScalarType()->isIntegerTy(1)) {
A = LHS;
Value *Zero = ConstantFP::getNegativeZero(A->getType());
return SelectInst::Create(C, Zero, A);
@@ -846,7 +875,7 @@ static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I,
Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifyUDivInst(Op0, Op1, TD))
+ if (Value *V = SimplifyUDivInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
// Handle the integer div common cases
@@ -854,13 +883,11 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
return Common;
// (x lshr C1) udiv C2 --> x udiv (C2 << C1)
- if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) {
+ if (Constant *C2 = dyn_cast<Constant>(Op1)) {
Value *X;
- ConstantInt *C1;
- if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) {
- APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1));
- return BinaryOperator::CreateUDiv(X, Builder->getInt(NC));
- }
+ Constant *C1;
+ if (match(Op0, m_LShr(m_Value(X), m_Constant(C1))))
+ return BinaryOperator::CreateUDiv(X, ConstantExpr::getShl(C2, C1));
}
// (zext A) udiv (zext B) --> zext (A udiv B)
@@ -907,18 +934,18 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifySDivInst(Op0, Op1, TD))
+ if (Value *V = SimplifySDivInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
// Handle the integer div common cases
if (Instruction *Common = commonIDivTransforms(I))
return Common;
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
- // sdiv X, -1 == -X
- if (RHS->isAllOnesValue())
- return BinaryOperator::CreateNeg(Op0);
+ // sdiv X, -1 == -X
+ if (match(Op1, m_AllOnes()))
+ return BinaryOperator::CreateNeg(Op0);
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
// sdiv X, C --> ashr exact X, log2(C)
if (I.isExact() && RHS->getValue().isNonNegative() &&
RHS->getValue().isPowerOf2()) {
@@ -926,7 +953,9 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
RHS->getValue().exactLogBase2());
return BinaryOperator::CreateExactAShr(Op0, ShAmt, I.getName());
}
+ }
+ if (Constant *RHS = dyn_cast<Constant>(Op1)) {
// -X/C --> X/-C provided the negation doesn't overflow.
if (SubOperator *Sub = dyn_cast<SubOperator>(Op0))
if (match(Sub->getOperand(0), m_Zero()) && Sub->hasNoSignedWrap())
@@ -965,9 +994,12 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
/// returned; otherwise, NULL is returned.
///
static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
- ConstantFP *Divisor,
+ Constant *Divisor,
bool AllowReciprocal) {
- const APFloat &FpVal = Divisor->getValueAPF();
+ if (!isa<ConstantFP>(Divisor)) // TODO: handle vectors.
+ return 0;
+
+ const APFloat &FpVal = cast<ConstantFP>(Divisor)->getValueAPF();
APFloat Reciprocal(FpVal.getSemantics());
bool Cvt = FpVal.getExactInverse(&Reciprocal);
@@ -988,7 +1020,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifyFDivInst(Op0, Op1, TD))
+ if (Value *V = SimplifyFDivInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
if (isa<Constant>(Op0))
@@ -999,32 +1031,29 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
bool AllowReassociate = I.hasUnsafeAlgebra();
bool AllowReciprocal = I.hasAllowReciprocal();
- if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+ if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
if (AllowReassociate) {
- ConstantFP *C1 = 0;
- ConstantFP *C2 = Op1C;
+ Constant *C1 = 0;
+ Constant *C2 = Op1C;
Value *X;
Instruction *Res = 0;
- if (match(Op0, m_FMul(m_Value(X), m_ConstantFP(C1)))) {
+ if (match(Op0, m_FMul(m_Value(X), m_Constant(C1)))) {
// (X*C1)/C2 => X * (C1/C2)
//
Constant *C = ConstantExpr::getFDiv(C1, C2);
- const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
- if (F.isNormal())
+ if (isNormalFp(C))
Res = BinaryOperator::CreateFMul(X, C);
- } else if (match(Op0, m_FDiv(m_Value(X), m_ConstantFP(C1)))) {
+ } else if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {
// (X/C1)/C2 => X /(C2*C1) [=> X * 1/(C2*C1) if reciprocal is allowed]
//
Constant *C = ConstantExpr::getFMul(C1, C2);
- const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
- if (F.isNormal()) {
- Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C),
- AllowReciprocal);
+ if (isNormalFp(C)) {
+ Res = CvtFDivConstToReciprocal(X, C, AllowReciprocal);
if (!Res)
Res = BinaryOperator::CreateFDiv(X, C);
}
@@ -1037,39 +1066,37 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
}
// X / C => X * 1/C
- if (Instruction *T = CvtFDivConstToReciprocal(Op0, Op1C, AllowReciprocal))
+ if (Instruction *T = CvtFDivConstToReciprocal(Op0, Op1C, AllowReciprocal)) {
+ T->copyFastMathFlags(&I);
return T;
+ }
return 0;
}
- if (AllowReassociate && isa<ConstantFP>(Op0)) {
- ConstantFP *C1 = cast<ConstantFP>(Op0), *C2;
+ if (AllowReassociate && isa<Constant>(Op0)) {
+ Constant *C1 = cast<Constant>(Op0), *C2;
Constant *Fold = 0;
Value *X;
bool CreateDiv = true;
// C1 / (X*C2) => (C1/C2) / X
- if (match(Op1, m_FMul(m_Value(X), m_ConstantFP(C2))))
+ if (match(Op1, m_FMul(m_Value(X), m_Constant(C2))))
Fold = ConstantExpr::getFDiv(C1, C2);
- else if (match(Op1, m_FDiv(m_Value(X), m_ConstantFP(C2)))) {
+ else if (match(Op1, m_FDiv(m_Value(X), m_Constant(C2)))) {
// C1 / (X/C2) => (C1*C2) / X
Fold = ConstantExpr::getFMul(C1, C2);
- } else if (match(Op1, m_FDiv(m_ConstantFP(C2), m_Value(X)))) {
+ } else if (match(Op1, m_FDiv(m_Constant(C2), m_Value(X)))) {
// C1 / (C2/X) => (C1/C2) * X
Fold = ConstantExpr::getFDiv(C1, C2);
CreateDiv = false;
}
- if (Fold) {
- const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF();
- if (FoldC.isNormal()) {
- Instruction *R = CreateDiv ?
- BinaryOperator::CreateFDiv(Fold, X) :
- BinaryOperator::CreateFMul(X, Fold);
- R->setFastMathFlags(I.getFastMathFlags());
- return R;
- }
+ if (Fold && isNormalFp(Fold)) {
+ Instruction *R = CreateDiv ? BinaryOperator::CreateFDiv(Fold, X)
+ : BinaryOperator::CreateFMul(X, Fold);
+ R->setFastMathFlags(I.getFastMathFlags());
+ return R;
}
return 0;
}
@@ -1082,15 +1109,25 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) {
// (X/Y) / Z => X / (Y*Z)
//
- if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op1)) {
+ if (!isa<Constant>(Y) || !isa<Constant>(Op1)) {
NewInst = Builder->CreateFMul(Y, Op1);
+ if (Instruction *RI = dyn_cast<Instruction>(NewInst)) {
+ FastMathFlags Flags = I.getFastMathFlags();
+ Flags &= cast<Instruction>(Op0)->getFastMathFlags();
+ RI->setFastMathFlags(Flags);
+ }
SimpR = BinaryOperator::CreateFDiv(X, NewInst);
}
} else if (Op1->hasOneUse() && match(Op1, m_FDiv(m_Value(X), m_Value(Y)))) {
// Z / (X/Y) => Z*Y / X
//
- if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op0)) {
+ if (!isa<Constant>(Y) || !isa<Constant>(Op0)) {
NewInst = Builder->CreateFMul(Op0, Y);
+ if (Instruction *RI = dyn_cast<Instruction>(NewInst)) {
+ FastMathFlags Flags = I.getFastMathFlags();
+ Flags &= cast<Instruction>(Op1)->getFastMathFlags();
+ RI->setFastMathFlags(Flags);
+ }
SimpR = BinaryOperator::CreateFDiv(NewInst, X);
}
}
@@ -1123,7 +1160,7 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
return &I;
- if (isa<ConstantInt>(Op1)) {
+ if (isa<Constant>(Op1)) {
if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -1145,7 +1182,7 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
Instruction *InstCombiner::visitURem(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifyURemInst(Op0, Op1, TD))
+ if (Value *V = SimplifyURemInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
if (Instruction *common = commonIRemTransforms(I))
@@ -1177,7 +1214,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifySRemInst(Op0, Op1, TD))
+ if (Value *V = SimplifySRemInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
// Handle the integer rem common cases
@@ -1248,7 +1285,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (Value *V = SimplifyFRemInst(Op0, Op1, TD))
+ if (Value *V = SimplifyFRemInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
// Handle cases involving: rem X, (select Cond, Y, Z)
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 4c6d0c4..0ab657a 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -255,9 +255,7 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
// profitable to do this xform.
if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
bool isAddressTaken = false;
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
- UI != E; ++UI) {
- User *U = *UI;
+ for (User *U : AI->users()) {
if (isa<LoadInst>(U)) continue;
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// If storing TO the alloca, then the address isn't taken.
@@ -518,7 +516,7 @@ static bool DeadPHICycle(PHINode *PN,
if (PotentiallyDeadPHIs.size() == 16)
return false;
- if (PHINode *PU = dyn_cast<PHINode>(PN->use_back()))
+ if (PHINode *PU = dyn_cast<PHINode>(PN->user_back()))
return DeadPHICycle(PU, PotentiallyDeadPHIs);
return false;
@@ -649,32 +647,30 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
return 0;
}
-
- for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
- UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (User *U : PN->users()) {
+ Instruction *UserI = cast<Instruction>(U);
// If the user is a PHI, inspect its uses recursively.
- if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (PHINode *UserPN = dyn_cast<PHINode>(UserI)) {
if (PHIsInspected.insert(UserPN))
PHIsToSlice.push_back(UserPN);
continue;
}
// Truncates are always ok.
- if (isa<TruncInst>(User)) {
- PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
+ if (isa<TruncInst>(UserI)) {
+ PHIUsers.push_back(PHIUsageRecord(PHIId, 0, UserI));
continue;
}
// Otherwise it must be a lshr which can only be used by one trunc.
- if (User->getOpcode() != Instruction::LShr ||
- !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
- !isa<ConstantInt>(User->getOperand(1)))
+ if (UserI->getOpcode() != Instruction::LShr ||
+ !UserI->hasOneUse() || !isa<TruncInst>(UserI->user_back()) ||
+ !isa<ConstantInt>(UserI->getOperand(1)))
return 0;
- unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
- PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
+ unsigned Shift = cast<ConstantInt>(UserI->getOperand(1))->getZExtValue();
+ PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, UserI->user_back()));
}
}
@@ -790,7 +786,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// PHINode simplification
//
Instruction *InstCombiner::visitPHINode(PHINode &PN) {
- if (Value *V = SimplifyInstruction(&PN, TD, TLI))
+ if (Value *V = SimplifyInstruction(&PN, DL, TLI))
return ReplaceInstUsesWith(PN, V);
// If all PHI operands are the same operation, pull them through the PHI,
@@ -809,7 +805,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
// this PHI only has a single use (a PHI), and if that PHI only has one use (a
// PHI)... break the cycle.
if (PN.hasOneUse()) {
- Instruction *PHIUser = cast<Instruction>(PN.use_back());
+ Instruction *PHIUser = cast<Instruction>(PN.user_back());
if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {
SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
PotentiallyDeadPHIs.insert(&PN);
@@ -825,7 +821,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
// late.
if (PHIUser->hasOneUse() &&
(isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&
- PHIUser->use_back() == &PN) {
+ PHIUser->user_back() == &PN) {
return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
}
}
@@ -893,8 +889,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
// it is only used by trunc or trunc(lshr) operations. If so, we split the
// PHI into the various pieces being extracted. This sort of thing is
// introduced when SROA promotes an aggregate to a single large integer type.
- if (PN.getType()->isIntegerTy() && TD &&
- !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+ if (PN.getType()->isIntegerTy() && DL &&
+ !DL->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
return Res;
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 283bec2..e74d912 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -14,7 +14,7 @@
#include "InstCombine.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Support/PatternMatch.h"
+#include "llvm/IR/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
@@ -554,18 +554,18 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
// arms of the select. See if substituting this value into the arm and
// simplifying the result yields the same value as the other arm.
if (Pred == ICmpInst::ICMP_EQ) {
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal)
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI) == TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI) == TrueVal)
return ReplaceInstUsesWith(SI, FalseVal);
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal)
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI) == FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI) == FalseVal)
return ReplaceInstUsesWith(SI, FalseVal);
} else if (Pred == ICmpInst::ICMP_NE) {
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal)
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI) == FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI) == FalseVal)
return ReplaceInstUsesWith(SI, TrueVal);
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal)
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI) == TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI) == TrueVal)
return ReplaceInstUsesWith(SI, TrueVal);
}
@@ -734,7 +734,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *TrueVal = SI.getTrueValue();
Value *FalseVal = SI.getFalseValue();
- if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, TD))
+ if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, DL))
return ReplaceInstUsesWith(SI, V);
if (SI.getType()->isIntegerTy(1)) {
@@ -901,6 +901,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *NegVal; // Compute -Z
if (SI.getType()->isFPOrFPVectorTy()) {
NegVal = Builder->CreateFNeg(SubOp->getOperand(1));
+ if (Instruction *NegInst = dyn_cast<Instruction>(NegVal)) {
+ FastMathFlags Flags = AddOp->getFastMathFlags();
+ Flags &= SubOp->getFastMathFlags();
+ NegInst->setFastMathFlags(Flags);
+ }
} else {
NegVal = Builder->CreateNeg(SubOp->getOperand(1));
}
@@ -913,9 +918,15 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Builder->CreateSelect(CondVal, NewTrueOp,
NewFalseOp, SI.getName() + ".p");
- if (SI.getType()->isFPOrFPVectorTy())
- return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel);
- else
+ if (SI.getType()->isFPOrFPVectorTy()) {
+ Instruction *RI =
+ BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel);
+
+ FastMathFlags Flags = AddOp->getFastMathFlags();
+ Flags &= SubOp->getFastMathFlags();
+ RI->setFastMathFlags(Flags);
+ return RI;
+ } else
return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 8cf76e5..8273dfd 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -15,7 +15,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/PatternMatch.h"
+#include "llvm/IR/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
@@ -677,7 +677,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
Instruction *InstCombiner::visitShl(BinaryOperator &I) {
if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
- TD))
+ DL))
return ReplaceInstUsesWith(I, V);
if (Instruction *V = commonShiftTransforms(I))
@@ -714,7 +714,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1),
- I.isExact(), TD))
+ I.isExact(), DL))
return ReplaceInstUsesWith(I, V);
if (Instruction *R = commonShiftTransforms(I))
@@ -754,7 +754,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1),
- I.isExact(), TD))
+ I.isExact(), DL))
return ReplaceInstUsesWith(I, V);
if (Instruction *R = commonShiftTransforms(I))
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index c831ddd..a47b709 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -16,7 +16,7 @@
#include "InstCombine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/PatternMatch.h"
+#include "llvm/IR/PatternMatch.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -105,9 +105,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(Depth <= 6 && "Limit Search Depth");
uint32_t BitWidth = DemandedMask.getBitWidth();
Type *VTy = V->getType();
- assert((TD || !VTy->isPointerTy()) &&
+ assert((DL || !VTy->isPointerTy()) &&
"SimplifyDemandedBits needs to know bit widths!");
- assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
+ assert((!DL || DL->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
(!VTy->isIntOrIntVectorTy() ||
VTy->getScalarSizeInBits() == BitWidth) &&
KnownZero.getBitWidth() == BitWidth &&
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 1e72410..521dc9c 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
#include "InstCombine.h"
-#include "llvm/Support/PatternMatch.h"
+#include "llvm/IR/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
@@ -25,11 +25,13 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
if (isConstant) return true;
// If all elts are the same, we can extract it and use any of the values.
- Constant *Op0 = C->getAggregateElement(0U);
- for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i)
- if (C->getAggregateElement(i) != Op0)
- return false;
- return true;
+ if (Constant *Op0 = C->getAggregateElement(0U)) {
+ for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e;
+ ++i)
+ if (C->getAggregateElement(i) != Op0)
+ return false;
+ return true;
+ }
}
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
@@ -116,7 +118,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// If so, it's known at this point that one operand is PHI and the other is
// an extractelement node. Find the PHI user that is not the extractelement
// node.
- Value::use_iterator iu = PN->use_begin();
+ auto iu = PN->user_begin();
Instruction *PHIUser = dyn_cast<Instruction>(*iu);
if (PHIUser == cast<Instruction>(&EI))
PHIUser = cast<Instruction>(*(++iu));
@@ -124,7 +126,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// Verify that this PHI user has one use, which is the PHI itself,
// and that it is a binary operation which is cheap to scalarize.
// otherwise return NULL.
- if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) ||
+ if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
!(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true))
return NULL;
@@ -324,7 +326,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
/// Otherwise, return false.
static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
SmallVectorImpl<Constant*> &Mask) {
- assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
+ assert(LHS->getType() == RHS->getType() &&
"Invalid CollectSingleShuffleElements");
unsigned NumElts = V->getType()->getVectorNumElements();
@@ -365,10 +367,10 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
return true;
}
} else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
- if (isa<ConstantInt>(EI->getOperand(1)) &&
- EI->getOperand(0)->getType() == V->getType()) {
+ if (isa<ConstantInt>(EI->getOperand(1))) {
unsigned ExtractedIdx =
cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+ unsigned NumLHSElts = LHS->getType()->getVectorNumElements();
// This must be extracting from either LHS or RHS.
if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
@@ -384,7 +386,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
assert(EI->getOperand(0) == RHS);
Mask[InsertedIdx % NumElts] =
ConstantInt::get(Type::getInt32Ty(V->getContext()),
- ExtractedIdx+NumElts);
+ ExtractedIdx + NumLHSElts);
}
return true;
}
@@ -392,29 +394,36 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
}
}
}
- // TODO: Handle shufflevector here!
return false;
}
-/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
-/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask
-/// that computes V and the LHS value of the shuffle.
-static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
- Value *&RHS) {
- assert(V->getType()->isVectorTy() &&
- (RHS == 0 || V->getType() == RHS->getType()) &&
- "Invalid shuffle!");
+
+/// We are building a shuffle to create V, which is a sequence of insertelement,
+/// extractelement pairs. If PermittedRHS is set, then we must either use it or
+/// not rely on the second vector source. Return an std::pair containing the
+/// left and right vectors of the proposed shuffle (or 0), and set the Mask
+/// parameter as required.
+///
+/// Note: we intentionally don't try to fold earlier shuffles since they have
+/// often been chosen carefully to be efficiently implementable on the target.
+typedef std::pair<Value *, Value *> ShuffleOps;
+
+static ShuffleOps CollectShuffleElements(Value *V,
+ SmallVectorImpl<Constant *> &Mask,
+ Value *PermittedRHS) {
+ assert(V->getType()->isVectorTy() && "Invalid shuffle!");
unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
if (isa<UndefValue>(V)) {
Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
- return V;
+ return std::make_pair(
+ PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
}
if (isa<ConstantAggregateZero>(V)) {
Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
- return V;
+ return std::make_pair(V, nullptr);
}
if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
@@ -424,51 +433,59 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
Value *IdxOp = IEI->getOperand(2);
if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
- if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
- EI->getOperand(0)->getType() == V->getType()) {
+ if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
unsigned ExtractedIdx =
cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
// Either the extracted from or inserted into vector must be RHSVec,
// otherwise we'd end up with a shuffle of three inputs.
- if (EI->getOperand(0) == RHS || RHS == 0) {
- RHS = EI->getOperand(0);
- Value *V = CollectShuffleElements(VecOp, Mask, RHS);
+ if (EI->getOperand(0) == PermittedRHS || PermittedRHS == 0) {
+ Value *RHS = EI->getOperand(0);
+ ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS);
+ assert(LR.second == 0 || LR.second == RHS);
+
+ if (LR.first->getType() != RHS->getType()) {
+ // We tried our best, but we can't find anything compatible with RHS
+ // further up the chain. Return a trivial shuffle.
+ for (unsigned i = 0; i < NumElts; ++i)
+ Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i);
+ return std::make_pair(V, nullptr);
+ }
+
+ unsigned NumLHSElts = RHS->getType()->getVectorNumElements();
Mask[InsertedIdx % NumElts] =
ConstantInt::get(Type::getInt32Ty(V->getContext()),
- NumElts+ExtractedIdx);
- return V;
+ NumLHSElts+ExtractedIdx);
+ return std::make_pair(LR.first, RHS);
}
- if (VecOp == RHS) {
- Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
- // Update Mask to reflect that `ScalarOp' has been inserted at
- // position `InsertedIdx' within the vector returned by IEI.
- Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx];
-
- // Everything but the extracted element is replaced with the RHS.
- for (unsigned i = 0; i != NumElts; ++i) {
- if (i != InsertedIdx)
- Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()),
- NumElts+i);
- }
- return V;
+ if (VecOp == PermittedRHS) {
+ // We've gone as far as we can: anything on the other side of the
+ // extractelement will already have been converted into a shuffle.
+ unsigned NumLHSElts =
+ EI->getOperand(0)->getType()->getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(ConstantInt::get(
+ Type::getInt32Ty(V->getContext()),
+ i == InsertedIdx ? ExtractedIdx : NumLHSElts + i));
+ return std::make_pair(EI->getOperand(0), PermittedRHS);
}
// If this insertelement is a chain that comes from exactly these two
// vectors, return the vector and the effective shuffle.
- if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
- return EI->getOperand(0);
+ if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
+ CollectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
+ Mask))
+ return std::make_pair(EI->getOperand(0), PermittedRHS);
}
}
}
- // TODO: Handle shufflevector here!
// Otherwise, can't do anything fancy. Return an identity vector.
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
- return V;
+ return std::make_pair(V, nullptr);
}
Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
@@ -483,17 +500,18 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
// If the inserted element was extracted from some other vector, and if the
// indexes are constant, try to turn this into a shufflevector operation.
if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
- if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
- EI->getOperand(0)->getType() == IE.getType()) {
- unsigned NumVectorElts = IE.getType()->getNumElements();
+ if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
+ unsigned NumInsertVectorElts = IE.getType()->getNumElements();
+ unsigned NumExtractVectorElts =
+ EI->getOperand(0)->getType()->getVectorNumElements();
unsigned ExtractedIdx =
cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
- if (ExtractedIdx >= NumVectorElts) // Out of range extract.
+ if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
return ReplaceInstUsesWith(IE, VecOp);
- if (InsertedIdx >= NumVectorElts) // Out of range insert.
+ if (InsertedIdx >= NumInsertVectorElts) // Out of range insert.
return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
// If we are extracting a value from a vector, then inserting it right
@@ -503,13 +521,18 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
// If this insertelement isn't used by some other insertelement, turn it
// (and any insertelements it points to), into one big shuffle.
- if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
+ if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) {
SmallVector<Constant*, 16> Mask;
- Value *RHS = 0;
- Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
- if (RHS == 0) RHS = UndefValue::get(LHS->getType());
- // We now have a shuffle of LHS, RHS, Mask.
- return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
+ ShuffleOps LR = CollectShuffleElements(&IE, Mask, 0);
+
+ // The proposed shuffle may be trivial, in which case we shouldn't
+ // perform the combine.
+ if (LR.first != &IE && LR.second != &IE) {
+ // We now have a shuffle of LHS, RHS, Mask.
+ if (LR.second == 0) LR.second = UndefValue::get(LR.first->getType());
+ return new ShuffleVectorInst(LR.first, LR.second,
+ ConstantVector::get(Mask));
+ }
}
}
}
@@ -638,6 +661,8 @@ static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
if (isa<PossiblyExactOperator>(BO)) {
New->setIsExact(BO->isExact());
}
+ if (isa<FPMathOperator>(BO))
+ New->copyFastMathFlags(I);
return New;
}
case Instruction::ICmp:
@@ -763,9 +788,10 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
}
}
+ // If element is not in Mask, no need to handle the operand 1 (element to
+ // be inserted). Just evaluate values in operand 0 according to Mask.
if (!Found)
- return UndefValue::get(
- VectorType::get(V->getType()->getScalarType(), Mask.size()));
+ return EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
return InsertElementInst::Create(V, I->getOperand(1),
@@ -1010,7 +1036,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
// If newRHS == newLHS, we want to remap any references from newRHS to
// newLHS so that we can properly identify splats that may occur due to
- // obfuscation accross the two vectors.
+ // obfuscation across the two vectors.
if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS)
eltMask += newLHSWidth;
}
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index f84db27..8c780b5 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -84,9 +84,8 @@ public:
/// now.
///
void AddUsersToWorkList(Instruction &I) {
- for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
- UI != UE; ++UI)
- Add(cast<Instruction>(*UI));
+ for (User *U : I.users())
+ Add(cast<Instruction>(U));
}
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 191a101..0cab81b 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -43,14 +43,14 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@@ -103,13 +103,13 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
assert(From->isIntegerTy() && To->isIntegerTy());
- // If we don't have TD, we don't know if the source/dest are legal.
- if (!TD) return false;
+ // If we don't have DL, we don't know if the source/dest are legal.
+ if (!DL) return false;
unsigned FromWidth = From->getPrimitiveSizeInBits();
unsigned ToWidth = To->getPrimitiveSizeInBits();
- bool FromLegal = TD->isLegalInteger(FromWidth);
- bool ToLegal = TD->isLegalInteger(ToWidth);
+ bool FromLegal = DL->isLegalInteger(FromWidth);
+ bool ToLegal = DL->isLegalInteger(ToWidth);
// If this is a legal integer from type, and the result would be an illegal
// type, don't do the transformation.
@@ -221,7 +221,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Value *C = I.getOperand(1);
// Does "B op C" simplify?
- if (Value *V = SimplifyBinOp(Opcode, B, C, TD)) {
+ if (Value *V = SimplifyBinOp(Opcode, B, C, DL)) {
// It simplifies to V. Form "A op V".
I.setOperand(0, A);
I.setOperand(1, V);
@@ -250,7 +250,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Value *C = Op1->getOperand(1);
// Does "A op B" simplify?
- if (Value *V = SimplifyBinOp(Opcode, A, B, TD)) {
+ if (Value *V = SimplifyBinOp(Opcode, A, B, DL)) {
// It simplifies to V. Form "V op C".
I.setOperand(0, V);
I.setOperand(1, C);
@@ -272,7 +272,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Value *C = I.getOperand(1);
// Does "C op A" simplify?
- if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+ if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) {
// It simplifies to V. Form "V op B".
I.setOperand(0, V);
I.setOperand(1, B);
@@ -292,7 +292,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Value *C = Op1->getOperand(1);
// Does "C op A" simplify?
- if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+ if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) {
// It simplifies to V. Form "B op V".
I.setOperand(0, B);
I.setOperand(1, V);
@@ -319,6 +319,12 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Constant *Folded = ConstantExpr::get(Opcode, C1, C2);
BinaryOperator *New = BinaryOperator::Create(Opcode, A, B);
+ if (isa<FPMathOperator>(New)) {
+ FastMathFlags Flags = I.getFastMathFlags();
+ Flags &= Op0->getFastMathFlags();
+ Flags &= Op1->getFastMathFlags();
+ New->setFastMathFlags(Flags);
+ }
InsertNewInstWith(New, I);
New->takeName(Op1);
I.setOperand(0, New);
@@ -419,7 +425,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
std::swap(C, D);
// Consider forming "A op' (B op D)".
// If "B op D" simplifies then it can be formed with no cost.
- Value *V = SimplifyBinOp(TopLevelOpcode, B, D, TD);
+ Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
// If "B op D" doesn't simplify then only go on if both of the existing
// operations "A op' B" and "C op' D" will be zapped as no longer used.
if (!V && Op0->hasOneUse() && Op1->hasOneUse())
@@ -441,7 +447,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
std::swap(C, D);
// Consider forming "(A op C) op' B".
// If "A op C" simplifies then it can be formed with no cost.
- Value *V = SimplifyBinOp(TopLevelOpcode, A, C, TD);
+ Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
// If "A op C" doesn't simplify then only go on if both of the existing
// operations "A op' B" and "C op' D" will be zapped as no longer used.
if (!V && Op0->hasOneUse() && Op1->hasOneUse())
@@ -463,8 +469,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
// Do "A op C" and "B op C" both simplify?
- if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, TD))
- if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, TD)) {
+ if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, DL))
+ if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, DL)) {
// They do! Return "L op' R".
++NumExpand;
// If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
@@ -472,7 +478,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
(Instruction::isCommutative(InnerOpcode) && L == B && R == A))
return Op0;
// Otherwise return "L op' R" if it simplifies.
- if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+ if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL))
return V;
// Otherwise, create a new instruction.
C = Builder->CreateBinOp(InnerOpcode, L, R);
@@ -488,8 +494,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
// Do "A op B" and "A op C" both simplify?
- if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, TD))
- if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, TD)) {
+ if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, DL))
+ if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, DL)) {
// They do! Return "L op' R".
++NumExpand;
// If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
@@ -497,7 +503,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
(Instruction::isCommutative(InnerOpcode) && L == C && R == B))
return Op1;
// Otherwise return "L op' R" if it simplifies.
- if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+ if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL))
return V;
// Otherwise, create a new instruction.
A = Builder->CreateBinOp(InnerOpcode, L, R);
@@ -566,9 +572,14 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
if (!ConstIsRHS)
std::swap(Op0, Op1);
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
- return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) {
+ Value *RI = IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
SO->getName()+".op");
+ Instruction *FPInst = dyn_cast<Instruction>(RI);
+ if (FPInst && isa<FPMathOperator>(FPInst))
+ FPInst->copyFastMathFlags(BO);
+ return RI;
+ }
if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
SO->getName()+".cmp");
@@ -630,10 +641,9 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
// uses into the PHI.
if (!PN->hasOneUse()) {
// Walk the use list for the instruction, comparing them to I.
- for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
- UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
- if (User != &I && !I.isIdenticalTo(User))
+ for (User *U : PN->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (UI != &I && !I.isIdenticalTo(UI))
return 0;
}
// Otherwise, we can replace *all* users with the new PHI we form.
@@ -748,8 +758,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
}
}
- for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
- UI != E; ) {
+ for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) {
Instruction *User = cast<Instruction>(*UI++);
if (User == &I) continue;
ReplaceInstUsesWith(*User, NewPN);
@@ -766,7 +775,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
SmallVectorImpl<Value*> &NewIndices) {
assert(PtrTy->isPtrOrPtrVectorTy());
- if (!TD)
+ if (!DL)
return 0;
Type *Ty = PtrTy->getPointerElementType();
@@ -776,9 +785,9 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
// Start with the index over the outer type. Note that the type size
// might be zero (even if the offset isn't zero) if the indexed type
// is something like [0 x {int, int}]
- Type *IntPtrTy = TD->getIntPtrType(PtrTy);
+ Type *IntPtrTy = DL->getIntPtrType(PtrTy);
int64_t FirstIdx = 0;
- if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
+ if (int64_t TySize = DL->getTypeAllocSize(Ty)) {
FirstIdx = Offset/TySize;
Offset -= FirstIdx*TySize;
@@ -796,11 +805,11 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
// Index into the types. If we fail, set OrigBase to null.
while (Offset) {
// Indexing into tail padding between struct/array elements.
- if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
+ if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty))
return 0;
if (StructType *STy = dyn_cast<StructType>(Ty)) {
- const StructLayout *SL = TD->getStructLayout(STy);
+ const StructLayout *SL = DL->getStructLayout(STy);
assert(Offset < (int64_t)SL->getSizeInBytes() &&
"Offset must stay within the indexed type");
@@ -811,7 +820,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
Offset -= SL->getElementOffset(Elt);
Ty = STy->getElementType(Elt);
} else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
- uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
+ uint64_t EltSize = DL->getTypeAllocSize(AT->getElementType());
assert(EltSize && "Cannot index into a zero-sized array");
NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
Offset %= EltSize;
@@ -1069,23 +1078,23 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// Move up one level in the expression.
assert(Ancestor->hasOneUse() && "Drilled down when more than one use!");
- Ancestor = Ancestor->use_back();
+ Ancestor = Ancestor->user_back();
} while (1);
}
Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
- if (Value *V = SimplifyGEPInst(Ops, TD))
+ if (Value *V = SimplifyGEPInst(Ops, DL))
return ReplaceInstUsesWith(GEP, V);
Value *PtrOp = GEP.getOperand(0);
// Eliminate unneeded casts for indices, and replace indices which displace
// by multiples of a zero size type with zero.
- if (TD) {
+ if (DL) {
bool MadeChange = false;
- Type *IntPtrTy = TD->getIntPtrType(GEP.getPointerOperandType());
+ Type *IntPtrTy = DL->getIntPtrType(GEP.getPointerOperandType());
gep_type_iterator GTI = gep_type_begin(GEP);
for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
@@ -1097,7 +1106,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// If the element type has zero size then any index over it is equivalent
// to an index of zero, so replace it with zero if it is not zero already.
if (SeqTy->getElementType()->isSized() &&
- TD->getTypeAllocSize(SeqTy->getElementType()) == 0)
+ DL->getTypeAllocSize(SeqTy->getElementType()) == 0)
if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
*I = Constant::getNullValue(IntPtrTy);
MadeChange = true;
@@ -1188,12 +1197,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Canonicalize (gep i8* X, -(ptrtoint Y)) to (sub (ptrtoint X), (ptrtoint Y))
// The GEP pattern is emitted by the SCEV expander for certain kinds of
// pointer arithmetic.
- if (TD && GEP.getNumIndices() == 1 &&
+ if (DL && GEP.getNumIndices() == 1 &&
match(GEP.getOperand(1), m_Neg(m_PtrToInt(m_Value())))) {
unsigned AS = GEP.getPointerAddressSpace();
if (GEP.getType() == Builder->getInt8PtrTy(AS) &&
GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
- TD->getPointerSizeInBits(AS)) {
+ DL->getPointerSizeInBits(AS)) {
Operator *Index = cast<Operator>(GEP.getOperand(1));
Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType());
Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1));
@@ -1209,9 +1218,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!StrippedPtrTy)
return 0;
- if (StrippedPtr != PtrOp &&
- StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
-
+ if (StrippedPtr != PtrOp) {
bool HasZeroPointerIndex = false;
if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
HasZeroPointerIndex = C->isZero();
@@ -1257,27 +1264,30 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
Type *SrcElTy = StrippedPtrTy->getElementType();
Type *ResElTy = PtrOp->getType()->getPointerElementType();
- if (TD && SrcElTy->isArrayTy() &&
- TD->getTypeAllocSize(SrcElTy->getArrayElementType()) ==
- TD->getTypeAllocSize(ResElTy)) {
- Type *IdxType = TD->getIntPtrType(GEP.getType());
+ if (DL && SrcElTy->isArrayTy() &&
+ DL->getTypeAllocSize(SrcElTy->getArrayElementType()) ==
+ DL->getTypeAllocSize(ResElTy)) {
+ Type *IdxType = DL->getIntPtrType(GEP.getType());
Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
Value *NewGEP = GEP.isInBounds() ?
Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+
// V and GEP are both pointer types --> BitCast
- return new BitCastInst(NewGEP, GEP.getType());
+ if (StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace())
+ return new BitCastInst(NewGEP, GEP.getType());
+ return new AddrSpaceCastInst(NewGEP, GEP.getType());
}
// Transform things like:
// %V = mul i64 %N, 4
// %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
// into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
- if (TD && ResElTy->isSized() && SrcElTy->isSized()) {
+ if (DL && ResElTy->isSized() && SrcElTy->isSized()) {
// Check that changing the type amounts to dividing the index by a scale
// factor.
- uint64_t ResSize = TD->getTypeAllocSize(ResElTy);
- uint64_t SrcSize = TD->getTypeAllocSize(SrcElTy);
+ uint64_t ResSize = DL->getTypeAllocSize(ResElTy);
+ uint64_t SrcSize = DL->getTypeAllocSize(SrcElTy);
if (ResSize && SrcSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1285,7 +1295,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
+ assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1296,8 +1306,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Value *NewGEP = GEP.isInBounds() && NSW ?
Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) :
Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName());
+
// The NewGEP must be pointer typed, so must the old one -> BitCast
- return new BitCastInst(NewGEP, GEP.getType());
+ if (StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace())
+ return new BitCastInst(NewGEP, GEP.getType());
+ return new AddrSpaceCastInst(NewGEP, GEP.getType());
}
}
}
@@ -1306,13 +1319,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
// (where tmp = 8*tmp2) into:
// getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
- if (TD && ResElTy->isSized() && SrcElTy->isSized() &&
+ if (DL && ResElTy->isSized() && SrcElTy->isSized() &&
SrcElTy->isArrayTy()) {
// Check that changing to the array element type amounts to dividing the
// index by a scale factor.
- uint64_t ResSize = TD->getTypeAllocSize(ResElTy);
+ uint64_t ResSize = DL->getTypeAllocSize(ResElTy);
uint64_t ArrayEltSize
- = TD->getTypeAllocSize(SrcElTy->getArrayElementType());
+ = DL->getTypeAllocSize(SrcElTy->getArrayElementType());
if (ResSize && ArrayEltSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1320,7 +1333,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
+ assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1329,7 +1342,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// If the multiplication NewIdx * Scale may overflow then the new
// GEP may not be "inbounds".
Value *Off[2] = {
- Constant::getNullValue(TD->getIntPtrType(GEP.getType())),
+ Constant::getNullValue(DL->getIntPtrType(GEP.getType())),
NewIdx
};
@@ -1337,14 +1350,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
- return new BitCastInst(NewGEP, GEP.getType());
+ if (StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace())
+ return new BitCastInst(NewGEP, GEP.getType());
+ return new AddrSpaceCastInst(NewGEP, GEP.getType());
}
}
}
}
}
- if (!TD)
+ if (!DL)
return 0;
/// See if we can simplify:
@@ -1355,10 +1370,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
Value *Operand = BCI->getOperand(0);
PointerType *OpType = cast<PointerType>(Operand->getType());
- unsigned OffsetBits = TD->getPointerTypeSizeInBits(OpType);
+ unsigned OffsetBits = DL->getPointerTypeSizeInBits(OpType);
APInt Offset(OffsetBits, 0);
if (!isa<BitCastInst>(Operand) &&
- GEP.accumulateConstantOffset(*TD, Offset) &&
+ GEP.accumulateConstantOffset(*DL, Offset) &&
StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
// If this GEP instruction doesn't move the pointer, just replace the GEP
@@ -1408,9 +1423,8 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
do {
Instruction *PI = Worklist.pop_back_val();
- for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end(); UI != UE;
- ++UI) {
- Instruction *I = cast<Instruction>(*UI);
+ for (User *U : PI->users()) {
+ Instruction *I = cast<Instruction>(U);
switch (I->getOpcode()) {
default:
// Give up the moment we see something we can't handle.
@@ -1618,7 +1632,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
return &BI;
}
- // Cannonicalize fcmp_one -> fcmp_oeq
+ // Canonicalize fcmp_one -> fcmp_oeq
FCmpInst::Predicate FPred; Value *Y;
if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
TrueDest, FalseDest)) &&
@@ -1634,7 +1648,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
return &BI;
}
- // Cannonicalize icmp_ne -> icmp_eq
+ // Canonicalize icmp_ne -> icmp_eq
ICmpInst::Predicate IPred;
if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
TrueDest, FalseDest)) &&
@@ -2214,7 +2228,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
static bool AddReachableCodeToWorklist(BasicBlock *BB,
SmallPtrSet<BasicBlock*, 64> &Visited,
InstCombiner &IC,
- const DataLayout *TD,
+ const DataLayout *DL,
const TargetLibraryInfo *TLI) {
bool MadeIRChange = false;
SmallVector<BasicBlock*, 256> Worklist;
@@ -2242,7 +2256,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
// ConstantProp instruction if trivially constant.
if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) {
+ if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
<< *Inst << '\n');
Inst->replaceAllUsesWith(C);
@@ -2251,7 +2265,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
continue;
}
- if (TD) {
+ if (DL) {
// See if we can constant fold its operands.
for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
i != e; ++i) {
@@ -2260,7 +2274,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
Constant*& FoldRes = FoldedConstants[CE];
if (!FoldRes)
- FoldRes = ConstantFoldConstantExpression(CE, TD, TLI);
+ FoldRes = ConstantFoldConstantExpression(CE, DL, TLI);
if (!FoldRes)
FoldRes = CE;
@@ -2327,7 +2341,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// the reachable instructions. Ignore blocks that are not reachable. Keep
// track of which blocks we visit.
SmallPtrSet<BasicBlock*, 64> Visited;
- MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD,
+ MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, DL,
TLI);
// Do a quick scan over the function. If we find any blocks that are
@@ -2373,7 +2387,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Instruction isn't dead, see if we can constant propagate it.
if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
+ if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
// Add operands to the worklist.
@@ -2387,12 +2401,12 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// See if we can trivially sink this instruction to a successor basic block.
if (I->hasOneUse()) {
BasicBlock *BB = I->getParent();
- Instruction *UserInst = cast<Instruction>(I->use_back());
+ Instruction *UserInst = cast<Instruction>(*I->user_begin());
BasicBlock *UserParent;
// Get the block the use occurs in.
if (PHINode *PN = dyn_cast<PHINode>(UserInst))
- UserParent = PN->getIncomingBlock(I->use_begin().getUse());
+ UserParent = PN->getIncomingBlock(*I->use_begin());
else
UserParent = UserInst->getParent();
@@ -2482,23 +2496,27 @@ namespace {
class InstCombinerLibCallSimplifier : public LibCallSimplifier {
InstCombiner *IC;
public:
- InstCombinerLibCallSimplifier(const DataLayout *TD,
+ InstCombinerLibCallSimplifier(const DataLayout *DL,
const TargetLibraryInfo *TLI,
InstCombiner *IC)
- : LibCallSimplifier(TD, TLI, UnsafeFPShrink) {
+ : LibCallSimplifier(DL, TLI, UnsafeFPShrink) {
this->IC = IC;
}
/// replaceAllUsesWith - override so that instruction replacement
/// can be defined in terms of the instruction combiner framework.
- virtual void replaceAllUsesWith(Instruction *I, Value *With) const {
+ void replaceAllUsesWith(Instruction *I, Value *With) const override {
IC->ReplaceInstUsesWith(*I, With);
}
};
}
bool InstCombiner::runOnFunction(Function &F) {
- TD = getAnalysisIfAvailable<DataLayout>();
+ if (skipOptnoneFunction(F))
+ return false;
+
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
TLI = &getAnalysis<TargetLibraryInfo>();
// Minimizing size?
MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
@@ -2507,11 +2525,11 @@ bool InstCombiner::runOnFunction(Function &F) {
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
IRBuilder<true, TargetFolder, InstCombineIRInserter>
- TheBuilder(F.getContext(), TargetFolder(TD),
+ TheBuilder(F.getContext(), TargetFolder(DL),
InstCombineIRInserter(Worklist));
Builder = &TheBuilder;
- InstCombinerLibCallSimplifier TheSimplifier(TD, TLI, this);
+ InstCombinerLibCallSimplifier TheSimplifier(DL, TLI, this);
Simplifier = &TheSimplifier;
bool EverMadeChange = false;
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index d731ec5..bbfa4c5 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -19,30 +19,30 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/DIBuilder.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/InstVisitor.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
+#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -56,9 +56,11 @@ using namespace llvm;
static const uint64_t kDefaultShadowScale = 3;
static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
-static const uint64_t kDefaultShort64bitShadowOffset = 0x7FFF8000; // < 2G.
+static const uint64_t kSmallX86_64ShadowOffset = 0x7FFF8000; // < 2G.
static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41;
static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa8000;
+static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
+static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46;
static const size_t kMinStackMallocSize = 1 << 6; // 64B
static const size_t kMaxStackMallocSize = 1 << 16; // 64K
@@ -78,9 +80,9 @@ static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
static const char *const kAsanInitName = "__asan_init_v3";
static const char *const kAsanCovName = "__sanitizer_cov";
+static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp";
+static const char *const kAsanPtrSub = "__sanitizer_ptr_sub";
static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
-static const char *const kAsanMappingOffsetName = "__asan_mapping_offset";
-static const char *const kAsanMappingScaleName = "__asan_mapping_scale";
static const int kMaxAsanStackMallocSizeClass = 10;
static const char *const kAsanStackMallocNameTemplate = "__asan_stack_malloc_";
static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_";
@@ -93,11 +95,6 @@ static const char *const kAsanUnpoisonStackMemoryName =
static const char *const kAsanOptionDetectUAR =
"__asan_option_detect_stack_use_after_return";
-// These constants must match the definitions in the run-time library.
-static const int kAsanStackLeftRedzoneMagic = 0xf1;
-static const int kAsanStackMidRedzoneMagic = 0xf2;
-static const int kAsanStackRightRedzoneMagic = 0xf3;
-static const int kAsanStackPartialRedzoneMagic = 0xf4;
#ifndef NDEBUG
static const int kAsanStackAfterReturnMagic = 0xf5;
#endif
@@ -135,14 +132,19 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
// This flag may need to be replaced with -f[no]asan-globals.
static cl::opt<bool> ClGlobals("asan-globals",
cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClCoverage("asan-coverage",
- cl::desc("ASan coverage"), cl::Hidden, cl::init(false));
+static cl::opt<int> ClCoverage("asan-coverage",
+ cl::desc("ASan coverage. 0: none, 1: entry block, 2: all blocks"),
+ cl::Hidden, cl::init(false));
static cl::opt<bool> ClInitializers("asan-initialization-order",
cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
static cl::opt<bool> ClMemIntrin("asan-memintrin",
cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClRealignStack("asan-realign-stack",
- cl::desc("Realign stack to 32"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInvalidPointerPairs("asan-detect-invalid-pointer-pair",
+ cl::desc("Instrument <, <=, >, >=, - with pointer operands"),
+ cl::Hidden, cl::init(false));
+static cl::opt<unsigned> ClRealignStack("asan-realign-stack",
+ cl::desc("Realign stack to the value of this flag (power of two)"),
+ cl::Hidden, cl::init(32));
static cl::opt<std::string> ClBlacklistFile("asan-blacklist",
cl::desc("File containing the list of objects to ignore "
"during instrumentation"), cl::Hidden);
@@ -165,11 +167,6 @@ static cl::opt<bool> ClKeepUninstrumented("asan-keep-uninstrumented-functions",
// Shadow = (Mem >> scale) + (1 << offset_log)
static cl::opt<int> ClMappingScale("asan-mapping-scale",
cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0));
-static cl::opt<int> ClMappingOffsetLog("asan-mapping-offset-log",
- cl::desc("offset of asan shadow mapping"), cl::Hidden, cl::init(-1));
-static cl::opt<bool> ClShort64BitOffset("asan-short-64bit-mapping-offset",
- cl::desc("Use short immediate constant as the mapping offset for 64bit"),
- cl::Hidden, cl::init(true));
// Optimization flags. Not user visible, used mostly for testing
// and benchmarking the tool.
@@ -238,11 +235,12 @@ struct ShadowMapping {
bool OrShadowOffset;
};
-static ShadowMapping getShadowMapping(const Module &M, int LongSize,
- bool ZeroBaseShadow) {
+static ShadowMapping getShadowMapping(const Module &M, int LongSize) {
llvm::Triple TargetTriple(M.getTargetTriple());
bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android;
- bool IsMacOSX = TargetTriple.getOS() == llvm::Triple::MacOSX;
+ // bool IsMacOSX = TargetTriple.getOS() == llvm::Triple::MacOSX;
+ bool IsFreeBSD = TargetTriple.getOS() == llvm::Triple::FreeBSD;
+ bool IsLinux = TargetTriple.getOS() == llvm::Triple::Linux;
bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64 ||
TargetTriple.getArch() == llvm::Triple::ppc64le;
bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64;
@@ -251,22 +249,24 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize,
ShadowMapping Mapping;
- // OR-ing shadow offset if more efficient (at least on x86),
- // but on ppc64 we have to use add since the shadow offset is not neccesary
- // 1/8-th of the address space.
- Mapping.OrShadowOffset = !IsPPC64 && !ClShort64BitOffset;
-
- Mapping.Offset = (IsAndroid || ZeroBaseShadow) ? 0 :
- (LongSize == 32 ?
- (IsMIPS32 ? kMIPS32_ShadowOffset32 : kDefaultShadowOffset32) :
- IsPPC64 ? kPPC64_ShadowOffset64 : kDefaultShadowOffset64);
- if (!ZeroBaseShadow && ClShort64BitOffset && IsX86_64 && !IsMacOSX) {
- assert(LongSize == 64);
- Mapping.Offset = kDefaultShort64bitShadowOffset;
- }
- if (!ZeroBaseShadow && ClMappingOffsetLog >= 0) {
- // Zero offset log is the special case.
- Mapping.Offset = (ClMappingOffsetLog == 0) ? 0 : 1ULL << ClMappingOffsetLog;
+ if (LongSize == 32) {
+ if (IsAndroid)
+ Mapping.Offset = 0;
+ else if (IsMIPS32)
+ Mapping.Offset = kMIPS32_ShadowOffset32;
+ else if (IsFreeBSD)
+ Mapping.Offset = kFreeBSD_ShadowOffset32;
+ else
+ Mapping.Offset = kDefaultShadowOffset32;
+ } else { // LongSize == 64
+ if (IsPPC64)
+ Mapping.Offset = kPPC64_ShadowOffset64;
+ else if (IsFreeBSD)
+ Mapping.Offset = kFreeBSD_ShadowOffset64;
+ else if (IsLinux && IsX86_64)
+ Mapping.Offset = kSmallX86_64ShadowOffset;
+ else
+ Mapping.Offset = kDefaultShadowOffset64;
}
Mapping.Scale = kDefaultShadowScale;
@@ -274,6 +274,11 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize,
Mapping.Scale = ClMappingScale;
}
+ // OR-ing shadow offset if more efficient (at least on x86) if the offset
+ // is a power of two, but on ppc64 we have to use add since the shadow
+ // offset is not necessary 1/8-th of the address space.
+ Mapping.OrShadowOffset = !IsPPC64 && !(Mapping.Offset & (Mapping.Offset - 1));
+
return Mapping;
}
@@ -288,19 +293,18 @@ struct AddressSanitizer : public FunctionPass {
AddressSanitizer(bool CheckInitOrder = true,
bool CheckUseAfterReturn = false,
bool CheckLifetime = false,
- StringRef BlacklistFile = StringRef(),
- bool ZeroBaseShadow = false)
+ StringRef BlacklistFile = StringRef())
: FunctionPass(ID),
CheckInitOrder(CheckInitOrder || ClInitializers),
CheckUseAfterReturn(CheckUseAfterReturn || ClUseAfterReturn),
CheckLifetime(CheckLifetime || ClCheckLifetime),
BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
- : BlacklistFile),
- ZeroBaseShadow(ZeroBaseShadow) {}
- virtual const char *getPassName() const {
+ : BlacklistFile) {}
+ const char *getPassName() const override {
return "AddressSanitizerFunctionPass";
}
void instrumentMop(Instruction *I);
+ void instrumentPointerComparisonOrSubtraction(Instruction *I);
void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
Value *Addr, uint32_t TypeSize, bool IsWrite,
Value *SizeArgument);
@@ -314,29 +318,26 @@ struct AddressSanitizer : public FunctionPass {
Value *Size,
Instruction *InsertBefore, bool IsWrite);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
- void emitShadowMapping(Module &M, IRBuilder<> &IRB) const;
- virtual bool doInitialization(Module &M);
+ bool doInitialization(Module &M) override;
static char ID; // Pass identification, replacement for typeid
private:
void initializeCallbacks(Module &M);
- bool ShouldInstrumentGlobal(GlobalVariable *G);
bool LooksLikeCodeInBug11395(Instruction *I);
- void FindDynamicInitializers(Module &M);
bool GlobalIsLinkerInitialized(GlobalVariable *G);
- bool InjectCoverage(Function &F);
+ bool InjectCoverage(Function &F, const ArrayRef<BasicBlock*> AllBlocks);
+ void InjectCoverageAtBlock(Function &F, BasicBlock &BB);
bool CheckInitOrder;
bool CheckUseAfterReturn;
bool CheckLifetime;
SmallString<64> BlacklistFile;
- bool ZeroBaseShadow;
LLVMContext *C;
- DataLayout *TD;
+ const DataLayout *DL;
int LongSize;
Type *IntptrTy;
ShadowMapping Mapping;
@@ -344,7 +345,8 @@ struct AddressSanitizer : public FunctionPass {
Function *AsanInitFunction;
Function *AsanHandleNoReturnFunc;
Function *AsanCovFunction;
- OwningPtr<SpecialCaseList> BL;
+ Function *AsanPtrCmpFunction, *AsanPtrSubFunction;
+ std::unique_ptr<SpecialCaseList> BL;
// This array is indexed by AccessIsWrite and log2(AccessSize).
Function *AsanErrorCallback[2][kNumberOfAccessSizes];
// This array is indexed by AccessIsWrite.
@@ -358,16 +360,14 @@ struct AddressSanitizer : public FunctionPass {
class AddressSanitizerModule : public ModulePass {
public:
AddressSanitizerModule(bool CheckInitOrder = true,
- StringRef BlacklistFile = StringRef(),
- bool ZeroBaseShadow = false)
+ StringRef BlacklistFile = StringRef())
: ModulePass(ID),
CheckInitOrder(CheckInitOrder || ClInitializers),
BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
- : BlacklistFile),
- ZeroBaseShadow(ZeroBaseShadow) {}
- bool runOnModule(Module &M);
+ : BlacklistFile) {}
+ bool runOnModule(Module &M) override;
static char ID; // Pass identification, replacement for typeid
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "AddressSanitizerModule";
}
@@ -376,19 +376,18 @@ class AddressSanitizerModule : public ModulePass {
bool ShouldInstrumentGlobal(GlobalVariable *G);
void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName);
- size_t RedzoneSize() const {
+ size_t MinRedzoneSizeForGlobal() const {
return RedzoneSizeForScale(Mapping.Scale);
}
bool CheckInitOrder;
SmallString<64> BlacklistFile;
- bool ZeroBaseShadow;
- OwningPtr<SpecialCaseList> BL;
+ std::unique_ptr<SpecialCaseList> BL;
SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
Type *IntptrTy;
LLVMContext *C;
- DataLayout *TD;
+ const DataLayout *DL;
ShadowMapping Mapping;
Function *AsanPoisonGlobals;
Function *AsanUnpoisonGlobals;
@@ -416,7 +415,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
SmallVector<AllocaInst*, 16> AllocaVec;
SmallVector<Instruction*, 8> RetVec;
- uint64_t TotalStackSize;
unsigned StackAlignment;
Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
@@ -440,7 +438,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
: F(F), ASan(ASan), DIB(*F.getParent()), C(ASan.C),
IntptrTy(ASan.IntptrTy), IntptrPtrTy(PointerType::get(IntptrTy, 0)),
Mapping(ASan.Mapping),
- TotalStackSize(0), StackAlignment(1 << Mapping.Scale) {}
+ StackAlignment(1 << Mapping.Scale) {}
bool runOnFunction() {
if (!ClStack) return false;
@@ -479,8 +477,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
StackAlignment = std::max(StackAlignment, AI.getAlignment());
AllocaVec.push_back(&AI);
- uint64_t AlignedSize = getAlignedAllocaSize(&AI);
- TotalStackSize += AlignedSize;
}
/// \brief Collect lifetime intrinsic calls to check for use-after-scope
@@ -514,31 +510,20 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
// Check if we want (and can) handle this alloca.
bool isInterestingAlloca(AllocaInst &AI) const {
- return (!AI.isArrayAllocation() &&
- AI.isStaticAlloca() &&
- AI.getAlignment() <= RedzoneSize() &&
- AI.getAllocatedType()->isSized());
+ return (!AI.isArrayAllocation() && AI.isStaticAlloca() &&
+ AI.getAllocatedType()->isSized() &&
+ // alloca() may be called with 0 size, ignore it.
+ getAllocaSizeInBytes(&AI) > 0);
}
- size_t RedzoneSize() const {
- return RedzoneSizeForScale(Mapping.Scale);
- }
uint64_t getAllocaSizeInBytes(AllocaInst *AI) const {
Type *Ty = AI->getAllocatedType();
- uint64_t SizeInBytes = ASan.TD->getTypeAllocSize(Ty);
+ uint64_t SizeInBytes = ASan.DL->getTypeAllocSize(Ty);
return SizeInBytes;
}
- uint64_t getAlignedSize(uint64_t SizeInBytes) const {
- size_t RZ = RedzoneSize();
- return ((SizeInBytes + RZ - 1) / RZ) * RZ;
- }
- uint64_t getAlignedAllocaSize(AllocaInst *AI) const {
- uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
- return getAlignedSize(SizeInBytes);
- }
/// Finds alloca where the value comes from.
AllocaInst *findAllocaForValue(Value *V);
- void poisonRedZones(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> &IRB,
+ void poisonRedZones(const ArrayRef<uint8_t> ShadowBytes, IRBuilder<> &IRB,
Value *ShadowBase, bool DoPoison);
void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> &IRB, bool DoPoison);
@@ -554,9 +539,9 @@ INITIALIZE_PASS(AddressSanitizer, "asan",
false, false)
FunctionPass *llvm::createAddressSanitizerFunctionPass(
bool CheckInitOrder, bool CheckUseAfterReturn, bool CheckLifetime,
- StringRef BlacklistFile, bool ZeroBaseShadow) {
+ StringRef BlacklistFile) {
return new AddressSanitizer(CheckInitOrder, CheckUseAfterReturn,
- CheckLifetime, BlacklistFile, ZeroBaseShadow);
+ CheckLifetime, BlacklistFile);
}
char AddressSanitizerModule::ID = 0;
@@ -564,9 +549,8 @@ INITIALIZE_PASS(AddressSanitizerModule, "asan-module",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs."
"ModulePass", false, false)
ModulePass *llvm::createAddressSanitizerModulePass(
- bool CheckInitOrder, StringRef BlacklistFile, bool ZeroBaseShadow) {
- return new AddressSanitizerModule(CheckInitOrder, BlacklistFile,
- ZeroBaseShadow);
+ bool CheckInitOrder, StringRef BlacklistFile) {
+ return new AddressSanitizerModule(CheckInitOrder, BlacklistFile);
}
static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -576,12 +560,16 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
}
// \brief Create a constant for Str so that we can pass it to the run-time lib.
-static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
+static GlobalVariable *createPrivateGlobalForString(
+ Module &M, StringRef Str, bool AllowMerging) {
Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
- GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true,
- GlobalValue::InternalLinkage, StrConst,
- kAsanGenPrefix);
- GV->setUnnamedAddr(true); // Ok to merge these.
+ // We use private linkage for module-local strings. If they can be merged
+ // with another one, we set the unnamed_addr attribute.
+ GlobalVariable *GV =
+ new GlobalVariable(M, StrConst->getType(), true,
+ GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix);
+ if (AllowMerging)
+ GV->setUnnamedAddr(true);
GV->setAlignment(1); // Strings may not be merged w/o setting align 1.
return GV;
}
@@ -635,7 +623,7 @@ bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
Value *Cmp = IRB.CreateICmpNE(Length,
Constant::getNullValue(Length->getType()));
- InsertBefore = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+ InsertBefore = SplitBlockAndInsertIfThen(Cmp, InsertBefore, false);
}
instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true);
@@ -670,6 +658,29 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
return NULL;
}
+static bool isPointerOperand(Value *V) {
+ return V->getType()->isPointerTy() || isa<PtrToIntInst>(V);
+}
+
+// This is a rough heuristic; it may cause both false positives and
+// false negatives. The proper implementation requires cooperation with
+// the frontend.
+static bool isInterestingPointerComparisonOrSubtraction(Instruction *I) {
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(I)) {
+ if (!Cmp->isRelational())
+ return false;
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ if (BO->getOpcode() != Instruction::Sub)
+ return false;
+ } else {
+ return false;
+ }
+ if (!isPointerOperand(I->getOperand(0)) ||
+ !isPointerOperand(I->getOperand(1)))
+ return false;
+ return true;
+}
+
bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
// If a global variable does not have dynamic initialization we don't
// have to instrument it. However, if a global does not have initializer
@@ -677,6 +688,18 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
return G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G);
}
+void
+AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) {
+ IRBuilder<> IRB(I);
+ Function *F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction;
+ Value *Param[2] = {I->getOperand(0), I->getOperand(1)};
+ for (int i = 0; i < 2; i++) {
+ if (Param[i]->getType()->isPointerTy())
+ Param[i] = IRB.CreatePointerCast(Param[i], IntptrTy);
+ }
+ IRB.CreateCall2(F, Param[0], Param[1]);
+}
+
void AddressSanitizer::instrumentMop(Instruction *I) {
bool IsWrite = false;
Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
@@ -705,7 +728,7 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
assert(OrigTy->isSized());
- uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy);
+ uint32_t TypeSize = DL->getTypeStoreSizeInBits(OrigTy);
assert((TypeSize % 8) == 0);
@@ -798,7 +821,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
TerminatorInst *CheckTerm =
- SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+ SplitBlockAndInsertIfThen(Cmp, InsertBefore, false);
assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
BasicBlock *NextBB = CheckTerm->getSuccessor(0);
IRB.SetInsertPoint(CheckTerm);
@@ -809,7 +832,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
ReplaceInstWithInst(CheckTerm, NewTerm);
} else {
- CrashTerm = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), true);
+ CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, true);
}
Instruction *Crash = generateCrashCode(
@@ -861,8 +884,8 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// - Need to poison all copies, not just the main thread's one.
if (G->isThreadLocal())
return false;
- // For now, just ignore this Alloca if the alignment is large.
- if (G->getAlignment() > RedzoneSize()) return false;
+ // For now, just ignore this Global if the alignment is large.
+ if (G->getAlignment() > MinRedzoneSizeForGlobal()) return false;
// Ignore all the globals with the names starting with "\01L_OBJC_".
// Many of those are put into the .cstring section. The linker compresses
@@ -870,7 +893,7 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// our redzones get broken.
if ((G->getName().find("\01L_OBJC_") == 0) ||
(G->getName().find("\01l_OBJC_") == 0)) {
- DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
+ DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G << "\n");
return false;
}
@@ -881,7 +904,7 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// them.
if ((Section.find("__OBJC,") == 0) ||
(Section.find("__DATA, __objc_") == 0)) {
- DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
+ DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G << "\n");
return false;
}
// See http://code.google.com/p/address-sanitizer/issues/detail?id=32
@@ -893,9 +916,17 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// Therefore there's no point in placing redzones into __DATA,__cfstring.
// Moreover, it causes the linker to crash on OS X 10.7
if (Section.find("__DATA,__cfstring") == 0) {
- DEBUG(dbgs() << "Ignoring CFString: " << *G);
+ DEBUG(dbgs() << "Ignoring CFString: " << *G << "\n");
+ return false;
+ }
+ // The linker merges the contents of cstring_literals and removes the
+ // trailing zeroes.
+ if (Section.find("__TEXT,__cstring,cstring_literals") == 0) {
+ DEBUG(dbgs() << "Ignoring a cstring literal: " << *G << "\n");
return false;
}
+ // Globals from llvm.metadata aren't emitted, do not instrument them.
+ if (Section == "llvm.metadata") return false;
}
return true;
@@ -926,15 +957,18 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) {
// redzones and inserts this function into llvm.global_ctors.
bool AddressSanitizerModule::runOnModule(Module &M) {
if (!ClGlobals) return false;
- TD = getAnalysisIfAvailable<DataLayout>();
- if (!TD)
+
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ if (!DLP)
return false;
+ DL = &DLP->getDataLayout();
+
BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
if (BL->isIn(M)) return false;
C = &(M.getContext());
- int LongSize = TD->getPointerSizeInBits();
+ int LongSize = DL->getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
- Mapping = getShadowMapping(M, LongSize, ZeroBaseShadow);
+ Mapping = getShadowMapping(M, LongSize);
initializeCallbacks(M);
DynamicallyInitializedGlobals.Init(M);
@@ -968,19 +1002,18 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
bool HasDynamicallyInitializedGlobals = false;
- GlobalVariable *ModuleName = createPrivateGlobalForString(
- M, M.getModuleIdentifier());
// We shouldn't merge same module names, as this string serves as unique
// module ID in runtime.
- ModuleName->setUnnamedAddr(false);
+ GlobalVariable *ModuleName = createPrivateGlobalForString(
+ M, M.getModuleIdentifier(), /*AllowMerging*/false);
for (size_t i = 0; i < n; i++) {
static const uint64_t kMaxGlobalRedzone = 1 << 18;
GlobalVariable *G = GlobalsToChange[i];
PointerType *PtrTy = cast<PointerType>(G->getType());
Type *Ty = PtrTy->getElementType();
- uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
- uint64_t MinRZ = RedzoneSize();
+ uint64_t SizeInBytes = DL->getTypeAllocSize(Ty);
+ uint64_t MinRZ = MinRedzoneSizeForGlobal();
// MinRZ <= RZ <= kMaxGlobalRedzone
// and trying to make RZ to be ~ 1/4 of SizeInBytes.
uint64_t RZ = std::max(MinRZ,
@@ -1003,7 +1036,8 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
NewTy, G->getInitializer(),
Constant::getNullValue(RightRedZoneTy), NULL);
- GlobalVariable *Name = createPrivateGlobalForString(M, G->getName());
+ GlobalVariable *Name =
+ createPrivateGlobalForString(M, G->getName(), /*AllowMerging*/true);
// Create a new global variable with enough space for a redzone.
GlobalValue::LinkageTypes Linkage = G->getLinkage();
@@ -1092,42 +1126,30 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
AsanCovFunction = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanCovName, IRB.getVoidTy(), IntptrTy, NULL));
+ kAsanCovName, IRB.getVoidTy(), NULL));
+ AsanPtrCmpFunction = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanPtrSubFunction = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
// We insert an empty inline asm after __asan_report* to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
/*hasSideEffects=*/true);
}
-void AddressSanitizer::emitShadowMapping(Module &M, IRBuilder<> &IRB) const {
- // Tell the values of mapping offset and scale to the run-time.
- GlobalValue *asan_mapping_offset =
- new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
- ConstantInt::get(IntptrTy, Mapping.Offset),
- kAsanMappingOffsetName);
- // Read the global, otherwise it may be optimized away.
- IRB.CreateLoad(asan_mapping_offset, true);
-
- GlobalValue *asan_mapping_scale =
- new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
- ConstantInt::get(IntptrTy, Mapping.Scale),
- kAsanMappingScaleName);
- // Read the global, otherwise it may be optimized away.
- IRB.CreateLoad(asan_mapping_scale, true);
-}
-
// virtual
bool AddressSanitizer::doInitialization(Module &M) {
// Initialize the private fields. No one has accessed them before.
- TD = getAnalysisIfAvailable<DataLayout>();
-
- if (!TD)
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ if (!DLP)
return false;
+ DL = &DLP->getDataLayout();
+
BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
DynamicallyInitializedGlobals.Init(M);
C = &(M.getContext());
- LongSize = TD->getPointerSizeInBits();
+ LongSize = DL->getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
AsanCtorFunction = Function::Create(
@@ -1141,8 +1163,7 @@ bool AddressSanitizer::doInitialization(Module &M) {
AsanInitFunction->setLinkage(Function::ExternalLinkage);
IRB.CreateCall(AsanInitFunction);
- Mapping = getShadowMapping(M, LongSize, ZeroBaseShadow);
- emitShadowMapping(M, IRB);
+ Mapping = getShadowMapping(M, LongSize);
appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
return true;
@@ -1164,9 +1185,42 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
return false;
}
+void AddressSanitizer::InjectCoverageAtBlock(Function &F, BasicBlock &BB) {
+ BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end();
+ // Skip static allocas at the top of the entry block so they don't become
+ // dynamic when we split the block. If we used our optimized stack layout,
+ // then there will only be one alloca and it will come first.
+ for (; IP != BE; ++IP) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(IP);
+ if (!AI || !AI->isStaticAlloca())
+ break;
+ }
+
+ IRBuilder<> IRB(IP);
+ Type *Int8Ty = IRB.getInt8Ty();
+ GlobalVariable *Guard = new GlobalVariable(
+ *F.getParent(), Int8Ty, false, GlobalValue::PrivateLinkage,
+ Constant::getNullValue(Int8Ty), "__asan_gen_cov_" + F.getName());
+ LoadInst *Load = IRB.CreateLoad(Guard);
+ Load->setAtomic(Monotonic);
+ Load->setAlignment(1);
+ Value *Cmp = IRB.CreateICmpEQ(Constant::getNullValue(Int8Ty), Load);
+ Instruction *Ins = SplitBlockAndInsertIfThen(
+ Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
+ IRB.SetInsertPoint(Ins);
+ // We pass &F to __sanitizer_cov. We could avoid this and rely on
+ // GET_CALLER_PC, but having the PC of the first instruction is just nice.
+ Instruction *Call = IRB.CreateCall(AsanCovFunction);
+ Call->setDebugLoc(IP->getDebugLoc());
+ StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int8Ty, 1), Guard);
+ Store->setAtomic(Monotonic);
+ Store->setAlignment(1);
+}
+
// Poor man's coverage that works with ASan.
// We create a Guard boolean variable with the same linkage
-// as the function and inject this code into the entry block:
+// as the function and inject this code into the entry block (-asan-coverage=1)
+// or all blocks (-asan-coverage=2):
// if (*Guard) {
// __sanitizer_cov(&F);
// *Guard = 1;
@@ -1175,33 +1229,24 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
// in __sanitizer_cov (it's fine to call it more than once).
//
// This coverage implementation provides very limited data:
-// it only tells if a given function was ever executed.
-// No counters, no per-basic-block or per-edge data.
+// it only tells if a given function (block) was ever executed.
+// No counters, no per-edge data.
// But for many use cases this is what we need and the added slowdown
// is negligible. This simple implementation will probably be obsoleted
// by the upcoming Clang-based coverage implementation.
// By having it here and now we hope to
// a) get the functionality to users earlier and
// b) collect usage statistics to help improve Clang coverage design.
-bool AddressSanitizer::InjectCoverage(Function &F) {
+bool AddressSanitizer::InjectCoverage(Function &F,
+ const ArrayRef<BasicBlock *> AllBlocks) {
if (!ClCoverage) return false;
- IRBuilder<> IRB(F.getEntryBlock().getFirstInsertionPt());
- Type *Int8Ty = IRB.getInt8Ty();
- GlobalVariable *Guard = new GlobalVariable(
- *F.getParent(), Int8Ty, false, GlobalValue::PrivateLinkage,
- Constant::getNullValue(Int8Ty), "__asan_gen_cov_" + F.getName());
- LoadInst *Load = IRB.CreateLoad(Guard);
- Load->setAtomic(Monotonic);
- Load->setAlignment(1);
- Value *Cmp = IRB.CreateICmpEQ(Constant::getNullValue(Int8Ty), Load);
- Instruction *Ins = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
- IRB.SetInsertPoint(Ins);
- // We pass &F to __sanitizer_cov. We could avoid this and rely on
- // GET_CALLER_PC, but having the PC of the first instruction is just nice.
- IRB.CreateCall(AsanCovFunction, IRB.CreatePointerCast(&F, IntptrTy));
- StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int8Ty, 1), Guard);
- Store->setAtomic(Monotonic);
- Store->setAlignment(1);
+
+ if (ClCoverage == 1) {
+ InjectCoverageAtBlock(F, F.getEntryBlock());
+ } else {
+ for (size_t i = 0, n = AllBlocks.size(); i < n; i++)
+ InjectCoverageAtBlock(F, *AllBlocks[i]);
+ }
return true;
}
@@ -1226,12 +1271,15 @@ bool AddressSanitizer::runOnFunction(Function &F) {
SmallSet<Value*, 16> TempsToInstrument;
SmallVector<Instruction*, 16> ToInstrument;
SmallVector<Instruction*, 8> NoReturnCalls;
+ SmallVector<BasicBlock*, 16> AllBlocks;
+ SmallVector<Instruction*, 16> PointerComparisonsOrSubtracts;
int NumAllocas = 0;
bool IsWrite;
// Fill the set of memory operations to instrument.
for (Function::iterator FI = F.begin(), FE = F.end();
FI != FE; ++FI) {
+ AllBlocks.push_back(FI);
TempsToInstrument.clear();
int NumInsnsPerBB = 0;
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
@@ -1242,6 +1290,10 @@ bool AddressSanitizer::runOnFunction(Function &F) {
if (!TempsToInstrument.insert(Addr))
continue; // We've seen this temp in the current BB.
}
+ } else if (ClInvalidPointerPairs &&
+ isInterestingPointerComparisonOrSubtraction(BI)) {
+ PointerComparisonsOrSubtracts.push_back(BI);
+ continue;
} else if (isa<MemIntrinsic>(BI) && ClMemIntrin) {
// ok, take it.
} else {
@@ -1299,9 +1351,14 @@ bool AddressSanitizer::runOnFunction(Function &F) {
IRB.CreateCall(AsanHandleNoReturnFunc);
}
+ for (size_t i = 0, n = PointerComparisonsOrSubtracts.size(); i != n; i++) {
+ instrumentPointerComparisonOrSubtraction(PointerComparisonsOrSubtracts[i]);
+ NumInstrumented++;
+ }
+
bool res = NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
- if (InjectCoverage(F))
+ if (InjectCoverage(F, AllBlocks))
res = true;
DEBUG(dbgs() << "ASAN done instrumenting: " << res << " " << F << "\n");
@@ -1323,32 +1380,6 @@ bool AddressSanitizer::runOnFunction(Function &F) {
return res;
}
-static uint64_t ValueForPoison(uint64_t PoisonByte, size_t ShadowRedzoneSize) {
- if (ShadowRedzoneSize == 1) return PoisonByte;
- if (ShadowRedzoneSize == 2) return (PoisonByte << 8) + PoisonByte;
- if (ShadowRedzoneSize == 4)
- return (PoisonByte << 24) + (PoisonByte << 16) +
- (PoisonByte << 8) + (PoisonByte);
- llvm_unreachable("ShadowRedzoneSize is either 1, 2 or 4");
-}
-
-static void PoisonShadowPartialRightRedzone(uint8_t *Shadow,
- size_t Size,
- size_t RZSize,
- size_t ShadowGranularity,
- uint8_t Magic) {
- for (size_t i = 0; i < RZSize;
- i+= ShadowGranularity, Shadow++) {
- if (i + ShadowGranularity <= Size) {
- *Shadow = 0; // fully addressable
- } else if (i >= Size) {
- *Shadow = Magic; // unaddressable
- } else {
- *Shadow = Size - i; // first Size-i bytes are addressable
- }
- }
-}
-
// Workaround for bug 11395: we don't want to instrument stack in functions
// with large assembly blobs (32-bit only), otherwise reg alloc may crash.
// FIXME: remove once the bug 11395 is fixed.
@@ -1378,65 +1409,31 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
}
-void FunctionStackPoisoner::poisonRedZones(
- const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> &IRB, Value *ShadowBase,
- bool DoPoison) {
- size_t ShadowRZSize = RedzoneSize() >> Mapping.Scale;
- assert(ShadowRZSize >= 1 && ShadowRZSize <= 4);
- Type *RZTy = Type::getIntNTy(*C, ShadowRZSize * 8);
- Type *RZPtrTy = PointerType::get(RZTy, 0);
-
- Value *PoisonLeft = ConstantInt::get(RZTy,
- ValueForPoison(DoPoison ? kAsanStackLeftRedzoneMagic : 0LL, ShadowRZSize));
- Value *PoisonMid = ConstantInt::get(RZTy,
- ValueForPoison(DoPoison ? kAsanStackMidRedzoneMagic : 0LL, ShadowRZSize));
- Value *PoisonRight = ConstantInt::get(RZTy,
- ValueForPoison(DoPoison ? kAsanStackRightRedzoneMagic : 0LL, ShadowRZSize));
-
- // poison the first red zone.
- IRB.CreateStore(PoisonLeft, IRB.CreateIntToPtr(ShadowBase, RZPtrTy));
-
- // poison all other red zones.
- uint64_t Pos = RedzoneSize();
- for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
- AllocaInst *AI = AllocaVec[i];
- uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
- uint64_t AlignedSize = getAlignedAllocaSize(AI);
- assert(AlignedSize - SizeInBytes < RedzoneSize());
- Value *Ptr = NULL;
-
- Pos += AlignedSize;
-
- assert(ShadowBase->getType() == IntptrTy);
- if (SizeInBytes < AlignedSize) {
- // Poison the partial redzone at right
- Ptr = IRB.CreateAdd(
- ShadowBase, ConstantInt::get(IntptrTy,
- (Pos >> Mapping.Scale) - ShadowRZSize));
- size_t AddressableBytes = RedzoneSize() - (AlignedSize - SizeInBytes);
- uint32_t Poison = 0;
- if (DoPoison) {
- PoisonShadowPartialRightRedzone((uint8_t*)&Poison, AddressableBytes,
- RedzoneSize(),
- 1ULL << Mapping.Scale,
- kAsanStackPartialRedzoneMagic);
- Poison =
- ASan.TD->isLittleEndian()
- ? support::endian::byte_swap<uint32_t, support::little>(Poison)
- : support::endian::byte_swap<uint32_t, support::big>(Poison);
+void
+FunctionStackPoisoner::poisonRedZones(const ArrayRef<uint8_t> ShadowBytes,
+ IRBuilder<> &IRB, Value *ShadowBase,
+ bool DoPoison) {
+ size_t n = ShadowBytes.size();
+ size_t i = 0;
+ // We need to (un)poison n bytes of stack shadow. Poison as many as we can
+ // using 64-bit stores (if we are on 64-bit arch), then poison the rest
+ // with 32-bit stores, then with 16-byte stores, then with 8-byte stores.
+ for (size_t LargeStoreSizeInBytes = ASan.LongSize / 8;
+ LargeStoreSizeInBytes != 0; LargeStoreSizeInBytes /= 2) {
+ for (; i + LargeStoreSizeInBytes - 1 < n; i += LargeStoreSizeInBytes) {
+ uint64_t Val = 0;
+ for (size_t j = 0; j < LargeStoreSizeInBytes; j++) {
+ if (ASan.DL->isLittleEndian())
+ Val |= (uint64_t)ShadowBytes[i + j] << (8 * j);
+ else
+ Val = (Val << 8) | ShadowBytes[i + j];
}
- Value *PartialPoison = ConstantInt::get(RZTy, Poison);
- IRB.CreateStore(PartialPoison, IRB.CreateIntToPtr(Ptr, RZPtrTy));
+ if (!Val) continue;
+ Value *Ptr = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i));
+ Type *StoreTy = Type::getIntNTy(*C, LargeStoreSizeInBytes * 8);
+ Value *Poison = ConstantInt::get(StoreTy, DoPoison ? Val : 0);
+ IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, StoreTy->getPointerTo()));
}
-
- // Poison the full redzone at right.
- Ptr = IRB.CreateAdd(ShadowBase,
- ConstantInt::get(IntptrTy, Pos >> Mapping.Scale));
- bool LastAlloca = (i == AllocaVec.size() - 1);
- Value *Poison = LastAlloca ? PoisonRight : PoisonMid;
- IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, RZPtrTy));
-
- Pos += RedzoneSize();
}
}
@@ -1468,24 +1465,37 @@ void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined(
}
void FunctionStackPoisoner::poisonStack() {
- uint64_t LocalStackSize = TotalStackSize +
- (AllocaVec.size() + 1) * RedzoneSize();
-
- bool DoStackMalloc = ASan.CheckUseAfterReturn
- && LocalStackSize <= kMaxStackMallocSize;
int StackMallocIdx = -1;
assert(AllocaVec.size() > 0);
Instruction *InsBefore = AllocaVec[0];
IRBuilder<> IRB(InsBefore);
+ SmallVector<ASanStackVariableDescription, 16> SVD;
+ SVD.reserve(AllocaVec.size());
+ for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
+ AllocaInst *AI = AllocaVec[i];
+ ASanStackVariableDescription D = { AI->getName().data(),
+ getAllocaSizeInBytes(AI),
+ AI->getAlignment(), AI, 0};
+ SVD.push_back(D);
+ }
+ // Minimal header size (left redzone) is 4 pointers,
+ // i.e. 32 bytes on 64-bit platforms and 16 bytes in 32-bit platforms.
+ size_t MinHeaderSize = ASan.LongSize / 2;
+ ASanStackFrameLayout L;
+ ComputeASanStackFrameLayout(SVD, 1UL << Mapping.Scale, MinHeaderSize, &L);
+ DEBUG(dbgs() << L.DescriptionString << " --- " << L.FrameSize << "\n");
+ uint64_t LocalStackSize = L.FrameSize;
+ bool DoStackMalloc =
+ ASan.CheckUseAfterReturn && LocalStackSize <= kMaxStackMallocSize;
Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize);
AllocaInst *MyAlloca =
new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore);
- if (ClRealignStack && StackAlignment < RedzoneSize())
- StackAlignment = RedzoneSize();
- MyAlloca->setAlignment(StackAlignment);
+ assert((ClRealignStack & (ClRealignStack - 1)) == 0);
+ size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack);
+ MyAlloca->setAlignment(FrameAlignment);
assert(MyAlloca->isStaticAlloca());
Value *OrigStackBase = IRB.CreatePointerCast(MyAlloca, IntptrTy);
Value *LocalStackBase = OrigStackBase;
@@ -1500,8 +1510,7 @@ void FunctionStackPoisoner::poisonStack() {
kAsanOptionDetectUAR, IRB.getInt32Ty());
Value *Cmp = IRB.CreateICmpNE(IRB.CreateLoad(OptionDetectUAR),
Constant::getNullValue(IRB.getInt32Ty()));
- Instruction *Term =
- SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+ Instruction *Term = SplitBlockAndInsertIfThen(Cmp, InsBefore, false);
BasicBlock *CmpBlock = cast<Instruction>(Cmp)->getParent();
IRBuilder<> IRBIf(Term);
LocalStackBase = IRBIf.CreateCall2(
@@ -1515,11 +1524,6 @@ void FunctionStackPoisoner::poisonStack() {
LocalStackBase = Phi;
}
- // This string will be parsed by the run-time (DescribeAddressIfStack).
- SmallString<2048> StackDescriptionStorage;
- raw_svector_ostream StackDescription(StackDescriptionStorage);
- StackDescription << AllocaVec.size() << " ";
-
// Insert poison calls for lifetime intrinsics for alloca.
bool HavePoisonedAllocas = false;
for (size_t i = 0, n = AllocaPoisonCallVec.size(); i < n; i++) {
@@ -1531,24 +1535,16 @@ void FunctionStackPoisoner::poisonStack() {
HavePoisonedAllocas |= APC.DoPoison;
}
- uint64_t Pos = RedzoneSize();
// Replace Alloca instructions with base+offset.
- for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
- AllocaInst *AI = AllocaVec[i];
- uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
- StringRef Name = AI->getName();
- StackDescription << Pos << " " << SizeInBytes << " "
- << Name.size() << " " << Name << " ";
- uint64_t AlignedSize = getAlignedAllocaSize(AI);
- assert((AlignedSize % RedzoneSize()) == 0);
+ for (size_t i = 0, n = SVD.size(); i < n; i++) {
+ AllocaInst *AI = SVD[i].AI;
Value *NewAllocaPtr = IRB.CreateIntToPtr(
- IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Pos)),
- AI->getType());
+ IRB.CreateAdd(LocalStackBase,
+ ConstantInt::get(IntptrTy, SVD[i].Offset)),
+ AI->getType());
replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB);
AI->replaceAllUsesWith(NewAllocaPtr);
- Pos += AlignedSize + RedzoneSize();
}
- assert(Pos == LocalStackSize);
// The left-most redzone has enough space for at least 4 pointers.
// Write the Magic value to redzone[0].
@@ -1560,7 +1556,8 @@ void FunctionStackPoisoner::poisonStack() {
IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize/8)),
IntptrPtrTy);
GlobalVariable *StackDescriptionGlobal =
- createPrivateGlobalForString(*F.getParent(), StackDescription.str());
+ createPrivateGlobalForString(*F.getParent(), L.DescriptionString,
+ /*AllowMerging*/true);
Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal,
IntptrTy);
IRB.CreateStore(Description, BasePlus1);
@@ -1573,30 +1570,33 @@ void FunctionStackPoisoner::poisonStack() {
// Poison the stack redzones at the entry.
Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB);
- poisonRedZones(AllocaVec, IRB, ShadowBase, true);
+ poisonRedZones(L.ShadowBytes, IRB, ShadowBase, true);
- // Unpoison the stack before all ret instructions.
+ // (Un)poison the stack before all ret instructions.
for (size_t i = 0, n = RetVec.size(); i < n; i++) {
Instruction *Ret = RetVec[i];
IRBuilder<> IRBRet(Ret);
// Mark the current frame as retired.
IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic),
BasePlus0);
- // Unpoison the stack.
- poisonRedZones(AllocaVec, IRBRet, ShadowBase, false);
if (DoStackMalloc) {
assert(StackMallocIdx >= 0);
- // In use-after-return mode, mark the whole stack frame unaddressable.
+ // if LocalStackBase != OrigStackBase:
+ // // In use-after-return mode, poison the whole stack frame.
+ // if StackMallocIdx <= 4
+ // // For small sizes inline the whole thing:
+ // memset(ShadowBase, kAsanStackAfterReturnMagic, ShadowSize);
+ // **SavedFlagPtr(LocalStackBase) = 0
+ // else
+ // __asan_stack_free_N(LocalStackBase, OrigStackBase)
+ // else
+ // <This is not a fake stack; unpoison the redzones>
+ Value *Cmp = IRBRet.CreateICmpNE(LocalStackBase, OrigStackBase);
+ TerminatorInst *ThenTerm, *ElseTerm;
+ SplitBlockAndInsertIfThenElse(Cmp, Ret, &ThenTerm, &ElseTerm);
+
+ IRBuilder<> IRBPoison(ThenTerm);
if (StackMallocIdx <= 4) {
- // For small sizes inline the whole thing:
- // if LocalStackBase != OrigStackBase:
- // memset(ShadowBase, kAsanStackAfterReturnMagic, ShadowSize);
- // **SavedFlagPtr(LocalStackBase) = 0
- // FIXME: if LocalStackBase != OrigStackBase don't call poisonRedZones.
- Value *Cmp = IRBRet.CreateICmpNE(LocalStackBase, OrigStackBase);
- TerminatorInst *PoisonTerm =
- SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
- IRBuilder<> IRBPoison(PoisonTerm);
int ClassSize = kMinStackMallocSize << StackMallocIdx;
SetShadowToStackAfterReturnInlined(IRBPoison, ShadowBase,
ClassSize >> Mapping.Scale);
@@ -1610,15 +1610,20 @@ void FunctionStackPoisoner::poisonStack() {
IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy()));
} else {
// For larger frames call __asan_stack_free_*.
- IRBRet.CreateCall3(AsanStackFreeFunc[StackMallocIdx], LocalStackBase,
- ConstantInt::get(IntptrTy, LocalStackSize),
- OrigStackBase);
+ IRBPoison.CreateCall3(AsanStackFreeFunc[StackMallocIdx], LocalStackBase,
+ ConstantInt::get(IntptrTy, LocalStackSize),
+ OrigStackBase);
}
+
+ IRBuilder<> IRBElse(ElseTerm);
+ poisonRedZones(L.ShadowBytes, IRBElse, ShadowBase, false);
} else if (HavePoisonedAllocas) {
// If we poisoned some allocas in llvm.lifetime analysis,
// unpoison whole stack frame now.
assert(LocalStackBase == OrigStackBase);
poisonAlloca(LocalStackBase, LocalStackSize, IRBRet, false);
+ } else {
+ poisonRedZones(L.ShadowBytes, IRBRet, ShadowBase, false);
}
}
diff --git a/lib/Transforms/Instrumentation/Android.mk b/lib/Transforms/Instrumentation/Android.mk
index cd90933..f9a55c7 100644
--- a/lib/Transforms/Instrumentation/Android.mk
+++ b/lib/Transforms/Instrumentation/Android.mk
@@ -24,6 +24,7 @@ include $(BUILD_HOST_STATIC_LIBRARY)
# For the target
# =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
include $(CLEAR_VARS)
LOCAL_MODULE:= libLLVMInstrumentation
@@ -33,3 +34,4 @@ LOCAL_SRC_FILES := $(instrumentation_SRC_FILES)
include $(LLVM_DEVICE_BUILD_MK)
include $(LLVM_GEN_INTRINSICS_MK)
include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 7a9f0f6..505fb83 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -16,14 +16,14 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetFolder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/TargetFolder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
@@ -45,15 +45,15 @@ namespace {
initializeBoundsCheckingPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DataLayout>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DataLayoutPass>();
AU.addRequired<TargetLibraryInfo>();
}
private:
- const DataLayout *TD;
+ const DataLayout *DL;
const TargetLibraryInfo *TLI;
ObjectSizeOffsetEvaluator *ObjSizeEval;
BuilderTy *Builder;
@@ -62,8 +62,6 @@ namespace {
BasicBlock *getTrapBB();
void emitBranchToTrap(Value *Cmp = 0);
- bool computeAllocSize(Value *Ptr, APInt &Offset, Value* &OffsetValue,
- APInt &Size, Value* &SizeValue);
bool instrument(Value *Ptr, Value *Val);
};
}
@@ -127,7 +125,7 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) {
/// size of memory block that is touched.
/// Returns true if any change was made to the IR, false otherwise.
bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
- uint64_t NeededSize = TD->getTypeStoreSize(InstVal->getType());
+ uint64_t NeededSize = DL->getTypeStoreSize(InstVal->getType());
DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
<< " bytes\n");
@@ -142,7 +140,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
Value *Offset = SizeOffset.second;
ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
- Type *IntTy = TD->getIntPtrType(Ptr->getType());
+ Type *IntTy = DL->getIntPtrType(Ptr->getType());
Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
// three checks are required to ensure safety:
@@ -166,13 +164,13 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
}
bool BoundsChecking::runOnFunction(Function &F) {
- TD = &getAnalysis<DataLayout>();
+ DL = &getAnalysis<DataLayoutPass>().getDataLayout();
TLI = &getAnalysis<TargetLibraryInfo>();
TrapBB = 0;
- BuilderTy TheBuilder(F.getContext(), TargetFolder(TD));
+ BuilderTy TheBuilder(F.getContext(), TargetFolder(DL));
Builder = &TheBuilder;
- ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext(),
+ ObjectSizeOffsetEvaluator TheObjSizeEval(DL, TLI, F.getContext(),
/*RoundToAlign=*/true);
ObjSizeEval = &TheObjSizeEval;
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 9b9e725..df1549d 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -50,13 +50,13 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/InstVisitor.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -96,6 +96,22 @@ static cl::opt<bool> ClArgsABI(
cl::desc("Use the argument ABI rather than the TLS ABI"),
cl::Hidden);
+// Controls whether the pass includes or ignores the labels of pointers in load
+// instructions.
+static cl::opt<bool> ClCombinePointerLabelsOnLoad(
+ "dfsan-combine-pointer-labels-on-load",
+ cl::desc("Combine the label of the pointer with the label of the data when "
+ "loading from memory."),
+ cl::Hidden, cl::init(true));
+
+// Controls whether the pass includes or ignores the labels of pointers in
+// stores instructions.
+static cl::opt<bool> ClCombinePointerLabelsOnStore(
+ "dfsan-combine-pointer-labels-on-store",
+ cl::desc("Combine the label of the pointer with the label of the data when "
+ "storing in memory."),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool> ClDebugNonzeroLabels(
"dfsan-debug-nonzero-labels",
cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
@@ -148,7 +164,7 @@ class DataFlowSanitizer : public ModulePass {
WK_Custom
};
- DataLayout *DL;
+ const DataLayout *DL;
Module *Mod;
LLVMContext *Ctx;
IntegerType *ShadowTy;
@@ -174,7 +190,7 @@ class DataFlowSanitizer : public ModulePass {
Constant *DFSanSetLabelFn;
Constant *DFSanNonzeroLabelFn;
MDNode *ColdCallWeights;
- OwningPtr<SpecialCaseList> ABIList;
+ std::unique_ptr<SpecialCaseList> ABIList;
DenseMap<Value *, Function *> UnwrappedFnMap;
AttributeSet ReadOnlyNoneAttrs;
@@ -197,8 +213,8 @@ class DataFlowSanitizer : public ModulePass {
DataFlowSanitizer(StringRef ABIListFile = StringRef(),
void *(*getArgTLS)() = 0, void *(*getRetValTLS)() = 0);
static char ID;
- bool doInitialization(Module &M);
- bool runOnModule(Module &M);
+ bool doInitialization(Module &M) override;
+ bool runOnModule(Module &M) override;
};
struct DFSanFunction {
@@ -327,9 +343,10 @@ FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
}
bool DataFlowSanitizer::doInitialization(Module &M) {
- DL = getAnalysisIfAvailable<DataLayout>();
- if (!DL)
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ if (!DLP)
return false;
+ DL = &DLP->getDataLayout();
Mod = &M;
Ctx = &M.getContext();
@@ -505,6 +522,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
DFSanUnionLoadFn =
Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy);
if (Function *F = dyn_cast<Function>(DFSanUnionLoadFn)) {
+ F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly);
F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
}
DFSanUnimplementedFn =
@@ -536,8 +554,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
++i;
// Don't stop on weak. We assume people aren't playing games with the
// instrumentedness of overridden weak aliases.
- if (Function *F = dyn_cast<Function>(
- GA->resolveAliasedGlobal(/*stopOnWeak=*/false))) {
+ if (Function *F = dyn_cast<Function>(GA->getAliasedGlobal())) {
bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
if (GAInst && FInst) {
addGlobalNamePrefix(GA);
@@ -589,10 +606,10 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
}
NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList());
- for (Function::use_iterator ui = F.use_begin(), ue = F.use_end();
- ui != ue;) {
- BlockAddress *BA = dyn_cast<BlockAddress>(ui.getUse().getUser());
- ++ui;
+ for (Function::user_iterator UI = F.user_begin(), UE = F.user_end();
+ UI != UE;) {
+ BlockAddress *BA = dyn_cast<BlockAddress>(*UI);
+ ++UI;
if (BA) {
BA->replaceAllUsesWith(
BlockAddress::get(NewF, BA->getBasicBlock()));
@@ -718,10 +735,9 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
Pos = Pos->getNextNode();
IRBuilder<> IRB(Pos);
- Instruction *NeInst = cast<Instruction>(
- IRB.CreateICmpNE(*i, DFSF.DFS.ZeroShadow));
+ Value *Ne = IRB.CreateICmpNE(*i, DFSF.DFS.ZeroShadow);
BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
- NeInst, /*Unreachable=*/ false, ColdCallWeights));
+ Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
IRBuilder<> ThenIRB(BI);
ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn);
}
@@ -821,26 +837,19 @@ Value *DataFlowSanitizer::combineShadows(Value *V1, Value *V2,
IRBuilder<> IRB(Pos);
BasicBlock *Head = Pos->getParent();
Value *Ne = IRB.CreateICmpNE(V1, V2);
- Instruction *NeInst = dyn_cast<Instruction>(Ne);
- if (NeInst) {
- BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
- NeInst, /*Unreachable=*/ false, ColdCallWeights));
- IRBuilder<> ThenIRB(BI);
- CallInst *Call = ThenIRB.CreateCall2(DFSanUnionFn, V1, V2);
- Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
- Call->addAttribute(1, Attribute::ZExt);
- Call->addAttribute(2, Attribute::ZExt);
-
- BasicBlock *Tail = BI->getSuccessor(0);
- PHINode *Phi = PHINode::Create(ShadowTy, 2, "", Tail->begin());
- Phi->addIncoming(Call, Call->getParent());
- Phi->addIncoming(V1, Head);
- Pos = Phi;
- return Phi;
- } else {
- assert(0 && "todo");
- return 0;
- }
+ BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
+ IRBuilder<> ThenIRB(BI);
+ CallInst *Call = ThenIRB.CreateCall2(DFSanUnionFn, V1, V2);
+ Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ Call->addAttribute(1, Attribute::ZExt);
+ Call->addAttribute(2, Attribute::ZExt);
+
+ BasicBlock *Tail = BI->getSuccessor(0);
+ PHINode *Phi = PHINode::Create(ShadowTy, 2, "", Tail->begin());
+ Phi->addIncoming(Call, Call->getParent());
+ Phi->addIncoming(V1, Head);
+ return Phi;
}
// A convenience function which folds the shadows of each of the operands
@@ -978,14 +987,15 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
Align = 1;
}
IRBuilder<> IRB(&LI);
- Value *LoadedShadow =
- DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI);
- Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
- Value *CombinedShadow = DFSF.DFS.combineShadows(LoadedShadow, PtrShadow, &LI);
- if (CombinedShadow != DFSF.DFS.ZeroShadow)
- DFSF.NonZeroChecks.insert(CombinedShadow);
-
- DFSF.setShadow(&LI, CombinedShadow);
+ Value *Shadow = DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI);
+ if (ClCombinePointerLabelsOnLoad) {
+ Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
+ Shadow = DFSF.DFS.combineShadows(Shadow, PtrShadow, &LI);
+ }
+ if (Shadow != DFSF.DFS.ZeroShadow)
+ DFSF.NonZeroChecks.insert(Shadow);
+
+ DFSF.setShadow(&LI, Shadow);
}
void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
@@ -1050,8 +1060,13 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) {
} else {
Align = 1;
}
- DFSF.storeShadow(SI.getPointerOperand(), Size, Align,
- DFSF.getShadow(SI.getValueOperand()), &SI);
+
+ Value* Shadow = DFSF.getShadow(SI.getValueOperand());
+ if (ClCombinePointerLabelsOnStore) {
+ Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
+ Shadow = DFSF.DFS.combineShadows(Shadow, PtrShadow, &SI);
+ }
+ DFSF.storeShadow(SI.getPointerOperand(), Size, Align, Shadow, &SI);
}
void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
@@ -1088,12 +1103,11 @@ void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
bool AllLoadsStores = true;
- for (Instruction::use_iterator i = I.use_begin(), e = I.use_end(); i != e;
- ++i) {
- if (isa<LoadInst>(*i))
+ for (User *U : I.users()) {
+ if (isa<LoadInst>(U))
continue;
- if (StoreInst *SI = dyn_cast<StoreInst>(*i)) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
if (SI->getPointerOperand() == &I)
continue;
}
diff --git a/lib/Transforms/Instrumentation/DebugIR.cpp b/lib/Transforms/Instrumentation/DebugIR.cpp
index f50a044..069886e 100644
--- a/lib/Transforms/Instrumentation/DebugIR.cpp
+++ b/lib/Transforms/Instrumentation/DebugIR.cpp
@@ -18,25 +18,23 @@
#define DEBUG_TYPE "debug-ir"
-#include "llvm/ADT/ValueMap.h"
-#include "llvm/Assembly/AssemblyAnnotationWriter.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/InstVisitor.h"
+#include "llvm/IR/ValueMap.h"
+#include "DebugIR.h"
+#include "llvm/IR/AssemblyAnnotationWriter.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Instrumentation.h"
-#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Path.h"
-
-#include "DebugIR.h"
-
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include <string>
#define STR_HELPER(x) #x
@@ -69,11 +67,12 @@ public:
// This function is called after an Instruction, GlobalValue, or GlobalAlias
// is printed.
- void printInfoComment(const Value &V, formatted_raw_ostream &Out) {
+ void printInfoComment(const Value &V, formatted_raw_ostream &Out) override {
addEntry(&V, Out);
}
- void emitFunctionAnnot(const Function *F, formatted_raw_ostream &Out) {
+ void emitFunctionAnnot(const Function *F,
+ formatted_raw_ostream &Out) override {
addEntry(F, Out);
}
@@ -184,8 +183,8 @@ public:
if (Finder.compile_unit_count() > 1)
report_fatal_error("DebugIR pass supports only a signle compile unit per "
"Module.");
- createCompileUnit(
- Finder.compile_unit_count() == 1 ? *Finder.compile_unit_begin() : 0);
+ createCompileUnit(Finder.compile_unit_count() == 1 ?
+ (MDNode*)*Finder.compile_units().begin() : 0);
}
void visitFunction(Function &F) {
@@ -326,14 +325,11 @@ private:
<< " subprogram nodes"
<< "\n");
- for (DebugInfoFinder::iterator i = Finder.subprogram_begin(),
- e = Finder.subprogram_end();
- i != e; ++i) {
- DISubprogram S(*i);
+ for (DISubprogram S : Finder.subprograms()) {
if (S.getFunction() == F) {
- DEBUG(dbgs() << "Found DISubprogram " << *i << " for function "
+ DEBUG(dbgs() << "Found DISubprogram " << S << " for function "
<< S.getFunction() << "\n");
- return *i;
+ return S;
}
}
DEBUG(dbgs() << "unable to find DISubprogram node for function "
@@ -504,7 +500,7 @@ bool DebugIR::updateExtension(StringRef NewExtension) {
return true;
}
-void DebugIR::generateFilename(OwningPtr<int> &fd) {
+void DebugIR::generateFilename(std::unique_ptr<int> &fd) {
SmallVector<char, 16> PathVec;
fd.reset(new int);
sys::fs::createTemporaryFile("debug-ir", "ll", *fd, PathVec);
@@ -525,12 +521,12 @@ std::string DebugIR::getPath() {
}
void DebugIR::writeDebugBitcode(const Module *M, int *fd) {
- OwningPtr<raw_fd_ostream> Out;
+ std::unique_ptr<raw_fd_ostream> Out;
std::string error;
if (!fd) {
std::string Path = getPath();
- Out.reset(new raw_fd_ostream(Path.c_str(), error));
+ Out.reset(new raw_fd_ostream(Path.c_str(), error, sys::fs::F_Text));
DEBUG(dbgs() << "WRITING debug bitcode from Module " << M << " to file "
<< Path << "\n");
} else {
@@ -543,12 +539,12 @@ void DebugIR::writeDebugBitcode(const Module *M, int *fd) {
Out->close();
}
-void DebugIR::createDebugInfo(Module &M, OwningPtr<Module> &DisplayM) {
+void DebugIR::createDebugInfo(Module &M, std::unique_ptr<Module> &DisplayM) {
if (M.getFunctionList().size() == 0)
// no functions -- no debug info needed
return;
- OwningPtr<ValueToValueMapTy> VMap;
+ std::unique_ptr<ValueToValueMapTy> VMap;
if (WriteSourceToDisk && (HideDebugIntrinsics || HideDebugMetadata)) {
VMap.reset(new ValueToValueMapTy);
@@ -567,7 +563,7 @@ void DebugIR::createDebugInfo(Module &M, OwningPtr<Module> &DisplayM) {
bool DebugIR::isMissingPath() { return Filename.empty() || Directory.empty(); }
bool DebugIR::runOnModule(Module &M) {
- OwningPtr<int> fd;
+ std::unique_ptr<int> fd;
if (isMissingPath() && !getSourceInfo(M)) {
if (!WriteSourceToDisk)
@@ -586,7 +582,7 @@ bool DebugIR::runOnModule(Module &M) {
// file name from the DICompileUnit descriptor.
DebugMetadataRemover::process(M, !ParsedPath);
- OwningPtr<Module> DisplayM;
+ std::unique_ptr<Module> DisplayM;
createDebugInfo(M, DisplayM);
if (WriteSourceToDisk) {
Module *OutputM = DisplayM.get() ? DisplayM.get() : &M;
diff --git a/lib/Transforms/Instrumentation/DebugIR.h b/lib/Transforms/Instrumentation/DebugIR.h
index 13774cf..3f57da5 100644
--- a/lib/Transforms/Instrumentation/DebugIR.h
+++ b/lib/Transforms/Instrumentation/DebugIR.h
@@ -16,7 +16,6 @@
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_DEBUGIR_H
#define LLVM_TRANSFORMS_INSTRUMENTATION_DEBUGIR_H
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/Pass.h"
namespace llvm {
@@ -43,7 +42,7 @@ class DebugIR : public llvm::ModulePass {
public:
static char ID;
- const char *getPassName() const { return "DebugIR"; }
+ const char *getPassName() const override { return "DebugIR"; }
/// Generate a file on disk to be displayed in a debugger. If Filename and
/// Directory are empty, a temporary path will be generated.
@@ -62,7 +61,7 @@ public:
/// Run pass on M and set Path to the source file path in the output module.
bool runOnModule(llvm::Module &M, std::string &Path);
- bool runOnModule(llvm::Module &M);
+ bool runOnModule(llvm::Module &M) override;
private:
@@ -79,11 +78,11 @@ private:
bool updateExtension(llvm::StringRef NewExtension);
/// Generate a temporary filename and open an fd
- void generateFilename(llvm::OwningPtr<int> &fd);
+ void generateFilename(std::unique_ptr<int> &fd);
/// Creates DWARF CU/Subroutine metadata
void createDebugInfo(llvm::Module &M,
- llvm::OwningPtr<llvm::Module> &DisplayM);
+ std::unique_ptr<llvm::Module> &DisplayM);
/// Returns true if either Directory or Filename is missing, false otherwise.
bool isMissingPath();
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 206bffb..bd00ec8 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -18,21 +18,23 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/UniqueVector.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/DebugLoc.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/InstIterator.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -62,20 +64,28 @@ GCOVOptions GCOVOptions::getDefault() {
}
namespace {
+ class GCOVFunction;
+
class GCOVProfiler : public ModulePass {
public:
static char ID;
GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) {
- ReversedVersion[0] = Options.Version[3];
- ReversedVersion[1] = Options.Version[2];
- ReversedVersion[2] = Options.Version[1];
- ReversedVersion[3] = Options.Version[0];
- ReversedVersion[4] = '\0';
- initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
+ init();
}
GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){
assert((Options.EmitNotes || Options.EmitData) &&
"GCOVProfiler asked to do nothing?");
+ init();
+ }
+ ~GCOVProfiler() {
+ DeleteContainerPointers(Funcs);
+ }
+ const char *getPassName() const override {
+ return "GCOV Profiler";
+ }
+
+ private:
+ void init() {
ReversedVersion[0] = Options.Version[3];
ReversedVersion[1] = Options.Version[2];
ReversedVersion[2] = Options.Version[1];
@@ -83,12 +93,7 @@ namespace {
ReversedVersion[4] = '\0';
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
- virtual const char *getPassName() const {
- return "GCOV Profiler";
- }
-
- private:
- bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
// Create the .gcno files for the Module based on DebugInfo.
void emitProfileNotes();
@@ -130,10 +135,13 @@ namespace {
GCOVOptions Options;
// Reversed, NUL-terminated copy of Options.Version.
- char ReversedVersion[5];
+ char ReversedVersion[5];
+ // Checksum, produced by hash of EdgeDestinations
+ SmallVector<uint32_t, 4> FileChecksums;
Module *M;
LLVMContext *Ctx;
+ SmallVector<GCOVFunction *, 16> Funcs;
};
}
@@ -145,7 +153,7 @@ ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
return new GCOVProfiler(Options);
}
-static std::string getFunctionName(DISubprogram SP) {
+static StringRef getFunctionName(DISubprogram SP) {
if (!SP.getLinkageName().empty())
return SP.getLinkageName();
return SP.getName();
@@ -220,7 +228,7 @@ namespace {
write(Lines[i]);
}
- GCOVLines(StringRef F, raw_ostream *os)
+ GCOVLines(StringRef F, raw_ostream *os)
: Filename(F) {
this->os = os;
}
@@ -231,14 +239,6 @@ namespace {
};
- // Sorting function for deterministic behaviour in GCOVBlock::writeOut.
- struct StringKeySort {
- bool operator()(StringMapEntry<GCOVLines *> *LHS,
- StringMapEntry<GCOVLines *> *RHS) const {
- return LHS->getKey() < RHS->getKey();
- }
- };
-
// Represent a basic block in GCOV. Each block has a unique number in the
// function, number of lines belonging to each block, and a set of edges to
// other blocks.
@@ -269,11 +269,14 @@ namespace {
write(Len);
write(Number);
- StringKeySort Sorter;
- std::sort(SortedLinesByFile.begin(), SortedLinesByFile.end(), Sorter);
+ std::sort(SortedLinesByFile.begin(), SortedLinesByFile.end(),
+ [](StringMapEntry<GCOVLines *> *LHS,
+ StringMapEntry<GCOVLines *> *RHS) {
+ return LHS->getKey() < RHS->getKey();
+ });
for (SmallVectorImpl<StringMapEntry<GCOVLines *> *>::iterator
I = SortedLinesByFile.begin(), E = SortedLinesByFile.end();
- I != E; ++I)
+ I != E; ++I)
(*I)->getValue()->writeOut();
write(0);
write(0);
@@ -302,30 +305,23 @@ namespace {
class GCOVFunction : public GCOVRecord {
public:
GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident,
- bool UseCfgChecksum) {
+ bool UseCfgChecksum) :
+ SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0) {
this->os = os;
Function *F = SP.getFunction();
- DEBUG(dbgs() << "Function: " << F->getName() << "\n");
+ DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
uint32_t i = 0;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
Blocks[BB] = new GCOVBlock(i++, os);
}
ReturnBlock = new GCOVBlock(i++, os);
- writeBytes(FunctionTag, 4);
- uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) +
- 1 + lengthOfGCOVString(SP.getFilename()) + 1;
- if (UseCfgChecksum)
- ++BlockLen;
- write(BlockLen);
- write(Ident);
- write(0); // lineno checksum
- if (UseCfgChecksum)
- write(0); // cfg checksum
- writeGCOVString(getFunctionName(SP));
- writeGCOVString(SP.getFilename());
- write(SP.getLineNumber());
+ std::string FunctionNameAndLine;
+ raw_string_ostream FNLOS(FunctionNameAndLine);
+ FNLOS << getFunctionName(SP) << SP.getLineNumber();
+ FNLOS.flush();
+ FuncChecksum = hash_value(FunctionNameAndLine);
}
~GCOVFunction() {
@@ -341,7 +337,41 @@ namespace {
return *ReturnBlock;
}
+ std::string getEdgeDestinations() {
+ std::string EdgeDestinations;
+ raw_string_ostream EDOS(EdgeDestinations);
+ Function *F = Blocks.begin()->first->getParent();
+ for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+ GCOVBlock &Block = *Blocks[I];
+ for (int i = 0, e = Block.OutEdges.size(); i != e; ++i)
+ EDOS << Block.OutEdges[i]->Number;
+ }
+ return EdgeDestinations;
+ }
+
+ uint32_t getFuncChecksum() {
+ return FuncChecksum;
+ }
+
+ void setCfgChecksum(uint32_t Checksum) {
+ CfgChecksum = Checksum;
+ }
+
void writeOut() {
+ writeBytes(FunctionTag, 4);
+ uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) +
+ 1 + lengthOfGCOVString(SP.getFilename()) + 1;
+ if (UseCfgChecksum)
+ ++BlockLen;
+ write(BlockLen);
+ write(Ident);
+ write(FuncChecksum);
+ if (UseCfgChecksum)
+ write(CfgChecksum);
+ writeGCOVString(getFunctionName(SP));
+ writeGCOVString(SP.getFilename());
+ write(SP.getLineNumber());
+
// Emit count of blocks.
writeBytes(BlockTag, 4);
write(Blocks.size() + 1);
@@ -375,6 +405,11 @@ namespace {
}
private:
+ DISubprogram SP;
+ uint32_t Ident;
+ uint32_t FuncChecksum;
+ bool UseCfgChecksum;
+ uint32_t CfgChecksum;
DenseMap<BasicBlock *, GCOVBlock *> Blocks;
GCOVBlock *ReturnBlock;
};
@@ -426,10 +461,8 @@ void GCOVProfiler::emitProfileNotes() {
DICompileUnit CU(CU_Nodes->getOperand(i));
std::string ErrorInfo;
raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
- sys::fs::F_Binary);
- out.write("oncg", 4);
- out.write(ReversedVersion, 4);
- out.write("MVLL", 4);
+ sys::fs::F_None);
+ std::string EdgeDestinations;
DIArray SPs = CU.getSubprograms();
for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
@@ -441,17 +474,28 @@ void GCOVProfiler::emitProfileNotes() {
Function *F = SP.getFunction();
if (!F) continue;
- GCOVFunction Func(SP, &out, i, Options.UseCfgChecksum);
+
+ // gcov expects every function to start with an entry block that has a
+ // single successor, so split the entry block to make sure of that.
+ BasicBlock &EntryBlock = F->getEntryBlock();
+ BasicBlock::iterator It = EntryBlock.begin();
+ while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
+ ++It;
+ EntryBlock.splitBasicBlock(It);
+
+ GCOVFunction *Func =
+ new GCOVFunction(SP, &out, i, Options.UseCfgChecksum);
+ Funcs.push_back(Func);
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- GCOVBlock &Block = Func.getBlock(BB);
+ GCOVBlock &Block = Func->getBlock(BB);
TerminatorInst *TI = BB->getTerminator();
if (int successors = TI->getNumSuccessors()) {
for (int i = 0; i != successors; ++i) {
- Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
+ Block.addEdge(Func->getBlock(TI->getSuccessor(i)));
}
} else if (isa<ReturnInst>(TI)) {
- Block.addEdge(Func.getReturnBlock());
+ Block.addEdge(Func->getReturnBlock());
}
uint32_t Line = 0;
@@ -467,8 +511,21 @@ void GCOVProfiler::emitProfileNotes() {
Lines.addLine(Loc.getLine());
}
}
- Func.writeOut();
+ EdgeDestinations += Func->getEdgeDestinations();
}
+
+ FileChecksums.push_back(hash_value(EdgeDestinations));
+ out.write("oncg", 4);
+ out.write(ReversedVersion, 4);
+ out.write(reinterpret_cast<char*>(&FileChecksums.back()), 4);
+
+ for (SmallVectorImpl<GCOVFunction *>::iterator I = Funcs.begin(),
+ E = Funcs.end(); I != E; ++I) {
+ GCOVFunction *Func = *I;
+ Func->setCfgChecksum(FileChecksums.back());
+ Func->writeOut();
+ }
+
out.write("\0\0\0\0\0\0\0\0", 8); // EOF
out.close();
}
@@ -478,7 +535,7 @@ bool GCOVProfiler::emitProfileArcs() {
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CU_Nodes) return false;
- bool Result = false;
+ bool Result = false;
bool InsertIndCounterIncrCode = false;
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
DICompileUnit CU(CU_Nodes->getOperand(i));
@@ -501,7 +558,7 @@ bool GCOVProfiler::emitProfileArcs() {
else
Edges += TI->getNumSuccessors();
}
-
+
ArrayType *CounterTy =
ArrayType::get(Type::getInt64Ty(*Ctx), Edges);
GlobalVariable *Counters =
@@ -510,10 +567,10 @@ bool GCOVProfiler::emitProfileArcs() {
Constant::getNullValue(CounterTy),
"__llvm_gcov_ctr");
CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));
-
+
UniqueVector<BasicBlock *> ComplexEdgePreds;
UniqueVector<BasicBlock *> ComplexEdgeSuccs;
-
+
unsigned Edge = 0;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
TerminatorInst *TI = BB->getTerminator();
@@ -547,13 +604,13 @@ bool GCOVProfiler::emitProfileArcs() {
Edge += Successors;
}
}
-
+
if (!ComplexEdgePreds.empty()) {
GlobalVariable *EdgeTable =
buildEdgeLookupTable(F, Counters,
ComplexEdgePreds, ComplexEdgeSuccs);
GlobalVariable *EdgeState = getEdgeStateValue();
-
+
for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
IRBuilder<> Builder(ComplexEdgePreds[i + 1]->getFirstInsertionPt());
Builder.CreateStore(Builder.getInt32(i), EdgeState);
@@ -630,7 +687,7 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
ArrayType *EdgeTableTy = ArrayType::get(Int64PtrTy, TableSize);
- OwningArrayPtr<Constant *> EdgeTable(new Constant*[TableSize]);
+ std::unique_ptr<Constant * []> EdgeTable(new Constant *[TableSize]);
Constant *NullValue = Constant::getNullValue(Int64PtrTy);
for (size_t i = 0; i != TableSize; ++i)
EdgeTable[i] = NullValue;
@@ -666,6 +723,7 @@ Constant *GCOVProfiler::getStartFileFunc() {
Type *Args[] = {
Type::getInt8PtrTy(*Ctx), // const char *orig_filename
Type::getInt8PtrTy(*Ctx), // const char version[4]
+ Type::getInt32Ty(*Ctx), // uint32_t checksum
};
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
return M->getOrInsertFunction("llvm_gcda_start_file", FTy);
@@ -683,10 +741,12 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
}
Constant *GCOVProfiler::getEmitFunctionFunc() {
- Type *Args[3] = {
+ Type *Args[] = {
Type::getInt32Ty(*Ctx), // uint32_t ident
Type::getInt8PtrTy(*Ctx), // const char *function_name
+ Type::getInt32Ty(*Ctx), // uint32_t func_checksum
Type::getInt8Ty(*Ctx), // uint8_t use_extra_checksum
+ Type::getInt32Ty(*Ctx), // uint32_t cfg_checksum
};
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
@@ -760,17 +820,22 @@ Function *GCOVProfiler::insertCounterWriteout(
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
DICompileUnit CU(CU_Nodes->getOperand(i));
std::string FilenameGcda = mangleName(CU, "gcda");
- Builder.CreateCall2(StartFile,
+ uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];
+ Builder.CreateCall3(StartFile,
Builder.CreateGlobalStringPtr(FilenameGcda),
- Builder.CreateGlobalStringPtr(ReversedVersion));
+ Builder.CreateGlobalStringPtr(ReversedVersion),
+ Builder.getInt32(CfgChecksum));
for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) {
DISubprogram SP(CountersBySP[j].second);
- Builder.CreateCall3(
+ uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum();
+ Builder.CreateCall5(
EmitFunction, Builder.getInt32(j),
Options.FunctionNamesInData ?
Builder.CreateGlobalStringPtr(getFunctionName(SP)) :
Constant::getNullValue(Builder.getInt8PtrTy()),
- Builder.getInt8(Options.UseCfgChecksum));
+ Builder.getInt32(FuncChecksum),
+ Builder.getInt8(Options.UseCfgChecksum),
+ Builder.getInt32(CfgChecksum));
GlobalVariable *GV = CountersBySP[j].first;
unsigned Arcs =
@@ -818,7 +883,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
// uint64_t *counter = counters[pred];
// if (!counter) return;
Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty());
- Arg = llvm::next(Fn->arg_begin());
+ Arg = std::next(Fn->arg_begin());
Arg->setName("counters");
Value *GEP = Builder.CreateGEP(Arg, ZExtPred);
Value *Counter = Builder.CreateLoad(GEP, "counter");
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index b1bea38..ac1dd43 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/InitializePasses.h"
-#include "llvm/PassRegistry.h"
#include "llvm-c/Initialization.h"
+#include "llvm/PassRegistry.h"
using namespace llvm;
diff --git a/lib/Transforms/Instrumentation/LLVMBuild.txt b/lib/Transforms/Instrumentation/LLVMBuild.txt
index d36ad54..99e95df 100644
--- a/lib/Transforms/Instrumentation/LLVMBuild.txt
+++ b/lib/Transforms/Instrumentation/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = Instrumentation
parent = Transforms
-required_libraries = Analysis Core Support TransformUtils
+required_libraries = Analysis Core Support Target TransformUtils
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index d547adc..ec1a195 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -100,17 +100,17 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/ADT/ValueMap.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/InstVisitor.h"
+#include "llvm/IR/ValueMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
@@ -133,9 +133,9 @@ static const unsigned kShadowTLSAlignment = 8;
///
/// Adds a section to MemorySanitizer report that points to the allocation
/// (stack or heap) the uninitialized bits came from originally.
-static cl::opt<bool> ClTrackOrigins("msan-track-origins",
+static cl::opt<int> ClTrackOrigins("msan-track-origins",
cl::desc("Track origins (allocation sites) of poisoned memory"),
- cl::Hidden, cl::init(false));
+ cl::Hidden, cl::init(0));
static cl::opt<bool> ClKeepGoing("msan-keep-going",
cl::desc("keep going after reporting a UMR"),
cl::Hidden, cl::init(false));
@@ -160,10 +160,6 @@ static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
cl::desc("exact handling of relational integer ICmp"),
cl::Hidden, cl::init(false));
-static cl::opt<bool> ClStoreCleanOrigin("msan-store-clean-origin",
- cl::desc("store origin for clean (fully initialized) values"),
- cl::Hidden, cl::init(false));
-
// This flag controls whether we check the shadow of the address
// operand of load or store. Such bugs are very rare, since load from
// a garbage address typically results in SEGV, but still happen
@@ -203,26 +199,26 @@ namespace {
/// uninitialized reads.
class MemorySanitizer : public FunctionPass {
public:
- MemorySanitizer(bool TrackOrigins = false,
+ MemorySanitizer(int TrackOrigins = 0,
StringRef BlacklistFile = StringRef())
: FunctionPass(ID),
- TrackOrigins(TrackOrigins || ClTrackOrigins),
- TD(0),
+ TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)),
+ DL(0),
WarningFn(0),
BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile : BlacklistFile),
WrapIndirectCalls(!ClWrapIndirectCalls.empty()) {}
- const char *getPassName() const { return "MemorySanitizer"; }
- bool runOnFunction(Function &F);
- bool doInitialization(Module &M);
+ const char *getPassName() const override { return "MemorySanitizer"; }
+ bool runOnFunction(Function &F) override;
+ bool doInitialization(Module &M) override;
static char ID; // Pass identification, replacement for typeid.
private:
void initializeCallbacks(Module &M);
/// \brief Track origins (allocation points) of uninitialized values.
- bool TrackOrigins;
+ int TrackOrigins;
- DataLayout *TD;
+ const DataLayout *DL;
LLVMContext *C;
Type *IntptrTy;
Type *OriginTy;
@@ -249,13 +245,14 @@ class MemorySanitizer : public FunctionPass {
/// \brief The run-time callback to print a warning.
Value *WarningFn;
- /// \brief Run-time helper that copies origin info for a memory range.
- Value *MsanCopyOriginFn;
/// \brief Run-time helper that generates a new origin value for a stack
/// allocation.
Value *MsanSetAllocaOrigin4Fn;
/// \brief Run-time helper that poisons stack on function entry.
Value *MsanPoisonStackFn;
+ /// \brief Run-time helper that records a store (or any event) of an
+ /// uninitialized value and returns an updated origin id encoding this info.
+ Value *MsanChainOriginFn;
/// \brief MSan runtime replacements for memmove, memcpy and memset.
Value *MemmoveFn, *MemcpyFn, *MemsetFn;
@@ -272,7 +269,7 @@ class MemorySanitizer : public FunctionPass {
/// \brief Path to blacklist file.
SmallString<64> BlacklistFile;
/// \brief The blacklist.
- OwningPtr<SpecialCaseList> BL;
+ std::unique_ptr<SpecialCaseList> BL;
/// \brief An empty volatile inline asm that prevents callback merge.
InlineAsm *EmptyAsm;
@@ -292,7 +289,7 @@ INITIALIZE_PASS(MemorySanitizer, "msan",
"MemorySanitizer: detects uninitialized reads.",
false, false)
-FunctionPass *llvm::createMemorySanitizerPass(bool TrackOrigins,
+FunctionPass *llvm::createMemorySanitizerPass(int TrackOrigins,
StringRef BlacklistFile) {
return new MemorySanitizer(TrackOrigins, BlacklistFile);
}
@@ -324,14 +321,13 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
: "__msan_warning_noreturn";
WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), NULL);
- MsanCopyOriginFn = M.getOrInsertFunction(
- "__msan_copy_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy, NULL);
MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
"__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
IRB.getInt8PtrTy(), IntptrTy, NULL);
MsanPoisonStackFn = M.getOrInsertFunction(
"__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, NULL);
+ MsanChainOriginFn = M.getOrInsertFunction(
+ "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty(), NULL);
MemmoveFn = M.getOrInsertFunction(
"__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
IRB.getInt8PtrTy(), IntptrTy, NULL);
@@ -399,12 +395,14 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
///
/// inserts a call to __msan_init to the module's constructor list.
bool MemorySanitizer::doInitialization(Module &M) {
- TD = getAnalysisIfAvailable<DataLayout>();
- if (!TD)
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ if (!DLP)
return false;
+ DL = &DLP->getDataLayout();
+
BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
C = &(M.getContext());
- unsigned PtrSize = TD->getPointerSizeInBits(/* AddressSpace */0);
+ unsigned PtrSize = DL->getPointerSizeInBits(/* AddressSpace */0);
switch (PtrSize) {
case 64:
ShadowMask = kShadowMask64;
@@ -420,7 +418,7 @@ bool MemorySanitizer::doInitialization(Module &M) {
}
IRBuilder<> IRB(*C);
- IntptrTy = IRB.getIntPtrTy(TD);
+ IntptrTy = IRB.getIntPtrTy(DL);
OriginTy = IRB.getInt32Ty();
ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
@@ -487,7 +485,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
MemorySanitizer &MS;
SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
ValueMap<Value*, Value*> ShadowMap, OriginMap;
- OwningPtr<VarArgHelper> VAHelper;
+ std::unique_ptr<VarArgHelper> VAHelper;
// The following flags disable parts of MSan instrumentation based on
// blacklist contents and command-line options.
@@ -503,7 +501,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Instruction *OrigIns;
ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
: Shadow(S), Origin(O), OrigIns(I) { }
- ShadowOriginAndInsertPoint() : Shadow(0), Origin(0), OrigIns(0) { }
};
SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
SmallVector<Instruction*, 16> StoreList;
@@ -527,6 +524,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
<< F.getName() << "'\n");
}
+ Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
+ if (MS.TrackOrigins <= 1) return V;
+ return IRB.CreateCall(MS.MsanChainOriginFn, V);
+ }
+
void materializeStores() {
for (size_t i = 0, n = StoreList.size(); i < n; i++) {
StoreInst& I = *dyn_cast<StoreInst>(StoreList[i]);
@@ -550,9 +552,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (MS.TrackOrigins) {
unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
- if (ClStoreCleanOrigin || isa<StructType>(Shadow->getType())) {
- IRB.CreateAlignedStore(getOrigin(Val), getOriginPtr(Addr, IRB),
- Alignment);
+ if (isa<StructType>(Shadow->getType())) {
+ IRB.CreateAlignedStore(updateOrigin(getOrigin(Val), IRB),
+ getOriginPtr(Addr, IRB), Alignment);
} else {
Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
@@ -565,11 +567,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
getCleanShadow(ConvertedShadow), "_mscmp");
Instruction *CheckTerm =
- SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false,
- MS.OriginStoreWeights);
+ SplitBlockAndInsertIfThen(Cmp, &I, false, MS.OriginStoreWeights);
IRBuilder<> IRBNew(CheckTerm);
- IRBNew.CreateAlignedStore(getOrigin(Val), getOriginPtr(Addr, IRBNew),
- Alignment);
+ IRBNew.CreateAlignedStore(updateOrigin(getOrigin(Val), IRBNew),
+ getOriginPtr(Addr, IRBNew), Alignment);
}
}
}
@@ -588,10 +589,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
continue;
Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
getCleanShadow(ConvertedShadow), "_mscmp");
- Instruction *CheckTerm =
- SplitBlockAndInsertIfThen(cast<Instruction>(Cmp),
- /* Unreachable */ !ClKeepGoing,
- MS.ColdCallWeights);
+ Instruction *CheckTerm = SplitBlockAndInsertIfThen(
+ Cmp, OrigIns,
+ /* Unreachable */ !ClKeepGoing, MS.ColdCallWeights);
IRB.SetInsertPoint(CheckTerm);
if (MS.TrackOrigins) {
@@ -599,8 +599,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0),
MS.OriginTLS);
}
- CallInst *Call = IRB.CreateCall(MS.WarningFn);
- Call->setDebugLoc(OrigIns->getDebugLoc());
+ IRB.CreateCall(MS.WarningFn);
IRB.CreateCall(MS.EmptyAsm);
DEBUG(dbgs() << " CHECK: " << *Cmp << "\n");
}
@@ -629,7 +628,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.CreatePHI(Fn0->getType(), 2, "msandr.indirect_target");
Instruction *CheckTerm = SplitBlockAndInsertIfThen(
- cast<Instruction>(NotInThisModule),
+ NotInThisModule, NewFnPhi,
/* Unreachable */ false, MS.ColdCallWeights);
IRB.SetInsertPoint(CheckTerm);
@@ -652,7 +651,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Add MemorySanitizer instrumentation to a function.
bool runOnFunction() {
MS.initializeCallbacks(*F.getParent());
- if (!MS.TD) return false;
+ if (!MS.DL) return false;
// In the presence of unreachable blocks, we may see Phi nodes with
// incoming nodes from such blocks. Since InstVisitor skips unreachable
@@ -712,7 +711,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
return IT;
if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
- uint32_t EltSize = MS.TD->getTypeSizeInBits(VT->getElementType());
+ uint32_t EltSize = MS.DL->getTypeSizeInBits(VT->getElementType());
return VectorType::get(IntegerType::get(*MS.C, EltSize),
VT->getNumElements());
}
@@ -724,7 +723,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
return Res;
}
- uint32_t TypeSize = MS.TD->getTypeSizeInBits(OrigTy);
+ uint32_t TypeSize = MS.DL->getTypeSizeInBits(OrigTy);
return IntegerType::get(*MS.C, TypeSize);
}
@@ -891,8 +890,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
continue;
}
unsigned Size = AI->hasByValAttr()
- ? MS.TD->getTypeAllocSize(AI->getType()->getPointerElementType())
- : MS.TD->getTypeAllocSize(AI->getType());
+ ? MS.DL->getTypeAllocSize(AI->getType()->getPointerElementType())
+ : MS.DL->getTypeAllocSize(AI->getType());
if (A == AI) {
Value *Base = getShadowPtrForArgument(AI, EntryIRB, ArgOffset);
if (AI->hasByValAttr()) {
@@ -902,7 +901,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
unsigned ArgAlign = AI->getParamAlignment();
if (ArgAlign == 0) {
Type *EltType = A->getType()->getPointerElementType();
- ArgAlign = MS.TD->getABITypeAlignment(EltType);
+ ArgAlign = MS.DL->getABITypeAlignment(EltType);
}
unsigned CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
Value *Cpy = EntryIRB.CreateMemCpy(
@@ -1088,7 +1087,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
handleCASOrRMW(I);
- I.setOrdering(addReleaseOrdering(I.getOrdering()));
+ I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
}
// Vector manipulation.
@@ -1325,6 +1324,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// TODO: handle struct types.
}
+ /// \brief Cast an application value to the type of its own shadow.
+ Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
+ Type *ShadowTy = getShadowTy(V);
+ if (V->getType() == ShadowTy)
+ return V;
+ if (V->getType()->isPtrOrPtrVectorTy())
+ return IRB.CreatePtrToInt(V, ShadowTy);
+ else
+ return IRB.CreateBitCast(V, ShadowTy);
+ }
+
/// \brief Propagate shadow for arbitrary operation.
void handleShadowOr(Instruction &I) {
IRBuilder<> IRB(&I);
@@ -1827,6 +1837,48 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
}
+ // Given a scalar or vector, extract lower 64 bits (or less), and return all
+ // zeroes if it is zero, and all ones otherwise.
+ Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
+ if (S->getType()->isVectorTy())
+ S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
+ assert(S->getType()->getPrimitiveSizeInBits() <= 64);
+ Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
+ return CreateShadowCast(IRB, S2, T, /* Signed */ true);
+ }
+
+ Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
+ Type *T = S->getType();
+ assert(T->isVectorTy());
+ Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
+ return IRB.CreateSExt(S2, T);
+ }
+
+ // \brief Instrument vector shift instrinsic.
+ //
+ // This function instruments intrinsics like int_x86_avx2_psll_w.
+ // Intrinsic shifts %In by %ShiftSize bits.
+ // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
+ // size, and the rest is ignored. Behavior is defined even if shift size is
+ // greater than register (or field) width.
+ void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
+ assert(I.getNumArgOperands() == 2);
+ IRBuilder<> IRB(&I);
+ // If any of the S2 bits are poisoned, the whole thing is poisoned.
+ // Otherwise perform the same shift on S1.
+ Value *S1 = getShadow(&I, 0);
+ Value *S2 = getShadow(&I, 1);
+ Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
+ : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
+ Value *V1 = I.getOperand(0);
+ Value *V2 = I.getOperand(1);
+ Value *Shift = IRB.CreateCall2(I.getCalledValue(),
+ IRB.CreateBitCast(S1, V1->getType()), V2);
+ Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
+ setShadow(&I, IRB.CreateOr(Shift, S2Conv));
+ setOriginForNaryOp(I);
+ }
+
void visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
case llvm::Intrinsic::bswap:
@@ -1866,6 +1918,83 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case llvm::Intrinsic::x86_sse_cvttps2pi:
handleVectorConvertIntrinsic(I, 2);
break;
+ case llvm::Intrinsic::x86_avx512_psll_dq:
+ case llvm::Intrinsic::x86_avx512_psrl_dq:
+ case llvm::Intrinsic::x86_avx2_psll_w:
+ case llvm::Intrinsic::x86_avx2_psll_d:
+ case llvm::Intrinsic::x86_avx2_psll_q:
+ case llvm::Intrinsic::x86_avx2_pslli_w:
+ case llvm::Intrinsic::x86_avx2_pslli_d:
+ case llvm::Intrinsic::x86_avx2_pslli_q:
+ case llvm::Intrinsic::x86_avx2_psll_dq:
+ case llvm::Intrinsic::x86_avx2_psrl_w:
+ case llvm::Intrinsic::x86_avx2_psrl_d:
+ case llvm::Intrinsic::x86_avx2_psrl_q:
+ case llvm::Intrinsic::x86_avx2_psra_w:
+ case llvm::Intrinsic::x86_avx2_psra_d:
+ case llvm::Intrinsic::x86_avx2_psrli_w:
+ case llvm::Intrinsic::x86_avx2_psrli_d:
+ case llvm::Intrinsic::x86_avx2_psrli_q:
+ case llvm::Intrinsic::x86_avx2_psrai_w:
+ case llvm::Intrinsic::x86_avx2_psrai_d:
+ case llvm::Intrinsic::x86_avx2_psrl_dq:
+ case llvm::Intrinsic::x86_sse2_psll_w:
+ case llvm::Intrinsic::x86_sse2_psll_d:
+ case llvm::Intrinsic::x86_sse2_psll_q:
+ case llvm::Intrinsic::x86_sse2_pslli_w:
+ case llvm::Intrinsic::x86_sse2_pslli_d:
+ case llvm::Intrinsic::x86_sse2_pslli_q:
+ case llvm::Intrinsic::x86_sse2_psll_dq:
+ case llvm::Intrinsic::x86_sse2_psrl_w:
+ case llvm::Intrinsic::x86_sse2_psrl_d:
+ case llvm::Intrinsic::x86_sse2_psrl_q:
+ case llvm::Intrinsic::x86_sse2_psra_w:
+ case llvm::Intrinsic::x86_sse2_psra_d:
+ case llvm::Intrinsic::x86_sse2_psrli_w:
+ case llvm::Intrinsic::x86_sse2_psrli_d:
+ case llvm::Intrinsic::x86_sse2_psrli_q:
+ case llvm::Intrinsic::x86_sse2_psrai_w:
+ case llvm::Intrinsic::x86_sse2_psrai_d:
+ case llvm::Intrinsic::x86_sse2_psrl_dq:
+ case llvm::Intrinsic::x86_mmx_psll_w:
+ case llvm::Intrinsic::x86_mmx_psll_d:
+ case llvm::Intrinsic::x86_mmx_psll_q:
+ case llvm::Intrinsic::x86_mmx_pslli_w:
+ case llvm::Intrinsic::x86_mmx_pslli_d:
+ case llvm::Intrinsic::x86_mmx_pslli_q:
+ case llvm::Intrinsic::x86_mmx_psrl_w:
+ case llvm::Intrinsic::x86_mmx_psrl_d:
+ case llvm::Intrinsic::x86_mmx_psrl_q:
+ case llvm::Intrinsic::x86_mmx_psra_w:
+ case llvm::Intrinsic::x86_mmx_psra_d:
+ case llvm::Intrinsic::x86_mmx_psrli_w:
+ case llvm::Intrinsic::x86_mmx_psrli_d:
+ case llvm::Intrinsic::x86_mmx_psrli_q:
+ case llvm::Intrinsic::x86_mmx_psrai_w:
+ case llvm::Intrinsic::x86_mmx_psrai_d:
+ handleVectorShiftIntrinsic(I, /* Variable */ false);
+ break;
+ case llvm::Intrinsic::x86_avx2_psllv_d:
+ case llvm::Intrinsic::x86_avx2_psllv_d_256:
+ case llvm::Intrinsic::x86_avx2_psllv_q:
+ case llvm::Intrinsic::x86_avx2_psllv_q_256:
+ case llvm::Intrinsic::x86_avx2_psrlv_d:
+ case llvm::Intrinsic::x86_avx2_psrlv_d_256:
+ case llvm::Intrinsic::x86_avx2_psrlv_q:
+ case llvm::Intrinsic::x86_avx2_psrlv_q_256:
+ case llvm::Intrinsic::x86_avx2_psrav_d:
+ case llvm::Intrinsic::x86_avx2_psrav_d_256:
+ handleVectorShiftIntrinsic(I, /* Variable */ true);
+ break;
+
+ // Byte shifts are not implemented.
+ // case llvm::Intrinsic::x86_avx512_psll_dq_bs:
+ // case llvm::Intrinsic::x86_avx512_psrl_dq_bs:
+ // case llvm::Intrinsic::x86_avx2_psll_dq_bs:
+ // case llvm::Intrinsic::x86_avx2_psrl_dq_bs:
+ // case llvm::Intrinsic::x86_sse2_psll_dq_bs:
+ // case llvm::Intrinsic::x86_sse2_psrl_dq_bs:
+
default:
if (!handleUnknownIntrinsic(I))
visitInstruction(I);
@@ -1937,13 +2066,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (CS.paramHasAttr(i + 1, Attribute::ByVal)) {
assert(A->getType()->isPointerTy() &&
"ByVal argument is not a pointer!");
- Size = MS.TD->getTypeAllocSize(A->getType()->getPointerElementType());
+ Size = MS.DL->getTypeAllocSize(A->getType()->getPointerElementType());
unsigned Alignment = CS.getParamAlignment(i + 1);
Store = IRB.CreateMemCpy(ArgShadowBase,
getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB),
Size, Alignment);
} else {
- Size = MS.TD->getTypeAllocSize(A->getType());
+ Size = MS.DL->getTypeAllocSize(A->getType());
Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
kShadowTLSAlignment);
}
@@ -1966,7 +2095,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Now, get the shadow for the RetVal.
if (!I.getType()->isSized()) return;
IRBuilder<> IRBBefore(&I);
- // Untill we have full dynamic coverage, make sure the retval shadow is 0.
+ // Until we have full dynamic coverage, make sure the retval shadow is 0.
Value *Base = getShadowPtrForRetval(&I, IRBBefore);
IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment);
Instruction *NextInsn = 0;
@@ -2026,7 +2155,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void visitAllocaInst(AllocaInst &I) {
setShadow(&I, getCleanShadow(&I));
IRBuilder<> IRB(I.getNextNode());
- uint64_t Size = MS.TD->getTypeAllocSize(I.getAllocatedType());
+ uint64_t Size = MS.DL->getTypeAllocSize(I.getAllocatedType());
if (PoisonStack && ClPoisonStackWithCall) {
IRB.CreateCall2(MS.MsanPoisonStackFn,
IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
@@ -2062,33 +2191,51 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void visitSelectInst(SelectInst& I) {
IRBuilder<> IRB(&I);
// a = select b, c, d
- Value *S = IRB.CreateSelect(I.getCondition(), getShadow(I.getTrueValue()),
- getShadow(I.getFalseValue()));
+ Value *B = I.getCondition();
+ Value *C = I.getTrueValue();
+ Value *D = I.getFalseValue();
+ Value *Sb = getShadow(B);
+ Value *Sc = getShadow(C);
+ Value *Sd = getShadow(D);
+
+ // Result shadow if condition shadow is 0.
+ Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
+ Value *Sa1;
if (I.getType()->isAggregateType()) {
// To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
// an extra "select". This results in much more compact IR.
// Sa = select Sb, poisoned, (select b, Sc, Sd)
- S = IRB.CreateSelect(getShadow(I.getCondition()),
- getPoisonedShadow(getShadowTy(I.getType())), S,
- "_msprop_select_agg");
+ Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
} else {
- // Sa = (sext Sb) | (select b, Sc, Sd)
- S = IRB.CreateOr(S, CreateShadowCast(IRB, getShadow(I.getCondition()),
- S->getType(), true),
- "_msprop_select");
+ // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
+ // If Sb (condition is poisoned), look for bits in c and d that are equal
+ // and both unpoisoned.
+ // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
+
+ // Cast arguments to shadow-compatible type.
+ C = CreateAppToShadowCast(IRB, C);
+ D = CreateAppToShadowCast(IRB, D);
+
+ // Result shadow if condition shadow is 1.
+ Sa1 = IRB.CreateOr(IRB.CreateXor(C, D), IRB.CreateOr(Sc, Sd));
}
- setShadow(&I, S);
+ Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
+ setShadow(&I, Sa);
if (MS.TrackOrigins) {
// Origins are always i32, so any vector conditions must be flattened.
// FIXME: consider tracking vector origins for app vectors?
- Value *Cond = I.getCondition();
- if (Cond->getType()->isVectorTy()) {
- Value *ConvertedShadow = convertToShadowTyNoVec(Cond, IRB);
- Cond = IRB.CreateICmpNE(ConvertedShadow,
- getCleanShadow(ConvertedShadow), "_mso_select");
+ if (B->getType()->isVectorTy()) {
+ Type *FlatTy = getShadowTyNoVec(B->getType());
+ B = IRB.CreateICmpNE(IRB.CreateBitCast(B, FlatTy),
+ ConstantInt::getNullValue(FlatTy));
+ Sb = IRB.CreateICmpNE(IRB.CreateBitCast(Sb, FlatTy),
+ ConstantInt::getNullValue(FlatTy));
}
- setOrigin(&I, IRB.CreateSelect(Cond,
- getOrigin(I.getTrueValue()), getOrigin(I.getFalseValue())));
+ // a = select b, c, d
+ // Oa = Sb ? Ob : (b ? Oc : Od)
+ setOrigin(&I, IRB.CreateSelect(
+ Sb, getOrigin(I.getCondition()),
+ IRB.CreateSelect(B, getOrigin(C), getOrigin(D))));
}
}
@@ -2195,34 +2342,47 @@ struct VarArgAMD64Helper : public VarArgHelper {
// would have been to associate each live instance of va_list with a copy of
// MSanParamTLS, and extract shadow on va_arg() call in the argument list
// order.
- void visitCallSite(CallSite &CS, IRBuilder<> &IRB) {
+ void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
unsigned GpOffset = 0;
unsigned FpOffset = AMD64GpEndOffset;
unsigned OverflowOffset = AMD64FpEndOffset;
for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
ArgIt != End; ++ArgIt) {
Value *A = *ArgIt;
- ArgKind AK = classifyArgument(A);
- if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
- AK = AK_Memory;
- if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
- AK = AK_Memory;
- Value *Base;
- switch (AK) {
- case AK_GeneralPurpose:
- Base = getShadowPtrForVAArgument(A, IRB, GpOffset);
- GpOffset += 8;
- break;
- case AK_FloatingPoint:
- Base = getShadowPtrForVAArgument(A, IRB, FpOffset);
- FpOffset += 16;
- break;
- case AK_Memory:
- uint64_t ArgSize = MS.TD->getTypeAllocSize(A->getType());
- Base = getShadowPtrForVAArgument(A, IRB, OverflowOffset);
+ unsigned ArgNo = CS.getArgumentNo(ArgIt);
+ bool IsByVal = CS.paramHasAttr(ArgNo + 1, Attribute::ByVal);
+ if (IsByVal) {
+ // ByVal arguments always go to the overflow area.
+ assert(A->getType()->isPointerTy());
+ Type *RealTy = A->getType()->getPointerElementType();
+ uint64_t ArgSize = MS.DL->getTypeAllocSize(RealTy);
+ Value *Base = getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset);
OverflowOffset += DataLayout::RoundUpAlignment(ArgSize, 8);
+ IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB),
+ ArgSize, kShadowTLSAlignment);
+ } else {
+ ArgKind AK = classifyArgument(A);
+ if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
+ AK = AK_Memory;
+ if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
+ AK = AK_Memory;
+ Value *Base;
+ switch (AK) {
+ case AK_GeneralPurpose:
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, GpOffset);
+ GpOffset += 8;
+ break;
+ case AK_FloatingPoint:
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, FpOffset);
+ FpOffset += 16;
+ break;
+ case AK_Memory:
+ uint64_t ArgSize = MS.DL->getTypeAllocSize(A->getType());
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
+ OverflowOffset += DataLayout::RoundUpAlignment(ArgSize, 8);
+ }
+ IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
}
- IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
}
Constant *OverflowSize =
ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
@@ -2230,15 +2390,15 @@ struct VarArgAMD64Helper : public VarArgHelper {
}
/// \brief Compute the shadow address for a given va_arg.
- Value *getShadowPtrForVAArgument(Value *A, IRBuilder<> &IRB,
+ Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
int ArgOffset) {
Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
- return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(A), 0),
+ return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
"_msarg");
}
- void visitVAStartInst(VAStartInst &I) {
+ void visitVAStartInst(VAStartInst &I) override {
IRBuilder<> IRB(&I);
VAStartInstrumentationList.push_back(&I);
Value *VAListTag = I.getArgOperand(0);
@@ -2250,7 +2410,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
/* size */24, /* alignment */8, false);
}
- void visitVACopyInst(VACopyInst &I) {
+ void visitVACopyInst(VACopyInst &I) override {
IRBuilder<> IRB(&I);
Value *VAListTag = I.getArgOperand(0);
Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
@@ -2261,7 +2421,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
/* size */24, /* alignment */8, false);
}
- void finalizeInstrumentation() {
+ void finalizeInstrumentation() override {
assert(!VAArgOverflowSize && !VAArgTLSCopy &&
"finalizeInstrumentation called twice");
if (!VAStartInstrumentationList.empty()) {
@@ -2313,13 +2473,13 @@ struct VarArgNoOpHelper : public VarArgHelper {
VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
MemorySanitizerVisitor &MSV) {}
- void visitCallSite(CallSite &CS, IRBuilder<> &IRB) {}
+ void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {}
- void visitVAStartInst(VAStartInst &I) {}
+ void visitVAStartInst(VAStartInst &I) override {}
- void visitVACopyInst(VACopyInst &I) {}
+ void visitVACopyInst(VACopyInst &I) override {}
- void finalizeInstrumentation() {}
+ void finalizeInstrumentation() override {}
};
VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 89fb746..5ffb17c 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -78,12 +78,12 @@ namespace {
struct ThreadSanitizer : public FunctionPass {
ThreadSanitizer(StringRef BlacklistFile = StringRef())
: FunctionPass(ID),
- TD(0),
+ DL(0),
BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
: BlacklistFile) { }
- const char *getPassName() const;
- bool runOnFunction(Function &F);
- bool doInitialization(Module &M);
+ const char *getPassName() const override;
+ bool runOnFunction(Function &F) override;
+ bool doInitialization(Module &M) override;
static char ID; // Pass identification, replacement for typeid.
private:
@@ -96,10 +96,10 @@ struct ThreadSanitizer : public FunctionPass {
bool addrPointsToConstantData(Value *Addr);
int getMemoryAccessFuncIndex(Value *Addr);
- DataLayout *TD;
+ const DataLayout *DL;
Type *IntptrTy;
SmallString<64> BlacklistFile;
- OwningPtr<SpecialCaseList> BL;
+ std::unique_ptr<SpecialCaseList> BL;
IntegerType *OrdTy;
// Callbacks to run-time library are computed in doInitialization.
Function *TsanFuncEntry;
@@ -224,14 +224,15 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
}
bool ThreadSanitizer::doInitialization(Module &M) {
- TD = getAnalysisIfAvailable<DataLayout>();
- if (!TD)
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ if (!DLP)
return false;
+ DL = &DLP->getDataLayout();
BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
// Always insert a call to __tsan_init into the module's CTORs.
IRBuilder<> IRB(M.getContext());
- IntptrTy = IRB.getIntPtrTy(TD);
+ IntptrTy = IRB.getIntPtrTy(DL);
Value *TsanInit = M.getOrInsertFunction("__tsan_init",
IRB.getVoidTy(), NULL);
appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
@@ -320,7 +321,7 @@ static bool isAtomic(Instruction *I) {
}
bool ThreadSanitizer::runOnFunction(Function &F) {
- if (!TD) return false;
+ if (!DL) return false;
if (BL->isIn(F)) return false;
initializeCallbacks(*F.getParent());
SmallVector<Instruction*, 8> RetVec;
@@ -402,8 +403,13 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
if (IsWrite && isVtableAccess(I)) {
DEBUG(dbgs() << " VPTR : " << *I << "\n");
Value *StoredValue = cast<StoreInst>(I)->getValueOperand();
- // StoredValue does not necessary have a pointer type.
- if (isa<IntegerType>(StoredValue->getType()))
+ // StoredValue may be a vector type if we are storing several vptrs at once.
+ // In this case, just take the first element of the vector since this is
+ // enough to find vptr races.
+ if (isa<VectorType>(StoredValue->getType()))
+ StoredValue = IRB.CreateExtractElement(
+ StoredValue, ConstantInt::get(IRB.getInt32Ty(), 0));
+ if (StoredValue->getType()->isIntegerTy())
StoredValue = IRB.CreateIntToPtr(StoredValue, IRB.getInt8PtrTy());
// Call TsanVptrUpdate.
IRB.CreateCall2(TsanVptrUpdate,
@@ -440,21 +446,6 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
return IRB->getInt32(v);
}
-static ConstantInt *createFailOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
- uint32_t v = 0;
- switch (ord) {
- case NotAtomic: assert(false);
- case Unordered: // Fall-through.
- case Monotonic: v = 0; break;
- // case Consume: v = 1; break; // Not specified yet.
- case Acquire: v = 2; break;
- case Release: v = 0; break;
- case AcquireRelease: v = 2; break;
- case SequentiallyConsistent: v = 5; break;
- }
- return IRB->getInt32(v);
-}
-
// If a memset intrinsic gets inlined by the code gen, we will miss races on it.
// So, we either need to ensure the intrinsic is not inlined, or instrument it.
// We do not instrument memset/memmove/memcpy intrinsics (too complicated),
@@ -482,7 +473,7 @@ bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
}
// Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x
-// standards. For background see C++11 standard. A slightly older, publically
+// standards. For background see C++11 standard. A slightly older, publicly
// available draft of the standard (not entirely up-to-date, but close enough
// for casual browsing) is available here:
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3242.pdf
@@ -550,8 +541,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
IRB.CreateIntCast(CASI->getCompareOperand(), Ty, false),
IRB.CreateIntCast(CASI->getNewValOperand(), Ty, false),
- createOrdering(&IRB, CASI->getOrdering()),
- createFailOrdering(&IRB, CASI->getOrdering())};
+ createOrdering(&IRB, CASI->getSuccessOrdering()),
+ createOrdering(&IRB, CASI->getFailureOrdering())};
CallInst *C = CallInst::Create(TsanAtomicCAS[Idx], ArrayRef<Value*>(Args));
ReplaceInstWithInst(I, C);
} else if (FenceInst *FI = dyn_cast<FenceInst>(I)) {
@@ -568,7 +559,7 @@ int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr) {
Type *OrigPtrTy = Addr->getType();
Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
assert(OrigTy->isSized());
- uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy);
+ uint32_t TypeSize = DL->getTypeStoreSizeInBits(OrigTy);
if (TypeSize != 8 && TypeSize != 16 &&
TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
NumAccessesWithBadSize++;
diff --git a/lib/Transforms/ObjCARC/Android.mk b/lib/Transforms/ObjCARC/Android.mk
index 126d7ee..226e9e1 100644
--- a/lib/Transforms/ObjCARC/Android.mk
+++ b/lib/Transforms/ObjCARC/Android.mk
@@ -26,6 +26,7 @@ include $(BUILD_HOST_STATIC_LIBRARY)
# For the device
# =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(transforms_objcarc_SRC_FILES)
@@ -36,3 +37,4 @@ LOCAL_MODULE_TAGS := optional
include $(LLVM_DEVICE_BUILD_MK)
include $(LLVM_GEN_INTRINSICS_MK)
include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 8f917ae..8780359 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -24,7 +24,7 @@
#include "ObjCARC.h"
#include "DependencyAnalysis.h"
#include "ProvenanceAnalysis.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/IR/CFG.h"
using namespace llvm;
using namespace llvm::objcarc;
diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h
index 8044494..f71cf2b 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/lib/Transforms/ObjCARC/ObjCARC.h
@@ -27,10 +27,10 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/InstIterator.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -308,6 +308,7 @@ static inline bool IsPotentialRetainableObjPtr(const Value *Op) {
// Special arguments can not be a valid retainable object pointer.
if (const Argument *Arg = dyn_cast<Argument>(Op))
if (Arg->hasByValAttr() ||
+ Arg->hasInAllocaAttr() ||
Arg->hasNestAttr() ||
Arg->hasStructRetAttr())
return false;
diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index 00d9864..cb7e4da 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -37,8 +37,8 @@ using namespace llvm::objcarc;
namespace {
/// \brief Autorelease pool elimination.
class ObjCARCAPElim : public ModulePass {
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual bool runOnModule(Module &M);
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnModule(Module &M) override;
static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0);
static bool OptimizeBB(BasicBlock *BB);
@@ -165,7 +165,7 @@ bool ObjCARCAPElim::runOnModule(Module &M) {
if (F->isDeclaration())
continue;
// Only look at functions with one basic block.
- if (llvm::next(F->begin()) != F->end())
+ if (std::next(F->begin()) != F->end())
continue;
// Ok, a single-block constructor function definition. Try to optimize it.
Changed |= OptimizeBB(F->begin());
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
index 41ccfe2..97b565b 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
+++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
@@ -44,28 +44,28 @@ namespace objcarc {
}
private:
- virtual void initializePass() {
+ void initializePass() override {
InitializeAliasAnalysis(this);
}
/// This method is used when a pass implements an analysis interface through
/// multiple inheritance. If needed, it should override this to adjust the
/// this pointer as needed for the specified pass info.
- virtual void *getAdjustedAnalysisPointer(const void *PI) {
+ void *getAdjustedAnalysisPointer(const void *PI) override {
if (PI == &AliasAnalysis::ID)
return static_cast<AliasAnalysis *>(this);
return this;
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual AliasResult alias(const Location &LocA, const Location &LocB);
- virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
- virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
- virtual ModRefBehavior getModRefBehavior(const Function *F);
- virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
- const Location &Loc);
- virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2);
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ AliasResult alias(const Location &LocA, const Location &LocB) override;
+ bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override;
+ ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
+ ModRefBehavior getModRefBehavior(const Function *F) override;
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) override;
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) override;
};
} // namespace objcarc
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 9d80037..3da5a0e 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -32,7 +32,7 @@
#include "DependencyAnalysis.h"
#include "ProvenanceAnalysis.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/Debug.h"
@@ -79,9 +79,9 @@ namespace {
void ContractRelease(Instruction *Release,
inst_iterator &Iter);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual bool doInitialization(Module &M);
- virtual bool runOnFunction(Function &F);
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
public:
static char ID;
@@ -95,7 +95,7 @@ char ObjCARCContract::ID = 0;
INITIALIZE_PASS_BEGIN(ObjCARCContract,
"objc-arc-contract", "ObjC ARC contraction", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(ObjCARCContract,
"objc-arc-contract", "ObjC ARC contraction", false, false)
@@ -105,7 +105,7 @@ Pass *llvm::createObjCARCContractPass() {
void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
@@ -323,7 +323,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
Changed = false;
AA = &getAnalysis<AliasAnalysis>();
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
PA.setAA(&getAnalysis<AliasAnalysis>());
@@ -440,17 +440,17 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// Don't use GetObjCArg because we don't want to look through bitcasts
// and such; to do the replacement, the argument must have type i8*.
- const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
+ Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
for (;;) {
// If we're compiling bugpointed code, don't get in trouble.
if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
break;
// Look through the uses of the pointer.
- for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+ for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
UI != UE; ) {
- Use &U = UI.getUse();
- unsigned OperandNo = UI.getOperandNo();
- ++UI; // Increment UI now, because we may unlink its element.
+ // Increment UI now, because we may unlink its element.
+ Use &U = *UI++;
+ unsigned OperandNo = U.getOperandNo();
// If the call's return value dominates a use of the call's argument
// value, rewrite the use to use the return value. We check for
@@ -475,9 +475,9 @@ bool ObjCARCContract::runOnFunction(Function &F) {
for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
if (PHI->getIncomingBlock(i) == BB) {
// Keep the UI iterator valid.
- if (&PHI->getOperandUse(
- PHINode::getOperandNumForIncomingValue(i)) ==
- &UI.getUse())
+ if (UI != UE &&
+ &PHI->getOperandUse(
+ PHINode::getOperandNumForIncomingValue(i)) == &*UI)
++UI;
PHI->setIncomingValue(i, Replacement);
}
diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
index 39bf8f3..8bec699 100644
--- a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -28,6 +28,7 @@
#include "ObjCARC.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Value.h"
@@ -37,7 +38,6 @@
#include "llvm/PassSupport.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/InstIterator.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -50,9 +50,9 @@ using namespace llvm::objcarc;
namespace {
/// \brief Early ARC transformations.
class ObjCARCExpand : public FunctionPass {
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual bool doInitialization(Module &M);
- virtual bool runOnFunction(Function &F);
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
/// A flag indicating whether this optimization pass should run.
bool Run;
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 2976df6..eed3cb2 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -35,9 +35,9 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -163,12 +163,9 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
// If we found an identifiable object but it has multiple uses, but they are
// trivial uses, we can still consider this to be a single-use value.
if (IsObjCIdentifiedObject(Arg)) {
- for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
- UI != UE; ++UI) {
- const User *U = *UI;
+ for (const User *U : Arg->users())
if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
return 0;
- }
return Arg;
}
@@ -382,7 +379,7 @@ namespace {
void clear();
/// Conservatively merge the two RRInfo. Returns true if a partial merge has
- /// occured, false otherwise.
+ /// occurred, false otherwise.
bool Merge(const RRInfo &Other);
};
@@ -538,8 +535,7 @@ namespace {
void
PtrState::Merge(const PtrState &Other, bool TopDown) {
- Seq = MergeSeqs(static_cast<Sequence>(Seq), static_cast<Sequence>(Other.Seq),
- TopDown);
+ Seq = MergeSeqs(GetSeq(), Other.GetSeq(), TopDown);
KnownPositiveRefCount &= Other.KnownPositiveRefCount;
// If we're not in a sequence (anymore), drop all associated state.
@@ -660,7 +656,7 @@ namespace {
/// which pass through this block. This is only valid after both the
/// top-down and bottom-up traversals are complete.
///
- /// Returns true if overflow occured. Returns false if overflow did not
+ /// Returns true if overflow occurred. Returns false if overflow did not
/// occur.
bool GetAllPathCountWithOverflow(unsigned &PathCount) const {
if (TopDownPathCount == OverflowOccurredValue ||
@@ -668,7 +664,7 @@ namespace {
return true;
unsigned long long Product =
(unsigned long long)TopDownPathCount*BottomUpPathCount;
- // Overflow occured if any of the upper bits of Product are set or if all
+ // Overflow occurred if any of the upper bits of Product are set or if all
// the lower bits of Product are all set.
return (Product >> 32) ||
((PathCount = Product) == OverflowOccurredValue);
@@ -712,7 +708,7 @@ void BBState::MergePred(const BBState &Other) {
// In order to be consistent, we clear the top down pointers when by adding
// TopDownPathCount becomes OverflowOccurredValue even though "true" overflow
- // has not occured.
+ // has not occurred.
if (TopDownPathCount == OverflowOccurredValue) {
clearTopDownPointers();
return;
@@ -756,7 +752,7 @@ void BBState::MergeSucc(const BBState &Other) {
// In order to be consistent, we clear the top down pointers when by adding
// BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow
- // has not occured.
+ // has not occurred.
if (BottomUpPathCount == OverflowOccurredValue) {
clearBottomUpPointers();
return;
@@ -959,7 +955,7 @@ static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
/*isVarArg=*/false);
Constant *Callee = M->getOrInsertFunction(Name, FTy);
- IRBuilder<> Builder(BB, llvm::prior(BB->end()));
+ IRBuilder<> Builder(BB, std::prev(BB->end()));
Value *PtrName;
StringRef Tmp = Ptr->getName();
@@ -1006,7 +1002,7 @@ static void GenerateARCAnnotation(unsigned InstMDId,
// llvm-arc-annotation-processor tool to cross reference where the source
// pointer is in the LLVM IR since the LLVM IR parser does not submit such
// information via debug info for backends to use (since why would anyone
- // need such a thing from LLVM IR besides in non standard cases
+ // need such a thing from LLVM IR besides in non-standard cases
// [i.e. this]).
MDString *SourcePtrMDNode =
AppendMDNodeToSourcePtr(PtrMDId, Ptr);
@@ -1164,10 +1160,10 @@ namespace {
void GatherStatistics(Function &F, bool AfterOptimization = false);
#endif
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual bool doInitialization(Module &M);
- virtual bool runOnFunction(Function &F);
- virtual void releaseMemory();
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+ void releaseMemory() override;
public:
static char ID;
@@ -1267,13 +1263,11 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
Users.push_back(Ptr);
do {
Ptr = Users.pop_back_val();
- for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
- UI != UE; ++UI) {
- const User *I = *UI;
- if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
+ for (const User *U : Ptr->users()) {
+ if (isa<ReturnInst>(U) || GetBasicInstructionClass(U) == IC_RetainRV)
return;
- if (isa<BitCastInst>(I))
- Users.push_back(I);
+ if (isa<BitCastInst>(U))
+ Users.push_back(U);
}
} while (!Users.empty());
@@ -1809,13 +1803,13 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
// pointer has multiple owners implying that we must be more conservative.
//
// This comes up in the context of a pointer being ``KnownSafe''. In the
- // presense of a block being initialized, the frontend will emit the
+ // presence of a block being initialized, the frontend will emit the
// objc_retain on the original pointer and the release on the pointer loaded
// from the alloca. The optimizer will through the provenance analysis
// realize that the two are related, but since we only require KnownSafe in
// one direction, will match the inner retain on the original pointer with
// the guard release on the original pointer. This is fixed by ensuring that
- // in the presense of allocas we only unconditionally remove pointers if
+ // in the presence of allocas we only unconditionally remove pointers if
// both our retain and our release are KnownSafe.
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand())) {
@@ -1875,7 +1869,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
if (isa<InvokeInst>(Inst))
S.InsertReverseInsertPt(BB->getFirstInsertionPt());
else
- S.InsertReverseInsertPt(llvm::next(BasicBlock::iterator(Inst)));
+ S.InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
S.SetSeq(S_Use);
ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
} else if (Seq == S_Release && IsUser(Class)) {
@@ -1889,7 +1883,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
if (isa<InvokeInst>(Inst))
S.InsertReverseInsertPt(BB->getFirstInsertionPt());
else
- S.InsertReverseInsertPt(llvm::next(BasicBlock::iterator(Inst)));
+ S.InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
}
break;
case S_Stop:
@@ -1946,7 +1940,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
// Visit all the instructions, bottom-up.
for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
- Instruction *Inst = llvm::prior(I);
+ Instruction *Inst = std::prev(I);
// Invoke instructions are visited as part of their successors (below).
if (isa<InvokeInst>(Inst))
@@ -2692,12 +2686,12 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
// within the same block. Theoretically, we could do memdep-style non-local
// analysis too, but that would want caching. A better approach would be to
// use the technique that EarlyCSE uses.
- inst_iterator Current = llvm::prior(I);
+ inst_iterator Current = std::prev(I);
BasicBlock *CurrentBB = Current.getBasicBlockIterator();
for (BasicBlock::iterator B = CurrentBB->begin(),
J = Current.getInstructionIterator();
J != B; --J) {
- Instruction *EarlierInst = &*llvm::prior(J);
+ Instruction *EarlierInst = &*std::prev(J);
InstructionClass EarlierClass = GetInstructionClass(EarlierInst);
switch (EarlierClass) {
case IC_LoadWeak:
@@ -2788,9 +2782,8 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
CallInst *Call = cast<CallInst>(Inst);
Value *Arg = Call->getArgOperand(0);
if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
- for (Value::use_iterator UI = Alloca->use_begin(),
- UE = Alloca->use_end(); UI != UE; ++UI) {
- const Instruction *UserInst = cast<Instruction>(*UI);
+ for (User *U : Alloca->users()) {
+ const Instruction *UserInst = cast<Instruction>(U);
switch (GetBasicInstructionClass(UserInst)) {
case IC_InitWeak:
case IC_StoreWeak:
@@ -2801,8 +2794,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
}
}
Changed = true;
- for (Value::use_iterator UI = Alloca->use_begin(),
- UE = Alloca->use_end(); UI != UE; ) {
+ for (auto UI = Alloca->user_begin(), UE = Alloca->user_end(); UI != UE;) {
CallInst *UserInst = cast<CallInst>(*UI++);
switch (GetBasicInstructionClass(UserInst)) {
case IC_InitWeak:
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
index ae3c628..22be6fd 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
@@ -79,11 +79,10 @@ static bool IsStoredObjCPointer(const Value *P) {
Visited.insert(P);
do {
P = Worklist.pop_back_val();
- for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end();
- UI != UE; ++UI) {
- const User *Ur = *UI;
+ for (const Use &U : P->uses()) {
+ const User *Ur = U.getUser();
if (isa<StoreInst>(Ur)) {
- if (UI.getOperandNo() == 0)
+ if (U.getOperandNo() == 0)
// The pointer is stored.
return true;
// The pointed is stored through.
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index a3eb07a..fa8b598 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -21,11 +21,11 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/InstIterator.h"
using namespace llvm;
STATISTIC(NumRemoved, "Number of instructions removed");
@@ -37,9 +37,9 @@ namespace {
initializeADCEPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function& F);
+ bool runOnFunction(Function& F) override;
- virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+ void getAnalysisUsage(AnalysisUsage& AU) const override {
AU.setPreservesCFG();
}
@@ -50,6 +50,9 @@ char ADCE::ID = 0;
INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
bool ADCE::runOnFunction(Function& F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
SmallPtrSet<Instruction*, 128> alive;
SmallVector<Instruction*, 128> worklist;
diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk
index 60308c6..3894f93 100644
--- a/lib/Transforms/Scalar/Android.mk
+++ b/lib/Transforms/Scalar/Android.mk
@@ -2,8 +2,8 @@ LOCAL_PATH:= $(call my-dir)
transforms_scalar_SRC_FILES := \
ADCE.cpp \
- CodeGenPrepare.cpp \
ConstantProp.cpp \
+ ConstantHoisting.cpp \
CorrelatedValuePropagation.cpp \
DCE.cpp \
DeadStoreElimination.cpp \
@@ -30,6 +30,7 @@ transforms_scalar_SRC_FILES := \
SROA.cpp \
SampleProfile.cpp \
Scalar.cpp \
+ Scalarizer.cpp \
ScalarReplAggregates.cpp \
SimplifyCFGPass.cpp \
Sink.cpp \
@@ -53,13 +54,20 @@ include $(BUILD_HOST_STATIC_LIBRARY)
# For the device
# =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(transforms_scalar_SRC_FILES)
LOCAL_MODULE:= libLLVMScalarOpts
+# Override the default optimization level to work around a SIGSEGV
+# on x86 target builds for SROA.cpp.
+# Bug: 8047767
+LOCAL_CFLAGS_x86 += -O1
+
LOCAL_MODULE_TAGS := optional
include $(LLVM_DEVICE_BUILD_MK)
include $(LLVM_GEN_INTRINSICS_MK)
include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 626c810..27434c1 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -1,10 +1,11 @@
add_llvm_library(LLVMScalarOpts
ADCE.cpp
- CodeGenPrepare.cpp
+ ConstantHoisting.cpp
ConstantProp.cpp
CorrelatedValuePropagation.cpp
DCE.cpp
DeadStoreElimination.cpp
+ Scalarizer.cpp
EarlyCSE.cpp
GlobalMerge.cpp
GVN.cpp
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
deleted file mode 100644
index 007e9b7..0000000
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ /dev/null
@@ -1,2002 +0,0 @@
-//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass munges the code in the input function to better prepare it for
-// SelectionDAG-based code generation. This works around limitations in it's
-// basic-block-at-a-time approach. It should eventually be removed.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "codegenprepare"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/ValueMap.h"
-#include "llvm/Analysis/DominatorInternals.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/ValueHandle.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/BuildLibCalls.h"
-#include "llvm/Transforms/Utils/BypassSlowDivision.h"
-#include "llvm/Transforms/Utils/Local.h"
-using namespace llvm;
-using namespace llvm::PatternMatch;
-
-STATISTIC(NumBlocksElim, "Number of blocks eliminated");
-STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
-STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
-STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
- "sunken Cmps");
-STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
- "of sunken Casts");
-STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
- "computations were sunk");
-STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
-STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
-STATISTIC(NumRetsDup, "Number of return instructions duplicated");
-STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
-STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
-
-static cl::opt<bool> DisableBranchOpts(
- "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
- cl::desc("Disable branch optimizations in CodeGenPrepare"));
-
-static cl::opt<bool> DisableSelectToBranch(
- "disable-cgp-select2branch", cl::Hidden, cl::init(false),
- cl::desc("Disable select to branch conversion."));
-
-namespace {
- class CodeGenPrepare : public FunctionPass {
- /// TLI - Keep a pointer of a TargetLowering to consult for determining
- /// transformation profitability.
- const TargetMachine *TM;
- const TargetLowering *TLI;
- const TargetLibraryInfo *TLInfo;
- DominatorTree *DT;
-
- /// CurInstIterator - As we scan instructions optimizing them, this is the
- /// next instruction to optimize. Xforms that can invalidate this should
- /// update it.
- BasicBlock::iterator CurInstIterator;
-
- /// Keeps track of non-local addresses that have been sunk into a block.
- /// This allows us to avoid inserting duplicate code for blocks with
- /// multiple load/stores of the same address.
- ValueMap<Value*, Value*> SunkAddrs;
-
- /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
- /// be updated.
- bool ModifiedDT;
-
- /// OptSize - True if optimizing for size.
- bool OptSize;
-
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit CodeGenPrepare(const TargetMachine *TM = 0)
- : FunctionPass(ID), TM(TM), TLI(0) {
- initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F);
-
- const char *getPassName() const { return "CodeGen Prepare"; }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<DominatorTree>();
- AU.addRequired<TargetLibraryInfo>();
- }
-
- private:
- bool EliminateFallThrough(Function &F);
- bool EliminateMostlyEmptyBlocks(Function &F);
- bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
- void EliminateMostlyEmptyBlock(BasicBlock *BB);
- bool OptimizeBlock(BasicBlock &BB);
- bool OptimizeInst(Instruction *I);
- bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
- bool OptimizeInlineAsmInst(CallInst *CS);
- bool OptimizeCallInst(CallInst *CI);
- bool MoveExtToFormExtLoad(Instruction *I);
- bool OptimizeExtUses(Instruction *I);
- bool OptimizeSelectInst(SelectInst *SI);
- bool DupRetToEnableTailCallOpts(BasicBlock *BB);
- bool PlaceDbgValues(Function &F);
- };
-}
-
-char CodeGenPrepare::ID = 0;
-INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
-INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
- "Optimize for code generation", false, false)
-
-FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
- return new CodeGenPrepare(TM);
-}
-
-bool CodeGenPrepare::runOnFunction(Function &F) {
- bool EverMadeChange = false;
-
- ModifiedDT = false;
- if (TM) TLI = TM->getTargetLowering();
- TLInfo = &getAnalysis<TargetLibraryInfo>();
- DT = getAnalysisIfAvailable<DominatorTree>();
- OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize);
-
- /// This optimization identifies DIV instructions that can be
- /// profitably bypassed and carried out with a shorter, faster divide.
- if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
- const DenseMap<unsigned int, unsigned int> &BypassWidths =
- TLI->getBypassSlowDivWidths();
- for (Function::iterator I = F.begin(); I != F.end(); I++)
- EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
- }
-
- // Eliminate blocks that contain only PHI nodes and an
- // unconditional branch.
- EverMadeChange |= EliminateMostlyEmptyBlocks(F);
-
- // llvm.dbg.value is far away from the value then iSel may not be able
- // handle it properly. iSel will drop llvm.dbg.value if it can not
- // find a node corresponding to the value.
- EverMadeChange |= PlaceDbgValues(F);
-
- bool MadeChange = true;
- while (MadeChange) {
- MadeChange = false;
- for (Function::iterator I = F.begin(); I != F.end(); ) {
- BasicBlock *BB = I++;
- MadeChange |= OptimizeBlock(*BB);
- }
- EverMadeChange |= MadeChange;
- }
-
- SunkAddrs.clear();
-
- if (!DisableBranchOpts) {
- MadeChange = false;
- SmallPtrSet<BasicBlock*, 8> WorkList;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
- MadeChange |= ConstantFoldTerminator(BB, true);
- if (!MadeChange) continue;
-
- for (SmallVectorImpl<BasicBlock*>::iterator
- II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
- if (pred_begin(*II) == pred_end(*II))
- WorkList.insert(*II);
- }
-
- // Delete the dead blocks and any of their dead successors.
- MadeChange |= !WorkList.empty();
- while (!WorkList.empty()) {
- BasicBlock *BB = *WorkList.begin();
- WorkList.erase(BB);
- SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
-
- DeleteDeadBlock(BB);
-
- for (SmallVectorImpl<BasicBlock*>::iterator
- II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
- if (pred_begin(*II) == pred_end(*II))
- WorkList.insert(*II);
- }
-
- // Merge pairs of basic blocks with unconditional branches, connected by
- // a single edge.
- if (EverMadeChange || MadeChange)
- MadeChange |= EliminateFallThrough(F);
-
- if (MadeChange)
- ModifiedDT = true;
- EverMadeChange |= MadeChange;
- }
-
- if (ModifiedDT && DT)
- DT->DT->recalculate(F);
-
- return EverMadeChange;
-}
-
-/// EliminateFallThrough - Merge basic blocks which are connected
-/// by a single edge, where one of the basic blocks has a single successor
-/// pointing to the other basic block, which has a single predecessor.
-bool CodeGenPrepare::EliminateFallThrough(Function &F) {
- bool Changed = false;
- // Scan all of the blocks in the function, except for the entry block.
- for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
- BasicBlock *BB = I++;
- // If the destination block has a single pred, then this is a trivial
- // edge, just collapse it.
- BasicBlock *SinglePred = BB->getSinglePredecessor();
-
- // Don't merge if BB's address is taken.
- if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
-
- BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
- if (Term && !Term->isConditional()) {
- Changed = true;
- DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
- // Remember if SinglePred was the entry block of the function.
- // If so, we will need to move BB back to the entry position.
- bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(BB, this);
-
- if (isEntry && BB != &BB->getParent()->getEntryBlock())
- BB->moveBefore(&BB->getParent()->getEntryBlock());
-
- // We have erased a block. Update the iterator.
- I = BB;
- }
- }
- return Changed;
-}
-
-/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
-/// debug info directives, and an unconditional branch. Passes before isel
-/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
-/// isel. Start by eliminating these blocks so we can split them the way we
-/// want them.
-bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
- bool MadeChange = false;
- // Note that this intentionally skips the entry block.
- for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
- BasicBlock *BB = I++;
-
- // If this block doesn't end with an uncond branch, ignore it.
- BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
- if (!BI || !BI->isUnconditional())
- continue;
-
- // If the instruction before the branch (skipping debug info) isn't a phi
- // node, then other stuff is happening here.
- BasicBlock::iterator BBI = BI;
- if (BBI != BB->begin()) {
- --BBI;
- while (isa<DbgInfoIntrinsic>(BBI)) {
- if (BBI == BB->begin())
- break;
- --BBI;
- }
- if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
- continue;
- }
-
- // Do not break infinite loops.
- BasicBlock *DestBB = BI->getSuccessor(0);
- if (DestBB == BB)
- continue;
-
- if (!CanMergeBlocks(BB, DestBB))
- continue;
-
- EliminateMostlyEmptyBlock(BB);
- MadeChange = true;
- }
- return MadeChange;
-}
-
-/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
-/// single uncond branch between them, and BB contains no other non-phi
-/// instructions.
-bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
- const BasicBlock *DestBB) const {
- // We only want to eliminate blocks whose phi nodes are used by phi nodes in
- // the successor. If there are more complex condition (e.g. preheaders),
- // don't mess around with them.
- BasicBlock::const_iterator BBI = BB->begin();
- while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
- for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end();
- UI != E; ++UI) {
- const Instruction *User = cast<Instruction>(*UI);
- if (User->getParent() != DestBB || !isa<PHINode>(User))
- return false;
- // If User is inside DestBB block and it is a PHINode then check
- // incoming value. If incoming value is not from BB then this is
- // a complex condition (e.g. preheaders) we want to avoid here.
- if (User->getParent() == DestBB) {
- if (const PHINode *UPN = dyn_cast<PHINode>(User))
- for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
- Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
- if (Insn && Insn->getParent() == BB &&
- Insn->getParent() != UPN->getIncomingBlock(I))
- return false;
- }
- }
- }
- }
-
- // If BB and DestBB contain any common predecessors, then the phi nodes in BB
- // and DestBB may have conflicting incoming values for the block. If so, we
- // can't merge the block.
- const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
- if (!DestBBPN) return true; // no conflict.
-
- // Collect the preds of BB.
- SmallPtrSet<const BasicBlock*, 16> BBPreds;
- if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
- // It is faster to get preds from a PHI than with pred_iterator.
- for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
- BBPreds.insert(BBPN->getIncomingBlock(i));
- } else {
- BBPreds.insert(pred_begin(BB), pred_end(BB));
- }
-
- // Walk the preds of DestBB.
- for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
- BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
- if (BBPreds.count(Pred)) { // Common predecessor?
- BBI = DestBB->begin();
- while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
- const Value *V1 = PN->getIncomingValueForBlock(Pred);
- const Value *V2 = PN->getIncomingValueForBlock(BB);
-
- // If V2 is a phi node in BB, look up what the mapped value will be.
- if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
- if (V2PN->getParent() == BB)
- V2 = V2PN->getIncomingValueForBlock(Pred);
-
- // If there is a conflict, bail out.
- if (V1 != V2) return false;
- }
- }
- }
-
- return true;
-}
-
-
-/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
-/// an unconditional branch in it.
-void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
- BranchInst *BI = cast<BranchInst>(BB->getTerminator());
- BasicBlock *DestBB = BI->getSuccessor(0);
-
- DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
-
- // If the destination block has a single pred, then this is a trivial edge,
- // just collapse it.
- if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
- if (SinglePred != DestBB) {
- // Remember if SinglePred was the entry block of the function. If so, we
- // will need to move BB back to the entry position.
- bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(DestBB, this);
-
- if (isEntry && BB != &BB->getParent()->getEntryBlock())
- BB->moveBefore(&BB->getParent()->getEntryBlock());
-
- DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
- return;
- }
- }
-
- // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
- // to handle the new incoming edges it is about to have.
- PHINode *PN;
- for (BasicBlock::iterator BBI = DestBB->begin();
- (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
- // Remove the incoming value for BB, and remember it.
- Value *InVal = PN->removeIncomingValue(BB, false);
-
- // Two options: either the InVal is a phi node defined in BB or it is some
- // value that dominates BB.
- PHINode *InValPhi = dyn_cast<PHINode>(InVal);
- if (InValPhi && InValPhi->getParent() == BB) {
- // Add all of the input values of the input PHI as inputs of this phi.
- for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
- PN->addIncoming(InValPhi->getIncomingValue(i),
- InValPhi->getIncomingBlock(i));
- } else {
- // Otherwise, add one instance of the dominating value for each edge that
- // we will be adding.
- if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
- for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
- PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
- } else {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- PN->addIncoming(InVal, *PI);
- }
- }
- }
-
- // The PHIs are now updated, change everything that refers to BB to use
- // DestBB and remove BB.
- BB->replaceAllUsesWith(DestBB);
- if (DT && !ModifiedDT) {
- BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock();
- BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
- BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
- DT->changeImmediateDominator(DestBB, NewIDom);
- DT->eraseNode(BB);
- }
- BB->eraseFromParent();
- ++NumBlocksElim;
-
- DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
-}
-
-/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
-/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
-/// sink it into user blocks to reduce the number of virtual
-/// registers that must be created and coalesced.
-///
-/// Return true if any changes are made.
-///
-static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
- // If this is a noop copy,
- EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
- EVT DstVT = TLI.getValueType(CI->getType());
-
- // This is an fp<->int conversion?
- if (SrcVT.isInteger() != DstVT.isInteger())
- return false;
-
- // If this is an extension, it will be a zero or sign extension, which
- // isn't a noop.
- if (SrcVT.bitsLT(DstVT)) return false;
-
- // If these values will be promoted, find out what they will be promoted
- // to. This helps us consider truncates on PPC as noop copies when they
- // are.
- if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
- TargetLowering::TypePromoteInteger)
- SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
- if (TLI.getTypeAction(CI->getContext(), DstVT) ==
- TargetLowering::TypePromoteInteger)
- DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
-
- // If, after promotion, these are the same types, this is a noop copy.
- if (SrcVT != DstVT)
- return false;
-
- BasicBlock *DefBB = CI->getParent();
-
- /// InsertedCasts - Only insert a cast in each block once.
- DenseMap<BasicBlock*, CastInst*> InsertedCasts;
-
- bool MadeChange = false;
- for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
- UI != E; ) {
- Use &TheUse = UI.getUse();
- Instruction *User = cast<Instruction>(*UI);
-
- // Figure out which BB this cast is used in. For PHI's this is the
- // appropriate predecessor block.
- BasicBlock *UserBB = User->getParent();
- if (PHINode *PN = dyn_cast<PHINode>(User)) {
- UserBB = PN->getIncomingBlock(UI);
- }
-
- // Preincrement use iterator so we don't invalidate it.
- ++UI;
-
- // If this user is in the same block as the cast, don't change the cast.
- if (UserBB == DefBB) continue;
-
- // If we have already inserted a cast into this block, use it.
- CastInst *&InsertedCast = InsertedCasts[UserBB];
-
- if (!InsertedCast) {
- BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
- InsertedCast =
- CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
- InsertPt);
- MadeChange = true;
- }
-
- // Replace a use of the cast with a use of the new cast.
- TheUse = InsertedCast;
- ++NumCastUses;
- }
-
- // If we removed all uses, nuke the cast.
- if (CI->use_empty()) {
- CI->eraseFromParent();
- MadeChange = true;
- }
-
- return MadeChange;
-}
-
-/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
-/// the number of virtual registers that must be created and coalesced. This is
-/// a clear win except on targets with multiple condition code registers
-/// (PowerPC), where it might lose; some adjustment may be wanted there.
-///
-/// Return true if any changes are made.
-static bool OptimizeCmpExpression(CmpInst *CI) {
- BasicBlock *DefBB = CI->getParent();
-
- /// InsertedCmp - Only insert a cmp in each block once.
- DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
-
- bool MadeChange = false;
- for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
- UI != E; ) {
- Use &TheUse = UI.getUse();
- Instruction *User = cast<Instruction>(*UI);
-
- // Preincrement use iterator so we don't invalidate it.
- ++UI;
-
- // Don't bother for PHI nodes.
- if (isa<PHINode>(User))
- continue;
-
- // Figure out which BB this cmp is used in.
- BasicBlock *UserBB = User->getParent();
-
- // If this user is in the same block as the cmp, don't change the cmp.
- if (UserBB == DefBB) continue;
-
- // If we have already inserted a cmp into this block, use it.
- CmpInst *&InsertedCmp = InsertedCmps[UserBB];
-
- if (!InsertedCmp) {
- BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
- InsertedCmp =
- CmpInst::Create(CI->getOpcode(),
- CI->getPredicate(), CI->getOperand(0),
- CI->getOperand(1), "", InsertPt);
- MadeChange = true;
- }
-
- // Replace a use of the cmp with a use of the new cmp.
- TheUse = InsertedCmp;
- ++NumCmpUses;
- }
-
- // If we removed all uses, nuke the cmp.
- if (CI->use_empty())
- CI->eraseFromParent();
-
- return MadeChange;
-}
-
-namespace {
-class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
-protected:
- void replaceCall(Value *With) {
- CI->replaceAllUsesWith(With);
- CI->eraseFromParent();
- }
- bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
- if (ConstantInt *SizeCI =
- dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
- return SizeCI->isAllOnesValue();
- return false;
- }
-};
-} // end anonymous namespace
-
-bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
- BasicBlock *BB = CI->getParent();
-
- // Lower inline assembly if we can.
- // If we found an inline asm expession, and if the target knows how to
- // lower it to normal LLVM code, do so now.
- if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
- if (TLI->ExpandInlineAsm(CI)) {
- // Avoid invalidating the iterator.
- CurInstIterator = BB->begin();
- // Avoid processing instructions out of order, which could cause
- // reuse before a value is defined.
- SunkAddrs.clear();
- return true;
- }
- // Sink address computing for memory operands into the block.
- if (OptimizeInlineAsmInst(CI))
- return true;
- }
-
- // Lower all uses of llvm.objectsize.*
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
- if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
- bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
- Type *ReturnTy = CI->getType();
- Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
-
- // Substituting this can cause recursive simplifications, which can
- // invalidate our iterator. Use a WeakVH to hold onto it in case this
- // happens.
- WeakVH IterHandle(CurInstIterator);
-
- replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getDataLayout() : 0,
- TLInfo, ModifiedDT ? 0 : DT);
-
- // If the iterator instruction was recursively deleted, start over at the
- // start of the block.
- if (IterHandle != CurInstIterator) {
- CurInstIterator = BB->begin();
- SunkAddrs.clear();
- }
- return true;
- }
-
- if (II && TLI) {
- SmallVector<Value*, 2> PtrOps;
- Type *AccessTy;
- if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
- while (!PtrOps.empty())
- if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
- return true;
- }
-
- // From here on out we're working with named functions.
- if (CI->getCalledFunction() == 0) return false;
-
- // We'll need DataLayout from here on out.
- const DataLayout *TD = TLI ? TLI->getDataLayout() : 0;
- if (!TD) return false;
-
- // Lower all default uses of _chk calls. This is very similar
- // to what InstCombineCalls does, but here we are only lowering calls
- // that have the default "don't know" as the objectsize. Anything else
- // should be left alone.
- CodeGenPrepareFortifiedLibCalls Simplifier;
- return Simplifier.fold(CI, TD, TLInfo);
-}
-
-/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
-/// instructions to the predecessor to enable tail call optimizations. The
-/// case it is currently looking for is:
-/// @code
-/// bb0:
-/// %tmp0 = tail call i32 @f0()
-/// br label %return
-/// bb1:
-/// %tmp1 = tail call i32 @f1()
-/// br label %return
-/// bb2:
-/// %tmp2 = tail call i32 @f2()
-/// br label %return
-/// return:
-/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
-/// ret i32 %retval
-/// @endcode
-///
-/// =>
-///
-/// @code
-/// bb0:
-/// %tmp0 = tail call i32 @f0()
-/// ret i32 %tmp0
-/// bb1:
-/// %tmp1 = tail call i32 @f1()
-/// ret i32 %tmp1
-/// bb2:
-/// %tmp2 = tail call i32 @f2()
-/// ret i32 %tmp2
-/// @endcode
-bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
- if (!TLI)
- return false;
-
- ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
- if (!RI)
- return false;
-
- PHINode *PN = 0;
- BitCastInst *BCI = 0;
- Value *V = RI->getReturnValue();
- if (V) {
- BCI = dyn_cast<BitCastInst>(V);
- if (BCI)
- V = BCI->getOperand(0);
-
- PN = dyn_cast<PHINode>(V);
- if (!PN)
- return false;
- }
-
- if (PN && PN->getParent() != BB)
- return false;
-
- // It's not safe to eliminate the sign / zero extension of the return value.
- // See llvm::isInTailCallPosition().
- const Function *F = BB->getParent();
- AttributeSet CallerAttrs = F->getAttributes();
- if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
- CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
- return false;
-
- // Make sure there are no instructions between the PHI and return, or that the
- // return is the first instruction in the block.
- if (PN) {
- BasicBlock::iterator BI = BB->begin();
- do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
- if (&*BI == BCI)
- // Also skip over the bitcast.
- ++BI;
- if (&*BI != RI)
- return false;
- } else {
- BasicBlock::iterator BI = BB->begin();
- while (isa<DbgInfoIntrinsic>(BI)) ++BI;
- if (&*BI != RI)
- return false;
- }
-
- /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
- /// call.
- SmallVector<CallInst*, 4> TailCalls;
- if (PN) {
- for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
- CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
- // Make sure the phi value is indeed produced by the tail call.
- if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
- TLI->mayBeEmittedAsTailCall(CI))
- TailCalls.push_back(CI);
- }
- } else {
- SmallPtrSet<BasicBlock*, 4> VisitedBBs;
- for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
- if (!VisitedBBs.insert(*PI))
- continue;
-
- BasicBlock::InstListType &InstList = (*PI)->getInstList();
- BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
- BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
- do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
- if (RI == RE)
- continue;
-
- CallInst *CI = dyn_cast<CallInst>(&*RI);
- if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
- TailCalls.push_back(CI);
- }
- }
-
- bool Changed = false;
- for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
- CallInst *CI = TailCalls[i];
- CallSite CS(CI);
-
- // Conservatively require the attributes of the call to match those of the
- // return. Ignore noalias because it doesn't affect the call sequence.
- AttributeSet CalleeAttrs = CS.getAttributes();
- if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
- removeAttribute(Attribute::NoAlias) !=
- AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
- removeAttribute(Attribute::NoAlias))
- continue;
-
- // Make sure the call instruction is followed by an unconditional branch to
- // the return block.
- BasicBlock *CallBB = CI->getParent();
- BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
- if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
- continue;
-
- // Duplicate the return into CallBB.
- (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
- ModifiedDT = Changed = true;
- ++NumRetsDup;
- }
-
- // If we eliminated all predecessors of the block, delete the block now.
- if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
- BB->eraseFromParent();
-
- return Changed;
-}
-
-//===----------------------------------------------------------------------===//
-// Memory Optimization
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
-/// which holds actual Value*'s for register values.
-struct ExtAddrMode : public TargetLowering::AddrMode {
- Value *BaseReg;
- Value *ScaledReg;
- ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
- void print(raw_ostream &OS) const;
- void dump() const;
-
- bool operator==(const ExtAddrMode& O) const {
- return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
- (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
- (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
- }
-};
-
-#ifndef NDEBUG
-static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
- AM.print(OS);
- return OS;
-}
-#endif
-
-void ExtAddrMode::print(raw_ostream &OS) const {
- bool NeedPlus = false;
- OS << "[";
- if (BaseGV) {
- OS << (NeedPlus ? " + " : "")
- << "GV:";
- WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
- NeedPlus = true;
- }
-
- if (BaseOffs)
- OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
-
- if (BaseReg) {
- OS << (NeedPlus ? " + " : "")
- << "Base:";
- WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
- NeedPlus = true;
- }
- if (Scale) {
- OS << (NeedPlus ? " + " : "")
- << Scale << "*";
- WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
- }
-
- OS << ']';
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void ExtAddrMode::dump() const {
- print(dbgs());
- dbgs() << '\n';
-}
-#endif
-
-
-/// \brief A helper class for matching addressing modes.
-///
-/// This encapsulates the logic for matching the target-legal addressing modes.
-class AddressingModeMatcher {
- SmallVectorImpl<Instruction*> &AddrModeInsts;
- const TargetLowering &TLI;
-
- /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
- /// the memory instruction that we're computing this address for.
- Type *AccessTy;
- Instruction *MemoryInst;
-
- /// AddrMode - This is the addressing mode that we're building up. This is
- /// part of the return value of this addressing mode matching stuff.
- ExtAddrMode &AddrMode;
-
- /// IgnoreProfitability - This is set to true when we should not do
- /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode
- /// always returns true.
- bool IgnoreProfitability;
-
- AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
- const TargetLowering &T, Type *AT,
- Instruction *MI, ExtAddrMode &AM)
- : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) {
- IgnoreProfitability = false;
- }
-public:
-
- /// Match - Find the maximal addressing mode that a load/store of V can fold,
- /// give an access type of AccessTy. This returns a list of involved
- /// instructions in AddrModeInsts.
- static ExtAddrMode Match(Value *V, Type *AccessTy,
- Instruction *MemoryInst,
- SmallVectorImpl<Instruction*> &AddrModeInsts,
- const TargetLowering &TLI) {
- ExtAddrMode Result;
-
- bool Success =
- AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
- MemoryInst, Result).MatchAddr(V, 0);
- (void)Success; assert(Success && "Couldn't select *anything*?");
- return Result;
- }
-private:
- bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
- bool MatchAddr(Value *V, unsigned Depth);
- bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth);
- bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
- ExtAddrMode &AMBefore,
- ExtAddrMode &AMAfter);
- bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
-};
-
-/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
-/// Return true and update AddrMode if this addr mode is legal for the target,
-/// false if not.
-bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
- unsigned Depth) {
- // If Scale is 1, then this is the same as adding ScaleReg to the addressing
- // mode. Just process that directly.
- if (Scale == 1)
- return MatchAddr(ScaleReg, Depth);
-
- // If the scale is 0, it takes nothing to add this.
- if (Scale == 0)
- return true;
-
- // If we already have a scale of this value, we can add to it, otherwise, we
- // need an available scale field.
- if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
- return false;
-
- ExtAddrMode TestAddrMode = AddrMode;
-
- // Add scale to turn X*4+X*3 -> X*7. This could also do things like
- // [A+B + A*7] -> [B+A*8].
- TestAddrMode.Scale += Scale;
- TestAddrMode.ScaledReg = ScaleReg;
-
- // If the new address isn't legal, bail out.
- if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
- return false;
-
- // It was legal, so commit it.
- AddrMode = TestAddrMode;
-
- // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
- // to see if ScaleReg is actually X+C. If so, we can turn this into adding
- // X*Scale + C*Scale to addr mode.
- ConstantInt *CI = 0; Value *AddLHS = 0;
- if (isa<Instruction>(ScaleReg) && // not a constant expr.
- match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
- TestAddrMode.ScaledReg = AddLHS;
- TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
-
- // If this addressing mode is legal, commit it and remember that we folded
- // this instruction.
- if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
- AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
- AddrMode = TestAddrMode;
- return true;
- }
- }
-
- // Otherwise, not (x+c)*scale, just return what we have.
- return true;
-}
-
-/// MightBeFoldableInst - This is a little filter, which returns true if an
-/// addressing computation involving I might be folded into a load/store
-/// accessing it. This doesn't need to be perfect, but needs to accept at least
-/// the set of instructions that MatchOperationAddr can.
-static bool MightBeFoldableInst(Instruction *I) {
- switch (I->getOpcode()) {
- case Instruction::BitCast:
- // Don't touch identity bitcasts.
- if (I->getType() == I->getOperand(0)->getType())
- return false;
- return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
- case Instruction::PtrToInt:
- // PtrToInt is always a noop, as we know that the int type is pointer sized.
- return true;
- case Instruction::IntToPtr:
- // We know the input is intptr_t, so this is foldable.
- return true;
- case Instruction::Add:
- return true;
- case Instruction::Mul:
- case Instruction::Shl:
- // Can only handle X*C and X << C.
- return isa<ConstantInt>(I->getOperand(1));
- case Instruction::GetElementPtr:
- return true;
- default:
- return false;
- }
-}
-
-/// MatchOperationAddr - Given an instruction or constant expr, see if we can
-/// fold the operation into the addressing mode. If so, update the addressing
-/// mode and return true, otherwise return false without modifying AddrMode.
-bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
- unsigned Depth) {
- // Avoid exponential behavior on extremely deep expression trees.
- if (Depth >= 5) return false;
-
- switch (Opcode) {
- case Instruction::PtrToInt:
- // PtrToInt is always a noop, as we know that the int type is pointer sized.
- return MatchAddr(AddrInst->getOperand(0), Depth);
- case Instruction::IntToPtr:
- // This inttoptr is a no-op if the integer type is pointer sized.
- if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
- TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
- return MatchAddr(AddrInst->getOperand(0), Depth);
- return false;
- case Instruction::BitCast:
- // BitCast is always a noop, and we can handle it as long as it is
- // int->int or pointer->pointer (we don't want int<->fp or something).
- if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
- AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
- // Don't touch identity bitcasts. These were probably put here by LSR,
- // and we don't want to mess around with them. Assume it knows what it
- // is doing.
- AddrInst->getOperand(0)->getType() != AddrInst->getType())
- return MatchAddr(AddrInst->getOperand(0), Depth);
- return false;
- case Instruction::Add: {
- // Check to see if we can merge in the RHS then the LHS. If so, we win.
- ExtAddrMode BackupAddrMode = AddrMode;
- unsigned OldSize = AddrModeInsts.size();
- if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
- MatchAddr(AddrInst->getOperand(0), Depth+1))
- return true;
-
- // Restore the old addr mode info.
- AddrMode = BackupAddrMode;
- AddrModeInsts.resize(OldSize);
-
- // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
- if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
- MatchAddr(AddrInst->getOperand(1), Depth+1))
- return true;
-
- // Otherwise we definitely can't merge the ADD in.
- AddrMode = BackupAddrMode;
- AddrModeInsts.resize(OldSize);
- break;
- }
- //case Instruction::Or:
- // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
- //break;
- case Instruction::Mul:
- case Instruction::Shl: {
- // Can only handle X*C and X << C.
- ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
- if (!RHS) return false;
- int64_t Scale = RHS->getSExtValue();
- if (Opcode == Instruction::Shl)
- Scale = 1LL << Scale;
-
- return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
- }
- case Instruction::GetElementPtr: {
- // Scan the GEP. We check it if it contains constant offsets and at most
- // one variable offset.
- int VariableOperand = -1;
- unsigned VariableScale = 0;
-
- int64_t ConstantOffset = 0;
- const DataLayout *TD = TLI.getDataLayout();
- gep_type_iterator GTI = gep_type_begin(AddrInst);
- for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
- if (StructType *STy = dyn_cast<StructType>(*GTI)) {
- const StructLayout *SL = TD->getStructLayout(STy);
- unsigned Idx =
- cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
- ConstantOffset += SL->getElementOffset(Idx);
- } else {
- uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
- if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
- ConstantOffset += CI->getSExtValue()*TypeSize;
- } else if (TypeSize) { // Scales of zero don't do anything.
- // We only allow one variable index at the moment.
- if (VariableOperand != -1)
- return false;
-
- // Remember the variable index.
- VariableOperand = i;
- VariableScale = TypeSize;
- }
- }
- }
-
- // A common case is for the GEP to only do a constant offset. In this case,
- // just add it to the disp field and check validity.
- if (VariableOperand == -1) {
- AddrMode.BaseOffs += ConstantOffset;
- if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
- // Check to see if we can fold the base pointer in too.
- if (MatchAddr(AddrInst->getOperand(0), Depth+1))
- return true;
- }
- AddrMode.BaseOffs -= ConstantOffset;
- return false;
- }
-
- // Save the valid addressing mode in case we can't match.
- ExtAddrMode BackupAddrMode = AddrMode;
- unsigned OldSize = AddrModeInsts.size();
-
- // See if the scale and offset amount is valid for this target.
- AddrMode.BaseOffs += ConstantOffset;
-
- // Match the base operand of the GEP.
- if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
- // If it couldn't be matched, just stuff the value in a register.
- if (AddrMode.HasBaseReg) {
- AddrMode = BackupAddrMode;
- AddrModeInsts.resize(OldSize);
- return false;
- }
- AddrMode.HasBaseReg = true;
- AddrMode.BaseReg = AddrInst->getOperand(0);
- }
-
- // Match the remaining variable portion of the GEP.
- if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
- Depth)) {
- // If it couldn't be matched, try stuffing the base into a register
- // instead of matching it, and retrying the match of the scale.
- AddrMode = BackupAddrMode;
- AddrModeInsts.resize(OldSize);
- if (AddrMode.HasBaseReg)
- return false;
- AddrMode.HasBaseReg = true;
- AddrMode.BaseReg = AddrInst->getOperand(0);
- AddrMode.BaseOffs += ConstantOffset;
- if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
- VariableScale, Depth)) {
- // If even that didn't work, bail.
- AddrMode = BackupAddrMode;
- AddrModeInsts.resize(OldSize);
- return false;
- }
- }
-
- return true;
- }
- }
- return false;
-}
-
-/// MatchAddr - If we can, try to add the value of 'Addr' into the current
-/// addressing mode. If Addr can't be added to AddrMode this returns false and
-/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type
-/// or intptr_t for the target.
-///
-bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
- // Fold in immediates if legal for the target.
- AddrMode.BaseOffs += CI->getSExtValue();
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
- return true;
- AddrMode.BaseOffs -= CI->getSExtValue();
- } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
- // If this is a global variable, try to fold it into the addressing mode.
- if (AddrMode.BaseGV == 0) {
- AddrMode.BaseGV = GV;
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
- return true;
- AddrMode.BaseGV = 0;
- }
- } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
- ExtAddrMode BackupAddrMode = AddrMode;
- unsigned OldSize = AddrModeInsts.size();
-
- // Check to see if it is possible to fold this operation.
- if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
- // Okay, it's possible to fold this. Check to see if it is actually
- // *profitable* to do so. We use a simple cost model to avoid increasing
- // register pressure too much.
- if (I->hasOneUse() ||
- IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
- AddrModeInsts.push_back(I);
- return true;
- }
-
- // It isn't profitable to do this, roll back.
- //cerr << "NOT FOLDING: " << *I;
- AddrMode = BackupAddrMode;
- AddrModeInsts.resize(OldSize);
- }
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
- if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
- return true;
- } else if (isa<ConstantPointerNull>(Addr)) {
- // Null pointer gets folded without affecting the addressing mode.
- return true;
- }
-
- // Worse case, the target should support [reg] addressing modes. :)
- if (!AddrMode.HasBaseReg) {
- AddrMode.HasBaseReg = true;
- AddrMode.BaseReg = Addr;
- // Still check for legality in case the target supports [imm] but not [i+r].
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
- return true;
- AddrMode.HasBaseReg = false;
- AddrMode.BaseReg = 0;
- }
-
- // If the base register is already taken, see if we can do [r+r].
- if (AddrMode.Scale == 0) {
- AddrMode.Scale = 1;
- AddrMode.ScaledReg = Addr;
- if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
- return true;
- AddrMode.Scale = 0;
- AddrMode.ScaledReg = 0;
- }
- // Couldn't match.
- return false;
-}
-
-/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
-/// inline asm call are due to memory operands. If so, return true, otherwise
-/// return false.
-static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
- const TargetLowering &TLI) {
- TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
- for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
- TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-
- // Compute the constraint code and ConstraintType to use.
- TLI.ComputeConstraintToUse(OpInfo, SDValue());
-
- // If this asm operand is our Value*, and if it isn't an indirect memory
- // operand, we can't fold it!
- if (OpInfo.CallOperandVal == OpVal &&
- (OpInfo.ConstraintType != TargetLowering::C_Memory ||
- !OpInfo.isIndirect))
- return false;
- }
-
- return true;
-}
-
-/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
-/// memory use. If we find an obviously non-foldable instruction, return true.
-/// Add the ultimately found memory instructions to MemoryUses.
-static bool FindAllMemoryUses(Instruction *I,
- SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
- SmallPtrSet<Instruction*, 16> &ConsideredInsts,
- const TargetLowering &TLI) {
- // If we already considered this instruction, we're done.
- if (!ConsideredInsts.insert(I))
- return false;
-
- // If this is an obviously unfoldable instruction, bail out.
- if (!MightBeFoldableInst(I))
- return true;
-
- // Loop over all the uses, recursively processing them.
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
- UI != E; ++UI) {
- User *U = *UI;
-
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
- continue;
- }
-
- if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- unsigned opNo = UI.getOperandNo();
- if (opNo == 0) return true; // Storing addr, not into addr.
- MemoryUses.push_back(std::make_pair(SI, opNo));
- continue;
- }
-
- if (CallInst *CI = dyn_cast<CallInst>(U)) {
- InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
- if (!IA) return true;
-
- // If this is a memory operand, we're cool, otherwise bail out.
- if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
- return true;
- continue;
- }
-
- if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
- TLI))
- return true;
- }
-
- return false;
-}
-
-/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
-/// the use site that we're folding it into. If so, there is no cost to
-/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values
-/// that we know are live at the instruction already.
-bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
- Value *KnownLive2) {
- // If Val is either of the known-live values, we know it is live!
- if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
- return true;
-
- // All values other than instructions and arguments (e.g. constants) are live.
- if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
-
- // If Val is a constant sized alloca in the entry block, it is live, this is
- // true because it is just a reference to the stack/frame pointer, which is
- // live for the whole function.
- if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
- if (AI->isStaticAlloca())
- return true;
-
- // Check to see if this value is already used in the memory instruction's
- // block. If so, it's already live into the block at the very least, so we
- // can reasonably fold it.
- return Val->isUsedInBasicBlock(MemoryInst->getParent());
-}
-
-/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
-/// mode of the machine to fold the specified instruction into a load or store
-/// that ultimately uses it. However, the specified instruction has multiple
-/// uses. Given this, it may actually increase register pressure to fold it
-/// into the load. For example, consider this code:
-///
-/// X = ...
-/// Y = X+1
-/// use(Y) -> nonload/store
-/// Z = Y+1
-/// load Z
-///
-/// In this case, Y has multiple uses, and can be folded into the load of Z
-/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
-/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
-/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
-/// number of computations either.
-///
-/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
-/// X was live across 'load Z' for other reasons, we actually *would* want to
-/// fold the addressing mode in the Z case. This would make Y die earlier.
-bool AddressingModeMatcher::
-IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
- ExtAddrMode &AMAfter) {
- if (IgnoreProfitability) return true;
-
- // AMBefore is the addressing mode before this instruction was folded into it,
- // and AMAfter is the addressing mode after the instruction was folded. Get
- // the set of registers referenced by AMAfter and subtract out those
- // referenced by AMBefore: this is the set of values which folding in this
- // address extends the lifetime of.
- //
- // Note that there are only two potential values being referenced here,
- // BaseReg and ScaleReg (global addresses are always available, as are any
- // folded immediates).
- Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
-
- // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
- // lifetime wasn't extended by adding this instruction.
- if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
- BaseReg = 0;
- if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
- ScaledReg = 0;
-
- // If folding this instruction (and it's subexprs) didn't extend any live
- // ranges, we're ok with it.
- if (BaseReg == 0 && ScaledReg == 0)
- return true;
-
- // If all uses of this instruction are ultimately load/store/inlineasm's,
- // check to see if their addressing modes will include this instruction. If
- // so, we can fold it into all uses, so it doesn't matter if it has multiple
- // uses.
- SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
- SmallPtrSet<Instruction*, 16> ConsideredInsts;
- if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
- return false; // Has a non-memory, non-foldable use!
-
- // Now that we know that all uses of this instruction are part of a chain of
- // computation involving only operations that could theoretically be folded
- // into a memory use, loop over each of these uses and see if they could
- // *actually* fold the instruction.
- SmallVector<Instruction*, 32> MatchedAddrModeInsts;
- for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
- Instruction *User = MemoryUses[i].first;
- unsigned OpNo = MemoryUses[i].second;
-
- // Get the access type of this use. If the use isn't a pointer, we don't
- // know what it accesses.
- Value *Address = User->getOperand(OpNo);
- if (!Address->getType()->isPointerTy())
- return false;
- Type *AddressAccessTy = Address->getType()->getPointerElementType();
-
- // Do a match against the root of this address, ignoring profitability. This
- // will tell us if the addressing mode for the memory operation will
- // *actually* cover the shared instruction.
- ExtAddrMode Result;
- AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
- MemoryInst, Result);
- Matcher.IgnoreProfitability = true;
- bool Success = Matcher.MatchAddr(Address, 0);
- (void)Success; assert(Success && "Couldn't select *anything*?");
-
- // If the match didn't cover I, then it won't be shared by it.
- if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
- I) == MatchedAddrModeInsts.end())
- return false;
-
- MatchedAddrModeInsts.clear();
- }
-
- return true;
-}
-
-} // end anonymous namespace
-
-/// IsNonLocalValue - Return true if the specified values are defined in a
-/// different basic block than BB.
-static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
- if (Instruction *I = dyn_cast<Instruction>(V))
- return I->getParent() != BB;
- return false;
-}
-
-/// OptimizeMemoryInst - Load and Store Instructions often have
-/// addressing modes that can do significant amounts of computation. As such,
-/// instruction selection will try to get the load or store to do as much
-/// computation as possible for the program. The problem is that isel can only
-/// see within a single block. As such, we sink as much legal addressing mode
-/// stuff into the block as possible.
-///
-/// This method is used to optimize both load/store and inline asms with memory
-/// operands.
-bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
- Type *AccessTy) {
- Value *Repl = Addr;
-
- // Try to collapse single-value PHI nodes. This is necessary to undo
- // unprofitable PRE transformations.
- SmallVector<Value*, 8> worklist;
- SmallPtrSet<Value*, 16> Visited;
- worklist.push_back(Addr);
-
- // Use a worklist to iteratively look through PHI nodes, and ensure that
- // the addressing mode obtained from the non-PHI roots of the graph
- // are equivalent.
- Value *Consensus = 0;
- unsigned NumUsesConsensus = 0;
- bool IsNumUsesConsensusValid = false;
- SmallVector<Instruction*, 16> AddrModeInsts;
- ExtAddrMode AddrMode;
- while (!worklist.empty()) {
- Value *V = worklist.back();
- worklist.pop_back();
-
- // Break use-def graph loops.
- if (!Visited.insert(V)) {
- Consensus = 0;
- break;
- }
-
- // For a PHI node, push all of its incoming values.
- if (PHINode *P = dyn_cast<PHINode>(V)) {
- for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
- worklist.push_back(P->getIncomingValue(i));
- continue;
- }
-
- // For non-PHIs, determine the addressing mode being computed.
- SmallVector<Instruction*, 16> NewAddrModeInsts;
- ExtAddrMode NewAddrMode =
- AddressingModeMatcher::Match(V, AccessTy, MemoryInst,
- NewAddrModeInsts, *TLI);
-
- // This check is broken into two cases with very similar code to avoid using
- // getNumUses() as much as possible. Some values have a lot of uses, so
- // calling getNumUses() unconditionally caused a significant compile-time
- // regression.
- if (!Consensus) {
- Consensus = V;
- AddrMode = NewAddrMode;
- AddrModeInsts = NewAddrModeInsts;
- continue;
- } else if (NewAddrMode == AddrMode) {
- if (!IsNumUsesConsensusValid) {
- NumUsesConsensus = Consensus->getNumUses();
- IsNumUsesConsensusValid = true;
- }
-
- // Ensure that the obtained addressing mode is equivalent to that obtained
- // for all other roots of the PHI traversal. Also, when choosing one
- // such root as representative, select the one with the most uses in order
- // to keep the cost modeling heuristics in AddressingModeMatcher
- // applicable.
- unsigned NumUses = V->getNumUses();
- if (NumUses > NumUsesConsensus) {
- Consensus = V;
- NumUsesConsensus = NumUses;
- AddrModeInsts = NewAddrModeInsts;
- }
- continue;
- }
-
- Consensus = 0;
- break;
- }
-
- // If the addressing mode couldn't be determined, or if multiple different
- // ones were determined, bail out now.
- if (!Consensus) return false;
-
- // Check to see if any of the instructions supersumed by this addr mode are
- // non-local to I's BB.
- bool AnyNonLocal = false;
- for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
- if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
- AnyNonLocal = true;
- break;
- }
- }
-
- // If all the instructions matched are already in this BB, don't do anything.
- if (!AnyNonLocal) {
- DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n");
- return false;
- }
-
- // Insert this computation right after this user. Since our caller is
- // scanning from the top of the BB to the bottom, reuse of the expr are
- // guaranteed to happen later.
- IRBuilder<> Builder(MemoryInst);
-
- // Now that we determined the addressing expression we want to use and know
- // that we have to sink it into this block. Check to see if we have already
- // done this for some other load/store instr in this block. If so, reuse the
- // computation.
- Value *&SunkAddr = SunkAddrs[Addr];
- if (SunkAddr) {
- DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
- << *MemoryInst);
- if (SunkAddr->getType() != Addr->getType())
- SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
- } else {
- DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
- << *MemoryInst);
- Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
- Value *Result = 0;
-
- // Start with the base register. Do this first so that subsequent address
- // matching finds it last, which will prevent it from trying to match it
- // as the scaled value in case it happens to be a mul. That would be
- // problematic if we've sunk a different mul for the scale, because then
- // we'd end up sinking both muls.
- if (AddrMode.BaseReg) {
- Value *V = AddrMode.BaseReg;
- if (V->getType()->isPointerTy())
- V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
- if (V->getType() != IntPtrTy)
- V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
- Result = V;
- }
-
- // Add the scale value.
- if (AddrMode.Scale) {
- Value *V = AddrMode.ScaledReg;
- if (V->getType() == IntPtrTy) {
- // done.
- } else if (V->getType()->isPointerTy()) {
- V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
- } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
- cast<IntegerType>(V->getType())->getBitWidth()) {
- V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
- } else {
- V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr");
- }
- if (AddrMode.Scale != 1)
- V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
- "sunkaddr");
- if (Result)
- Result = Builder.CreateAdd(Result, V, "sunkaddr");
- else
- Result = V;
- }
-
- // Add in the BaseGV if present.
- if (AddrMode.BaseGV) {
- Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
- if (Result)
- Result = Builder.CreateAdd(Result, V, "sunkaddr");
- else
- Result = V;
- }
-
- // Add in the Base Offset if present.
- if (AddrMode.BaseOffs) {
- Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
- if (Result)
- Result = Builder.CreateAdd(Result, V, "sunkaddr");
- else
- Result = V;
- }
-
- if (Result == 0)
- SunkAddr = Constant::getNullValue(Addr->getType());
- else
- SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
- }
-
- MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
-
- // If we have no uses, recursively delete the value and all dead instructions
- // using it.
- if (Repl->use_empty()) {
- // This can cause recursive deletion, which can invalidate our iterator.
- // Use a WeakVH to hold onto it in case this happens.
- WeakVH IterHandle(CurInstIterator);
- BasicBlock *BB = CurInstIterator->getParent();
-
- RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
-
- if (IterHandle != CurInstIterator) {
- // If the iterator instruction was recursively deleted, start over at the
- // start of the block.
- CurInstIterator = BB->begin();
- SunkAddrs.clear();
- }
- }
- ++NumMemoryInsts;
- return true;
-}
-
-/// OptimizeInlineAsmInst - If there are any memory operands, use
-/// OptimizeMemoryInst to sink their address computing into the block when
-/// possible / profitable.
-bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
- bool MadeChange = false;
-
- TargetLowering::AsmOperandInfoVector
- TargetConstraints = TLI->ParseConstraints(CS);
- unsigned ArgNo = 0;
- for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
- TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-
- // Compute the constraint code and ConstraintType to use.
- TLI->ComputeConstraintToUse(OpInfo, SDValue());
-
- if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
- OpInfo.isIndirect) {
- Value *OpVal = CS->getArgOperand(ArgNo++);
- MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
- } else if (OpInfo.Type == InlineAsm::isInput)
- ArgNo++;
- }
-
- return MadeChange;
-}
-
-/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
-/// basic block as the load, unless conditions are unfavorable. This allows
-/// SelectionDAG to fold the extend into the load.
-///
-bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
- // Look for a load being extended.
- LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
- if (!LI) return false;
-
- // If they're already in the same block, there's nothing to do.
- if (LI->getParent() == I->getParent())
- return false;
-
- // If the load has other users and the truncate is not free, this probably
- // isn't worthwhile.
- if (!LI->hasOneUse() &&
- TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
- !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
- !TLI->isTruncateFree(I->getType(), LI->getType()))
- return false;
-
- // Check whether the target supports casts folded into loads.
- unsigned LType;
- if (isa<ZExtInst>(I))
- LType = ISD::ZEXTLOAD;
- else {
- assert(isa<SExtInst>(I) && "Unexpected ext type!");
- LType = ISD::SEXTLOAD;
- }
- if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
- return false;
-
- // Move the extend into the same block as the load, so that SelectionDAG
- // can fold it.
- I->removeFromParent();
- I->insertAfter(LI);
- ++NumExtsMoved;
- return true;
-}
-
-bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
- BasicBlock *DefBB = I->getParent();
-
- // If the result of a {s|z}ext and its source are both live out, rewrite all
- // other uses of the source with result of extension.
- Value *Src = I->getOperand(0);
- if (Src->hasOneUse())
- return false;
-
- // Only do this xform if truncating is free.
- if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
- return false;
-
- // Only safe to perform the optimization if the source is also defined in
- // this block.
- if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
- return false;
-
- bool DefIsLiveOut = false;
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
- UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
-
- // Figure out which BB this ext is used in.
- BasicBlock *UserBB = User->getParent();
- if (UserBB == DefBB) continue;
- DefIsLiveOut = true;
- break;
- }
- if (!DefIsLiveOut)
- return false;
-
- // Make sure none of the uses are PHI nodes.
- for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
- UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
- BasicBlock *UserBB = User->getParent();
- if (UserBB == DefBB) continue;
- // Be conservative. We don't want this xform to end up introducing
- // reloads just before load / store instructions.
- if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
- return false;
- }
-
- // InsertedTruncs - Only insert one trunc in each block once.
- DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
-
- bool MadeChange = false;
- for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
- UI != E; ++UI) {
- Use &TheUse = UI.getUse();
- Instruction *User = cast<Instruction>(*UI);
-
- // Figure out which BB this ext is used in.
- BasicBlock *UserBB = User->getParent();
- if (UserBB == DefBB) continue;
-
- // Both src and def are live in this block. Rewrite the use.
- Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
-
- if (!InsertedTrunc) {
- BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
- InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
- }
-
- // Replace a use of the {s|z}ext source with a use of the result.
- TheUse = InsertedTrunc;
- ++NumExtUses;
- MadeChange = true;
- }
-
- return MadeChange;
-}
-
-/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
-/// turned into an explicit branch.
-static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
- // FIXME: This should use the same heuristics as IfConversion to determine
- // whether a select is better represented as a branch. This requires that
- // branch probability metadata is preserved for the select, which is not the
- // case currently.
-
- CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
-
- // If the branch is predicted right, an out of order CPU can avoid blocking on
- // the compare. Emit cmovs on compares with a memory operand as branches to
- // avoid stalls on the load from memory. If the compare has more than one use
- // there's probably another cmov or setcc around so it's not worth emitting a
- // branch.
- if (!Cmp)
- return false;
-
- Value *CmpOp0 = Cmp->getOperand(0);
- Value *CmpOp1 = Cmp->getOperand(1);
-
- // We check that the memory operand has one use to avoid uses of the loaded
- // value directly after the compare, making branches unprofitable.
- return Cmp->hasOneUse() &&
- ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
- (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
-}
-
-
-/// If we have a SelectInst that will likely profit from branch prediction,
-/// turn it into a branch.
-bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
- bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
-
- // Can we convert the 'select' to CF ?
- if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
- return false;
-
- TargetLowering::SelectSupportKind SelectKind;
- if (VectorCond)
- SelectKind = TargetLowering::VectorMaskSelect;
- else if (SI->getType()->isVectorTy())
- SelectKind = TargetLowering::ScalarCondVectorVal;
- else
- SelectKind = TargetLowering::ScalarValSelect;
-
- // Do we have efficient codegen support for this kind of 'selects' ?
- if (TLI->isSelectSupported(SelectKind)) {
- // We have efficient codegen support for the select instruction.
- // Check if it is profitable to keep this 'select'.
- if (!TLI->isPredictableSelectExpensive() ||
- !isFormingBranchFromSelectProfitable(SI))
- return false;
- }
-
- ModifiedDT = true;
-
- // First, we split the block containing the select into 2 blocks.
- BasicBlock *StartBlock = SI->getParent();
- BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
- BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
-
- // Create a new block serving as the landing pad for the branch.
- BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
- NextBlock->getParent(), NextBlock);
-
- // Move the unconditional branch from the block with the select in it into our
- // landing pad block.
- StartBlock->getTerminator()->eraseFromParent();
- BranchInst::Create(NextBlock, SmallBlock);
-
- // Insert the real conditional branch based on the original condition.
- BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
-
- // The select itself is replaced with a PHI Node.
- PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
- PN->takeName(SI);
- PN->addIncoming(SI->getTrueValue(), StartBlock);
- PN->addIncoming(SI->getFalseValue(), SmallBlock);
- SI->replaceAllUsesWith(PN);
- SI->eraseFromParent();
-
- // Instruct OptimizeBlock to skip to the next block.
- CurInstIterator = StartBlock->end();
- ++NumSelectsExpanded;
- return true;
-}
-
-bool CodeGenPrepare::OptimizeInst(Instruction *I) {
- if (PHINode *P = dyn_cast<PHINode>(I)) {
- // It is possible for very late stage optimizations (such as SimplifyCFG)
- // to introduce PHI nodes too late to be cleaned up. If we detect such a
- // trivial PHI, go ahead and zap it here.
- if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0,
- TLInfo, DT)) {
- P->replaceAllUsesWith(V);
- P->eraseFromParent();
- ++NumPHIsElim;
- return true;
- }
- return false;
- }
-
- if (CastInst *CI = dyn_cast<CastInst>(I)) {
- // If the source of the cast is a constant, then this should have
- // already been constant folded. The only reason NOT to constant fold
- // it is if something (e.g. LSR) was careful to place the constant
- // evaluation in a block other than then one that uses it (e.g. to hoist
- // the address of globals out of a loop). If this is the case, we don't
- // want to forward-subst the cast.
- if (isa<Constant>(CI->getOperand(0)))
- return false;
-
- if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
- return true;
-
- if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
- bool MadeChange = MoveExtToFormExtLoad(I);
- return MadeChange | OptimizeExtUses(I);
- }
- return false;
- }
-
- if (CmpInst *CI = dyn_cast<CmpInst>(I))
- return OptimizeCmpExpression(CI);
-
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (TLI)
- return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
- return false;
- }
-
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (TLI)
- return OptimizeMemoryInst(I, SI->getOperand(1),
- SI->getOperand(0)->getType());
- return false;
- }
-
- if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
- if (GEPI->hasAllZeroIndices()) {
- /// The GEP operand must be a pointer, so must its result -> BitCast
- Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
- GEPI->getName(), GEPI);
- GEPI->replaceAllUsesWith(NC);
- GEPI->eraseFromParent();
- ++NumGEPsElim;
- OptimizeInst(NC);
- return true;
- }
- return false;
- }
-
- if (CallInst *CI = dyn_cast<CallInst>(I))
- return OptimizeCallInst(CI);
-
- if (SelectInst *SI = dyn_cast<SelectInst>(I))
- return OptimizeSelectInst(SI);
-
- return false;
-}
-
-// In this pass we look for GEP and cast instructions that are used
-// across basic blocks and rewrite them to improve basic-block-at-a-time
-// selection.
-bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
- SunkAddrs.clear();
- bool MadeChange = false;
-
- CurInstIterator = BB.begin();
- while (CurInstIterator != BB.end())
- MadeChange |= OptimizeInst(CurInstIterator++);
-
- MadeChange |= DupRetToEnableTailCallOpts(&BB);
-
- return MadeChange;
-}
-
-// llvm.dbg.value is far away from the value then iSel may not be able
-// handle it properly. iSel will drop llvm.dbg.value if it can not
-// find a node corresponding to the value.
-bool CodeGenPrepare::PlaceDbgValues(Function &F) {
- bool MadeChange = false;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- Instruction *PrevNonDbgInst = NULL;
- for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
- Instruction *Insn = BI; ++BI;
- DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
- if (!DVI) {
- PrevNonDbgInst = Insn;
- continue;
- }
-
- Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
- if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
- DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
- DVI->removeFromParent();
- if (isa<PHINode>(VI))
- DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
- else
- DVI->insertAfter(VI);
- MadeChange = true;
- ++NumDbgValueMoved;
- }
- }
- }
- return MadeChange;
-}
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
new file mode 100644
index 0000000..57a1521
--- /dev/null
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -0,0 +1,590 @@
+//===- ConstantHoisting.cpp - Prepare code for expensive constants --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies expensive constants to hoist and coalesces them to
+// better prepare it for SelectionDAG-based code generation. This works around
+// the limitations of the basic-block-at-a-time approach.
+//
+// First it scans all instructions for integer constants and calculates its
+// cost. If the constant can be folded into the instruction (the cost is
+// TCC_Free) or the cost is just a simple operation (TCC_BASIC), then we don't
+// consider it expensive and leave it alone. This is the default behavior and
+// the default implementation of getIntImmCost will always return TCC_Free.
+//
+// If the cost is more than TCC_BASIC, then the integer constant can't be folded
+// into the instruction and it might be beneficial to hoist the constant.
+// Similar constants are coalesced to reduce register pressure and
+// materialization code.
+//
+// When a constant is hoisted, it is also hidden behind a bitcast to force it to
+// be live-out of the basic block. Otherwise the constant would be just
+// duplicated and each basic block would have its own copy in the SelectionDAG.
+// The SelectionDAG recognizes such constants as opaque and doesn't perform
+// certain transformations on them, which would create a new expensive constant.
+//
+// This optimization is only applied to integer constants in instructions and
+// simple (this means not nested) constant cast expressions. For example:
+// %0 = load i64* inttoptr (i64 big_constant to i64*)
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "consthoist"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+STATISTIC(NumConstantsHoisted, "Number of constants hoisted");
+STATISTIC(NumConstantsRebased, "Number of constants rebased");
+
+namespace {
+struct ConstantUser;
+struct RebasedConstantInfo;
+
+typedef SmallVector<ConstantUser, 8> ConstantUseListType;
+typedef SmallVector<RebasedConstantInfo, 4> RebasedConstantListType;
+
+/// \brief Keeps track of the user of a constant and the operand index where the
+/// constant is used.
+struct ConstantUser {
+ Instruction *Inst;
+ unsigned OpndIdx;
+
+ ConstantUser(Instruction *Inst, unsigned Idx) : Inst(Inst), OpndIdx(Idx) { }
+};
+
+/// \brief Keeps track of a constant candidate and its uses.
+struct ConstantCandidate {
+ ConstantUseListType Uses;
+ ConstantInt *ConstInt;
+ unsigned CumulativeCost;
+
+ ConstantCandidate(ConstantInt *ConstInt)
+ : ConstInt(ConstInt), CumulativeCost(0) { }
+
+ /// \brief Add the user to the use list and update the cost.
+ void addUser(Instruction *Inst, unsigned Idx, unsigned Cost) {
+ CumulativeCost += Cost;
+ Uses.push_back(ConstantUser(Inst, Idx));
+ }
+};
+
+/// \brief This represents a constant that has been rebased with respect to a
+/// base constant. The difference to the base constant is recorded in Offset.
+struct RebasedConstantInfo {
+ ConstantUseListType Uses;
+ Constant *Offset;
+
+ RebasedConstantInfo(ConstantUseListType &&Uses, Constant *Offset)
+ : Uses(Uses), Offset(Offset) { }
+};
+
+/// \brief A base constant and all its rebased constants.
+struct ConstantInfo {
+ ConstantInt *BaseConstant;
+ RebasedConstantListType RebasedConstants;
+};
+
+/// \brief The constant hoisting pass.
+class ConstantHoisting : public FunctionPass {
+ typedef DenseMap<ConstantInt *, unsigned> ConstCandMapType;
+ typedef std::vector<ConstantCandidate> ConstCandVecType;
+
+ const TargetTransformInfo *TTI;
+ DominatorTree *DT;
+ BasicBlock *Entry;
+
+ /// Keeps track of constant candidates found in the function.
+ ConstCandVecType ConstCandVec;
+
+ /// Keep track of cast instructions we already cloned.
+ SmallDenseMap<Instruction *, Instruction *> ClonedCastMap;
+
+ /// These are the final constants we decided to hoist.
+ SmallVector<ConstantInfo, 8> ConstantVec;
+public:
+ static char ID; // Pass identification, replacement for typeid
+ ConstantHoisting() : FunctionPass(ID), TTI(0), DT(0), Entry(0) {
+ initializeConstantHoistingPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &Fn) override;
+
+ const char *getPassName() const override { return "Constant Hoisting"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetTransformInfo>();
+ }
+
+private:
+ /// \brief Initialize the pass.
+ void setup(Function &Fn) {
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ TTI = &getAnalysis<TargetTransformInfo>();
+ Entry = &Fn.getEntryBlock();
+ }
+
+ /// \brief Cleanup.
+ void cleanup() {
+ ConstantVec.clear();
+ ClonedCastMap.clear();
+ ConstCandVec.clear();
+
+ TTI = nullptr;
+ DT = nullptr;
+ Entry = nullptr;
+ }
+
+ Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U) const;
+ Instruction *findConstantInsertionPoint(const ConstantInfo &ConstInfo) const;
+ void collectConstantCandidates(ConstCandMapType &ConstCandMap,
+ Instruction *Inst, unsigned Idx,
+ ConstantInt *ConstInt);
+ void collectConstantCandidates(ConstCandMapType &ConstCandMap,
+ Instruction *Inst);
+ void collectConstantCandidates(Function &Fn);
+ void findAndMakeBaseConstant(ConstCandVecType::iterator S,
+ ConstCandVecType::iterator E);
+ void findBaseConstants();
+ void emitBaseConstants(Instruction *Base, Constant *Offset,
+ const ConstantUser &ConstUser);
+ bool emitBaseConstants();
+ void deleteDeadCastInst() const;
+ bool optimizeConstants(Function &Fn);
+};
+}
+
+char ConstantHoisting::ID = 0;
+INITIALIZE_PASS_BEGIN(ConstantHoisting, "consthoist", "Constant Hoisting",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_END(ConstantHoisting, "consthoist", "Constant Hoisting",
+ false, false)
+
+FunctionPass *llvm::createConstantHoistingPass() {
+ return new ConstantHoisting();
+}
+
+/// \brief Perform the constant hoisting optimization for the given function.
+bool ConstantHoisting::runOnFunction(Function &Fn) {
+ DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n");
+ DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n');
+
+ setup(Fn);
+
+ bool MadeChange = optimizeConstants(Fn);
+
+ if (MadeChange) {
+ DEBUG(dbgs() << "********** Function after Constant Hoisting: "
+ << Fn.getName() << '\n');
+ DEBUG(dbgs() << Fn);
+ }
+ DEBUG(dbgs() << "********** End Constant Hoisting **********\n");
+
+ cleanup();
+
+ return MadeChange;
+}
+
+
+/// \brief Find the constant materialization insertion point.
+Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst,
+ unsigned Idx) const {
+ // The simple and common case.
+ if (!isa<PHINode>(Inst) && !isa<LandingPadInst>(Inst))
+ return Inst;
+
+ // We can't insert directly before a phi node or landing pad. Insert before
+ // the terminator of the incoming or dominating block.
+ assert(Entry != Inst->getParent() && "PHI or landing pad in entry block!");
+ if (Idx != ~0U && isa<PHINode>(Inst))
+ return cast<PHINode>(Inst)->getIncomingBlock(Idx)->getTerminator();
+
+ BasicBlock *IDom = DT->getNode(Inst->getParent())->getIDom()->getBlock();
+ return IDom->getTerminator();
+}
+
+/// \brief Find an insertion point that dominates all uses.
+Instruction *ConstantHoisting::
+findConstantInsertionPoint(const ConstantInfo &ConstInfo) const {
+ assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry.");
+ // Collect all basic blocks.
+ SmallPtrSet<BasicBlock *, 8> BBs;
+ for (auto const &RCI : ConstInfo.RebasedConstants)
+ for (auto const &U : RCI.Uses)
+ BBs.insert(U.Inst->getParent());
+
+ if (BBs.count(Entry))
+ return &Entry->front();
+
+ while (BBs.size() >= 2) {
+ BasicBlock *BB, *BB1, *BB2;
+ BB1 = *BBs.begin();
+ BB2 = *std::next(BBs.begin());
+ BB = DT->findNearestCommonDominator(BB1, BB2);
+ if (BB == Entry)
+ return &Entry->front();
+ BBs.erase(BB1);
+ BBs.erase(BB2);
+ BBs.insert(BB);
+ }
+ assert((BBs.size() == 1) && "Expected only one element.");
+ Instruction &FirstInst = (*BBs.begin())->front();
+ return findMatInsertPt(&FirstInst);
+}
+
+
+/// \brief Record constant integer ConstInt for instruction Inst at operand
+/// index Idx.
+///
+/// The operand at index Idx is not necessarily the constant integer itself. It
+/// could also be a cast instruction or a constant expression that uses the
+// constant integer.
+void ConstantHoisting::collectConstantCandidates(ConstCandMapType &ConstCandMap,
+ Instruction *Inst,
+ unsigned Idx,
+ ConstantInt *ConstInt) {
+ unsigned Cost;
+ // Ask the target about the cost of materializing the constant for the given
+ // instruction and operand index.
+ if (auto IntrInst = dyn_cast<IntrinsicInst>(Inst))
+ Cost = TTI->getIntImmCost(IntrInst->getIntrinsicID(), Idx,
+ ConstInt->getValue(), ConstInt->getType());
+ else
+ Cost = TTI->getIntImmCost(Inst->getOpcode(), Idx, ConstInt->getValue(),
+ ConstInt->getType());
+
+ // Ignore cheap integer constants.
+ if (Cost > TargetTransformInfo::TCC_Basic) {
+ ConstCandMapType::iterator Itr;
+ bool Inserted;
+ std::tie(Itr, Inserted) = ConstCandMap.insert(std::make_pair(ConstInt, 0));
+ if (Inserted) {
+ ConstCandVec.push_back(ConstantCandidate(ConstInt));
+ Itr->second = ConstCandVec.size() - 1;
+ }
+ ConstCandVec[Itr->second].addUser(Inst, Idx, Cost);
+ DEBUG(if (isa<ConstantInt>(Inst->getOperand(Idx)))
+ dbgs() << "Collect constant " << *ConstInt << " from " << *Inst
+ << " with cost " << Cost << '\n';
+ else
+ dbgs() << "Collect constant " << *ConstInt << " indirectly from "
+ << *Inst << " via " << *Inst->getOperand(Idx) << " with cost "
+ << Cost << '\n';
+ );
+ }
+}
+
+/// \brief Scan the instruction for expensive integer constants and record them
+/// in the constant candidate vector.
+void ConstantHoisting::collectConstantCandidates(ConstCandMapType &ConstCandMap,
+ Instruction *Inst) {
+ // Skip all cast instructions. They are visited indirectly later on.
+ if (Inst->isCast())
+ return;
+
+ // Can't handle inline asm. Skip it.
+ if (auto Call = dyn_cast<CallInst>(Inst))
+ if (isa<InlineAsm>(Call->getCalledValue()))
+ return;
+
+ // Scan all operands.
+ for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) {
+ Value *Opnd = Inst->getOperand(Idx);
+
+ // Visit constant integers.
+ if (auto ConstInt = dyn_cast<ConstantInt>(Opnd)) {
+ collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
+ continue;
+ }
+
+ // Visit cast instructions that have constant integers.
+ if (auto CastInst = dyn_cast<Instruction>(Opnd)) {
+ // Only visit cast instructions, which have been skipped. All other
+ // instructions should have already been visited.
+ if (!CastInst->isCast())
+ continue;
+
+ if (auto *ConstInt = dyn_cast<ConstantInt>(CastInst->getOperand(0))) {
+ // Pretend the constant is directly used by the instruction and ignore
+ // the cast instruction.
+ collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
+ continue;
+ }
+ }
+
+ // Visit constant expressions that have constant integers.
+ if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
+ // Only visit constant cast expressions.
+ if (!ConstExpr->isCast())
+ continue;
+
+ if (auto ConstInt = dyn_cast<ConstantInt>(ConstExpr->getOperand(0))) {
+ // Pretend the constant is directly used by the instruction and ignore
+ // the constant expression.
+ collectConstantCandidates(ConstCandMap, Inst, Idx, ConstInt);
+ continue;
+ }
+ }
+ } // end of for all operands
+}
+
+/// \brief Collect all integer constants in the function that cannot be folded
+/// into an instruction itself.
+void ConstantHoisting::collectConstantCandidates(Function &Fn) {
+ ConstCandMapType ConstCandMap;
+ for (Function::iterator BB : Fn)
+ for (BasicBlock::iterator Inst : *BB)
+ collectConstantCandidates(ConstCandMap, Inst);
+}
+
+/// \brief Find the base constant within the given range and rebase all other
+/// constants with respect to the base constant.
+void ConstantHoisting::findAndMakeBaseConstant(ConstCandVecType::iterator S,
+ ConstCandVecType::iterator E) {
+ auto MaxCostItr = S;
+ unsigned NumUses = 0;
+ // Use the constant that has the maximum cost as base constant.
+ for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
+ NumUses += ConstCand->Uses.size();
+ if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
+ MaxCostItr = ConstCand;
+ }
+
+ // Don't hoist constants that have only one use.
+ if (NumUses <= 1)
+ return;
+
+ ConstantInfo ConstInfo;
+ ConstInfo.BaseConstant = MaxCostItr->ConstInt;
+ Type *Ty = ConstInfo.BaseConstant->getType();
+
+ // Rebase the constants with respect to the base constant.
+ for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
+ APInt Diff = ConstCand->ConstInt->getValue() -
+ ConstInfo.BaseConstant->getValue();
+ Constant *Offset = Diff == 0 ? nullptr : ConstantInt::get(Ty, Diff);
+ ConstInfo.RebasedConstants.push_back(
+ RebasedConstantInfo(std::move(ConstCand->Uses), Offset));
+ }
+ ConstantVec.push_back(ConstInfo);
+}
+
+/// \brief Finds and combines constant candidates that can be easily
+/// rematerialized with an add from a common base constant.
+void ConstantHoisting::findBaseConstants() {
+ // Sort the constants by value and type. This invalidates the mapping!
+ std::sort(ConstCandVec.begin(), ConstCandVec.end(),
+ [](const ConstantCandidate &LHS, const ConstantCandidate &RHS) {
+ if (LHS.ConstInt->getType() != RHS.ConstInt->getType())
+ return LHS.ConstInt->getType()->getBitWidth() <
+ RHS.ConstInt->getType()->getBitWidth();
+ return LHS.ConstInt->getValue().ult(RHS.ConstInt->getValue());
+ });
+
+ // Simple linear scan through the sorted constant candidate vector for viable
+ // merge candidates.
+ auto MinValItr = ConstCandVec.begin();
+ for (auto CC = std::next(ConstCandVec.begin()), E = ConstCandVec.end();
+ CC != E; ++CC) {
+ if (MinValItr->ConstInt->getType() == CC->ConstInt->getType()) {
+ // Check if the constant is in range of an add with immediate.
+ APInt Diff = CC->ConstInt->getValue() - MinValItr->ConstInt->getValue();
+ if ((Diff.getBitWidth() <= 64) &&
+ TTI->isLegalAddImmediate(Diff.getSExtValue()))
+ continue;
+ }
+ // We either have now a different constant type or the constant is not in
+ // range of an add with immediate anymore.
+ findAndMakeBaseConstant(MinValItr, CC);
+ // Start a new base constant search.
+ MinValItr = CC;
+ }
+ // Finalize the last base constant search.
+ findAndMakeBaseConstant(MinValItr, ConstCandVec.end());
+}
+
+/// \brief Updates the operand at Idx in instruction Inst with the result of
+/// instruction Mat. If the instruction is a PHI node then special
+/// handling for duplicate values form the same incomming basic block is
+/// required.
+/// \return The update will always succeed, but the return value indicated if
+/// Mat was used for the update or not.
+static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat) {
+ if (auto PHI = dyn_cast<PHINode>(Inst)) {
+ // Check if any previous operand of the PHI node has the same incoming basic
+ // block. This is a very odd case that happens when the incoming basic block
+ // has a switch statement. In this case use the same value as the previous
+ // operand(s), otherwise we will fail verification due to different values.
+ // The values are actually the same, but the variable names are different
+ // and the verifier doesn't like that.
+ BasicBlock *IncomingBB = PHI->getIncomingBlock(Idx);
+ for (unsigned i = 0; i < Idx; ++i) {
+ if (PHI->getIncomingBlock(i) == IncomingBB) {
+ Value *IncomingVal = PHI->getIncomingValue(i);
+ Inst->setOperand(Idx, IncomingVal);
+ return false;
+ }
+ }
+ }
+
+ Inst->setOperand(Idx, Mat);
+ return true;
+}
+
+/// \brief Emit materialization code for all rebased constants and update their
+/// users.
+void ConstantHoisting::emitBaseConstants(Instruction *Base, Constant *Offset,
+ const ConstantUser &ConstUser) {
+ Instruction *Mat = Base;
+ if (Offset) {
+ Instruction *InsertionPt = findMatInsertPt(ConstUser.Inst,
+ ConstUser.OpndIdx);
+ Mat = BinaryOperator::Create(Instruction::Add, Base, Offset,
+ "const_mat", InsertionPt);
+
+ DEBUG(dbgs() << "Materialize constant (" << *Base->getOperand(0)
+ << " + " << *Offset << ") in BB "
+ << Mat->getParent()->getName() << '\n' << *Mat << '\n');
+ Mat->setDebugLoc(ConstUser.Inst->getDebugLoc());
+ }
+ Value *Opnd = ConstUser.Inst->getOperand(ConstUser.OpndIdx);
+
+ // Visit constant integer.
+ if (isa<ConstantInt>(Opnd)) {
+ DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n');
+ if (!updateOperand(ConstUser.Inst, ConstUser.OpndIdx, Mat) && Offset)
+ Mat->eraseFromParent();
+ DEBUG(dbgs() << "To : " << *ConstUser.Inst << '\n');
+ return;
+ }
+
+ // Visit cast instruction.
+ if (auto CastInst = dyn_cast<Instruction>(Opnd)) {
+ assert(CastInst->isCast() && "Expected an cast instruction!");
+ // Check if we already have visited this cast instruction before to avoid
+ // unnecessary cloning.
+ Instruction *&ClonedCastInst = ClonedCastMap[CastInst];
+ if (!ClonedCastInst) {
+ ClonedCastInst = CastInst->clone();
+ ClonedCastInst->setOperand(0, Mat);
+ ClonedCastInst->insertAfter(CastInst);
+ // Use the same debug location as the original cast instruction.
+ ClonedCastInst->setDebugLoc(CastInst->getDebugLoc());
+ DEBUG(dbgs() << "Clone instruction: " << *ClonedCastInst << '\n'
+ << "To : " << *CastInst << '\n');
+ }
+
+ DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n');
+ updateOperand(ConstUser.Inst, ConstUser.OpndIdx, ClonedCastInst);
+ DEBUG(dbgs() << "To : " << *ConstUser.Inst << '\n');
+ return;
+ }
+
+ // Visit constant expression.
+ if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
+ Instruction *ConstExprInst = ConstExpr->getAsInstruction();
+ ConstExprInst->setOperand(0, Mat);
+ ConstExprInst->insertBefore(findMatInsertPt(ConstUser.Inst,
+ ConstUser.OpndIdx));
+
+ // Use the same debug location as the instruction we are about to update.
+ ConstExprInst->setDebugLoc(ConstUser.Inst->getDebugLoc());
+
+ DEBUG(dbgs() << "Create instruction: " << *ConstExprInst << '\n'
+ << "From : " << *ConstExpr << '\n');
+ DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n');
+ if (!updateOperand(ConstUser.Inst, ConstUser.OpndIdx, ConstExprInst)) {
+ ConstExprInst->eraseFromParent();
+ if (Offset)
+ Mat->eraseFromParent();
+ }
+ DEBUG(dbgs() << "To : " << *ConstUser.Inst << '\n');
+ return;
+ }
+}
+
+/// \brief Hoist and hide the base constant behind a bitcast and emit
+/// materialization code for derived constants.
+bool ConstantHoisting::emitBaseConstants() {
+ bool MadeChange = false;
+ for (auto const &ConstInfo : ConstantVec) {
+ // Hoist and hide the base constant behind a bitcast.
+ Instruction *IP = findConstantInsertionPoint(ConstInfo);
+ IntegerType *Ty = ConstInfo.BaseConstant->getType();
+ Instruction *Base =
+ new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP);
+ DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant << ") to BB "
+ << IP->getParent()->getName() << '\n' << *Base << '\n');
+ NumConstantsHoisted++;
+
+ // Emit materialization code for all rebased constants.
+ for (auto const &RCI : ConstInfo.RebasedConstants) {
+ NumConstantsRebased++;
+ for (auto const &U : RCI.Uses)
+ emitBaseConstants(Base, RCI.Offset, U);
+ }
+
+ // Use the same debug location as the last user of the constant.
+ assert(!Base->use_empty() && "The use list is empty!?");
+ assert(isa<Instruction>(Base->user_back()) &&
+ "All uses should be instructions.");
+ Base->setDebugLoc(cast<Instruction>(Base->user_back())->getDebugLoc());
+
+ // Correct for base constant, which we counted above too.
+ NumConstantsRebased--;
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+/// \brief Check all cast instructions we made a copy of and remove them if they
+/// have no more users.
+void ConstantHoisting::deleteDeadCastInst() const {
+ for (auto const &I : ClonedCastMap)
+ if (I.first->use_empty())
+ I.first->eraseFromParent();
+}
+
+/// \brief Optimize expensive integer constants in the given function.
+bool ConstantHoisting::optimizeConstants(Function &Fn) {
+ // Collect all constant candidates.
+ collectConstantCandidates(Fn);
+
+ // There are no constant candidates to worry about.
+ if (ConstCandVec.empty())
+ return false;
+
+ // Combine constants that can be easily materialized with an add from a common
+ // base constant.
+ findBaseConstants();
+
+ // There are no constants to emit.
+ if (ConstantVec.empty())
+ return false;
+
+ // Finally hoist the base constant and emit materialization code for dependent
+ // constants.
+ bool MadeChange = emitBaseConstants();
+
+ // Cleanup dead instructions.
+ deleteDeadCastInst();
+
+ return MadeChange;
+}
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index d5a96ec..7045b36 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -24,9 +24,9 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Pass.h"
-#include "llvm/Support/InstIterator.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include <set>
using namespace llvm;
@@ -40,9 +40,9 @@ namespace {
initializeConstantPropagationPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<TargetLibraryInfo>();
}
@@ -67,7 +67,8 @@ bool ConstantPropagation::runOnFunction(Function &F) {
WorkList.insert(&*i);
}
bool Changed = false;
- DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
while (!WorkList.empty()) {
@@ -75,12 +76,11 @@ bool ConstantPropagation::runOnFunction(Function &F) {
WorkList.erase(WorkList.begin()); // Get an element from the worklist...
if (!I->use_empty()) // Don't muck with dead instructions...
- if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
+ if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
// Add all of the users of this instruction to the worklist, they might
// be constant propagatable now...
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI)
- WorkList.insert(cast<Instruction>(*UI));
+ for (User *U : I->users())
+ WorkList.insert(cast<Instruction>(U));
// Replace all of the uses of a variable with uses of the constant.
I->replaceAllUsesWith(C);
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 995782e..0490767 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -16,11 +16,11 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -48,9 +48,9 @@ namespace {
initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LazyValueInfo>();
}
};
@@ -281,6 +281,9 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
}
bool CorrelatedValuePropagation::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
LVI = &getAnalysis<LazyValueInfo>();
bool FnChanged = false;
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index e8a090a..8377fd9 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -19,9 +19,9 @@
#define DEBUG_TYPE "dce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Pass.h"
-#include "llvm/Support/InstIterator.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -38,7 +38,9 @@ namespace {
DeadInstElimination() : BasicBlockPass(ID) {
initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnBasicBlock(BasicBlock &BB) {
+ bool runOnBasicBlock(BasicBlock &BB) override {
+ if (skipOptnoneFunction(BB))
+ return false;
TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
@@ -52,7 +54,7 @@ namespace {
return Changed;
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
}
};
@@ -77,9 +79,9 @@ namespace {
initializeDCEPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
}
};
@@ -89,6 +91,9 @@ char DCE::ID = 0;
INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
bool DCE::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
// Start out with all of the instructions in the worklist...
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 57432c7..f54c00d 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -22,12 +22,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
@@ -53,10 +53,13 @@ namespace {
initializeDSEPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F) {
+ bool runOnFunction(Function &F) override {
+ if (skipOptnoneFunction(F))
+ return false;
+
AA = &getAnalysis<AliasAnalysis>();
MD = &getAnalysis<MemoryDependenceAnalysis>();
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TLI = AA->getTargetLibraryInfo();
bool Changed = false;
@@ -76,13 +79,13 @@ namespace {
void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
SmallSetVector<Value*, 16> &DeadStackObjects);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<AliasAnalysis>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
};
@@ -90,7 +93,7 @@ namespace {
char DSE::ID = 0;
INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
@@ -190,6 +193,7 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
/// describe the memory operations for this instruction.
static AliasAnalysis::Location
getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
+ const DataLayout *DL = AA.getDataLayout();
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return AA.getLocation(SI);
@@ -199,7 +203,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
// If we don't have target data around, an unknown size in Location means
// that we should use the size of the pointee type. This isn't valid for
// memset/memcpy, which writes more than an i8.
- if (Loc.Size == AliasAnalysis::UnknownSize && AA.getDataLayout() == 0)
+ if (Loc.Size == AliasAnalysis::UnknownSize && DL == 0)
return AliasAnalysis::Location();
return Loc;
}
@@ -213,7 +217,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
// If we don't have target data around, an unknown size in Location means
// that we should use the size of the pointee type. This isn't valid for
// init.trampoline, which writes more than an i8.
- if (AA.getDataLayout() == 0) return AliasAnalysis::Location();
+ if (DL == 0) return AliasAnalysis::Location();
// FIXME: We don't know the size of the trampoline, so we can't really
// handle it here.
@@ -341,6 +345,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
AliasAnalysis &AA,
int64_t &EarlierOff,
int64_t &LaterOff) {
+ const DataLayout *DL = AA.getDataLayout();
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
@@ -354,8 +359,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// If we have no DataLayout information around, then the size of the store
// is inferrable from the pointee type. If they are the same type, then
// we know that the store is safe.
- if (AA.getDataLayout() == 0 &&
- Later.Ptr->getType() == Earlier.Ptr->getType())
+ if (DL == 0 && Later.Ptr->getType() == Earlier.Ptr->getType())
return OverwriteComplete;
return OverwriteUnknown;
@@ -369,17 +373,14 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// Otherwise, we have to have size information, and the later store has to be
// larger than the earlier one.
if (Later.Size == AliasAnalysis::UnknownSize ||
- Earlier.Size == AliasAnalysis::UnknownSize ||
- AA.getDataLayout() == 0)
+ Earlier.Size == AliasAnalysis::UnknownSize || DL == 0)
return OverwriteUnknown;
// Check to see if the later store is to the entire object (either a global,
- // an alloca, or a byval argument). If so, then it clearly overwrites any
- // other store to the same object.
- const DataLayout *TD = AA.getDataLayout();
-
- const Value *UO1 = GetUnderlyingObject(P1, TD),
- *UO2 = GetUnderlyingObject(P2, TD);
+ // an alloca, or a byval/inalloca argument). If so, then it clearly
+ // overwrites any other store to the same object.
+ const Value *UO1 = GetUnderlyingObject(P1, DL),
+ *UO2 = GetUnderlyingObject(P2, DL);
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
@@ -397,8 +398,8 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// pointers are equal, then we can reason about the two stores.
EarlierOff = 0;
LaterOff = 0;
- const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
- const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
+ const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL);
+ const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL);
// If the base pointers still differ, we have two completely different stores.
if (BP1 != BP2)
@@ -679,7 +680,7 @@ bool DSE::HandleFree(CallInst *F) {
if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
break;
- Instruction *Next = llvm::next(BasicBlock::iterator(Dependency));
+ Instruction *Next = std::next(BasicBlock::iterator(Dependency));
// DCE instructions only used to calculate that store
DeleteDeadInstruction(Dependency, *MD, TLI);
@@ -701,22 +702,6 @@ bool DSE::HandleFree(CallInst *F) {
return MadeChange;
}
-namespace {
- struct CouldRef {
- typedef Value *argument_type;
- const CallSite CS;
- AliasAnalysis *AA;
-
- bool operator()(Value *I) {
- // See if the call site touches the value.
- AliasAnalysis::ModRefResult A =
- AA->getModRefInfo(CS, I, getPointerSize(I, *AA));
-
- return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
- }
- };
-}
-
/// handleEndBlock - Remove dead stores to stack-allocated locations in the
/// function end block. Ex:
/// %A = alloca i32
@@ -742,11 +727,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
DeadStackObjects.insert(I);
}
- // Treat byval arguments the same, stores to them are dead at the end of the
- // function.
+ // Treat byval or inalloca arguments the same, stores to them are dead at the
+ // end of the function.
for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
AE = BB.getParent()->arg_end(); AI != AE; ++AI)
- if (AI->hasByValAttr())
+ if (AI->hasByValOrInAllocaAttr())
DeadStackObjects.insert(AI);
// Scan the basic block backwards
@@ -776,7 +761,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
E = Pointers.end(); I != E; ++I) {
dbgs() << **I;
- if (llvm::next(I) != E)
+ if (std::next(I) != E)
dbgs() << ", ";
}
dbgs() << '\n');
@@ -818,8 +803,13 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// If the call might load from any of our allocas, then any store above
// the call is live.
- CouldRef Pred = { CS, AA };
- DeadStackObjects.remove_if(Pred);
+ DeadStackObjects.remove_if([&](Value *I) {
+ // See if the call site touches the value.
+ AliasAnalysis::ModRefResult A =
+ AA->getModRefInfo(CS, I, getPointerSize(I, *AA));
+
+ return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
+ });
// If all of the allocas were clobbered by the call then we're not going
// to find anything else to process.
@@ -862,20 +852,6 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
return MadeChange;
}
-namespace {
- struct CouldAlias {
- typedef Value *argument_type;
- const AliasAnalysis::Location &LoadedLoc;
- AliasAnalysis *AA;
-
- bool operator()(Value *I) {
- // See if the loaded location could alias the stack location.
- AliasAnalysis::Location StackLoc(I, getPointerSize(I, *AA));
- return !AA->isNoAlias(StackLoc, LoadedLoc);
- }
- };
-}
-
/// RemoveAccessedObjects - Check to see if the specified location may alias any
/// of the stack objects in the DeadStackObjects set. If so, they become live
/// because the location is being loaded.
@@ -895,6 +871,9 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
}
// Remove objects that could alias LoadedLoc.
- CouldAlias Pred = { LoadedLoc, AA };
- DeadStackObjects.remove_if(Pred);
+ DeadStackObjects.remove_if([&](Value *I) {
+ // See if the loaded location could alias the stack location.
+ AliasAnalysis::Location StackLoc(I, getPointerSize(I, *AA));
+ return !AA->isNoAlias(StackLoc, LoadedLoc);
+ });
}
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 5266894..af2c3d1 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -17,16 +17,16 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
-#include <deque>
+#include <vector>
using namespace llvm;
STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
@@ -262,7 +262,7 @@ namespace {
/// cases.
class EarlyCSE : public FunctionPass {
public:
- const DataLayout *TD;
+ const DataLayout *DL;
const TargetLibraryInfo *TLI;
DominatorTree *DT;
typedef RecyclingAllocator<BumpPtrAllocator,
@@ -303,7 +303,7 @@ public:
initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
private:
@@ -376,8 +376,8 @@ private:
bool processNode(DomTreeNode *Node);
// This transformation requires dominator postdominator info
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfo>();
AU.setPreservesCFG();
}
@@ -392,7 +392,7 @@ FunctionPass *llvm::createEarlyCSEPass() {
}
INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
@@ -432,7 +432,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If the instruction can be simplified (e.g. X+0 = X) then replace it with
// its simpler value.
- if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) {
+ if (Value *V = SimplifyInstruction(Inst, DL, TLI, DT)) {
DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n');
Inst->replaceAllUsesWith(V);
Inst->eraseFromParent();
@@ -552,11 +552,15 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
bool EarlyCSE::runOnFunction(Function &F) {
- std::deque<StackNode *> nodesToProcess;
+ if (skipOptnoneFunction(F))
+ return false;
- TD = getAnalysisIfAvailable<DataLayout>();
+ std::vector<StackNode *> nodesToProcess;
+
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
TLI = &getAnalysis<TargetLibraryInfo>();
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
// Tables that the pass uses when walking the domtree.
ScopedHTType AVTable;
@@ -570,7 +574,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
bool Changed = false;
// Process the root node.
- nodesToProcess.push_front(
+ nodesToProcess.push_back(
new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
CurrentGeneration, DT->getRootNode(),
DT->getRootNode()->begin(),
@@ -583,7 +587,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
while (!nodesToProcess.empty()) {
// Grab the first item off the stack. Set the current generation, remove
// the node from the stack, and process it.
- StackNode *NodeToProcess = nodesToProcess.front();
+ StackNode *NodeToProcess = nodesToProcess.back();
// Initialize class members.
CurrentGeneration = NodeToProcess->currentGeneration();
@@ -597,7 +601,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
} else if (NodeToProcess->childIter() != NodeToProcess->end()) {
// Push the next child onto the stack.
DomTreeNode *child = NodeToProcess->nextChild();
- nodesToProcess.push_front(
+ nodesToProcess.push_back(
new StackNode(AvailableValues,
AvailableLoads,
AvailableCalls,
@@ -607,7 +611,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
// It has been processed, and there are no more children to process,
// so delete it and pop it off the stack.
delete NodeToProcess;
- nodesToProcess.pop_front();
+ nodesToProcess.pop_back();
}
} // while (!nodes...)
diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp
index e7de07f..e7f2564 100644
--- a/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -14,8 +14,8 @@
#define DEBUG_TYPE "flattencfg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/CFG.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -26,9 +26,9 @@ public:
FlattenCFGPass() : FunctionPass(ID) {
initializeFlattenCFGPassPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AliasAnalysis>();
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 6af269d..33c387c 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -20,30 +20,29 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Assembly/Writer.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/PatternMatch.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -587,7 +586,7 @@ namespace {
bool NoLoads;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
- const DataLayout *TD;
+ const DataLayout *DL;
const TargetLibraryInfo *TLI;
SetVector<BasicBlock *> DeadBlocks;
@@ -616,7 +615,7 @@ namespace {
initializeGVNPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
/// markInstructionForDeletion - This removes the specified instruction from
/// our various maps and marks it for deletion.
@@ -625,7 +624,7 @@ namespace {
InstrsToErase.push_back(I);
}
- const DataLayout *getDataLayout() const { return TD; }
+ const DataLayout *getDataLayout() const { return DL; }
DominatorTree &getDominatorTree() const { return *DT; }
AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
MemoryDependenceAnalysis &getMemDep() const { return *MD; }
@@ -677,14 +676,14 @@ namespace {
SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
// This transformation requires dominator postdominator info
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfo>();
if (!NoLoads)
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<AliasAnalysis>();
}
@@ -727,7 +726,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
@@ -818,8 +817,7 @@ SpeculationFailure:
// Mark as unavailable.
EntryVal = 0;
- for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
- BBWorklist.push_back(*I);
+ BBWorklist.append(succ_begin(Entry), succ_end(Entry));
} while (!BBWorklist.empty());
return false;
@@ -830,7 +828,7 @@ SpeculationFailure:
/// CoerceAvailableValueToLoadType will succeed.
static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
Type *LoadTy,
- const DataLayout &TD) {
+ const DataLayout &DL) {
// If the loaded or stored value is an first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
@@ -839,8 +837,8 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
return false;
// The store has to be at least as big as the load.
- if (TD.getTypeSizeInBits(StoredVal->getType()) <
- TD.getTypeSizeInBits(LoadTy))
+ if (DL.getTypeSizeInBits(StoredVal->getType()) <
+ DL.getTypeSizeInBits(LoadTy))
return false;
return true;
@@ -855,15 +853,15 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
Type *LoadedTy,
Instruction *InsertPt,
- const DataLayout &TD) {
- if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
+ const DataLayout &DL) {
+ if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL))
return 0;
// If this is already the right type, just return it.
Type *StoredValTy = StoredVal->getType();
- uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
- uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
+ uint64_t StoreSize = DL.getTypeSizeInBits(StoredValTy);
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadedTy);
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
@@ -874,13 +872,13 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
// Convert source pointers to integers, which can be bitcast.
if (StoredValTy->getScalarType()->isPointerTy()) {
- StoredValTy = TD.getIntPtrType(StoredValTy);
+ StoredValTy = DL.getIntPtrType(StoredValTy);
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
Type *TypeToCastTo = LoadedTy;
if (TypeToCastTo->getScalarType()->isPointerTy())
- TypeToCastTo = TD.getIntPtrType(TypeToCastTo);
+ TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
if (StoredValTy != TypeToCastTo)
StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
@@ -899,7 +897,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
// Convert source pointers to integers, which can be manipulated.
if (StoredValTy->getScalarType()->isPointerTy()) {
- StoredValTy = TD.getIntPtrType(StoredValTy);
+ StoredValTy = DL.getIntPtrType(StoredValTy);
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
@@ -911,7 +909,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
// If this is a big-endian system, we need to shift the value down to the low
// bits so that a truncate will work.
- if (TD.isBigEndian()) {
+ if (DL.isBigEndian()) {
Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
}
@@ -942,15 +940,15 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
Value *WritePtr,
uint64_t WriteSizeInBits,
- const DataLayout &TD) {
+ const DataLayout &DL) {
// If the loaded or stored value is a first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
if (LoadTy->isStructTy() || LoadTy->isArrayTy())
return -1;
int64_t StoreOffset = 0, LoadOffset = 0;
- Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&TD);
- Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &TD);
+ Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&DL);
+ Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &DL);
if (StoreBase != LoadBase)
return -1;
@@ -972,7 +970,7 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
// If the load and store don't overlap at all, the store doesn't provide
// anything to the load. In this case, they really don't alias at all, AA
// must have gotten confused.
- uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy);
if ((WriteSizeInBits & 7) | (LoadSize & 7))
return -1;
@@ -1015,51 +1013,51 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
/// memdep query of a load that ends up being a clobbering store.
static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI,
- const DataLayout &TD) {
+ const DataLayout &DL) {
// Cannot handle reading from store of first-class aggregate yet.
if (DepSI->getValueOperand()->getType()->isStructTy() ||
DepSI->getValueOperand()->getType()->isArrayTy())
return -1;
Value *StorePtr = DepSI->getPointerOperand();
- uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
+ uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
- StorePtr, StoreSize, TD);
+ StorePtr, StoreSize, DL);
}
/// AnalyzeLoadFromClobberingLoad - This function is called when we have a
/// memdep query of a load that ends up being clobbered by another load. See if
/// the other load can feed into the second load.
static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
- LoadInst *DepLI, const DataLayout &TD){
+ LoadInst *DepLI, const DataLayout &DL){
// Cannot handle reading from store of first-class aggregate yet.
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
return -1;
Value *DepPtr = DepLI->getPointerOperand();
- uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
- int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
+ uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType());
+ int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
if (R != -1) return R;
// If we have a load/load clobber an DepLI can be widened to cover this load,
// then we should widen it!
int64_t LoadOffs = 0;
const Value *LoadBase =
- GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &TD);
- unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+ GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &DL);
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
unsigned Size = MemoryDependenceAnalysis::
- getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
+ getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, DL);
if (Size == 0) return -1;
- return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
+ return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL);
}
static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
MemIntrinsic *MI,
- const DataLayout &TD) {
+ const DataLayout &DL) {
// If the mem operation is a non-constant size, we can't handle it.
ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
if (SizeCst == 0) return -1;
@@ -1069,7 +1067,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
// of the memset..
if (MI->getIntrinsicID() == Intrinsic::memset)
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
- MemSizeInBits, TD);
+ MemSizeInBits, DL);
// If we have a memcpy/memmove, the only case we can handle is if this is a
// copy from constant memory. In that case, we can read directly from the
@@ -1079,12 +1077,12 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
Constant *Src = dyn_cast<Constant>(MTI->getSource());
if (Src == 0) return -1;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD));
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &DL));
if (GV == 0 || !GV->isConstant()) return -1;
// See if the access is within the bounds of the transfer.
int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
- MI->getDest(), MemSizeInBits, TD);
+ MI->getDest(), MemSizeInBits, DL);
if (Offset == -1)
return Offset;
@@ -1097,7 +1095,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- if (ConstantFoldLoadFromConstPtr(Src, &TD))
+ if (ConstantFoldLoadFromConstPtr(Src, &DL))
return Offset;
return -1;
}
@@ -1110,11 +1108,11 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
/// before we give up.
static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
Type *LoadTy,
- Instruction *InsertPt, const DataLayout &TD){
+ Instruction *InsertPt, const DataLayout &DL){
LLVMContext &Ctx = SrcVal->getType()->getContext();
- uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
- uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
+ uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
+ uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
@@ -1122,13 +1120,13 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
// to an integer type to start with.
if (SrcVal->getType()->getScalarType()->isPointerTy())
SrcVal = Builder.CreatePtrToInt(SrcVal,
- TD.getIntPtrType(SrcVal->getType()));
+ DL.getIntPtrType(SrcVal->getType()));
if (!SrcVal->getType()->isIntegerTy())
SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
// Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt;
- if (TD.isLittleEndian())
+ if (DL.isLittleEndian())
ShiftAmt = Offset*8;
else
ShiftAmt = (StoreSize-LoadSize-Offset)*8;
@@ -1139,7 +1137,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
if (LoadSize != StoreSize)
SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
- return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
+ return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, DL);
}
/// GetLoadValueForLoad - This function is called when we have a
@@ -1150,11 +1148,11 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
Type *LoadTy, Instruction *InsertPt,
GVN &gvn) {
- const DataLayout &TD = *gvn.getDataLayout();
+ const DataLayout &DL = *gvn.getDataLayout();
// If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
// widen SrcVal out to a larger load.
- unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
- unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+ unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType());
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
if (Offset+LoadSize > SrcValSize) {
assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
@@ -1186,7 +1184,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
// Replace uses of the original load with the wider load. On a big endian
// system, we need to shift down to get the relevant bits.
Value *RV = NewLoad;
- if (TD.isBigEndian())
+ if (DL.isBigEndian())
RV = Builder.CreateLShr(RV,
NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
RV = Builder.CreateTrunc(RV, SrcVal->getType());
@@ -1201,7 +1199,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
SrcVal = NewLoad;
}
- return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD);
+ return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL);
}
@@ -1209,9 +1207,9 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
/// memdep query of a load that ends up being a clobbering mem intrinsic.
static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
Type *LoadTy, Instruction *InsertPt,
- const DataLayout &TD){
+ const DataLayout &DL){
LLVMContext &Ctx = LoadTy->getContext();
- uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy)/8;
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
@@ -1242,7 +1240,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
++NumBytesSet;
}
- return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
+ return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, DL);
}
// Otherwise, this is a memcpy/memmove from a constant global.
@@ -1258,7 +1256,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- return ConstantFoldLoadFromConstPtr(Src, &TD);
+ return ConstantFoldLoadFromConstPtr(Src, &DL);
}
@@ -1324,10 +1322,10 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
if (isSimpleValue()) {
Res = getSimpleValue();
if (Res->getType() != LoadTy) {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
+ const DataLayout *DL = gvn.getDataLayout();
+ assert(DL && "Need target data to handle type mismatch case");
Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
- *TD);
+ *DL);
DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
<< *getSimpleValue() << '\n'
@@ -1346,10 +1344,10 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
<< *Res << '\n' << "\n\n\n");
}
} else if (isMemIntrinValue()) {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
+ const DataLayout *DL = gvn.getDataLayout();
+ assert(DL && "Need target data to handle type mismatch case");
Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
- LoadTy, BB->getTerminator(), *TD);
+ LoadTy, BB->getTerminator(), *DL);
DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n' << "\n\n\n");
@@ -1402,9 +1400,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
// read by the load, we can extract the bits we need for the load from the
// stored value.
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
- if (TD && Address) {
+ if (DL && Address) {
int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
- DepSI, *TD);
+ DepSI, *DL);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
DepSI->getValueOperand(),
@@ -1421,10 +1419,10 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
// If this is a clobber and L is the first instruction in its block, then
// we have the first instruction in the entry block.
- if (DepLI != LI && Address && TD) {
+ if (DepLI != LI && Address && DL) {
int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
LI->getPointerOperand(),
- DepLI, *TD);
+ DepLI, *DL);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
@@ -1437,9 +1435,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
// If the clobbering value is a memset/memcpy/memmove, see if we can
// forward a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
- if (TD && Address) {
+ if (DL && Address) {
int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
- DepMI, *TD);
+ DepMI, *DL);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
Offset));
@@ -1471,8 +1469,8 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (S->getValueOperand()->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
- LI->getType(), *TD)) {
+ if (DL == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
+ LI->getType(), *DL)) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1488,7 +1486,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (LD->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
+ if (DL == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)){
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1611,7 +1609,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// If all preds have a single successor, then we know it is safe to insert
// the load on the pred (?!?), so we can insert code to materialize the
// pointer if it is not available.
- PHITransAddr Address(LI->getPointerOperand(), TD);
+ PHITransAddr Address(LI->getPointerOperand(), DL);
Value *LoadPtr = 0;
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
*DT, NewInsts);
@@ -1712,7 +1710,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
!Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
DEBUG(
dbgs() << "GVN: non-local load ";
- WriteAsOperand(dbgs(), LI);
+ LI->printAsOperand(dbgs());
dbgs() << " has unknown dependencies\n";
);
return false;
@@ -1789,7 +1787,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
ReplInst->setMetadata(Kind, MDNode::getMostGenericRange(IMD, ReplMD));
break;
case LLVMContext::MD_prof:
- llvm_unreachable("MD_prof in a non terminator instruction");
+ llvm_unreachable("MD_prof in a non-terminator instruction");
break;
case LLVMContext::MD_fpmath:
ReplInst->setMetadata(Kind, MDNode::getMostGenericFPMath(IMD, ReplMD));
@@ -1823,7 +1821,7 @@ bool GVN::processLoad(LoadInst *L) {
// If we have a clobber and target data is around, see if this is a clobber
// that we can fix up through code synthesis.
- if (Dep.isClobber() && TD) {
+ if (Dep.isClobber() && DL) {
// Check to see if we have something like this:
// store i32 123, i32* %P
// %A = bitcast i32* %P to i8*
@@ -1838,10 +1836,10 @@ bool GVN::processLoad(LoadInst *L) {
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
L->getPointerOperand(),
- DepSI, *TD);
+ DepSI, *DL);
if (Offset != -1)
AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
- L->getType(), L, *TD);
+ L->getType(), L, *DL);
}
// Check to see if we have something like this:
@@ -1856,7 +1854,7 @@ bool GVN::processLoad(LoadInst *L) {
int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
L->getPointerOperand(),
- DepLI, *TD);
+ DepLI, *DL);
if (Offset != -1)
AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
}
@@ -1866,9 +1864,9 @@ bool GVN::processLoad(LoadInst *L) {
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
L->getPointerOperand(),
- DepMI, *TD);
+ DepMI, *DL);
if (Offset != -1)
- AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD);
+ AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *DL);
}
if (AvailVal) {
@@ -1890,7 +1888,7 @@ bool GVN::processLoad(LoadInst *L) {
DEBUG(
// fast print dep, using operator<< on instruction is too slow.
dbgs() << "GVN: load ";
- WriteAsOperand(dbgs(), L);
+ L->printAsOperand(dbgs());
Instruction *I = Dep.getInst();
dbgs() << " is clobbered by " << *I << '\n';
);
@@ -1905,7 +1903,7 @@ bool GVN::processLoad(LoadInst *L) {
DEBUG(
// fast print dep, using operator<< on instruction is too slow.
dbgs() << "GVN: load ";
- WriteAsOperand(dbgs(), L);
+ L->printAsOperand(dbgs());
dbgs() << " has unknown dependence\n";
);
return false;
@@ -1919,9 +1917,9 @@ bool GVN::processLoad(LoadInst *L) {
// actually have the same type. See if we know how to reuse the stored
// value (depending on its type).
if (StoredVal->getType() != L->getType()) {
- if (TD) {
+ if (DL) {
StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
- L, *TD);
+ L, *DL);
if (StoredVal == 0)
return false;
@@ -1948,9 +1946,9 @@ bool GVN::processLoad(LoadInst *L) {
// the same type. See if we know how to reuse the previously loaded value
// (depending on its type).
if (DepLI->getType() != L->getType()) {
- if (TD) {
+ if (DL) {
AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
- L, *TD);
+ L, *DL);
if (AvailableVal == 0)
return false;
@@ -2030,7 +2028,7 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
unsigned Count = 0;
for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
UI != UE; ) {
- Use &U = (UI++).getUse();
+ Use &U = *UI++;
if (DT->dominates(Root, U)) {
U.set(To);
@@ -2202,7 +2200,7 @@ bool GVN::processInstruction(Instruction *I) {
// to value numbering it. Value numbering often exposes redundancies, for
// example if it determines that %y is equal to %x then the instruction
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
- if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
+ if (Value *V = SimplifyInstruction(I, DL, TLI, DT)) {
I->replaceAllUsesWith(V);
if (MD && V->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
@@ -2314,10 +2312,14 @@ bool GVN::processInstruction(Instruction *I) {
/// runOnFunction - This is the main transformation entry point for a function.
bool GVN::runOnFunction(Function& F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
if (!NoLoads)
MD = &getAnalysis<MemoryDependenceAnalysis>();
- DT = &getAnalysis<DominatorTree>();
- TD = getAnalysisIfAvailable<DataLayout>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
TLI = &getAnalysis<TargetLibraryInfo>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 954e545..8ffd64b 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -70,6 +70,11 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
+cl::opt<bool>
+EnableGlobalMerge("global-merge", cl::Hidden,
+ cl::desc("Enable global merge pass"),
+ cl::init(true));
+
static cl::opt<bool>
EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
cl::desc("Enable global merge pass on constants"),
@@ -107,31 +112,18 @@ namespace {
initializeGlobalMergePass(*PassRegistry::getPassRegistry());
}
- virtual bool doInitialization(Module &M);
- virtual bool runOnFunction(Function &F);
- virtual bool doFinalization(Module &M);
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+ bool doFinalization(Module &M) override;
- const char *getPassName() const {
+ const char *getPassName() const override {
return "Merge internal globals";
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
FunctionPass::getAnalysisUsage(AU);
}
-
- struct GlobalCmp {
- const DataLayout *TD;
-
- GlobalCmp(const DataLayout *td) : TD(td) { }
-
- bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
- Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
- Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
-
- return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
- }
- };
};
} // end anonymous namespace
@@ -143,7 +135,7 @@ INITIALIZE_PASS(GlobalMerge, "global-merge",
bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const {
const TargetLowering *TLI = TM->getTargetLowering();
- const DataLayout *TD = TLI->getDataLayout();
+ const DataLayout *DL = TLI->getDataLayout();
// FIXME: Infer the maximum possible offset depending on the actual users
// (these max offsets are different for the users inside Thumb or ARM
@@ -151,7 +143,13 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
// FIXME: Find better heuristics
- std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
+ std::stable_sort(Globals.begin(), Globals.end(),
+ [DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+
+ return (DL->getTypeAllocSize(Ty1) < DL->getTypeAllocSize(Ty2));
+ });
Type *Int32Ty = Type::getInt32Ty(M.getContext());
@@ -162,7 +160,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
std::vector<Constant*> Inits;
for (j = i; j != e; ++j) {
Type *Ty = Globals[j]->getType()->getElementType();
- MergedSize += TD->getTypeAllocSize(Ty);
+ MergedSize += DL->getTypeAllocSize(Ty);
if (MergedSize > MaxOffset) {
break;
}
@@ -214,7 +212,7 @@ void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
++IFn) {
for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
IBB != IEndBB; ++IBB) {
- // Follow the inwoke link to find the landing pad instruction
+ // Follow the invoke link to find the landing pad instruction
const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
if (!II) continue;
@@ -231,10 +229,13 @@ void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
}
bool GlobalMerge::doInitialization(Module &M) {
+ if (!EnableGlobalMerge)
+ return false;
+
DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
BSSGlobals;
const TargetLowering *TLI = TM->getTargetLowering();
- const DataLayout *TD = TLI->getDataLayout();
+ const DataLayout *DL = TLI->getDataLayout();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
bool Changed = false;
setMustKeepGlobalVariables(M);
@@ -252,9 +253,9 @@ bool GlobalMerge::doInitialization(Module &M) {
unsigned AddressSpace = PT->getAddressSpace();
// Ignore fancy-aligned globals for now.
- unsigned Alignment = TD->getPreferredAlignment(I);
+ unsigned Alignment = DL->getPreferredAlignment(I);
Type *Ty = I->getType()->getElementType();
- if (Alignment > TD->getABITypeAlignment(Ty))
+ if (Alignment > DL->getABITypeAlignment(Ty))
continue;
// Ignore all 'special' globals.
@@ -266,7 +267,7 @@ bool GlobalMerge::doInitialization(Module &M) {
if (isMustKeepGlobalVariable(I))
continue;
- if (TD->getTypeAllocSize(Ty) < MaxOffset) {
+ if (DL->getTypeAllocSize(Ty) < MaxOffset) {
if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
.isBSSLocal())
BSSGlobals[AddressSpace].push_back(I);
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 235aaaa..7537632 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -29,18 +29,18 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -63,12 +63,15 @@ static cl::opt<bool> VerifyIndvars(
"verify-indvars", cl::Hidden,
cl::desc("Verify the ScalarEvolution result after running indvars"));
+static cl::opt<bool> ReduceLiveIVs("liv-reduce", cl::Hidden,
+ cl::desc("Reduce live induction variables."));
+
namespace {
class IndVarSimplify : public LoopPass {
LoopInfo *LI;
ScalarEvolution *SE;
DominatorTree *DT;
- DataLayout *TD;
+ const DataLayout *DL;
TargetLibraryInfo *TLI;
SmallVector<WeakVH, 16> DeadInsts;
@@ -76,15 +79,15 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), TD(0),
+ IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), DL(0),
Changed(false) {
initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
@@ -96,7 +99,7 @@ namespace {
}
private:
- virtual void releaseMemory() {
+ void releaseMemory() override {
DeadInsts.clear();
}
@@ -119,7 +122,7 @@ namespace {
char IndVarSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
"Induction Variable Simplification", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
@@ -266,11 +269,11 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// Check Incr uses. One user is PN and the other user is an exit condition
// used by the conditional terminator.
- Value::use_iterator IncrUse = Incr->use_begin();
+ Value::user_iterator IncrUse = Incr->user_begin();
Instruction *U1 = cast<Instruction>(*IncrUse++);
- if (IncrUse == Incr->use_end()) return;
+ if (IncrUse == Incr->user_end()) return;
Instruction *U2 = cast<Instruction>(*IncrUse++);
- if (IncrUse != Incr->use_end()) return;
+ if (IncrUse != Incr->user_end()) return;
// Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
// only used by a branch, we can't transform it.
@@ -278,10 +281,10 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
if (!Compare)
Compare = dyn_cast<FCmpInst>(U2);
if (Compare == 0 || !Compare->hasOneUse() ||
- !isa<BranchInst>(Compare->use_back()))
+ !isa<BranchInst>(Compare->user_back()))
return;
- BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
+ BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
// We need to verify that the branch actually controls the iteration count
// of the loop. If not, the new IV can overflow and no one will notice.
@@ -494,6 +497,21 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
unsigned NumPreds = PN->getNumIncomingValues();
+ // We would like to be able to RAUW single-incoming value PHI nodes. We
+ // have to be certain this is safe even when this is an LCSSA PHI node.
+ // While the computed exit value is no longer varying in *this* loop, the
+ // exit block may be an exit block for an outer containing loop as well,
+ // the exit value may be varying in the outer loop, and thus it may still
+ // require an LCSSA PHI node. The safe case is when this is
+ // single-predecessor PHI node (LCSSA) and the exit block containing it is
+ // part of the enclosing loop, or this is the outer most loop of the nest.
+ // In either case the exit value could (at most) be varying in the same
+ // loop body as the phi node itself. Thus if it is in turn used outside of
+ // an enclosing loop it will only be via a separate LCSSA node.
+ bool LCSSASafePhiForRAUW =
+ NumPreds == 1 &&
+ (!L->getParentLoop() || L->getParentLoop() == LI->getLoopFor(ExitBB));
+
// Iterate over all of the PHI nodes.
BasicBlock::iterator BBI = ExitBB->begin();
while ((PN = dyn_cast<PHINode>(BBI++))) {
@@ -545,8 +563,8 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
unsigned NumHardInternalUses = 0;
unsigned NumSoftExternalUses = 0;
unsigned NumUses = 0;
- for (Value::use_iterator IB=Inst->use_begin(), IE=Inst->use_end();
- IB!=IE && NumUses<=6 ; ++IB) {
+ for (auto IB = Inst->user_begin(), IE = Inst->user_end();
+ IB != IE && NumUses <= 6; ++IB) {
Instruction *UseInstr = cast<Instruction>(*IB);
unsigned Opc = UseInstr->getOpcode();
NumUses++;
@@ -558,9 +576,9 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// Do not count the Phi as a use. LCSSA may have inserted
// plenty of trivial ones.
NumUses--;
- for (Value::use_iterator PB=UseInstr->use_begin(),
- PE=UseInstr->use_end();
- PB!=PE && NumUses<=6 ; ++PB, ++NumUses) {
+ for (auto PB = UseInstr->user_begin(),
+ PE = UseInstr->user_end();
+ PB != PE && NumUses <= 6; ++PB, ++NumUses) {
unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode();
if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret)
NumSoftExternalUses++;
@@ -594,17 +612,18 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
if (isInstructionTriviallyDead(Inst, TLI))
DeadInsts.push_back(Inst);
- if (NumPreds == 1) {
- // Completely replace a single-pred PHI. This is safe, because the
- // NewVal won't be variant in the loop, so we don't need an LCSSA phi
- // node anymore.
+ // If we determined that this PHI is safe to replace even if an LCSSA
+ // PHI, do so.
+ if (LCSSASafePhiForRAUW) {
PN->replaceAllUsesWith(ExitVal);
PN->eraseFromParent();
}
}
- if (NumPreds != 1) {
- // Clone the PHI and delete the original one. This lets IVUsers and
- // any other maps purge the original user from their records.
+
+ // If we were unable to completely replace the PHI node, clone the PHI
+ // and delete the original one. This lets IVUsers and any other maps
+ // purge the original user from their records.
+ if (!LCSSASafePhiForRAUW) {
PHINode *NewPN = cast<PHINode>(PN->clone());
NewPN->takeName(PN);
NewPN->insertBefore(PN);
@@ -634,34 +653,20 @@ namespace {
WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {}
};
-
- class WideIVVisitor : public IVVisitor {
- ScalarEvolution *SE;
- const DataLayout *TD;
-
- public:
- WideIVInfo WI;
-
- WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
- const DataLayout *TData) :
- SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
-
- // Implement the interface used by simplifyUsersOfIV.
- virtual void visitCast(CastInst *Cast);
- };
}
/// visitCast - Update information about the induction variable that is
/// extended by this sign or zero extend operation. This is used to determine
/// the final width of the IV before actually widening it.
-void WideIVVisitor::visitCast(CastInst *Cast) {
+static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
+ const DataLayout *DL) {
bool IsSigned = Cast->getOpcode() == Instruction::SExt;
if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
return;
Type *Ty = Cast->getType();
uint64_t Width = SE->getTypeSizeInBits(Ty);
- if (TD && !TD->isLegalInteger(Width))
+ if (DL && !DL->isLegalInteger(Width))
return;
if (!WI.WidestNativeType) {
@@ -891,15 +896,43 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
return AddRec;
}
+/// This IV user cannot be widen. Replace this use of the original narrow IV
+/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
+static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
+ DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
+ << " for user " << *DU.NarrowUse << "\n");
+ IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+ Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
+ DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
+}
+
/// WidenIVUse - Determine whether an individual user of the narrow IV can be
/// widened. If so, return the wide clone of the user.
Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// Stop traversing the def-use chain at inner-loop phis or post-loop phis.
- if (isa<PHINode>(DU.NarrowUse) &&
- LI->getLoopFor(DU.NarrowUse->getParent()) != L)
- return 0;
-
+ if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
+ if (LI->getLoopFor(UsePhi->getParent()) != L) {
+ // For LCSSA phis, sink the truncate outside the loop.
+ // After SimplifyCFG most loop exit targets have a single predecessor.
+ // Otherwise fall back to a truncate within the loop.
+ if (UsePhi->getNumOperands() != 1)
+ truncateIVUse(DU, DT);
+ else {
+ PHINode *WidePhi =
+ PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
+ UsePhi);
+ WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
+ IRBuilder<> Builder(WidePhi->getParent()->getFirstInsertionPt());
+ Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
+ UsePhi->replaceAllUsesWith(Trunc);
+ DeadInsts.push_back(UsePhi);
+ DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
+ << " to " << *WidePhi << "\n");
+ }
+ return 0;
+ }
+ }
// Our raison d'etre! Eliminate sign and zero extension.
if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
Value *NewDef = DU.WideDef;
@@ -947,9 +980,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// This user does not evaluate to a recurence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
- IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
- Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
- DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
+ truncateIVUse(DU, DT);
return 0;
}
// Assume block terminators cannot evaluate to a recurrence. We can't to
@@ -987,15 +1018,14 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
///
void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
- for (Value::use_iterator UI = NarrowDef->use_begin(),
- UE = NarrowDef->use_end(); UI != UE; ++UI) {
- Instruction *NarrowUse = cast<Instruction>(*UI);
+ for (User *U : NarrowDef->users()) {
+ Instruction *NarrowUser = cast<Instruction>(U);
// Handle data flow merges and bizarre phi cycles.
- if (!Widened.insert(NarrowUse))
+ if (!Widened.insert(NarrowUser))
continue;
- NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef));
+ NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUser, WideDef));
}
}
@@ -1080,9 +1110,36 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
}
//===----------------------------------------------------------------------===//
+// Live IV Reduction - Minimize IVs live across the loop.
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
// Simplification of IV users based on SCEV evaluation.
//===----------------------------------------------------------------------===//
+namespace {
+ class IndVarSimplifyVisitor : public IVVisitor {
+ ScalarEvolution *SE;
+ const DataLayout *DL;
+ PHINode *IVPhi;
+
+ public:
+ WideIVInfo WI;
+
+ IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
+ const DataLayout *DL, const DominatorTree *DTree):
+ SE(SCEV), DL(DL), IVPhi(IV) {
+ DT = DTree;
+ WI.NarrowIV = IVPhi;
+ if (ReduceLiveIVs)
+ setSplitOverflowIntrinsics();
+ }
+
+ // Implement the interface used by simplifyUsersOfIV.
+ void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, DL); }
+ };
+}
/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
/// users. Each successive simplification may push more users which may
@@ -1114,12 +1171,12 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
- WideIVVisitor WIV(CurrIV, SE, TD);
+ IndVarSimplifyVisitor Visitor(CurrIV, SE, DL, DT);
- Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
+ Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor);
- if (WIV.WI.WidestNativeType) {
- WideIVs.push_back(WIV.WI);
+ if (Visitor.WI.WidestNativeType) {
+ WideIVs.push_back(Visitor.WI);
}
} while(!LoopPhis.empty());
@@ -1359,15 +1416,11 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
Value *IncV = Phi->getIncomingValue(LatchIdx);
- for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
- UI != UE; ++UI) {
- if (*UI != Cond && *UI != IncV) return false;
- }
+ for (User *U : Phi->users())
+ if (U != Cond && U != IncV) return false;
- for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
- UI != UE; ++UI) {
- if (*UI != Cond && *UI != Phi) return false;
- }
+ for (User *U : IncV->users())
+ if (U != Cond && U != Phi) return false;
return true;
}
@@ -1386,7 +1439,7 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
/// could at least handle constant BECounts.
static PHINode *
FindLoopCounter(Loop *L, const SCEV *BECount,
- ScalarEvolution *SE, DominatorTree *DT, const DataLayout *TD) {
+ ScalarEvolution *SE, DominatorTree *DT, const DataLayout *DL) {
uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
Value *Cond =
@@ -1415,7 +1468,7 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
// AR may be wider than BECount. With eq/ne tests overflow is immaterial.
// AR may not be a narrower type, or we may never exit.
uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
- if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth)))
+ if (PhiWidth < BCWidth || (DL && !DL->isLegalInteger(PhiWidth)))
continue;
const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
@@ -1697,13 +1750,12 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
// Determine if there is a use in or before the loop (direct or
// otherwise).
bool UsedInLoop = false;
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI) {
- User *U = *UI;
- BasicBlock *UseBB = cast<Instruction>(U)->getParent();
- if (PHINode *P = dyn_cast<PHINode>(U)) {
+ for (Use &U : I->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
+ BasicBlock *UseBB = User->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(User)) {
unsigned i =
- PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
+ PHINode::getIncomingValueNumForOperand(U.getOperandNo());
UseBB = P->getIncomingBlock(i);
}
if (UseBB == Preheader || L->contains(UseBB)) {
@@ -1743,6 +1795,9 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
//===----------------------------------------------------------------------===//
bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
// If LoopSimplify form is not available, stay out of trouble. Some notes:
// - LSR currently only supports LoopSimplify-form loops. Indvars'
// canonicalization can be a pessimization without LSR to "clean up"
@@ -1756,8 +1811,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
- DT = &getAnalysis<DominatorTree>();
- TD = getAnalysisIfAvailable<DataLayout>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
DeadInsts.clear();
@@ -1799,13 +1855,13 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) {
- PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
+ PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, DL);
if (IndVar) {
// Check preconditions for proper SCEVExpander operation. SCEV does not
// express SCEVExpander's dependencies, such as LoopSimplify. Instead any
// pass that uses the SCEVExpander must do it. This does not work well for
- // loop passes because SCEVExpander makes assumptions about all loops, while
- // LoopPassManager only forces the current loop to be simplified.
+ // loop passes because SCEVExpander makes assumptions about all loops,
+ // while LoopPassManager only forces the current loop to be simplified.
//
// FIXME: SCEV expansion has no way to bail out, so the caller must
// explicitly check any assumptions made by SCEV. Brittle.
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index b3ec2fc..067deb7 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -27,10 +27,10 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -76,7 +76,7 @@ namespace {
/// revectored to the false side of the second if.
///
class JumpThreading : public FunctionPass {
- DataLayout *TD;
+ const DataLayout *DL;
TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
#ifdef NDEBUG
@@ -105,9 +105,9 @@ namespace {
initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LazyValueInfo>();
AU.addPreserved<LazyValueInfo>();
AU.addRequired<TargetLibraryInfo>();
@@ -148,8 +148,12 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
/// runOnFunction - Top level algorithm.
///
bool JumpThreading::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
- TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
TLI = &getAnalysis<TargetLibraryInfo>();
LVI = &getAnalysis<LazyValueInfo>();
@@ -251,7 +255,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
// as having cost of 2 total, and if they are a vector intrinsic, we model
// them as having cost 1.
if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- if (CI->hasFnAttr(Attribute::NoDuplicate))
+ if (CI->cannotDuplicate())
// Blocks with NoDuplicate are modelled as having infinite cost, so they
// are never duplicated.
return ~0U;
@@ -490,7 +494,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
Value *LHS = PN->getIncomingValue(i);
Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
- Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
+ Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL);
if (Res == 0) {
if (!isa<Constant>(RHS))
continue;
@@ -692,7 +696,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// Run constant folding to see if we can reduce the condition to a simple
// constant.
if (Instruction *I = dyn_cast<Instruction>(Condition)) {
- Value *SimpleVal = ConstantFoldInstruction(I, TD, TLI);
+ Value *SimpleVal = ConstantFoldInstruction(I, DL, TLI);
if (SimpleVal) {
I->replaceAllUsesWith(SimpleVal);
I->eraseFromParent();
@@ -1431,16 +1435,15 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
// Scan all uses of this instruction to see if it is used outside of its
// block, and if so, record them in UsesToRename.
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
- ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (Use &U : I->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
- if (UserPN->getIncomingBlock(UI) == BB)
+ if (UserPN->getIncomingBlock(U) == BB)
continue;
} else if (User->getParent() == BB)
continue;
- UsesToRename.push_back(&UI.getUse());
+ UsesToRename.push_back(&U);
}
// If there are no uses outside the block, we're done with this instruction.
@@ -1475,7 +1478,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
// frequently happens because of phi translation.
- SimplifyInstructionsInBlock(NewBB, TD, TLI);
+ SimplifyInstructionsInBlock(NewBB, DL, TLI);
// Threaded an edge!
++NumThreads;
@@ -1557,7 +1560,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// If this instruction can be simplified after the operands are updated,
// just use the simplified value instead. This frequently happens due to
// phi translation.
- if (Value *IV = SimplifyInstruction(New, TD)) {
+ if (Value *IV = SimplifyInstruction(New, DL)) {
delete New;
ValueMapping[BI] = IV;
} else {
@@ -1585,16 +1588,15 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
// Scan all uses of this instruction to see if it is used outside of its
// block, and if so, record them in UsesToRename.
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
- ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (Use &U : I->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
- if (UserPN->getIncomingBlock(UI) == BB)
+ if (UserPN->getIncomingBlock(U) == BB)
continue;
} else if (User->getParent() == BB)
continue;
- UsesToRename.push_back(&UI.getUse());
+ UsesToRename.push_back(&U);
}
// If there are no uses outside the block, we're done with this instruction.
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index f94cd2a..b69f2dc 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -36,23 +36,26 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/IR/PredIteratorCache.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
using namespace llvm;
@@ -74,26 +77,28 @@ namespace {
initializeLICMPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...
///
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
- AU.addPreserved("scalar-evolution");
- AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<ScalarEvolution>();
AU.addRequired<TargetLibraryInfo>();
}
using llvm::Pass::doFinalization;
- bool doFinalization() {
+ bool doFinalization() override {
assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
return false;
}
@@ -103,7 +108,7 @@ namespace {
LoopInfo *LI; // Current LoopInfo
DominatorTree *DT; // Dominator Tree for the current Loop.
- DataLayout *TD; // DataLayout for constant folding.
+ const DataLayout *DL; // DataLayout for constant folding.
TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding.
// State that is updated as we process loops.
@@ -117,11 +122,12 @@ namespace {
DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
- void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L);
+ void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
+ Loop *L) override;
/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
/// set.
- void deleteAnalysisValue(Value *V, Loop *L);
+ void deleteAnalysisValue(Value *V, Loop *L) override;
/// SinkRegion - Walk the specified region of the CFG (defined by all blocks
/// dominated by the specified block, and that are in the current loop) in
@@ -183,15 +189,18 @@ namespace {
void PromoteAliasSet(AliasSet &AS,
SmallVectorImpl<BasicBlock*> &ExitBlocks,
- SmallVectorImpl<Instruction*> &InsertPts);
+ SmallVectorImpl<Instruction*> &InsertPts,
+ PredIteratorCache &PIC);
};
}
char LICM::ID = 0;
INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
@@ -203,16 +212,22 @@ Pass *llvm::createLICMPass() { return new LICM(); }
/// times on one loop.
///
bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
Changed = false;
// Get our Loop and Alias Analysis information...
LI = &getAnalysis<LoopInfo>();
AA = &getAnalysis<AliasAnalysis>();
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
TLI = &getAnalysis<TargetLibraryInfo>();
+ assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
+
CurAST = new AliasSetTracker(*AA);
// Collect Alias info from subloops.
for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
@@ -272,16 +287,33 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
- if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+ if (!DisablePromotion && (Preheader || L->hasDedicatedExits())) {
SmallVector<BasicBlock *, 8> ExitBlocks;
SmallVector<Instruction *, 8> InsertPts;
+ PredIteratorCache PIC;
// Loop over all of the alias sets in the tracker object.
for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
I != E; ++I)
- PromoteAliasSet(*I, ExitBlocks, InsertPts);
+ PromoteAliasSet(*I, ExitBlocks, InsertPts, PIC);
+
+ // Once we have promoted values across the loop body we have to recursively
+ // reform LCSSA as any nested loop may now have values defined within the
+ // loop used in the outer loop.
+ // FIXME: This is really heavy handed. It would be a bit better to use an
+ // SSAUpdater strategy during promotion that was LCSSA aware and reformed
+ // it as it went.
+ if (Changed)
+ formLCSSARecursively(*L, *DT, getAnalysisIfAvailable<ScalarEvolution>());
}
+ // Check that neither this loop nor its parent have had LCSSA broken. LICM is
+ // specifically moving instructions across the loop boundary and so it is
+ // especially in need of sanity checking here.
+ assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
+ assert((!L->getParentLoop() || L->getParentLoop()->isLCSSAForm(*DT)) &&
+ "Parent loop not left in LCSSA form after LICM!");
+
// Clear out loops state information for the next iteration
CurLoop = 0;
Preheader = 0;
@@ -364,7 +396,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
// Try constant folding this instruction. If all the operands are
// constants, it is technically hoistable, but it would be better to just
// fold it.
- if (Constant *C = ConstantFoldInstruction(&I, TD, TLI)) {
+ if (Constant *C = ConstantFoldInstruction(&I, DL, TLI)) {
DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
CurAST->copyValue(&I, C);
CurAST->deleteValue(&I);
@@ -450,27 +482,54 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
return isSafeToExecuteUnconditionally(I);
}
+/// \brief Returns true if a PHINode is a trivially replaceable with an
+/// Instruction.
+///
+/// This is true when all incoming values are that instruction. This pattern
+/// occurs most often with LCSSA PHI nodes.
+static bool isTriviallyReplacablePHI(PHINode &PN, Instruction &I) {
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ if (PN.getIncomingValue(i) != &I)
+ return false;
+
+ return true;
+}
+
/// isNotUsedInLoop - Return true if the only users of this instruction are
/// outside of the loop. If this is true, we can sink the instruction to the
/// exit blocks of the loop.
///
bool LICM::isNotUsedInLoop(Instruction &I) {
- for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
- if (PHINode *PN = dyn_cast<PHINode>(User)) {
- // PHI node uses occur in predecessor blocks!
+ for (User *U : I.users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (PHINode *PN = dyn_cast<PHINode>(UI)) {
+ // A PHI node where all of the incoming values are this instruction are
+ // special -- they can just be RAUW'ed with the instruction and thus
+ // don't require a use in the predecessor. This is a particular important
+ // special case because it is the pattern found in LCSSA form.
+ if (isTriviallyReplacablePHI(*PN, I)) {
+ if (CurLoop->contains(PN))
+ return false;
+ else
+ continue;
+ }
+
+ // Otherwise, PHI node uses occur in predecessor blocks if the incoming
+ // values. Check for such a use being inside the loop.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == &I)
if (CurLoop->contains(PN->getIncomingBlock(i)))
return false;
- } else if (CurLoop->contains(User)) {
- return false;
+
+ continue;
}
+
+ if (CurLoop->contains(UI))
+ return false;
}
return true;
}
-
/// sink - When an instruction is found to only be used outside of the loop,
/// this function moves it to the exit blocks and patches up SSA form as needed.
/// This method is guaranteed to remove the original instruction from its
@@ -479,119 +538,59 @@ bool LICM::isNotUsedInLoop(Instruction &I) {
void LICM::sink(Instruction &I) {
DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
- SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getUniqueExitBlocks(ExitBlocks);
-
if (isa<LoadInst>(I)) ++NumMovedLoads;
else if (isa<CallInst>(I)) ++NumMovedCalls;
++NumSunk;
Changed = true;
- // The case where there is only a single exit node of this loop is common
- // enough that we handle it as a special (more efficient) case. It is more
- // efficient to handle because there are no PHI nodes that need to be placed.
- if (ExitBlocks.size() == 1) {
- if (!DT->dominates(I.getParent(), ExitBlocks[0])) {
- // Instruction is not used, just delete it.
- CurAST->deleteValue(&I);
- // If I has users in unreachable blocks, eliminate.
- // If I is not void type then replaceAllUsesWith undef.
- // This allows ValueHandlers and custom metadata to adjust itself.
- if (!I.use_empty())
- I.replaceAllUsesWith(UndefValue::get(I.getType()));
- I.eraseFromParent();
- } else {
- // Move the instruction to the start of the exit block, after any PHI
- // nodes in it.
- I.moveBefore(ExitBlocks[0]->getFirstInsertionPt());
-
- // This instruction is no longer in the AST for the current loop, because
- // we just sunk it out of the loop. If we just sunk it into an outer
- // loop, we will rediscover the operation when we process it.
- CurAST->deleteValue(&I);
- }
- return;
- }
-
- if (ExitBlocks.empty()) {
- // The instruction is actually dead if there ARE NO exit blocks.
- CurAST->deleteValue(&I);
- // If I has users in unreachable blocks, eliminate.
- // If I is not void type then replaceAllUsesWith undef.
- // This allows ValueHandlers and custom metadata to adjust itself.
- if (!I.use_empty())
- I.replaceAllUsesWith(UndefValue::get(I.getType()));
- I.eraseFromParent();
- return;
- }
-
- // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
- // hard work of inserting PHI nodes as necessary.
- SmallVector<PHINode*, 8> NewPHIs;
- SSAUpdater SSA(&NewPHIs);
-
- if (!I.use_empty())
- SSA.Initialize(I.getType(), I.getName());
-
- // Insert a copy of the instruction in each exit block of the loop that is
- // dominated by the instruction. Each exit block is known to only be in the
- // ExitBlocks list once.
- BasicBlock *InstOrigBB = I.getParent();
- unsigned NumInserted = 0;
-
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- BasicBlock *ExitBlock = ExitBlocks[i];
-
- if (!DT->dominates(InstOrigBB, ExitBlock))
- continue;
-
- // Insert the code after the last PHI node.
- BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
-
- // If this is the first exit block processed, just move the original
- // instruction, otherwise clone the original instruction and insert
- // the copy.
- Instruction *New;
- if (NumInserted++ == 0) {
- I.moveBefore(InsertPt);
- New = &I;
- } else {
- New = I.clone();
- if (!I.getName().empty())
- New->setName(I.getName()+".le");
- ExitBlock->getInstList().insert(InsertPt, New);
- }
-
- // Now that we have inserted the instruction, inform SSAUpdater.
- if (!I.use_empty())
- SSA.AddAvailableValue(ExitBlock, New);
- }
-
- // If the instruction doesn't dominate any exit blocks, it must be dead.
- if (NumInserted == 0) {
- CurAST->deleteValue(&I);
- if (!I.use_empty())
- I.replaceAllUsesWith(UndefValue::get(I.getType()));
- I.eraseFromParent();
- return;
- }
+#ifndef NDEBUG
+ SmallVector<BasicBlock *, 32> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end());
+#endif
+
+ // If this instruction is only used outside of the loop, then all users are
+ // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
+ // the instruction.
+ while (!I.use_empty()) {
+ // The user must be a PHI node.
+ PHINode *PN = cast<PHINode>(I.user_back());
+
+ BasicBlock *ExitBlock = PN->getParent();
+ assert(ExitBlockSet.count(ExitBlock) &&
+ "The LCSSA PHI is not in an exit block!");
+
+ Instruction *New = I.clone();
+ ExitBlock->getInstList().insert(ExitBlock->getFirstInsertionPt(), New);
+ if (!I.getName().empty())
+ New->setName(I.getName() + ".le");
+
+ // Build LCSSA PHI nodes for any in-loop operands. Note that this is
+ // particularly cheap because we can rip off the PHI node that we're
+ // replacing for the number and blocks of the predecessors.
+ // OPT: If this shows up in a profile, we can instead finish sinking all
+ // invariant instructions, and then walk their operands to re-establish
+ // LCSSA. That will eliminate creating PHI nodes just to nuke them when
+ // sinking bottom-up.
+ for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
+ ++OI)
+ if (Instruction *OInst = dyn_cast<Instruction>(*OI))
+ if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
+ if (!OLoop->contains(PN)) {
+ PHINode *OpPN = PHINode::Create(
+ OInst->getType(), PN->getNumIncomingValues(),
+ OInst->getName() + ".lcssa", ExitBlock->begin());
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ OpPN->addIncoming(OInst, PN->getIncomingBlock(i));
+ *OI = OpPN;
+ }
- // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
- for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
- // Grab the use before incrementing the iterator.
- Use &U = UI.getUse();
- // Increment the iterator before removing the use from the list.
- ++UI;
- SSA.RewriteUseAfterInsertions(U);
+ PN->replaceAllUsesWith(New);
+ PN->eraseFromParent();
}
- // Update CurAST for NewPHIs if I had pointer type.
- if (I.getType()->isPointerTy())
- for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
- CurAST->copyValue(&I, NewPHIs[i]);
-
- // Finally, remove the instruction from CurAST. It is no longer in the loop.
CurAST->deleteValue(&I);
+ I.eraseFromParent();
}
/// hoist - When an instruction is found to only use loop invariant operands
@@ -662,24 +661,42 @@ namespace {
SmallPtrSet<Value*, 4> &PointerMustAliases;
SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
SmallVectorImpl<Instruction*> &LoopInsertPts;
+ PredIteratorCache &PredCache;
AliasSetTracker &AST;
+ LoopInfo &LI;
DebugLoc DL;
int Alignment;
MDNode *TBAATag;
+
+ Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (Loop *L = LI.getLoopFor(I->getParent()))
+ if (!L->contains(BB)) {
+ // We need to create an LCSSA PHI node for the incoming value and
+ // store that.
+ PHINode *PN = PHINode::Create(
+ I->getType(), PredCache.GetNumPreds(BB),
+ I->getName() + ".lcssa", BB->begin());
+ for (BasicBlock **PI = PredCache.GetPreds(BB); *PI; ++PI)
+ PN->addIncoming(I, *PI);
+ return PN;
+ }
+ return V;
+ }
+
public:
- LoopPromoter(Value *SP,
- const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
- SmallPtrSet<Value*, 4> &PMA,
- SmallVectorImpl<BasicBlock*> &LEB,
- SmallVectorImpl<Instruction*> &LIP,
- AliasSetTracker &ast, DebugLoc dl, int alignment,
+ LoopPromoter(Value *SP, const SmallVectorImpl<Instruction *> &Insts,
+ SSAUpdater &S, SmallPtrSet<Value *, 4> &PMA,
+ SmallVectorImpl<BasicBlock *> &LEB,
+ SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
+ AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
MDNode *TBAATag)
- : LoadAndStorePromoter(Insts, S), SomePtr(SP),
- PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP),
- AST(ast), DL(dl), Alignment(alignment), TBAATag(TBAATag) {}
+ : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
+ LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
+ LI(li), DL(dl), Alignment(alignment), TBAATag(TBAATag) {}
- virtual bool isInstInList(Instruction *I,
- const SmallVectorImpl<Instruction*> &) const {
+ bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &) const override {
Value *Ptr;
if (LoadInst *LI = dyn_cast<LoadInst>(I))
Ptr = LI->getOperand(0);
@@ -688,7 +705,7 @@ namespace {
return PointerMustAliases.count(Ptr);
}
- virtual void doExtraRewritesBeforeFinalDeletion() const {
+ void doExtraRewritesBeforeFinalDeletion() const override {
// Insert stores after in the loop exit blocks. Each exit block gets a
// store of the live-out values that feed them. Since we've already told
// the SSA updater about the defs in the loop and the preheader
@@ -696,19 +713,21 @@ namespace {
for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = LoopExitBlocks[i];
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
+ Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
Instruction *InsertPos = LoopInsertPts[i];
- StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
+ StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
if (TBAATag) NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
}
}
- virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const {
+ void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
// Update alias analysis.
AST.copyValue(LI, V);
}
- virtual void instructionDeleted(Instruction *I) const {
+ void instructionDeleted(Instruction *I) const override {
AST.deleteValue(I);
}
};
@@ -721,7 +740,8 @@ namespace {
///
void LICM::PromoteAliasSet(AliasSet &AS,
SmallVectorImpl<BasicBlock*> &ExitBlocks,
- SmallVectorImpl<Instruction*> &InsertPts) {
+ SmallVectorImpl<Instruction*> &InsertPts,
+ PredIteratorCache &PIC) {
// We can promote this alias set if it has a store, if it is a "Must" alias
// set, if the pointer is loop invariant, and if we are not eliminating any
// volatile loads or stores.
@@ -769,23 +789,22 @@ void LICM::PromoteAliasSet(AliasSet &AS,
if (SomePtr->getType() != ASIV->getType())
return;
- for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
- UI != UE; ++UI) {
+ for (User *U : ASIV->users()) {
// Ignore instructions that are outside the loop.
- Instruction *Use = dyn_cast<Instruction>(*UI);
- if (!Use || !CurLoop->contains(Use))
+ Instruction *UI = dyn_cast<Instruction>(U);
+ if (!UI || !CurLoop->contains(UI))
continue;
// If there is an non-load/store instruction in the loop, we can't promote
// it.
- if (LoadInst *load = dyn_cast<LoadInst>(Use)) {
+ if (LoadInst *load = dyn_cast<LoadInst>(UI)) {
assert(!load->isVolatile() && "AST broken");
if (!load->isSimple())
return;
- } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) {
+ } else if (StoreInst *store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
- if (Use->getOperand(1) != ASIV)
+ if (UI->getOperand(1) != ASIV)
continue;
assert(!store->isVolatile() && "AST broken");
if (!store->isSimple())
@@ -801,13 +820,13 @@ void LICM::PromoteAliasSet(AliasSet &AS,
// Larger is better, with the exception of 0 being the best alignment.
unsigned InstAlignment = store->getAlignment();
if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0)
- if (isGuaranteedToExecute(*Use)) {
+ if (isGuaranteedToExecute(*UI)) {
GuaranteedToExecute = true;
Alignment = InstAlignment;
}
if (!GuaranteedToExecute)
- GuaranteedToExecute = isGuaranteedToExecute(*Use);
+ GuaranteedToExecute = isGuaranteedToExecute(*UI);
} else
return; // Not a load or store.
@@ -815,13 +834,13 @@ void LICM::PromoteAliasSet(AliasSet &AS,
// Merge the TBAA tags.
if (LoopUses.empty()) {
// On the first load/store, just take its TBAA tag.
- TBAATag = Use->getMetadata(LLVMContext::MD_tbaa);
+ TBAATag = UI->getMetadata(LLVMContext::MD_tbaa);
} else if (TBAATag) {
TBAATag = MDNode::getMostGenericTBAA(TBAATag,
- Use->getMetadata(LLVMContext::MD_tbaa));
+ UI->getMetadata(LLVMContext::MD_tbaa));
}
-
- LoopUses.push_back(Use);
+
+ LoopUses.push_back(UI);
}
}
@@ -853,7 +872,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
SmallVector<PHINode*, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- InsertPts, *CurAST, DL, Alignment, TBAATag);
+ InsertPts, PIC, *CurAST, *LI, DL, Alignment, TBAATag);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
diff --git a/lib/Transforms/Scalar/LLVMBuild.txt b/lib/Transforms/Scalar/LLVMBuild.txt
index cee9119..1f6df7d 100644
--- a/lib/Transforms/Scalar/LLVMBuild.txt
+++ b/lib/Transforms/Scalar/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
name = Scalar
parent = Transforms
library_name = ScalarOpts
-required_libraries = Analysis Core InstCombine Support Target TransformUtils
+required_libraries = Analysis Core IPA InstCombine Support Target TransformUtils
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 9e39d2e..9a520c8 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -18,9 +18,9 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/Dominators.h"
using namespace llvm;
STATISTIC(NumDeleted, "Number of loops deleted");
@@ -34,17 +34,17 @@ namespace {
}
// Possibly eliminate loop L if it is dead.
- bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreserved<ScalarEvolution>();
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfo>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
@@ -61,7 +61,7 @@ namespace {
char LoopDeletion::ID = 0;
INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
"Delete dead loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
@@ -130,6 +130,9 @@ bool LoopDeletion::isLoopDead(Loop *L,
/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
/// in order to make various safety checks work.
bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
// We can only remove the loop if there is a preheader that we can
// branch from after removing it.
BasicBlock *preheader = L->getLoopPreheader();
@@ -202,7 +205,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
// Update the dominator tree and remove the instructions and blocks that will
// be deleted from the reference counting scheme.
- DominatorTree &DT = getAnalysis<DominatorTree>();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SmallVector<DomTreeNode*, 8> ChildNodes;
for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
LI != LE; ++LI) {
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 952b76b..e5e8b84 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -51,6 +51,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
@@ -78,9 +79,6 @@ namespace {
return dyn_cast<BranchInst>(BB->getTerminator());
}
- /// Return the condition of the branch terminating the given basic block.
- static Value *getBrCondtion(BasicBlock *);
-
/// Derive the precondition block (i.e the block that guards the loop
/// preheader) from the given preheader.
static BasicBlock *getPrecondBb(BasicBlock *PreHead);
@@ -108,8 +106,8 @@ namespace {
bool preliminaryScreen();
/// Check if the given conditional branch is based on the comparison
- /// beween a variable and zero, and if the variable is non-zero, the
- /// control yeilds to the loop entry. If the branch matches the behavior,
+ /// between a variable and zero, and if the variable is non-zero, the
+ /// control yields to the loop entry. If the branch matches the behavior,
/// the variable involved in the comparion is returned. This function will
/// be called to see if the precondition and postcondition of the loop
/// are in desirable form.
@@ -131,7 +129,7 @@ namespace {
class LoopIdiomRecognize : public LoopPass {
Loop *CurLoop;
- const DataLayout *TD;
+ const DataLayout *DL;
DominatorTree *DT;
ScalarEvolution *SE;
TargetLibraryInfo *TLI;
@@ -140,10 +138,10 @@ namespace {
static char ID;
explicit LoopIdiomRecognize() : LoopPass(ID) {
initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
- TD = 0; DT = 0; SE = 0; TLI = 0; TTI = 0;
+ DL = 0; DT = 0; SE = 0; TLI = 0; TTI = 0;
}
- bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
SmallVectorImpl<BasicBlock*> &ExitBlocks);
@@ -163,7 +161,7 @@ namespace {
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG.
///
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
@@ -174,18 +172,23 @@ namespace {
AU.addPreserved<AliasAnalysis>();
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
- AU.addPreserved<DominatorTree>();
- AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfo>();
AU.addRequired<TargetTransformInfo>();
}
const DataLayout *getDataLayout() {
- return TD ? TD : TD=getAnalysisIfAvailable<DataLayout>();
+ if (DL)
+ return DL;
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
+ return DL;
}
DominatorTree *getDominatorTree() {
- return DT ? DT : (DT=&getAnalysis<DominatorTree>());
+ return DT ? DT
+ : (DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree());
}
ScalarEvolution *getScalarEvolution() {
@@ -212,7 +215,7 @@ char LoopIdiomRecognize::ID = 0;
INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
@@ -286,11 +289,6 @@ bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
return false;
}
-Value *LIRUtil::getBrCondtion(BasicBlock *BB) {
- BranchInst *Br = getBranch(BB);
- return Br ? Br->getCondition() : 0;
-}
-
BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
if (BasicBlock *BB = PreHead->getSinglePredecessor()) {
BranchInst *Br = getBranch(BB);
@@ -458,9 +456,8 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
// Check if the result of the instruction is live of the loop.
bool LiveOutLoop = false;
- for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end();
- I != E; I++) {
- if ((cast<Instruction>(*I))->getParent() != LoopEntry) {
+ for (User *U : Inst->users()) {
+ if ((cast<Instruction>(U))->getParent() != LoopEntry) {
LiveOutLoop = true; break;
}
}
@@ -519,7 +516,7 @@ void NclPopcountRecognize::transform(Instruction *CntInst,
// TripCnt is exactly the number of iterations the loop has
TripCnt = NewCount;
- // If the popoulation counter's initial value is not zero, insert Add Inst.
+ // If the population counter's initial value is not zero, insert Add Inst.
Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
if (!InitConst || !InitConst->isZero()) {
@@ -596,11 +593,9 @@ void NclPopcountRecognize::transform(Instruction *CntInst,
// __builtin_ctpop().
{
SmallVector<Value *, 4> CntUses;
- for (Value::use_iterator I = CntInst->use_begin(), E = CntInst->use_end();
- I != E; I++) {
- if (cast<Instruction>(*I)->getParent() != Body)
- CntUses.push_back(*I);
- }
+ for (User *U : CntInst->users())
+ if (cast<Instruction>(U)->getParent() != Body)
+ CntUses.push_back(U);
for (unsigned Idx = 0; Idx < CntUses.size(); Idx++) {
(cast<Instruction>(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount);
}
@@ -705,6 +700,9 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
}
bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
CurLoop = L;
// If the loop could not be converted to canonical form, it must have an
@@ -777,7 +775,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
Value *StorePtr = SI->getPointerOperand();
// Reject stores that are so large that they overflow an unsigned.
- uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType());
+ uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
return false;
@@ -905,7 +903,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
///
/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
/// just replicate their input array and then pass on to memset_pattern16.
-static Constant *getMemSetPatternValue(Value *V, const DataLayout &TD) {
+static Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) {
// If the value isn't a constant, we can't promote it to being in a constant
// array. We could theoretically do a store to an alloca or something, but
// that doesn't seem worthwhile.
@@ -913,12 +911,12 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout &TD) {
if (C == 0) return 0;
// Only handle simple values that are a power of two bytes in size.
- uint64_t Size = TD.getTypeSizeInBits(V->getType());
+ uint64_t Size = DL.getTypeSizeInBits(V->getType());
if (Size == 0 || (Size & 7) || (Size & (Size-1)))
return 0;
// Don't care enough about darwin/ppc to implement this.
- if (TD.isBigEndian())
+ if (DL.isBigEndian())
return 0;
// Convert to size in bytes.
@@ -965,7 +963,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
PatternValue = 0;
} else if (DestAS == 0 &&
TLI->has(LibFunc::memset_pattern16) &&
- (PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
+ (PatternValue = getMemSetPatternValue(StoredVal, *DL))) {
// Don't create memset_pattern16s with address spaces.
// It looks like we can use PatternValue!
SplatValue = 0;
@@ -1006,7 +1004,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = Builder.getIntPtrTy(TD, DestAS);
+ Type *IntPtr = Builder.getIntPtrTy(DL, DestAS);
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
@@ -1120,7 +1118,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtrTy = Builder.getIntPtrTy(TD, SI->getPointerAddressSpace());
+ Type *IntPtrTy = Builder.getIntPtrTy(DL, SI->getPointerAddressSpace());
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtrTy, 1),
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index a23860a..263ba93 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -13,13 +13,13 @@
#define DEBUG_TYPE "loop-instsimplify"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLibraryInfo.h"
@@ -36,9 +36,9 @@ namespace {
initializeLoopInstSimplifyPass(*PassRegistry::getPassRegistry());
}
- bool runOnLoop(Loop*, LPPassManager&);
+ bool runOnLoop(Loop*, LPPassManager&) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
@@ -54,7 +54,7 @@ char LoopInstSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
"Simplify instructions in loops", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopInstSimplify, "loop-instsimplify",
@@ -65,9 +65,15 @@ Pass *llvm::createLoopInstSimplifyPass() {
}
bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
- DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+ if (skipOptnoneFunction(L))
+ return false;
+
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0;
LoopInfo *LI = &getAnalysis<LoopInfo>();
- const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallVector<BasicBlock*, 8> ExitBlocks;
@@ -109,12 +115,11 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Don't bother simplifying unused instructions.
if (!I->use_empty()) {
- Value *V = SimplifyInstruction(I, TD, TLI, DT);
+ Value *V = SimplifyInstruction(I, DL, TLI, DT);
if (V && LI->replacementPreservesLCSSAForm(I, V)) {
// Mark all uses for resimplification next time round the loop.
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI)
- Next->insert(cast<Instruction>(*UI));
+ for (User *U : I->users())
+ Next->insert(cast<Instruction>(U));
I->replaceAllUsesWith(V);
LocalChanged = true;
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index 335af81..81c1e42 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -13,9 +13,9 @@
#define DEBUG_TYPE "loop-reroll"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/LoopPass.h"
@@ -24,6 +24,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -124,14 +125,14 @@ namespace {
initializeLoopRerollPass(*PassRegistry::getPassRegistry());
}
- bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AliasAnalysis>();
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
- AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
AU.addRequired<TargetLibraryInfo>();
}
@@ -140,7 +141,7 @@ protected:
AliasAnalysis *AA;
LoopInfo *LI;
ScalarEvolution *SE;
- DataLayout *DL;
+ const DataLayout *DL;
TargetLibraryInfo *TLI;
DominatorTree *DT;
@@ -189,12 +190,12 @@ protected:
iterator begin() {
assert(Valid && "Using invalid reduction");
- return llvm::next(Instructions.begin());
+ return std::next(Instructions.begin());
}
const_iterator begin() const {
assert(Valid && "Using invalid reduction");
- return llvm::next(Instructions.begin());
+ return std::next(Instructions.begin());
}
iterator end() { return Instructions.end(); }
@@ -340,7 +341,7 @@ char LoopReroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
@@ -353,12 +354,9 @@ Pass *llvm::createLoopRerollPass() {
// This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
// non-loop blocks to be outside the loop.
static bool hasUsesOutsideLoop(Instruction *I, Loop *L) {
- for (Value::use_iterator UI = I->use_begin(),
- UIE = I->use_end(); UI != UIE; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
- if (!L->contains(User))
+ for (User *U : I->users())
+ if (!L->contains(cast<Instruction>(U)))
return true;
- }
return false;
}
@@ -408,7 +406,7 @@ void LoopReroll::SimpleLoopReduction::add(Loop *L) {
Instruction *C = Instructions.front();
do {
- C = cast<Instruction>(*C->use_begin());
+ C = cast<Instruction>(*C->user_begin());
if (C->hasOneUse()) {
if (!C->isBinaryOp())
return;
@@ -423,17 +421,15 @@ void LoopReroll::SimpleLoopReduction::add(Loop *L) {
if (Instructions.size() < 2 ||
!C->isSameOperationAs(Instructions.back()) ||
- C->use_begin() == C->use_end())
+ C->use_empty())
return;
// C is now the (potential) last instruction in the reduction chain.
- for (Value::use_iterator UI = C->use_begin(), UIE = C->use_end();
- UI != UIE; ++UI) {
+ for (User *U : C->users())
// The only in-loop user can be the initial PHI.
- if (L->contains(cast<Instruction>(*UI)))
- if (cast<Instruction>(*UI ) != Instructions.front())
+ if (L->contains(cast<Instruction>(U)))
+ if (cast<Instruction>(U) != Instructions.front())
return;
- }
Instructions.push_back(C);
Valid = true;
@@ -483,12 +479,11 @@ void LoopReroll::collectInLoopUserSet(Loop *L,
continue;
if (!Final.count(I))
- for (Value::use_iterator UI = I->use_begin(),
- UIE = I->use_end(); UI != UIE; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (Use &U : I->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
if (PHINode *PN = dyn_cast<PHINode>(User)) {
// Ignore "wrap-around" uses to PHIs of this loop's header.
- if (PN->getIncomingBlock(UI) == L->getHeader())
+ if (PN->getIncomingBlock(U) == L->getHeader())
continue;
}
@@ -559,8 +554,8 @@ bool LoopReroll::findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
if (RealIV->getNumUses() != 2)
return false;
const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(RealIV));
- Instruction *User1 = cast<Instruction>(*RealIV->use_begin()),
- *User2 = cast<Instruction>(*llvm::next(RealIV->use_begin()));
+ Instruction *User1 = cast<Instruction>(*RealIV->user_begin()),
+ *User2 = cast<Instruction>(*std::next(RealIV->user_begin()));
if (!SE->isSCEVable(User1->getType()) || !SE->isSCEVable(User2->getType()))
return false;
const SCEVAddRecExpr *User1SCEV =
@@ -616,26 +611,25 @@ bool LoopReroll::collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale,
SmallVector<SmallInstructionVector, 32> &Roots,
SmallInstructionSet &AllRoots,
SmallInstructionVector &LoopIncs) {
- for (Value::use_iterator UI = IV->use_begin(),
- UIE = IV->use_end(); UI != UIE; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
- if (!SE->isSCEVable(User->getType()))
+ for (User *U : IV->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (!SE->isSCEVable(UI->getType()))
continue;
- if (User->getType() != IV->getType())
+ if (UI->getType() != IV->getType())
continue;
- if (!L->contains(User))
+ if (!L->contains(UI))
continue;
- if (hasUsesOutsideLoop(User, L))
+ if (hasUsesOutsideLoop(UI, L))
continue;
if (const SCEVConstant *Diff = dyn_cast<SCEVConstant>(SE->getMinusSCEV(
- SE->getSCEV(User), SE->getSCEV(IV)))) {
+ SE->getSCEV(UI), SE->getSCEV(IV)))) {
uint64_t Idx = Diff->getValue()->getValue().getZExtValue();
if (Idx > 0 && Idx < Scale) {
- Roots[Idx-1].push_back(User);
- AllRoots.insert(User);
+ Roots[Idx-1].push_back(UI);
+ AllRoots.insert(UI);
} else if (Idx == Scale && Inc > 1) {
- LoopIncs.push_back(User);
+ LoopIncs.push_back(UI);
}
}
}
@@ -719,10 +713,8 @@ void LoopReroll::ReductionTracker::replaceSelected() {
// Replace users with the new end-of-chain value.
SmallInstructionVector Users;
- for (Value::use_iterator UI =
- PossibleReds[i].getReducedValue()->use_begin(),
- UIE = PossibleReds[i].getReducedValue()->use_end(); UI != UIE; ++UI)
- Users.push_back(cast<Instruction>(*UI));
+ for (User *U : PossibleReds[i].getReducedValue()->users())
+ Users.push_back(cast<Instruction>(U));
for (SmallInstructionVector::iterator J = Users.begin(),
JE = Users.end(); J != JE; ++J)
@@ -1088,9 +1080,8 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
L, SCEV::FlagAnyWrap));
{ // Limit the lifetime of SCEVExpander.
SCEVExpander Expander(*SE, "reroll");
- PHINode *NewIV =
- cast<PHINode>(Expander.expandCodeFor(H, IV->getType(),
- Header->begin()));
+ Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
+
for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
JE = BaseUseSet.end(); J != JE; ++J)
(*J)->replaceUsesOfWith(IV, NewIV);
@@ -1101,20 +1092,23 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
if (Inc == 1)
ICSCEV =
SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
- Value *IC;
- if (isa<SCEVConstant>(ICSCEV)) {
- IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), BI);
+ // Iteration count SCEV minus 1
+ const SCEV *ICMinus1SCEV =
+ SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
+
+ Value *ICMinus1; // Iteration count minus 1
+ if (isa<SCEVConstant>(ICMinus1SCEV)) {
+ ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
} else {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader)
Preheader = InsertPreheaderForLoop(L, this);
- IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(),
- Preheader->getTerminator());
+ ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
+ Preheader->getTerminator());
}
- Value *NewIVNext = NewIV->getIncomingValueForBlock(Header);
- Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIVNext, IC,
+ Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1,
"exitcond");
BI->setCondition(Cond);
@@ -1131,12 +1125,16 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
}
bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
AA = &getAnalysis<AliasAnalysis>();
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
TLI = &getAnalysis<TargetLibraryInfo>();
- DL = getAnalysisIfAvailable<DataLayout>();
- DT = &getAnalysis<DominatorTree>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 14c5655..fde6bac 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -20,9 +20,10 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -43,8 +44,8 @@ namespace {
}
// LCSSA form makes instruction renaming easier.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
@@ -55,7 +56,7 @@ namespace {
AU.addRequired<TargetTransformInfo>();
}
- bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
bool simplifyLoopLatch(Loop *L);
bool rotateLoop(Loop *L, bool SimplifiedLatch);
@@ -78,6 +79,9 @@ Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
/// Rotate Loop L as many times as possible. Return true if
/// the loop is rotated at least once.
bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
@@ -130,7 +134,7 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
UE = OrigHeaderVal->use_end(); UI != UE; ) {
// Grab the use before incrementing the iterator.
- Use &U = UI.getUse();
+ Use &U = *UI;
// Increment the iterator before removing the use from the list.
++UI;
@@ -251,8 +255,9 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
// Nuke the Latch block.
assert(Latch->empty() && "unable to evacuate Latch");
LI->removeBlock(Latch);
- if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
- DT->eraseNode(Latch);
+ if (DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DTWP->getDomTree().eraseNode(Latch);
Latch->eraseFromParent();
return true;
}
@@ -301,7 +306,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
CodeMetrics Metrics;
Metrics.analyzeBasicBlock(OrigHeader, *TTI);
if (Metrics.notDuplicatable) {
- DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non duplicatable"
+ DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
<< " instructions: "; L->dump());
return false;
}
@@ -433,23 +438,25 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// The conditional branch can't be folded, handle the general case.
// Update DominatorTree to reflect the CFG change we just made. Then split
// edges as necessary to preserve LoopSimplify form.
- if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+ if (DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DominatorTree &DT = DTWP->getDomTree();
// Everything that was dominated by the old loop header is now dominated
// by the original loop preheader. Conceptually the header was merged
// into the preheader, even though we reuse the actual block as a new
// loop latch.
- DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
+ DomTreeNode *OrigHeaderNode = DT.getNode(OrigHeader);
SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
OrigHeaderNode->end());
- DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader);
+ DomTreeNode *OrigPreheaderNode = DT.getNode(OrigPreheader);
for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I)
- DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
+ DT.changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
- assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode);
- assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode);
+ assert(DT.getNode(Exit)->getIDom() == OrigPreheaderNode);
+ assert(DT.getNode(NewHeader)->getIDom() == OrigPreheaderNode);
// Update OrigHeader to be dominated by the new header block.
- DT->changeImmediateDominator(OrigHeader, OrigLatch);
+ DT.changeImmediateDominator(OrigHeader, OrigLatch);
}
// Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
@@ -459,9 +466,24 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
NewPH->setName(NewHeader->getName() + ".lr.ph");
// Preserve canonical loop form, which means that 'Exit' should have only
- // one predecessor.
- BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
- ExitSplit->moveBefore(Exit);
+ // one predecessor. Note that Exit could be an exit block for multiple
+ // nested loops, causing both of the edges to now be critical and need to
+ // be split.
+ SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
+ bool SplitLatchEdge = false;
+ for (SmallVectorImpl<BasicBlock *>::iterator PI = ExitPreds.begin(),
+ PE = ExitPreds.end();
+ PI != PE; ++PI) {
+ // We only need to split loop exit edges.
+ Loop *PredLoop = LI->getLoopFor(*PI);
+ if (!PredLoop || PredLoop->contains(Exit))
+ continue;
+ SplitLatchEdge |= L->getLoopLatch() == *PI;
+ BasicBlock *ExitSplit = SplitCriticalEdge(*PI, Exit, this);
+ ExitSplit->moveBefore(Exit);
+ }
+ assert(SplitLatchEdge &&
+ "Despite splitting all preds, failed to split latch exit?");
} else {
// We can fold the conditional branch in the preheader, this makes things
// simpler. The first step is to remove the extra edge to the Exit block.
@@ -471,15 +493,17 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
PHBI->eraseFromParent();
// With our CFG finalized, update DomTree if it is available.
- if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+ if (DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DominatorTree &DT = DTWP->getDomTree();
// Update OrigHeader to be dominated by the new header block.
- DT->changeImmediateDominator(NewHeader, OrigPreheader);
- DT->changeImmediateDominator(OrigHeader, OrigLatch);
+ DT.changeImmediateDominator(NewHeader, OrigPreheader);
+ DT.changeImmediateDominator(OrigHeader, OrigLatch);
// Brute force incremental dominator tree update. Call
// findNearestCommonDominator on all CFG predecessors of each child of the
// original header.
- DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
+ DomTreeNode *OrigHeaderNode = DT.getNode(OrigHeader);
SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
OrigHeaderNode->end());
bool Changed;
@@ -492,11 +516,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
pred_iterator PI = pred_begin(BB);
BasicBlock *NearestDom = *PI;
for (pred_iterator PE = pred_end(BB); PI != PE; ++PI)
- NearestDom = DT->findNearestCommonDominator(NearestDom, *PI);
+ NearestDom = DT.findNearestCommonDominator(NearestDom, *PI);
// Remember if this changes the DomTree.
if (Node->getIDom()->getBlock() != NearestDom) {
- DT->changeImmediateDominator(BB, NearestDom);
+ DT.changeImmediateDominator(BB, NearestDom);
Changed = true;
}
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index eff5268..272a16d 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -56,22 +56,22 @@
#define DEBUG_TYPE "loop-reduce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Assembly/Writer.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -394,7 +394,7 @@ void Formula::print(raw_ostream &OS) const {
bool First = true;
if (BaseGV) {
if (!First) OS << " + "; else First = false;
- WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
+ BaseGV->printAsOperand(OS, /*PrintType=*/false);
}
if (BaseOffset != 0) {
if (!First) OS << " + "; else First = false;
@@ -422,7 +422,7 @@ void Formula::print(raw_ostream &OS) const {
OS << ')';
}
if (UnfoldedOffset != 0) {
- if (!First) OS << " + "; else First = false;
+ if (!First) OS << " + ";
OS << "imm(" << UnfoldedOffset << ')';
}
}
@@ -723,13 +723,12 @@ static bool isHighCostExpansion(const SCEV *S,
// multiplication already generates this expression.
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
Value *UVal = U->getValue();
- for (Value::use_iterator UI = UVal->use_begin(), UE = UVal->use_end();
- UI != UE; ++UI) {
+ for (User *UR : UVal->users()) {
// If U is a constant, it may be used by a ConstantExpr.
- Instruction *User = dyn_cast<Instruction>(*UI);
- if (User && User->getOpcode() == Instruction::Mul
- && SE.isSCEVable(User->getType())) {
- return SE.getSCEV(User) == Mul;
+ Instruction *UI = dyn_cast<Instruction>(UR);
+ if (UI && UI->getOpcode() == Instruction::Mul &&
+ SE.isSCEVable(UI->getType())) {
+ return SE.getSCEV(UI) == Mul;
}
}
}
@@ -804,7 +803,7 @@ public:
bool operator<(const Cost &Other) const;
- void Loose();
+ void Lose();
#ifndef NDEBUG
// Once any of the metrics loses, they must all remain losers.
@@ -864,7 +863,7 @@ void Cost::RateRegister(const SCEV *Reg,
return;
// Otherwise, do not consider this formula at all.
- Loose();
+ Lose();
return;
}
AddRecCost += 1; /// TODO: This should be a function of the stride.
@@ -903,7 +902,7 @@ void Cost::RatePrimaryRegister(const SCEV *Reg,
ScalarEvolution &SE, DominatorTree &DT,
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
if (LoserRegs && LoserRegs->count(Reg)) {
- Loose();
+ Lose();
return;
}
if (Regs.insert(Reg)) {
@@ -925,7 +924,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
- Loose();
+ Lose();
return;
}
RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
@@ -936,7 +935,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
E = F.BaseRegs.end(); I != E; ++I) {
const SCEV *BaseReg = *I;
if (VisitedRegs.count(BaseReg)) {
- Loose();
+ Lose();
return;
}
RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
@@ -967,8 +966,8 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
assert(isValid() && "invalid cost");
}
-/// Loose - Set this cost to a losing value.
-void Cost::Loose() {
+/// Lose - Set this cost to a losing value.
+void Cost::Lose() {
NumRegs = ~0u;
AddRecCost = ~0u;
NumIVMuls = ~0u;
@@ -980,21 +979,11 @@ void Cost::Loose() {
/// operator< - Choose the lower cost.
bool Cost::operator<(const Cost &Other) const {
- if (NumRegs != Other.NumRegs)
- return NumRegs < Other.NumRegs;
- if (AddRecCost != Other.AddRecCost)
- return AddRecCost < Other.AddRecCost;
- if (NumIVMuls != Other.NumIVMuls)
- return NumIVMuls < Other.NumIVMuls;
- if (NumBaseAdds != Other.NumBaseAdds)
- return NumBaseAdds < Other.NumBaseAdds;
- if (ScaleCost != Other.ScaleCost)
- return ScaleCost < Other.ScaleCost;
- if (ImmCost != Other.ImmCost)
- return ImmCost < Other.ImmCost;
- if (SetupCost != Other.SetupCost)
- return SetupCost < Other.SetupCost;
- return false;
+ return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
+ ImmCost, SetupCost) <
+ std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
+ Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
+ Other.SetupCost);
}
void Cost::print(raw_ostream &OS) const {
@@ -1080,19 +1069,19 @@ void LSRFixup::print(raw_ostream &OS) const {
// Store is common and interesting enough to be worth special-casing.
if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
OS << "store ";
- WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
+ Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
} else if (UserInst->getType()->isVoidTy())
OS << UserInst->getOpcodeName();
else
- WriteAsOperand(OS, UserInst, /*PrintType=*/false);
+ UserInst->printAsOperand(OS, /*PrintType=*/false);
OS << ", OperandValToReplace=";
- WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
+ OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);
for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
E = PostIncLoops.end(); I != E; ++I) {
OS << ", PostIncLoop=";
- WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
+ (*I)->getHeader()->printAsOperand(OS, /*PrintType=*/false);
}
if (LUIdx != ~size_t(0))
@@ -1126,11 +1115,7 @@ struct UniquifierDenseMapInfo {
}
static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
- unsigned Result = 0;
- for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
- E = V.end(); I != E; ++I)
- Result ^= DenseMapInfo<const SCEV *>::getHashValue(*I);
- return Result;
+ return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
}
static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
@@ -1158,6 +1143,8 @@ public:
// TODO: Add a generic icmp too?
};
+ typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
+
KindType Kind;
Type *AccessTy;
@@ -1295,7 +1282,7 @@ void LSRUse::print(raw_ostream &OS) const {
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
E = Offsets.end(); I != E; ++I) {
OS << *I;
- if (llvm::next(I) != E)
+ if (std::next(I) != E)
OS << ',';
}
OS << '}';
@@ -1504,30 +1491,6 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
namespace {
-/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding
-/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind.
-struct UseMapDenseMapInfo {
- static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() {
- return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic);
- }
-
- static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() {
- return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic);
- }
-
- static unsigned
- getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) {
- unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first);
- Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second));
- return Result;
- }
-
- static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS,
- const std::pair<const SCEV *, LSRUse::KindType> &RHS) {
- return LHS == RHS;
- }
-};
-
/// IVInc - An individual increment in a Chain of IV increments.
/// Relate an IV user to an expression that computes the IV it uses from the IV
/// used by the previous link in the Chain.
@@ -1562,7 +1525,7 @@ struct IVChain {
// begin - return the first increment in the chain.
const_iterator begin() const {
assert(!Incs.empty());
- return llvm::next(Incs.begin());
+ return std::next(Incs.begin());
}
const_iterator end() const {
return Incs.end();
@@ -1656,9 +1619,7 @@ class LSRInstance {
}
// Support for sharing of LSRUses between LSRFixups.
- typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>,
- size_t,
- UseMapDenseMapInfo> UseMapTy;
+ typedef DenseMap<LSRUse::SCEVUseKindPair, size_t> UseMapTy;
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
@@ -2229,7 +2190,7 @@ LSRInstance::getUse(const SCEV *&Expr,
}
std::pair<UseMapTy::iterator, bool> P =
- UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0));
+ UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
if (!P.second) {
// A use already existed with this base.
size_t LUIdx = P.first->second;
@@ -2338,7 +2299,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
for (SmallSetVector<const SCEV *, 4>::const_iterator
I = Strides.begin(), E = Strides.end(); I != E; ++I)
for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
- llvm::next(I); NewStrideIter != E; ++NewStrideIter) {
+ std::next(I); NewStrideIter != E; ++NewStrideIter) {
const SCEV *OldStride = *I;
const SCEV *NewStride = *NewStrideIter;
@@ -2646,9 +2607,8 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
// they will eventually be used be the current chain, or can be computed
// from one of the chain increments. To be more precise we could
// transitively follow its user and only add leaf IV users to the set.
- for (Value::use_iterator UseIter = IVOper->use_begin(),
- UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) {
- Instruction *OtherUse = dyn_cast<Instruction>(*UseIter);
+ for (User *U : IVOper->users()) {
+ Instruction *OtherUse = dyn_cast<Instruction>(U);
if (!OtherUse)
continue;
// Uses in the chain will no longer be uses if the chain is formed.
@@ -2738,7 +2698,7 @@ void LSRInstance::CollectChains() {
Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
if (UniqueOperands.insert(IVOpInst))
ChainInstruction(I, IVOpInst, ChainUsersVec);
- IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE);
+ IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
}
} // Continue walking down the instructions.
} // Continue walking down the domtree.
@@ -2829,7 +2789,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
|| SE.getSCEV(IVSrc) == Head.IncExpr) {
break;
}
- IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE);
+ IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
}
if (IVOpIter == IVOpEnd) {
// Gracefully give up on this chain.
@@ -3059,18 +3019,17 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
Worklist.push_back(D->getLHS());
Worklist.push_back(D->getRHS());
- } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
- if (!Inserted.insert(U)) continue;
- const Value *V = U->getValue();
+ } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
+ if (!Inserted.insert(US)) continue;
+ const Value *V = US->getValue();
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
// Look for instructions defined outside the loop.
if (L->contains(Inst)) continue;
} else if (isa<UndefValue>(V))
// Undef doesn't have a live range, so it doesn't matter.
continue;
- for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
- UI != UE; ++UI) {
- const Instruction *UserInst = dyn_cast<Instruction>(*UI);
+ for (const Use &U : V->uses()) {
+ const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
// Ignore non-instructions.
if (!UserInst)
continue;
@@ -3082,7 +3041,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
UserInst->getParent() :
cast<PHINode>(UserInst)->getIncomingBlock(
- PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
+ PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
if (!DT.dominates(L->getHeader(), UseBB))
continue;
// Ignore uses which are part of other SCEV expressions, to avoid
@@ -3092,7 +3051,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
// If the user is a no-op, look through to its uses.
if (!isa<SCEVUnknown>(UserS))
continue;
- if (UserS == U) {
+ if (UserS == US) {
Worklist.push_back(
SE.getUnknown(const_cast<Instruction *>(UserInst)));
continue;
@@ -3100,7 +3059,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
}
// Ignore icmp instructions which are already being analyzed.
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
- unsigned OtherIdx = !UI.getOperandNo();
+ unsigned OtherIdx = !U.getOperandNo();
Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
continue;
@@ -3108,7 +3067,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LSRFixup &LF = getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
- LF.OperandValToReplace = UI.getUse();
+ LF.OperandValToReplace = U;
std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
LF.LUIdx = P.first;
LF.Offset = P.second;
@@ -3118,7 +3077,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
SE.getTypeSizeInBits(LU.WidestFixupType) <
SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
LU.WidestFixupType = LF.OperandValToReplace->getType();
- InsertSupplementalFormula(U, LU, LF.LUIdx);
+ InsertSupplementalFormula(US, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
break;
}
@@ -3221,10 +3180,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
continue;
// Collect all operands except *J.
- SmallVector<const SCEV *, 8> InnerAddOps
- (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
- InnerAddOps.append
- (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+ SmallVector<const SCEV *, 8> InnerAddOps(
+ ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ InnerAddOps.append(std::next(J),
+ ((const SmallVector<const SCEV *, 8> &)AddOps).end());
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
@@ -3390,6 +3349,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
if (NewBaseOffset / Factor != Base.BaseOffset)
continue;
+ // If the offset will be truncated at this use, check that it is in bounds.
+ if (!IntTy->isPointerTy() &&
+ !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
+ continue;
// Check that multiplying with the use offset doesn't overflow.
int64_t Offset = LU.MinOffset;
@@ -3398,6 +3361,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
Offset = (uint64_t)Offset * Factor;
if (Offset / Factor != LU.MinOffset)
continue;
+ // If the offset will be truncated at this use, check that it is in bounds.
+ if (!IntTy->isPointerTy() &&
+ !ConstantInt::isValueValidForType(IntTy, Offset))
+ continue;
Formula F = Base;
F.BaseOffset = NewBaseOffset;
@@ -3432,6 +3399,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
continue;
+ // If the offset will be truncated, check that it is in bounds.
+ if (!IntTy->isPointerTy() &&
+ !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
+ continue;
}
// If we make it here and it's legal, add it.
@@ -3614,8 +3585,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// Conservatively examine offsets between this orig reg a few selected
// other orig regs.
ImmMapTy::const_iterator OtherImms[] = {
- Imms.begin(), prior(Imms.end()),
- Imms.lower_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
+ Imms.begin(), std::prev(Imms.end()),
+ Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
+ 2)
};
for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
ImmMapTy::const_iterator M = OtherImms[i];
@@ -4210,7 +4182,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
SmallVector<const Formula *, 8> Workspace;
Cost SolutionCost;
- SolutionCost.Loose();
+ SolutionCost.Lose();
Cost CurCost;
SmallPtrSet<const SCEV *, 16> CurRegs;
DenseSet<const SCEV *> VisitedRegs;
@@ -4281,7 +4253,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
// instead of at the end, so that it can be used for other expansions.
if (IDom == Inst->getParent() &&
(!BetterPos || !DT.dominates(Inst, BetterPos)))
- BetterPos = llvm::next(BasicBlock::iterator(Inst));
+ BetterPos = std::next(BasicBlock::iterator(Inst));
}
if (!AllDominate)
break;
@@ -4695,7 +4667,8 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
LSRInstance::LSRInstance(Loop *L, Pass *P)
: IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
- DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()),
+ DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
+ LI(P->getAnalysis<LoopInfo>()),
TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
IVIncInsertPos(0) {
// If LoopSimplify form is not available, stay out of trouble.
@@ -4734,7 +4707,7 @@ LSRInstance::LSRInstance(Loop *L, Pass *P)
#endif // DEBUG
DEBUG(dbgs() << "\nLSR on loop ";
- WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
+ L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
dbgs() << ":\n");
// First, perform some low-level loop optimizations.
@@ -4864,8 +4837,8 @@ public:
LoopStrengthReduce();
private:
- bool runOnLoop(Loop *L, LPPassManager &LPM);
- void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
};
}
@@ -4874,7 +4847,7 @@ char LoopStrengthReduce::ID = 0;
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(IVUsers)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
@@ -4899,8 +4872,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
// Requiring LoopSimplify a second time here prevents IVUsers from running
@@ -4912,6 +4885,9 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
+ if (skipOptnoneFunction(L))
+ return false;
+
bool Changed = false;
// Run the main LSR transformation.
@@ -4925,10 +4901,9 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
- unsigned numFolded =
- Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(),
- DeadInsts,
- &getAnalysis<TargetTransformInfo>());
+ unsigned numFolded = Rewriter.replaceCongruentIVs(
+ L, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), DeadInsts,
+ &getAnalysis<TargetTransformInfo>());
if (numFolded) {
Changed = true;
DeleteTriviallyDeadInstructions(DeadInsts);
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 08ac38d..ecd350b 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -86,12 +87,12 @@ namespace {
bool UserAllowPartial; // CurrentAllowPartial is user-specified.
bool UserRuntime; // CurrentRuntime is user-specified.
- bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...
///
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
@@ -105,7 +106,7 @@ namespace {
// If loop unroll does not preserve dom info then LCSSA pass on next
// loop will receive invalid dom info.
// For now, recreate dom info, if loop is unrolled.
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
}
};
}
@@ -124,6 +125,10 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
return new LoopUnroll(Threshold, Count, AllowPartial, Runtime);
}
+Pass *llvm::createSimpleLoopUnrollPass() {
+ return llvm::createLoopUnrollPass(-1, -1, 0, 0);
+}
+
/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
bool &NotDuplicatable,
@@ -146,6 +151,9 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
}
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipOptnoneFunction(L))
+ return false;
+
LoopInfo *LI = &getAnalysis<LoopInfo>();
ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
@@ -158,7 +166,10 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
TargetTransformInfo::UnrollingPreferences UP;
UP.Threshold = CurrentThreshold;
UP.OptSizeThreshold = OptSizeUnrollThreshold;
+ UP.PartialThreshold = CurrentThreshold;
+ UP.PartialOptSizeThreshold = OptSizeUnrollThreshold;
UP.Count = CurrentCount;
+ UP.MaxCount = UINT_MAX;
UP.Partial = CurrentAllowPartial;
UP.Runtime = CurrentRuntime;
TTI.getUnrollingPreferences(L, UP);
@@ -168,11 +179,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// function is marked as optimize-for-size, and the unroll threshold was
// not user specified.
unsigned Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
+ unsigned PartialThreshold =
+ UserThreshold ? CurrentThreshold : UP.PartialThreshold;
if (!UserThreshold &&
Header->getParent()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize))
+ Attribute::OptimizeForSize)) {
Threshold = UP.OptSizeThreshold;
+ PartialThreshold = UP.PartialOptSizeThreshold;
+ }
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
@@ -206,14 +221,14 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Enforce the threshold.
- if (Threshold != NoThreshold) {
+ if (Threshold != NoThreshold && PartialThreshold != NoThreshold) {
unsigned NumInlineCandidates;
bool notDuplicatable;
unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates,
notDuplicatable, TTI);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
if (notDuplicatable) {
- DEBUG(dbgs() << " Not unrolling loop which contains non duplicatable"
+ DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
<< " instructions.\n");
return false;
}
@@ -233,17 +248,19 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
if (TripCount) {
// Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = Threshold / LoopSize;
+ Count = PartialThreshold / LoopSize;
while (Count != 0 && TripCount%Count != 0)
Count--;
}
else if (Runtime) {
// Reduce unroll count to be a lower power-of-two value
- while (Count != 0 && Size > Threshold) {
+ while (Count != 0 && Size > PartialThreshold) {
Count >>= 1;
Size = LoopSize*Count;
}
}
+ if (Count > UP.MaxCount)
+ Count = UP.MaxCount;
if (Count < 2) {
DEBUG(dbgs() << " could not unroll partially\n");
return false;
@@ -253,7 +270,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Unroll the loop.
- if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, &LPM))
+ if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, this, &LPM))
return false;
return true;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index c4ebfd5..5954f4a 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,7 +32,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -40,6 +39,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/CommandLine.h"
@@ -156,27 +156,27 @@ namespace {
initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
}
- bool runOnLoop(Loop *L, LPPassManager &LPM);
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
bool processCurrentLoop();
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG.
///
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<ScalarEvolution>();
AU.addRequired<TargetTransformInfo>();
}
private:
- virtual void releaseMemory() {
+ void releaseMemory() override {
BranchesInfo.forgetLoop(currentLoop);
}
@@ -225,7 +225,7 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
LoopPropsMapIt PropsIt;
bool Inserted;
- llvm::tie(PropsIt, Inserted) =
+ std::tie(PropsIt, Inserted) =
LoopsProperties.insert(std::make_pair(L, LoopProperties()));
LoopProperties &Props = PropsIt->second;
@@ -382,9 +382,14 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
}
bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
+ if (skipOptnoneFunction(L))
+ return false;
+
LI = &getAnalysis<LoopInfo>();
LPM = &LPM_Ref;
- DT = getAnalysisIfAvailable<DominatorTree>();
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : 0;
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
bool Changed = false;
@@ -397,7 +402,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
if (Changed) {
// FIXME: Reconstruct dom info, because it is not preserved properly.
if (DT)
- DT->runOnFunction(*F);
+ DT->recalculate(*F);
}
return Changed;
}
@@ -934,9 +939,8 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
Worklist.push_back(Use);
// Add users to the worklist which may be simplified now.
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
- UI != E; ++UI)
- Worklist.push_back(cast<Instruction>(*UI));
+ for (User *U : I->users())
+ Worklist.push_back(cast<Instruction>(U));
LPM->deleteSimpleAnalysisValue(I, L);
RemoveFromWorklist(I, Worklist);
I->replaceAllUsesWith(V);
@@ -986,12 +990,11 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
!cast<ConstantInt>(Val)->getZExtValue());
- for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
- UI != E; ++UI) {
- Instruction *U = dyn_cast<Instruction>(*UI);
- if (!U || !L->contains(U))
+ for (User *U : LIC->users()) {
+ Instruction *UI = dyn_cast<Instruction>(U);
+ if (!UI || !L->contains(UI))
continue;
- Worklist.push_back(U);
+ Worklist.push_back(UI);
}
for (std::vector<Instruction*>::iterator UI = Worklist.begin(),
@@ -1005,19 +1008,18 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// Otherwise, we don't know the precise value of LIC, but we do know that it
// is certainly NOT "Val". As such, simplify any uses in the loop that we
// can. This case occurs when we unswitch switch statements.
- for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
- UI != E; ++UI) {
- Instruction *U = dyn_cast<Instruction>(*UI);
- if (!U || !L->contains(U))
+ for (User *U : LIC->users()) {
+ Instruction *UI = dyn_cast<Instruction>(U);
+ if (!UI || !L->contains(UI))
continue;
- Worklist.push_back(U);
+ Worklist.push_back(UI);
// TODO: We could do other simplifications, for example, turning
// 'icmp eq LIC, Val' -> false.
// If we know that LIC is not Val, use this info to simplify code.
- SwitchInst *SI = dyn_cast<SwitchInst>(U);
+ SwitchInst *SI = dyn_cast<SwitchInst>(UI);
if (SI == 0 || !isa<ConstantInt>(Val)) continue;
SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index 8ced494..7c0a623 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -111,7 +111,9 @@ namespace {
LowerAtomic() : BasicBlockPass(ID) {
initializeLowerAtomicPass(*PassRegistry::getPassRegistry());
}
- bool runOnBasicBlock(BasicBlock &BB) {
+ bool runOnBasicBlock(BasicBlock &BB) override {
+ if (skipOptnoneFunction(BB))
+ return false;
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
Instruction *Inst = DI++;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9912d3d..2603c96 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -17,16 +17,16 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -75,6 +75,13 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
const DataLayout &TD) {
Ptr1 = Ptr1->stripPointerCasts();
Ptr2 = Ptr2->stripPointerCasts();
+
+ // Handle the trivial case first.
+ if (Ptr1 == Ptr2) {
+ Offset = 0;
+ return true;
+ }
+
GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
@@ -195,9 +202,9 @@ class MemsetRanges {
/// because each element is relatively large and expensive to copy.
std::list<MemsetRange> Ranges;
typedef std::list<MemsetRange>::iterator range_iterator;
- const DataLayout &TD;
+ const DataLayout &DL;
public:
- MemsetRanges(const DataLayout &td) : TD(td) {}
+ MemsetRanges(const DataLayout &DL) : DL(DL) {}
typedef std::list<MemsetRange>::const_iterator const_iterator;
const_iterator begin() const { return Ranges.begin(); }
@@ -212,7 +219,7 @@ public:
}
void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
- int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
+ int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
addRange(OffsetFromFirst, StoreSize,
SI->getPointerOperand(), SI->getAlignment(), SI);
@@ -305,23 +312,23 @@ namespace {
class MemCpyOpt : public FunctionPass {
MemoryDependenceAnalysis *MD;
TargetLibraryInfo *TLI;
- const DataLayout *TD;
+ const DataLayout *DL;
public:
static char ID; // Pass identification, replacement for typeid
MemCpyOpt() : FunctionPass(ID) {
initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
MD = 0;
TLI = 0;
- TD = 0;
+ DL = 0;
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
private:
// This transformation requires dominator postdominator info
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
AU.addRequired<TargetLibraryInfo>();
@@ -353,7 +360,7 @@ FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
@@ -366,13 +373,13 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
/// attempts to merge them together into a memcpy/memset.
Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Value *StartPtr, Value *ByteVal) {
- if (TD == 0) return 0;
+ if (DL == 0) return 0;
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
// Join these together into ranges, so we can decide whether contiguous blocks
// are stored.
- MemsetRanges Ranges(*TD);
+ MemsetRanges Ranges(*DL);
BasicBlock::iterator BI = StartInst;
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
@@ -396,7 +403,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
- Offset, *TD))
+ Offset, *DL))
break;
Ranges.addStore(Offset, NextStore);
@@ -409,7 +416,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
- if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD))
+ if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *DL))
break;
Ranges.addMemSet(Offset, MSI);
@@ -441,7 +448,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
if (Range.TheStores.size() == 1) continue;
// If it is profitable to lower this range to memset, do so now.
- if (!Range.isProfitableToUseMemset(*TD))
+ if (!Range.isProfitableToUseMemset(*DL))
continue;
// Otherwise, we do want to transform this! Create a new memset.
@@ -453,7 +460,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
if (Alignment == 0) {
Type *EltType =
cast<PointerType>(StartPtr->getType())->getElementType();
- Alignment = TD->getABITypeAlignment(EltType);
+ Alignment = DL->getABITypeAlignment(EltType);
}
AMemSet =
@@ -484,7 +491,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
- if (TD == 0) return false;
+ if (DL == 0) return false;
// Detect cases where we're performing call slot forwarding, but
// happen to be using a load-store pair to implement it, rather than
@@ -514,15 +521,15 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (C) {
unsigned storeAlign = SI->getAlignment();
if (!storeAlign)
- storeAlign = TD->getABITypeAlignment(SI->getOperand(0)->getType());
+ storeAlign = DL->getABITypeAlignment(SI->getOperand(0)->getType());
unsigned loadAlign = LI->getAlignment();
if (!loadAlign)
- loadAlign = TD->getABITypeAlignment(LI->getType());
+ loadAlign = DL->getABITypeAlignment(LI->getType());
bool changed = performCallSlotOptzn(LI,
SI->getPointerOperand()->stripPointerCasts(),
LI->getPointerOperand()->stripPointerCasts(),
- TD->getTypeStoreSize(SI->getOperand(0)->getType()),
+ DL->getTypeStoreSize(SI->getOperand(0)->getType()),
std::min(storeAlign, loadAlign), C);
if (changed) {
MD->removeInstruction(SI);
@@ -596,13 +603,13 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return false;
// Check that all of src is copied to dest.
- if (TD == 0) return false;
+ if (DL == 0) return false;
ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
if (!srcArraySize)
return false;
- uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
+ uint64_t srcSize = DL->getTypeAllocSize(srcAlloca->getAllocatedType()) *
srcArraySize->getZExtValue();
if (cpyLen < srcSize)
@@ -617,7 +624,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
if (!destArraySize)
return false;
- uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) *
+ uint64_t destSize = DL->getTypeAllocSize(A->getAllocatedType()) *
destArraySize->getZExtValue();
if (destSize < srcSize)
@@ -636,7 +643,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return false;
}
- uint64_t destSize = TD->getTypeAllocSize(StructTy);
+ uint64_t destSize = DL->getTypeAllocSize(StructTy);
if (destSize < srcSize)
return false;
} else {
@@ -646,7 +653,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// Check that dest points to memory that is at least as aligned as src.
unsigned srcAlign = srcAlloca->getAlignment();
if (!srcAlign)
- srcAlign = TD->getABITypeAlignment(srcAlloca->getAllocatedType());
+ srcAlign = DL->getABITypeAlignment(srcAlloca->getAllocatedType());
bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
// If dest is not aligned enough and we can't increase its alignment then
// bail out.
@@ -657,30 +664,28 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// guarantees that it holds only undefined values when passed in (so the final
// memcpy can be dropped), that it is not read or written between the call and
// the memcpy, and that writing beyond the end of it is undefined.
- SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
- srcAlloca->use_end());
+ SmallVector<User*, 8> srcUseList(srcAlloca->user_begin(),
+ srcAlloca->user_end());
while (!srcUseList.empty()) {
- User *UI = srcUseList.pop_back_val();
+ User *U = srcUseList.pop_back_val();
- if (isa<BitCastInst>(UI)) {
- for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
- I != E; ++I)
- srcUseList.push_back(*I);
- } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) {
+ if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
+ for (User *UU : U->users())
+ srcUseList.push_back(UU);
+ } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
if (G->hasAllZeroIndices())
- for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
- I != E; ++I)
- srcUseList.push_back(*I);
+ for (User *UU : U->users())
+ srcUseList.push_back(UU);
else
return false;
- } else if (UI != C && UI != cpy) {
+ } else if (U != C && U != cpy) {
return false;
}
}
// Since we're changing the parameter to the callsite, we need to make sure
// that what would be the new parameter dominates the callsite.
- DominatorTree &DT = getAnalysis<DominatorTree>();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
if (!DT.dominates(cpyDestInst, C))
return false;
@@ -816,9 +821,8 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
/// circumstances). This allows later passes to remove the first memcpy
/// altogether.
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
- // We can only optimize statically-sized memcpy's that are non-volatile.
- ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
- if (CopySize == 0 || M->isVolatile()) return false;
+ // We can only optimize non-volatile memcpy's.
+ if (M->isVolatile()) return false;
// If the source and destination of the memcpy are the same, then zap it.
if (M->getSource() == M->getDest()) {
@@ -832,7 +836,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
if (GV->isConstant() && GV->hasDefinitiveInitializer())
if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
IRBuilder<> Builder(M);
- Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize,
+ Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
M->getAlignment(), false);
MD->removeInstruction(M);
M->eraseFromParent();
@@ -840,9 +844,16 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
return true;
}
- // The are two possible optimizations we can do for memcpy:
+ // The optimizations after this point require the memcpy size.
+ ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+ if (CopySize == 0) return false;
+
+ // The are three possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE.
// b) call-memcpy xform for return slot optimization.
+ // c) memcpy from freshly alloca'd space or space that has just started its
+ // lifetime copies undefined data, and we can therefore eliminate the
+ // memcpy in favor of the data that was already at the destination.
MemDepResult DepInfo = MD->getDependency(M);
if (DepInfo.isClobber()) {
if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
@@ -862,6 +873,25 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
if (SrcDepInfo.isClobber()) {
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
+ } else if (SrcDepInfo.isDef()) {
+ Instruction *I = SrcDepInfo.getInst();
+ bool hasUndefContents = false;
+
+ if (isa<AllocaInst>(I)) {
+ hasUndefContents = true;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
+ if (LTSize->getZExtValue() >= CopySize->getZExtValue())
+ hasUndefContents = true;
+ }
+
+ if (hasUndefContents) {
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ ++NumMemCpyInstr;
+ return true;
+ }
}
return false;
@@ -899,12 +929,12 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
/// processByValArgument - This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
- if (TD == 0) return false;
+ if (DL == 0) return false;
// Find out what feeds this byval argument.
Value *ByValArg = CS.getArgument(ArgNo);
Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
- uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
+ uint64_t ByValSize = DL->getTypeAllocSize(ByValTy);
MemDepResult DepInfo =
MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
true, CS.getInstruction(),
@@ -933,7 +963,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
// If it is greater than the memcpy, then we check to see if we can force the
// source of the memcpy to the alignment we need. If we fail, we bail out.
if (MDep->getAlignment() < ByValAlign &&
- getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign)
+ getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, DL) < ByValAlign)
return false;
// Verify that the copied-from memory doesn't change in between the memcpy and
@@ -1007,9 +1037,13 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
// function.
//
bool MemCpyOpt::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
bool MadeChange = false;
MD = &getAnalysis<MemoryDependenceAnalysis>();
- TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
TLI = &getAnalysis<TargetLibraryInfo>();
// If we don't have at least memset and memcpy, there is little point of doing
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 15cee44..2f19935 100644
--- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -35,8 +35,8 @@ namespace {
initializePartiallyInlineLibCallsPass(*PassRegistry::getPassRegistry());
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual bool runOnFunction(Function &F);
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
private:
/// Optimize calls to sqrt.
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 328a9c5..b6b4d97 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -27,17 +27,16 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@@ -67,7 +66,7 @@ static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
<< *Ops[0].Op->getType() << '\t';
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
dbgs() << "[ ";
- WriteAsOperand(dbgs(), Ops[i].Op, false, M);
+ Ops[i].Op->printAsOperand(dbgs(), false, M);
dbgs() << ", #" << Ops[i].Rank << "] ";
}
}
@@ -168,9 +167,9 @@ namespace {
initializeReassociatePass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
}
private:
@@ -821,7 +820,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
if (ExpressionChanged == I)
break;
ExpressionChanged->moveBefore(I);
- ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->use_begin());
+ ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
} while (1);
// Throw away any left over nodes from the original expression.
@@ -863,8 +862,7 @@ static Value *NegateValue(Value *V, Instruction *BI) {
// Okay, we need to materialize a negated version of V with an instruction.
// Scan the use lists of V to see if we have one already.
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
- User *U = *UI;
+ for (User *U : V->users()) {
if (!BinaryOperator::isNeg(U)) continue;
// We found one! Now we have to make sure that the definition dominates
@@ -914,8 +912,8 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
isReassociableOp(Sub->getOperand(1), Instruction::Sub))
return true;
if (Sub->hasOneUse() &&
- (isReassociableOp(Sub->use_back(), Instruction::Add) ||
- isReassociableOp(Sub->use_back(), Instruction::Sub)))
+ (isReassociableOp(Sub->user_back(), Instruction::Add) ||
+ isReassociableOp(Sub->user_back(), Instruction::Sub)))
return true;
return false;
@@ -1293,7 +1291,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
// the same symbolic value cluster together. For instance, the input operand
// sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
// ("x | 123", "x & 789", "y & 456").
- std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
+ std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
// Step 3: Combine adjacent operands
XorOpnd *PrevOpnd = 0;
@@ -1548,19 +1546,6 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
return 0;
}
-namespace {
- /// \brief Predicate tests whether a ValueEntry's op is in a map.
- struct IsValueInMap {
- const DenseMap<Value *, unsigned> &Map;
-
- IsValueInMap(const DenseMap<Value *, unsigned> &Map) : Map(Map) {}
-
- bool operator()(const ValueEntry &Entry) {
- return Map.find(Entry.Op) != Map.end();
- }
- };
-}
-
/// \brief Build up a vector of value/power pairs factoring a product.
///
/// Given a series of multiplication operands, build a vector of factors and
@@ -1619,7 +1604,7 @@ bool Reassociate::collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
// below our mininum of '4'.
assert(FactorPowerSum >= 4);
- std::sort(Factors.begin(), Factors.end(), Factor::PowerDescendingSorter());
+ std::stable_sort(Factors.begin(), Factors.end(), Factor::PowerDescendingSorter());
return true;
}
@@ -1795,9 +1780,9 @@ void Reassociate::EraseInst(Instruction *I) {
// If this is a node in an expression tree, climb to the expression root
// and add that since that's where optimization actually happens.
unsigned Opcode = Op->getOpcode();
- while (Op->hasOneUse() && Op->use_back()->getOpcode() == Opcode &&
+ while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
Visited.insert(Op))
- Op = Op->use_back();
+ Op = Op->user_back();
RedoInsts.insert(Op);
}
}
@@ -1815,8 +1800,8 @@ void Reassociate::OptimizeInst(Instruction *I) {
// is used by a reassociable multiply or add, turn into a multiply.
if (isReassociableOp(I->getOperand(0), Instruction::Mul) ||
(I->hasOneUse() &&
- (isReassociableOp(I->use_back(), Instruction::Mul) ||
- isReassociableOp(I->use_back(), Instruction::Add)))) {
+ (isReassociableOp(I->user_back(), Instruction::Mul) ||
+ isReassociableOp(I->user_back(), Instruction::Add)))) {
Instruction *NI = ConvertShiftToMul(I);
RedoInsts.insert(I);
MadeChange = true;
@@ -1869,7 +1854,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
// and if this is not an inner node of a multiply tree.
if (isReassociableOp(I->getOperand(1), Instruction::Mul) &&
(!I->hasOneUse() ||
- !isReassociableOp(I->use_back(), Instruction::Mul))) {
+ !isReassociableOp(I->user_back(), Instruction::Mul))) {
Instruction *NI = LowerNegateToMultiply(I);
RedoInsts.insert(I);
MadeChange = true;
@@ -1885,13 +1870,13 @@ void Reassociate::OptimizeInst(Instruction *I) {
// If this is an interior node of a reassociable tree, ignore it until we
// get to the root of the tree, to avoid N^2 analysis.
unsigned Opcode = BO->getOpcode();
- if (BO->hasOneUse() && BO->use_back()->getOpcode() == Opcode)
+ if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode)
return;
// If this is an add tree that is used by a sub instruction, ignore it
// until we process the subtract.
if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
- cast<Instruction>(BO->use_back())->getOpcode() == Instruction::Sub)
+ cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub)
return;
ReassociateExpression(BO);
@@ -1943,7 +1928,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
// In this case we reassociate to put the negation on the outside so that we
// can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
if (I->getOpcode() == Instruction::Mul && I->hasOneUse() &&
- cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add &&
+ cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add &&
isa<ConstantInt>(Ops.back().Op) &&
cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
ValueEntry Tmp = Ops.pop_back_val();
@@ -1972,6 +1957,9 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
}
bool Reassociate::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
// Calculate the rank map for F
BuildRankMap(F);
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 07f540a..d9809ce 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -20,12 +20,12 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
#include <list>
using namespace llvm;
@@ -40,23 +40,22 @@ namespace {
initializeRegToMemPass(*PassRegistry::getPassRegistry());
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredID(BreakCriticalEdgesID);
AU.addPreservedID(BreakCriticalEdgesID);
}
- bool valueEscapes(const Instruction *Inst) const {
- const BasicBlock *BB = Inst->getParent();
- for (Value::const_use_iterator UI = Inst->use_begin(),E = Inst->use_end();
- UI != E; ++UI) {
- const Instruction *I = cast<Instruction>(*UI);
- if (I->getParent() != BB || isa<PHINode>(I))
+ bool valueEscapes(const Instruction *Inst) const {
+ const BasicBlock *BB = Inst->getParent();
+ for (const User *U : Inst->users()) {
+ const Instruction *UI = cast<Instruction>(U);
+ if (UI->getParent() != BB || isa<PHINode>(UI))
return true;
}
return false;
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
};
}
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 4364720..b8f10e9 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -26,13 +26,13 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/InstVisitor.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -153,7 +153,7 @@ namespace {
/// Constant Propagation.
///
class SCCPSolver : public InstVisitor<SCCPSolver> {
- const DataLayout *TD;
+ const DataLayout *DL;
const TargetLibraryInfo *TLI;
SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
@@ -205,8 +205,8 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
typedef std::pair<BasicBlock*, BasicBlock*> Edge;
DenseSet<Edge> KnownFeasibleEdges;
public:
- SCCPSolver(const DataLayout *td, const TargetLibraryInfo *tli)
- : TD(td), TLI(tli) {}
+ SCCPSolver(const DataLayout *DL, const TargetLibraryInfo *tli)
+ : DL(DL), TLI(tli) {}
/// MarkBlockExecutable - This method can be used by clients to mark all of
/// the blocks that are known to be intrinsically live in the processed unit.
@@ -491,7 +491,6 @@ private:
}
void visitCallSite (CallSite CS);
void visitResumeInst (TerminatorInst &I) { /*returns void*/ }
- void visitUnwindInst (TerminatorInst &I) { /*returns void*/ }
void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
void visitFenceInst (FenceInst &I) { /*returns void*/ }
void visitAtomicCmpXchgInst (AtomicCmpXchgInst &I) { markOverdefined(&I); }
@@ -1067,7 +1066,7 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
}
// Transform load from a constant into a constant if possible.
- if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, TD))
+ if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL))
return markConstant(IV, &I, C);
// Otherwise we cannot say for certain what value this load will produce.
@@ -1181,10 +1180,9 @@ void SCCPSolver::Solve() {
// since all of its users will have already been marked as overdefined
// Update all of the users of this instruction's value.
//
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
- UI != E; ++UI)
- if (Instruction *I = dyn_cast<Instruction>(*UI))
- OperandChangedState(I);
+ for (User *U : I->users())
+ if (Instruction *UI = dyn_cast<Instruction>(U))
+ OperandChangedState(UI);
}
// Process the instruction work list.
@@ -1201,10 +1199,9 @@ void SCCPSolver::Solve() {
// Update all of the users of this instruction's value.
//
if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
- UI != E; ++UI)
- if (Instruction *I = dyn_cast<Instruction>(*UI))
- OperandChangedState(I);
+ for (User *U : I->users())
+ if (Instruction *UI = dyn_cast<Instruction>(U))
+ OperandChangedState(UI);
}
// Process the basic block work list.
@@ -1499,7 +1496,7 @@ namespace {
/// Sparse Conditional Constant Propagator.
///
struct SCCP : public FunctionPass {
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfo>();
}
static char ID; // Pass identification, replacement for typeid
@@ -1510,7 +1507,7 @@ namespace {
// runOnFunction - Run the Sparse Conditional Constant Propagation
// algorithm, and return true if the function was modified.
//
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
};
} // end anonymous namespace
@@ -1553,10 +1550,14 @@ static void DeleteInstructionInBlock(BasicBlock *BB) {
// and return true if the function was modified.
//
bool SCCP::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
- const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ const DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
- SCCPSolver Solver(TD, TLI);
+ SCCPSolver Solver(DL, TLI);
// Mark the first block of the function as being executable.
Solver.MarkBlockExecutable(F.begin());
@@ -1628,14 +1629,14 @@ namespace {
/// Constant Propagation.
///
struct IPSCCP : public ModulePass {
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfo>();
}
static char ID;
IPSCCP() : ModulePass(ID) {
initializeIPSCCPPass(*PassRegistry::getPassRegistry());
}
- bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
};
} // end anonymous namespace
@@ -1658,21 +1659,20 @@ static bool AddressIsTaken(const GlobalValue *GV) {
// Delete any dead constantexpr klingons.
GV->removeDeadConstantUsers();
- for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
- UI != E; ++UI) {
- const User *U = *UI;
- if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ for (const Use &U : GV->uses()) {
+ const User *UR = U.getUser();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(UR)) {
if (SI->getOperand(0) == GV || SI->isVolatile())
return true; // Storing addr of GV.
- } else if (isa<InvokeInst>(U) || isa<CallInst>(U)) {
+ } else if (isa<InvokeInst>(UR) || isa<CallInst>(UR)) {
// Make sure we are calling the function, not passing the address.
- ImmutableCallSite CS(cast<Instruction>(U));
- if (!CS.isCallee(UI))
+ ImmutableCallSite CS(cast<Instruction>(UR));
+ if (!CS.isCallee(&U))
return true;
- } else if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ } else if (const LoadInst *LI = dyn_cast<LoadInst>(UR)) {
if (LI->isVolatile())
return true;
- } else if (isa<BlockAddress>(U)) {
+ } else if (isa<BlockAddress>(UR)) {
// blockaddress doesn't take the address of the function, it takes addr
// of label.
} else {
@@ -1683,9 +1683,10 @@ static bool AddressIsTaken(const GlobalValue *GV) {
}
bool IPSCCP::runOnModule(Module &M) {
- const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
- SCCPSolver Solver(TD, TLI);
+ SCCPSolver Solver(DL, TLI);
// AddressTakenFunctions - This set keeps track of the address-taken functions
// that are in the input. As IPSCCP runs through and simplifies code,
@@ -1834,8 +1835,9 @@ bool IPSCCP::runOnModule(Module &M) {
for (unsigned i = 0, e = BlocksToErase.size(); i != e; ++i) {
// If there are any PHI nodes in this successor, drop entries for BB now.
BasicBlock *DeadBB = BlocksToErase[i];
- for (Value::use_iterator UI = DeadBB->use_begin(), UE = DeadBB->use_end();
- UI != UE; ) {
+ for (Value::user_iterator UI = DeadBB->user_begin(),
+ UE = DeadBB->user_end();
+ UI != UE;) {
// Grab the user and then increment the iterator early, as the user
// will be deleted. Step past all adjacent uses from the same user.
Instruction *I = dyn_cast<Instruction>(*UI);
@@ -1925,7 +1927,7 @@ bool IPSCCP::runOnModule(Module &M) {
"Overdefined values should have been taken out of the map!");
DEBUG(dbgs() << "Found that GV '" << GV->getName() << "' is constant!\n");
while (!GV->use_empty()) {
- StoreInst *SI = cast<StoreInst>(GV->use_back());
+ StoreInst *SI = cast<StoreInst>(GV->user_back());
SI->eraseFromParent();
}
M.getGlobalList().erase(GV);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 9f3fc83..ed5e618 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -29,32 +29,39 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Operator.h"
-#include "llvm/InstVisitor.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TimeValue.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+#if __cplusplus >= 201103L && !defined(NDEBUG)
+// We only use this for a debug check in C++11
+#include <random>
+#endif
+
using namespace llvm;
STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
@@ -73,6 +80,16 @@ STATISTIC(NumVectorized, "Number of vectorized aggregates");
static cl::opt<bool>
ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
+/// Hidden option to enable randomly shuffling the slices to help uncover
+/// instability in their order.
+static cl::opt<bool> SROARandomShuffleSlices("sroa-random-shuffle-slices",
+ cl::init(false), cl::Hidden);
+
+/// Hidden option to experiment with completely strict handling of inbounds
+/// GEPs.
+static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds",
+ cl::init(false), cl::Hidden);
+
namespace {
/// \brief A custom IRBuilder inserter which prefixes all names if they are
/// preserved.
@@ -244,8 +261,8 @@ public:
void printUse(raw_ostream &OS, const_iterator I,
StringRef Indent = " ") const;
void print(raw_ostream &OS) const;
- void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump(const_iterator I) const;
- void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump() const;
+ void dump(const_iterator I) const;
+ void dump() const;
#endif
private:
@@ -339,7 +356,7 @@ private:
bool IsSplittable = false) {
// Completely skip uses which have a zero size or start either before or
// past the end of the allocation.
- if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize)) {
+ if (Size == 0 || Offset.uge(AllocSize)) {
DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
<< " which has zero size or starts outside of the "
<< AllocSize << " byte alloca:\n"
@@ -380,6 +397,43 @@ private:
if (GEPI.use_empty())
return markAsDead(GEPI);
+ if (SROAStrictInbounds && GEPI.isInBounds()) {
+ // FIXME: This is a manually un-factored variant of the basic code inside
+ // of GEPs with checking of the inbounds invariant specified in the
+ // langref in a very strict sense. If we ever want to enable
+ // SROAStrictInbounds, this code should be factored cleanly into
+ // PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds
+ // by writing out the code here where we have tho underlying allocation
+ // size readily available.
+ APInt GEPOffset = Offset;
+ for (gep_type_iterator GTI = gep_type_begin(GEPI),
+ GTE = gep_type_end(GEPI);
+ GTI != GTE; ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+ if (!OpC)
+ break;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ unsigned ElementIdx = OpC->getZExtValue();
+ const StructLayout *SL = DL.getStructLayout(STy);
+ GEPOffset +=
+ APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx));
+ } else {
+ // For array or vector indices, scale the index by the size of the type.
+ APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
+ GEPOffset += Index * APInt(Offset.getBitWidth(),
+ DL.getTypeAllocSize(GTI.getIndexedType()));
+ }
+
+ // If this index has computed an intermediate pointer which is not
+ // inbounds, then the result of the GEP is a poison value and we can
+ // delete it and all uses.
+ if (GEPOffset.ugt(AllocSize))
+ return markAsDead(GEPI);
+ }
+ }
+
return Base::visitGetElementPtrInst(GEPI);
}
@@ -426,8 +480,7 @@ private:
// risk of overflow.
// FIXME: We should instead consider the pointer to have escaped if this
// function is being instrumented for addressing bugs or race conditions.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size)) {
+ if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
<< " which extends past the end of the " << AllocSize
<< " byte alloca:\n"
@@ -446,7 +499,7 @@ private:
assert(II.getRawDest() == *U && "Pointer use is not the destination?");
ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
if ((Length && Length->getValue() == 0) ||
- (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+ (IsOffsetKnown && Offset.uge(AllocSize)))
// Zero-length mem transfer intrinsics can be ignored entirely.
return markAsDead(II);
@@ -461,14 +514,30 @@ private:
void visitMemTransferInst(MemTransferInst &II) {
ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
- if ((Length && Length->getValue() == 0) ||
- (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+ if (Length && Length->getValue() == 0)
// Zero-length mem transfer intrinsics can be ignored entirely.
return markAsDead(II);
+ // Because we can visit these intrinsics twice, also check to see if the
+ // first time marked this instruction as dead. If so, skip it.
+ if (VisitedDeadInsts.count(&II))
+ return;
+
if (!IsOffsetKnown)
return PI.setAborted(&II);
+ // This side of the transfer is completely out-of-bounds, and so we can
+ // nuke the entire transfer. However, we also need to nuke the other side
+ // if already added to our partitions.
+ // FIXME: Yet another place we really should bypass this when
+ // instrumenting for ASan.
+ if (Offset.uge(AllocSize)) {
+ SmallDenseMap<Instruction *, unsigned>::iterator MTPI = MemTransferSliceMap.find(&II);
+ if (MTPI != MemTransferSliceMap.end())
+ S.Slices[MTPI->second].kill();
+ return markAsDead(II);
+ }
+
uint64_t RawOffset = Offset.getLimitedValue();
uint64_t Size = Length ? Length->getLimitedValue()
: AllocSize - RawOffset;
@@ -487,7 +556,7 @@ private:
// they both point to the same alloca.
bool Inserted;
SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
- llvm::tie(MTPI, Inserted) =
+ std::tie(MTPI, Inserted) =
MemTransferSliceMap.insert(std::make_pair(&II, S.Slices.size()));
unsigned PrevIdx = MTPI->second;
if (!Inserted) {
@@ -546,7 +615,7 @@ private:
Size = 0;
do {
Instruction *I, *UsedI;
- llvm::tie(UsedI, I) = Uses.pop_back_val();
+ std::tie(UsedI, I) = Uses.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
Size = std::max(Size, DL.getTypeStoreSize(LI->getType()));
@@ -568,10 +637,9 @@ private:
return I;
}
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
- ++UI)
- if (Visited.insert(cast<Instruction>(*UI)))
- Uses.push_back(std::make_pair(I, cast<Instruction>(*UI)));
+ for (User *U : I->users())
+ if (Visited.insert(cast<Instruction>(U)))
+ Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
} while (!Uses.empty());
return 0;
@@ -597,8 +665,7 @@ private:
// themselves which should be replaced with undef.
// FIXME: This should instead be escaped in the event we're instrumenting
// for address sanitization.
- if ((Offset.isNegative() && (-Offset).uge(PHISize)) ||
- (!Offset.isNegative() && Offset.uge(AllocSize))) {
+ if (Offset.uge(AllocSize)) {
S.DeadOperands.push_back(U);
return;
}
@@ -638,8 +705,7 @@ private:
// themselves which should be replaced with undef.
// FIXME: This should instead be escaped in the event we're instrumenting
// for address sanitization.
- if ((Offset.isNegative() && Offset.uge(SelectSize)) ||
- (!Offset.isNegative() && Offset.uge(AllocSize))) {
+ if (Offset.uge(AllocSize)) {
S.DeadOperands.push_back(U);
return;
}
@@ -674,6 +740,13 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
std::mem_fun_ref(&Slice::isDead)),
Slices.end());
+#if __cplusplus >= 201103L && !defined(NDEBUG)
+ if (SROARandomShuffleSlices) {
+ std::mt19937 MT(static_cast<unsigned>(sys::TimeValue::now().msec()));
+ std::shuffle(Slices.begin(), Slices.end(), MT);
+ }
+#endif
+
// Sort the uses. This arranges for the offsets to be in ascending order,
// and the sizes to be in descending order.
std::sort(Slices.begin(), Slices.end());
@@ -712,8 +785,10 @@ void AllocaSlices::print(raw_ostream &OS) const {
print(OS, I);
}
-void AllocaSlices::dump(const_iterator I) const { print(dbgs(), I); }
-void AllocaSlices::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
+ print(dbgs(), I);
+}
+LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -741,12 +816,10 @@ public:
// Retain the debug information attached to the alloca for use when
// rewriting loads and stores.
if (MDNode *DebugNode = MDNode::getIfExists(AI.getContext(), &AI)) {
- for (Value::use_iterator UI = DebugNode->use_begin(),
- UE = DebugNode->use_end();
- UI != UE; ++UI)
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+ for (User *U : DebugNode->users())
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
DDIs.push_back(DDI);
- else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
+ else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
DVIs.push_back(DVI);
}
@@ -760,8 +833,8 @@ public:
DVIs.pop_back_val()->eraseFromParent();
}
- virtual bool isInstInList(Instruction *I,
- const SmallVectorImpl<Instruction*> &Insts) const {
+ bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts) const override {
Value *Ptr;
if (LoadInst *LI = dyn_cast<LoadInst>(I))
Ptr = LI->getOperand(0);
@@ -788,7 +861,7 @@ public:
return false;
}
- virtual void updateDebugInfo(Instruction *Inst) const {
+ void updateDebugInfo(Instruction *Inst) const override {
for (SmallVectorImpl<DbgDeclareInst *>::const_iterator I = DDIs.begin(),
E = DDIs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
@@ -899,10 +972,10 @@ public:
C(0), DL(0), DT(0) {
initializeSROAPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
- void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
- const char *getPassName() const { return "SROA"; }
+ const char *getPassName() const override { return "SROA"; }
static char ID;
private:
@@ -915,6 +988,7 @@ private:
ArrayRef<AllocaSlices::iterator> SplitUses);
bool splitAlloca(AllocaInst &AI, AllocaSlices &S);
bool runOnAlloca(AllocaInst &AI);
+ void clobberUse(Use &U);
void deleteDeadInstructions(SmallPtrSet<AllocaInst *, 4> &DeletedAllocas);
bool promoteAllocas(Function &F);
};
@@ -928,7 +1002,7 @@ FunctionPass *llvm::createSROAPass(bool RequiresDomTree) {
INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates",
false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",
false, false)
@@ -938,7 +1012,11 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
AllocaSlices::const_iterator E,
uint64_t EndOffset) {
Type *Ty = 0;
- bool IgnoreNonIntegralTypes = false;
+ bool TyIsCommon = true;
+ IntegerType *ITy = 0;
+
+ // Note that we need to look at *every* alloca slice's Use to ensure we
+ // always get consistent results regardless of the order of slices.
for (AllocaSlices::const_iterator I = B; I != E; ++I) {
Use *U = I->getUse();
if (isa<IntrinsicInst>(*U->getUser()))
@@ -951,37 +1029,30 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
UserTy = LI->getType();
} else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
UserTy = SI->getValueOperand()->getType();
- } else {
- IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
- continue;
}
- if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
+ if (!UserTy || (Ty && Ty != UserTy))
+ TyIsCommon = false; // Give up on anything but an iN type.
+ else
+ Ty = UserTy;
+
+ if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
// If the type is larger than the partition, skip it. We only encounter
// this for split integer operations where we want to use the type of the
// entity causing the split. Also skip if the type is not a byte width
// multiple.
- if (ITy->getBitWidth() % 8 != 0 ||
- ITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
+ if (UserITy->getBitWidth() % 8 != 0 ||
+ UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
continue;
- // If we have found an integer type use covering the alloca, use that
- // regardless of the other types, as integers are often used for
- // a "bucket of bits" type.
- //
- // NB: This *must* be the only return from inside the loop so that the
- // order of slices doesn't impact the computed type.
- return ITy;
- } else if (IgnoreNonIntegralTypes) {
- continue;
+ // Track the largest bitwidth integer type used in this way in case there
+ // is no common type.
+ if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
+ ITy = UserITy;
}
-
- if (Ty && Ty != UserTy)
- IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
-
- Ty = UserTy;
}
- return Ty;
+
+ return TyIsCommon ? Ty : ITy;
}
/// PHI instructions that use an alloca and are subsequently loaded can be
@@ -1011,9 +1082,8 @@ static bool isSafePHIToSpeculate(PHINode &PN,
BasicBlock *BB = PN.getParent();
unsigned MaxAlign = 0;
bool HaveLoad = false;
- for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end(); UI != UE;
- ++UI) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ for (User *U : PN.users()) {
+ LoadInst *LI = dyn_cast<LoadInst>(U);
if (LI == 0 || !LI->isSimple())
return false;
@@ -1077,13 +1147,13 @@ static void speculatePHINodeLoads(PHINode &PN) {
// Get the TBAA tag and alignment to use from one of the loads. It doesn't
// matter which one we get and if any differ.
- LoadInst *SomeLoad = cast<LoadInst>(*PN.use_begin());
+ LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
unsigned Align = SomeLoad->getAlignment();
// Rewrite all loads of the PN to use the new PHI.
while (!PN.use_empty()) {
- LoadInst *LI = cast<LoadInst>(*PN.use_begin());
+ LoadInst *LI = cast<LoadInst>(PN.user_back());
LI->replaceAllUsesWith(NewPN);
LI->eraseFromParent();
}
@@ -1127,9 +1197,8 @@ static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = 0) {
bool TDerefable = TValue->isDereferenceablePointer();
bool FDerefable = FValue->isDereferenceablePointer();
- for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end(); UI != UE;
- ++UI) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ for (User *U : SI.users()) {
+ LoadInst *LI = dyn_cast<LoadInst>(U);
if (LI == 0 || !LI->isSimple())
return false;
@@ -1155,7 +1224,7 @@ static void speculateSelectInstLoads(SelectInst &SI) {
Value *FV = SI.getFalseValue();
// Replace the loads of the select with a select of two loads.
while (!SI.use_empty()) {
- LoadInst *LI = cast<LoadInst>(*SI.use_begin());
+ LoadInst *LI = cast<LoadInst>(SI.user_back());
assert(LI->isSimple() && "We only speculate simple loads");
IRB.SetInsertPoint(LI);
@@ -1188,7 +1257,7 @@ static void speculateSelectInstLoads(SelectInst &SI) {
/// This will return the BasePtr if that is valid, or build a new GEP
/// instruction using the IRBuilder if GEP-ing is needed.
static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
- SmallVectorImpl<Value *> &Indices) {
+ SmallVectorImpl<Value *> &Indices, Twine NamePrefix) {
if (Indices.empty())
return BasePtr;
@@ -1197,7 +1266,7 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
return BasePtr;
- return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx");
+ return IRB.CreateInBoundsGEP(BasePtr, Indices, NamePrefix + "sroa_idx");
}
/// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1211,9 +1280,13 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
/// indicated by Indices to have the correct offset.
static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
Value *BasePtr, Type *Ty, Type *TargetTy,
- SmallVectorImpl<Value *> &Indices) {
+ SmallVectorImpl<Value *> &Indices,
+ Twine NamePrefix) {
if (Ty == TargetTy)
- return buildGEP(IRB, BasePtr, Indices);
+ return buildGEP(IRB, BasePtr, Indices, NamePrefix);
+
+ // Pointer size to use for the indices.
+ unsigned PtrSize = DL.getPointerTypeSizeInBits(BasePtr->getType());
// See if we can descend into a struct and locate a field with the correct
// type.
@@ -1222,11 +1295,13 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
do {
if (ElementTy->isPointerTy())
break;
- if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {
- ElementTy = SeqTy->getElementType();
- // Note that we use the default address space as this index is over an
- // array or a vector, not a pointer.
- Indices.push_back(IRB.getInt(APInt(DL.getPointerSizeInBits(0), 0)));
+
+ if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
+ ElementTy = ArrayTy->getElementType();
+ Indices.push_back(IRB.getIntN(PtrSize, 0));
+ } else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
+ ElementTy = VectorTy->getElementType();
+ Indices.push_back(IRB.getInt32(0));
} else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
if (STy->element_begin() == STy->element_end())
break; // Nothing left to descend into.
@@ -1240,7 +1315,7 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
if (ElementTy != TargetTy)
Indices.erase(Indices.end() - NumLayers, Indices.end());
- return buildGEP(IRB, BasePtr, Indices);
+ return buildGEP(IRB, BasePtr, Indices, NamePrefix);
}
/// \brief Recursively compute indices for a natural GEP.
@@ -1250,9 +1325,10 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
Value *Ptr, Type *Ty, APInt &Offset,
Type *TargetTy,
- SmallVectorImpl<Value *> &Indices) {
+ SmallVectorImpl<Value *> &Indices,
+ Twine NamePrefix) {
if (Offset == 0)
- return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices);
+ return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices, NamePrefix);
// We can't recurse through pointer types.
if (Ty->isPointerTy())
@@ -1272,7 +1348,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
- Offset, TargetTy, Indices);
+ Offset, TargetTy, Indices, NamePrefix);
}
if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
@@ -1285,7 +1361,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
- Indices);
+ Indices, NamePrefix);
}
StructType *STy = dyn_cast<StructType>(Ty);
@@ -1304,7 +1380,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
Indices.push_back(IRB.getInt32(Index));
return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
- Indices);
+ Indices, NamePrefix);
}
/// \brief Get a natural GEP from a base pointer to a particular offset and
@@ -1319,12 +1395,13 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
/// If no natural GEP can be constructed, this function returns null.
static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
Value *Ptr, APInt Offset, Type *TargetTy,
- SmallVectorImpl<Value *> &Indices) {
+ SmallVectorImpl<Value *> &Indices,
+ Twine NamePrefix) {
PointerType *Ty = cast<PointerType>(Ptr->getType());
// Don't consider any GEPs through an i8* as natural unless the TargetTy is
// an i8.
- if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8))
+ if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
return 0;
Type *ElementTy = Ty->getElementType();
@@ -1338,7 +1415,7 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
- Indices);
+ Indices, NamePrefix);
}
/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
@@ -1356,8 +1433,9 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
/// properties. The algorithm tries to fold as many constant indices into
/// a single GEP as possible, thus making each GEP more independent of the
/// surrounding code.
-static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL,
- Value *Ptr, APInt Offset, Type *PointerTy) {
+static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
+ APInt Offset, Type *PointerTy,
+ Twine NamePrefix) {
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
SmallPtrSet<Value *, 4> Visited;
@@ -1391,7 +1469,7 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL,
// See if we can perform a natural GEP here.
Indices.clear();
if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy,
- Indices)) {
+ Indices, NamePrefix)) {
if (P->getType() == PointerTy) {
// Zap any offset pointer that we ended up computing in previous rounds.
if (OffsetPtr && OffsetPtr->use_empty())
@@ -1425,20 +1503,21 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL,
if (!OffsetPtr) {
if (!Int8Ptr) {
- Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
- "raw_cast");
+ Int8Ptr = IRB.CreateBitCast(
+ Ptr, IRB.getInt8PtrTy(PointerTy->getPointerAddressSpace()),
+ NamePrefix + "sroa_raw_cast");
Int8PtrOffset = Offset;
}
OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
- "raw_idx");
+ NamePrefix + "sroa_raw_idx");
}
Ptr = OffsetPtr;
// On the off chance we were targeting i8*, guard the bitcast here.
if (Ptr->getType() != PointerTy)
- Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast");
+ Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix + "sroa_cast");
return Ptr;
}
@@ -1931,16 +2010,22 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
// integer type will be stored here for easy access during rewriting.
IntegerType *IntTy;
- // The offset of the slice currently being rewritten.
+ // The original offset of the slice currently being rewritten relative to
+ // the original alloca.
uint64_t BeginOffset, EndOffset;
+ // The new offsets of the slice currently being rewritten relative to the
+ // original alloca.
+ uint64_t NewBeginOffset, NewEndOffset;
+
+ uint64_t SliceSize;
bool IsSplittable;
bool IsSplit;
Use *OldUse;
Instruction *OldPtr;
- // Output members carrying state about the result of visiting and rewriting
- // the slice of the alloca.
- bool IsUsedByRewrittenSpeculatableInstructions;
+ // Track post-rewrite users which are PHI nodes and Selects.
+ SmallPtrSetImpl<PHINode *> &PHIUsers;
+ SmallPtrSetImpl<SelectInst *> &SelectUsers;
// Utility IR builder, whose name prefix is setup for each visited use, and
// the insertion point is set to point to the user.
@@ -1949,11 +2034,14 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
public:
AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &S, SROA &Pass,
AllocaInst &OldAI, AllocaInst &NewAI,
- uint64_t NewBeginOffset, uint64_t NewEndOffset,
- bool IsVectorPromotable = false,
- bool IsIntegerPromotable = false)
+ uint64_t NewAllocaBeginOffset,
+ uint64_t NewAllocaEndOffset, bool IsVectorPromotable,
+ bool IsIntegerPromotable,
+ SmallPtrSetImpl<PHINode *> &PHIUsers,
+ SmallPtrSetImpl<SelectInst *> &SelectUsers)
: DL(DL), S(S), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
- NewAllocaBeginOffset(NewBeginOffset), NewAllocaEndOffset(NewEndOffset),
+ NewAllocaBeginOffset(NewAllocaBeginOffset),
+ NewAllocaEndOffset(NewAllocaEndOffset),
NewAllocaTy(NewAI.getAllocatedType()),
VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : 0),
ElementTy(VecTy ? VecTy->getElementType() : 0),
@@ -1964,7 +2052,7 @@ public:
DL.getTypeSizeInBits(NewAI.getAllocatedType()))
: 0),
BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
- OldPtr(), IsUsedByRewrittenSpeculatableInstructions(false),
+ OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
IRB(NewAI.getContext(), ConstantFolder()) {
if (VecTy) {
assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&
@@ -1983,6 +2071,14 @@ public:
IsSplit =
BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
+ // Compute the intersecting offset range.
+ assert(BeginOffset < NewAllocaEndOffset);
+ assert(EndOffset > NewAllocaBeginOffset);
+ NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
+ NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
+
+ SliceSize = NewEndOffset - NewBeginOffset;
+
OldUse = I->getUse();
OldPtr = cast<Instruction>(OldUse->get());
@@ -1997,20 +2093,6 @@ public:
return CanSROA;
}
- /// \brief Query whether this slice is used by speculatable instructions after
- /// rewriting.
- ///
- /// These instructions (PHIs and Selects currently) require the alloca slice
- /// to run back through the rewriter. Thus, they are promotable, but not on
- /// this iteration. This is distinct from a slice which is unpromotable for
- /// some other reason, in which case we don't even want to perform the
- /// speculation. This can be querried at any time and reflects whether (at
- /// that point) a visit call has rewritten a speculatable instruction on the
- /// current slice.
- bool isUsedByRewrittenSpeculatableInstructions() const {
- return IsUsedByRewrittenSpeculatableInstructions;
- }
-
private:
// Make sure the other visit overloads are visible.
using Base::visit;
@@ -2021,30 +2103,53 @@ private:
llvm_unreachable("No rewrite rule for this instruction!");
}
- Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, uint64_t Offset,
- Type *PointerTy) {
- assert(Offset >= NewAllocaBeginOffset);
- return getAdjustedPtr(IRB, DL, &NewAI, APInt(DL.getPointerSizeInBits(),
- Offset - NewAllocaBeginOffset),
- PointerTy);
+ Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
+ // Note that the offset computation can use BeginOffset or NewBeginOffset
+ // interchangeably for unsplit slices.
+ assert(IsSplit || BeginOffset == NewBeginOffset);
+ uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
+
+#ifndef NDEBUG
+ StringRef OldName = OldPtr->getName();
+ // Skip through the last '.sroa.' component of the name.
+ size_t LastSROAPrefix = OldName.rfind(".sroa.");
+ if (LastSROAPrefix != StringRef::npos) {
+ OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
+ // Look for an SROA slice index.
+ size_t IndexEnd = OldName.find_first_not_of("0123456789");
+ if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
+ // Strip the index and look for the offset.
+ OldName = OldName.substr(IndexEnd + 1);
+ size_t OffsetEnd = OldName.find_first_not_of("0123456789");
+ if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
+ // Strip the offset.
+ OldName = OldName.substr(OffsetEnd + 1);
+ }
+ }
+ // Strip any SROA suffixes as well.
+ OldName = OldName.substr(0, OldName.find(".sroa_"));
+#endif
+
+ return getAdjustedPtr(IRB, DL, &NewAI,
+ APInt(DL.getPointerSizeInBits(), Offset), PointerTy,
+#ifndef NDEBUG
+ Twine(OldName) + "."
+#else
+ Twine()
+#endif
+ );
}
- /// \brief Compute suitable alignment to access an offset into the new alloca.
- unsigned getOffsetAlign(uint64_t Offset) {
+ /// \brief Compute suitable alignment to access this slice of the *new* alloca.
+ ///
+ /// You can optionally pass a type to this routine and if that type's ABI
+ /// alignment is itself suitable, this will return zero.
+ unsigned getSliceAlign(Type *Ty = 0) {
unsigned NewAIAlign = NewAI.getAlignment();
if (!NewAIAlign)
NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType());
- return MinAlign(NewAIAlign, Offset);
- }
-
- /// \brief Compute suitable alignment to access a type at an offset of the
- /// new alloca.
- ///
- /// \returns zero if the type's ABI alignment is a suitable alignment,
- /// otherwise returns the maximal suitable alignment.
- unsigned getOffsetTypeAlign(Type *Ty, uint64_t Offset) {
- unsigned Align = getOffsetAlign(Offset);
- return Align == DL.getABITypeAlignment(Ty) ? 0 : Align;
+ unsigned Align = MinAlign(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset);
+ return (Ty && Align == DL.getABITypeAlignment(Ty)) ? 0 : Align;
}
unsigned getIndex(uint64_t Offset) {
@@ -2062,8 +2167,7 @@ private:
Pass.DeadInsts.insert(I);
}
- Value *rewriteVectorizedLoadInst(uint64_t NewBeginOffset,
- uint64_t NewEndOffset) {
+ Value *rewriteVectorizedLoadInst() {
unsigned BeginIndex = getIndex(NewBeginOffset);
unsigned EndIndex = getIndex(NewEndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
@@ -2073,8 +2177,7 @@ private:
return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
}
- Value *rewriteIntegerLoad(LoadInst &LI, uint64_t NewBeginOffset,
- uint64_t NewEndOffset) {
+ Value *rewriteIntegerLoad(LoadInst &LI) {
assert(IntTy && "We cannot insert an integer to the alloca");
assert(!LI.isVolatile());
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
@@ -2093,32 +2196,23 @@ private:
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
- // Compute the intersecting offset range.
- assert(BeginOffset < NewAllocaEndOffset);
- assert(EndOffset > NewAllocaBeginOffset);
- uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
- uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
-
- uint64_t Size = NewEndOffset - NewBeginOffset;
-
- Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
+ Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
: LI.getType();
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
- V = rewriteVectorizedLoadInst(NewBeginOffset, NewEndOffset);
+ V = rewriteVectorizedLoadInst();
} else if (IntTy && LI.getType()->isIntegerTy()) {
- V = rewriteIntegerLoad(LI, NewBeginOffset, NewEndOffset);
+ V = rewriteIntegerLoad(LI);
} else if (NewBeginOffset == NewAllocaBeginOffset &&
canConvertValue(DL, NewAllocaTy, LI.getType())) {
V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- LI.isVolatile(), "load");
+ LI.isVolatile(), LI.getName());
} else {
Type *LTy = TargetTy->getPointerTo();
- V = IRB.CreateAlignedLoad(
- getAdjustedAllocaPtr(IRB, NewBeginOffset, LTy),
- getOffsetTypeAlign(TargetTy, NewBeginOffset - NewAllocaBeginOffset),
- LI.isVolatile(), "load");
+ V = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
+ getSliceAlign(TargetTy), LI.isVolatile(),
+ LI.getName());
IsPtrAdjusted = true;
}
V = convertValue(DL, IRB, V, TargetTy);
@@ -2127,13 +2221,13 @@ private:
assert(!LI.isVolatile());
assert(LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
- assert(Size < DL.getTypeStoreSize(LI.getType()) &&
+ assert(SliceSize < DL.getTypeStoreSize(LI.getType()) &&
"Split load isn't smaller than original load");
assert(LI.getType()->getIntegerBitWidth() ==
DL.getTypeStoreSizeInBits(LI.getType()) &&
"Non-byte-multiple bit width");
// Move the insertion point just past the load so that we can refer to it.
- IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
+ IRB.SetInsertPoint(std::next(BasicBlock::iterator(&LI)));
// Create a placeholder value with the same type as LI to use as the
// basis for the new value. This allows us to replace the uses of LI with
// the computed value, and then replace the placeholder with LI, leaving
@@ -2155,9 +2249,7 @@ private:
return !LI.isVolatile() && !IsPtrAdjusted;
}
- bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
- uint64_t NewBeginOffset,
- uint64_t NewEndOffset) {
+ bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp) {
if (V->getType() != VecTy) {
unsigned BeginIndex = getIndex(NewBeginOffset);
unsigned EndIndex = getIndex(NewEndOffset);
@@ -2183,8 +2275,7 @@ private:
return true;
}
- bool rewriteIntegerStore(Value *V, StoreInst &SI,
- uint64_t NewBeginOffset, uint64_t NewEndOffset) {
+ bool rewriteIntegerStore(Value *V, StoreInst &SI) {
assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
if (DL.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
@@ -2217,30 +2308,22 @@ private:
if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
Pass.PostPromotionWorklist.insert(AI);
- // Compute the intersecting offset range.
- assert(BeginOffset < NewAllocaEndOffset);
- assert(EndOffset > NewAllocaBeginOffset);
- uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
- uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
-
- uint64_t Size = NewEndOffset - NewBeginOffset;
- if (Size < DL.getTypeStoreSize(V->getType())) {
+ if (SliceSize < DL.getTypeStoreSize(V->getType())) {
assert(!SI.isVolatile());
assert(V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(V->getType()->getIntegerBitWidth() ==
DL.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
- IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
+ IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset,
"extract");
}
if (VecTy)
- return rewriteVectorizedStoreInst(V, SI, OldOp, NewBeginOffset,
- NewEndOffset);
+ return rewriteVectorizedStoreInst(V, SI, OldOp);
if (IntTy && V->getType()->isIntegerTy())
- return rewriteIntegerStore(V, SI, NewBeginOffset, NewEndOffset);
+ return rewriteIntegerStore(V, SI);
StoreInst *NewSI;
if (NewBeginOffset == NewAllocaBeginOffset &&
@@ -2250,12 +2333,9 @@ private:
NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
SI.isVolatile());
} else {
- Value *NewPtr = getAdjustedAllocaPtr(IRB, NewBeginOffset,
- V->getType()->getPointerTo());
- NewSI = IRB.CreateAlignedStore(
- V, NewPtr, getOffsetTypeAlign(
- V->getType(), NewBeginOffset - NewAllocaBeginOffset),
- SI.isVolatile());
+ Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo());
+ NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
+ SI.isVolatile());
}
(void)NewSI;
Pass.DeadInsts.insert(&SI);
@@ -2307,11 +2387,10 @@ private:
// pointer to the new alloca.
if (!isa<Constant>(II.getLength())) {
assert(!IsSplit);
- assert(BeginOffset >= NewAllocaBeginOffset);
- II.setDest(
- getAdjustedAllocaPtr(IRB, BeginOffset, II.getRawDest()->getType()));
+ assert(NewBeginOffset == BeginOffset);
+ II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
Type *CstTy = II.getAlignmentCst()->getType();
- II.setAlignment(ConstantInt::get(CstTy, getOffsetAlign(BeginOffset)));
+ II.setAlignment(ConstantInt::get(CstTy, getSliceAlign()));
deleteIfTriviallyDead(OldPtr);
return false;
@@ -2323,13 +2402,6 @@ private:
Type *AllocaTy = NewAI.getAllocatedType();
Type *ScalarTy = AllocaTy->getScalarType();
- // Compute the intersecting offset range.
- assert(BeginOffset < NewAllocaEndOffset);
- assert(EndOffset > NewAllocaBeginOffset);
- uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
- uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
- uint64_t SliceOffset = NewBeginOffset - NewAllocaBeginOffset;
-
// If this doesn't map cleanly onto the alloca type, and that type isn't
// a single value type, just emit a memset.
if (!VecTy && !IntTy &&
@@ -2341,8 +2413,8 @@ private:
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
CallInst *New = IRB.CreateMemSet(
- getAdjustedAllocaPtr(IRB, NewBeginOffset, II.getRawDest()->getType()),
- II.getValue(), Size, getOffsetAlign(SliceOffset), II.isVolatile());
+ getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
+ getSliceAlign(), II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return false;
@@ -2419,25 +2491,11 @@ private:
DEBUG(dbgs() << " original: " << II << "\n");
- // Compute the intersecting offset range.
- assert(BeginOffset < NewAllocaEndOffset);
- assert(EndOffset > NewAllocaBeginOffset);
- uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
- uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
-
- assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr);
- bool IsDest = II.getRawDest() == OldPtr;
-
- // Compute the relative offset within the transfer.
- unsigned IntPtrWidth = DL.getPointerSizeInBits();
- APInt RelOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
+ bool IsDest = &II.getRawDestUse() == OldUse;
+ assert((IsDest && II.getRawDest() == OldPtr) ||
+ (!IsDest && II.getRawSource() == OldPtr));
- unsigned Align = II.getAlignment();
- uint64_t SliceOffset = NewBeginOffset - NewAllocaBeginOffset;
- if (Align > 1)
- Align =
- MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(),
- MinAlign(II.getAlignment(), getOffsetAlign(SliceOffset)));
+ unsigned SliceAlign = getSliceAlign();
// For unsplit intrinsics, we simply modify the source and destination
// pointers in place. This isn't just an optimization, it is a matter of
@@ -2447,19 +2505,20 @@ private:
// memcpy, and so simply updating the pointers is the necessary for us to
// update both source and dest of a single call.
if (!IsSplittable) {
- Value *OldOp = IsDest ? II.getRawDest() : II.getRawSource();
+ Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
if (IsDest)
- II.setDest(
- getAdjustedAllocaPtr(IRB, BeginOffset, II.getRawDest()->getType()));
+ II.setDest(AdjustedPtr);
else
- II.setSource(getAdjustedAllocaPtr(IRB, BeginOffset,
- II.getRawSource()->getType()));
+ II.setSource(AdjustedPtr);
- Type *CstTy = II.getAlignmentCst()->getType();
- II.setAlignment(ConstantInt::get(CstTy, Align));
+ if (II.getAlignment() > SliceAlign) {
+ Type *CstTy = II.getAlignmentCst()->getType();
+ II.setAlignment(
+ ConstantInt::get(CstTy, MinAlign(II.getAlignment(), SliceAlign)));
+ }
DEBUG(dbgs() << " to: " << II << "\n");
- deleteIfTriviallyDead(OldOp);
+ deleteIfTriviallyDead(OldPtr);
return false;
}
// For split transfer intrinsics we have an incredibly useful assurance:
@@ -2495,37 +2554,39 @@ private:
// alloca that should be re-examined after rewriting this instruction.
Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
if (AllocaInst *AI
- = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets()))
+ = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) {
+ assert(AI != &OldAI && AI != &NewAI &&
+ "Splittable transfers cannot reach the same alloca on both ends.");
Pass.Worklist.insert(AI);
+ }
- if (EmitMemCpy) {
- Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
- : II.getRawDest()->getType();
+ Type *OtherPtrTy = OtherPtr->getType();
+ unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
+
+ // Compute the relative offset for the other pointer within the transfer.
+ unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS);
+ APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
+ unsigned OtherAlign = MinAlign(II.getAlignment() ? II.getAlignment() : 1,
+ OtherOffset.zextOrTrunc(64).getZExtValue());
+ if (EmitMemCpy) {
// Compute the other pointer, folding as much as possible to produce
// a single, simple GEP in most cases.
- OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy);
+ OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
+ OtherPtr->getName() + ".");
- Value *OurPtr = getAdjustedAllocaPtr(
- IRB, NewBeginOffset,
- IsDest ? II.getRawDest()->getType() : II.getRawSource()->getType());
+ Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
- CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr,
- IsDest ? OtherPtr : OurPtr,
- Size, Align, II.isVolatile());
+ CallInst *New = IRB.CreateMemCpy(
+ IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, Size,
+ MinAlign(SliceAlign, OtherAlign), II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return false;
}
- // Note that we clamp the alignment to 1 here as a 0 alignment for a memcpy
- // is equivalent to 1, but that isn't true if we end up rewriting this as
- // a load or store.
- if (!Align)
- Align = 1;
-
bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
NewEndOffset == NewAllocaEndOffset;
uint64_t Size = NewEndOffset - NewBeginOffset;
@@ -2535,22 +2596,30 @@ private:
IntegerType *SubIntTy
= IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
- Type *OtherPtrTy = NewAI.getType();
+ // Reset the other pointer type to match the register type we're going to
+ // use, but using the address space of the original other pointer.
if (VecTy && !IsWholeAlloca) {
if (NumElements == 1)
OtherPtrTy = VecTy->getElementType();
else
OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
- OtherPtrTy = OtherPtrTy->getPointerTo();
+ OtherPtrTy = OtherPtrTy->getPointerTo(OtherAS);
} else if (IntTy && !IsWholeAlloca) {
- OtherPtrTy = SubIntTy->getPointerTo();
+ OtherPtrTy = SubIntTy->getPointerTo(OtherAS);
+ } else {
+ OtherPtrTy = NewAllocaTy->getPointerTo(OtherAS);
}
- Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy);
+ Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
+ OtherPtr->getName() + ".");
+ unsigned SrcAlign = OtherAlign;
Value *DstPtr = &NewAI;
- if (!IsDest)
+ unsigned DstAlign = SliceAlign;
+ if (!IsDest) {
std::swap(SrcPtr, DstPtr);
+ std::swap(SrcAlign, DstAlign);
+ }
Value *Src;
if (VecTy && !IsWholeAlloca && !IsDest) {
@@ -2564,7 +2633,7 @@ private:
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
} else {
- Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
+ Src = IRB.CreateAlignedLoad(SrcPtr, SrcAlign, II.isVolatile(),
"copyload");
}
@@ -2582,7 +2651,7 @@ private:
}
StoreInst *Store = cast<StoreInst>(
- IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile()));
+ IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return !II.isVolatile();
@@ -2594,20 +2663,13 @@ private:
DEBUG(dbgs() << " original: " << II << "\n");
assert(II.getArgOperand(1) == OldPtr);
- // Compute the intersecting offset range.
- assert(BeginOffset < NewAllocaEndOffset);
- assert(EndOffset > NewAllocaBeginOffset);
- uint64_t NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
- uint64_t NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
-
// Record this instruction for deletion.
Pass.DeadInsts.insert(&II);
ConstantInt *Size
= ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
NewEndOffset - NewBeginOffset);
- Value *Ptr =
- getAdjustedAllocaPtr(IRB, NewBeginOffset, II.getArgOperand(1)->getType());
+ Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
Value *New;
if (II.getIntrinsicID() == Intrinsic::lifetime_start)
New = IRB.CreateLifetimeStart(Ptr, Size);
@@ -2628,28 +2690,22 @@ private:
// as local as possible to the PHI. To do that, we re-use the location of
// the old pointer, which necessarily must be in the right position to
// dominate the PHI.
- IRBuilderTy PtrBuilder(OldPtr);
- PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
- ".");
+ IRBuilderTy PtrBuilder(IRB);
+ PtrBuilder.SetInsertPoint(OldPtr);
+ PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());
- Value *NewPtr =
- getAdjustedAllocaPtr(PtrBuilder, BeginOffset, OldPtr->getType());
+ Value *NewPtr = getNewAllocaSlicePtr(PtrBuilder, OldPtr->getType());
// Replace the operands which were using the old pointer.
std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
DEBUG(dbgs() << " to: " << PN << "\n");
deleteIfTriviallyDead(OldPtr);
- // Check whether we can speculate this PHI node, and if so remember that
- // fact and queue it up for another iteration after the speculation
- // occurs.
- if (isSafePHIToSpeculate(PN, &DL)) {
- Pass.SpeculatablePHIs.insert(&PN);
- IsUsedByRewrittenSpeculatableInstructions = true;
- return true;
- }
-
- return false; // PHIs can't be promoted on their own.
+ // PHIs can't be promoted on their own, but often can be speculated. We
+ // check the speculation outside of the rewriter so that we see the
+ // fully-rewritten alloca.
+ PHIUsers.insert(&PN);
+ return true;
}
bool visitSelectInst(SelectInst &SI) {
@@ -2659,7 +2715,7 @@ private:
assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
- Value *NewPtr = getAdjustedAllocaPtr(IRB, BeginOffset, OldPtr->getType());
+ Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
// Replace the operands which were using the old pointer.
if (SI.getOperand(1) == OldPtr)
SI.setOperand(1, NewPtr);
@@ -2669,16 +2725,11 @@ private:
DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldPtr);
- // Check whether we can speculate this select instruction, and if so
- // remember that fact and queue it up for another iteration after the
- // speculation occurs.
- if (isSafeSelectToSpeculate(SI, &DL)) {
- Pass.SpeculatableSelects.insert(&SI);
- IsUsedByRewrittenSpeculatableInstructions = true;
- return true;
- }
-
- return false; // Selects can't be promoted on their own.
+ // Selects can't be promoted on their own, but often can be speculated. We
+ // check the speculation outside of the rewriter so that we see the
+ // fully-rewritten alloca.
+ SelectUsers.insert(&SI);
+ return true;
}
};
@@ -2726,10 +2777,9 @@ private:
/// Enqueue all the users of the given instruction for further processing.
/// This uses a set to de-duplicate users.
void enqueueUsers(Instruction &I) {
- for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
- ++UI)
- if (Visited.insert(*UI))
- Queue.push_back(&UI.getUse());
+ for (Use &U : I.uses())
+ if (Visited.insert(U.getUser()))
+ Queue.push_back(&U);
}
// Conservative default is to not rewrite anything.
@@ -3114,17 +3164,17 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
<< "[" << BeginOffset << "," << EndOffset << ") to: " << *NewAI
<< "\n");
- // Track the high watermark on several worklists that are only relevant for
+ // Track the high watermark on the worklist as it is only relevant for
// promoted allocas. We will reset it to this point if the alloca is not in
// fact scheduled for promotion.
unsigned PPWOldSize = PostPromotionWorklist.size();
- unsigned SPOldSize = SpeculatablePHIs.size();
- unsigned SSOldSize = SpeculatableSelects.size();
unsigned NumUses = 0;
+ SmallPtrSet<PHINode *, 8> PHIUsers;
+ SmallPtrSet<SelectInst *, 8> SelectUsers;
AllocaSliceRewriter Rewriter(*DL, S, *this, AI, *NewAI, BeginOffset,
EndOffset, IsVectorPromotable,
- IsIntegerPromotable);
+ IsIntegerPromotable, PHIUsers, SelectUsers);
bool Promotable = true;
for (ArrayRef<AllocaSlices::iterator>::const_iterator SUI = SplitUses.begin(),
SUE = SplitUses.end();
@@ -3145,50 +3195,60 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
MaxUsesPerAllocaPartition =
std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition);
- if (Promotable && !Rewriter.isUsedByRewrittenSpeculatableInstructions()) {
- DEBUG(dbgs() << " and queuing for promotion\n");
- PromotableAllocas.push_back(NewAI);
- } else if (NewAI != &AI ||
- (Promotable &&
- Rewriter.isUsedByRewrittenSpeculatableInstructions())) {
+ // Now that we've processed all the slices in the new partition, check if any
+ // PHIs or Selects would block promotion.
+ for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
+ E = PHIUsers.end();
+ I != E; ++I)
+ if (!isSafePHIToSpeculate(**I, DL)) {
+ Promotable = false;
+ PHIUsers.clear();
+ SelectUsers.clear();
+ break;
+ }
+ for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
+ E = SelectUsers.end();
+ I != E; ++I)
+ if (!isSafeSelectToSpeculate(**I, DL)) {
+ Promotable = false;
+ PHIUsers.clear();
+ SelectUsers.clear();
+ break;
+ }
+
+ if (Promotable) {
+ if (PHIUsers.empty() && SelectUsers.empty()) {
+ // Promote the alloca.
+ PromotableAllocas.push_back(NewAI);
+ } else {
+ // If we have either PHIs or Selects to speculate, add them to those
+ // worklists and re-queue the new alloca so that we promote in on the
+ // next iteration.
+ for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
+ E = PHIUsers.end();
+ I != E; ++I)
+ SpeculatablePHIs.insert(*I);
+ for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
+ E = SelectUsers.end();
+ I != E; ++I)
+ SpeculatableSelects.insert(*I);
+ Worklist.insert(NewAI);
+ }
+ } else {
// If we can't promote the alloca, iterate on it to check for new
// refinements exposed by splitting the current alloca. Don't iterate on an
// alloca which didn't actually change and didn't get promoted.
- //
- // Alternatively, if we could promote the alloca but have speculatable
- // instructions then we will speculate them after finishing our processing
- // of the original alloca. Mark the new one for re-visiting in the next
- // iteration so the speculated operations can be rewritten.
- //
- // FIXME: We should actually track whether the rewriter changed anything.
- Worklist.insert(NewAI);
- }
-
- // Drop any post-promotion work items if promotion didn't happen.
- if (!Promotable) {
+ if (NewAI != &AI)
+ Worklist.insert(NewAI);
+
+ // Drop any post-promotion work items if promotion didn't happen.
while (PostPromotionWorklist.size() > PPWOldSize)
PostPromotionWorklist.pop_back();
- while (SpeculatablePHIs.size() > SPOldSize)
- SpeculatablePHIs.pop_back();
- while (SpeculatableSelects.size() > SSOldSize)
- SpeculatableSelects.pop_back();
}
return true;
}
-namespace {
-struct IsSliceEndLessOrEqualTo {
- uint64_t UpperBound;
-
- IsSliceEndLessOrEqualTo(uint64_t UpperBound) : UpperBound(UpperBound) {}
-
- bool operator()(const AllocaSlices::iterator &I) {
- return I->endOffset() <= UpperBound;
- }
-};
-}
-
static void
removeFinishedSplitUses(SmallVectorImpl<AllocaSlices::iterator> &SplitUses,
uint64_t &MaxSplitUseEndOffset, uint64_t Offset) {
@@ -3200,7 +3260,9 @@ removeFinishedSplitUses(SmallVectorImpl<AllocaSlices::iterator> &SplitUses,
size_t SplitUsesOldSize = SplitUses.size();
SplitUses.erase(std::remove_if(SplitUses.begin(), SplitUses.end(),
- IsSliceEndLessOrEqualTo(Offset)),
+ [Offset](const AllocaSlices::iterator &I) {
+ return I->endOffset() <= Offset;
+ }),
SplitUses.end());
if (SplitUsesOldSize == SplitUses.size())
return;
@@ -3227,7 +3289,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) {
uint64_t BeginOffset = S.begin()->beginOffset();
- for (AllocaSlices::iterator SI = S.begin(), SJ = llvm::next(SI), SE = S.end();
+ for (AllocaSlices::iterator SI = S.begin(), SJ = std::next(SI), SE = S.end();
SI != SE; SI = SJ) {
uint64_t MaxEndOffset = SI->endOffset();
@@ -3326,6 +3388,21 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) {
return Changed;
}
+/// \brief Clobber a use with undef, deleting the used value if it becomes dead.
+void SROA::clobberUse(Use &U) {
+ Value *OldV = U;
+ // Replace the use with an undef value.
+ U = UndefValue::get(OldV->getType());
+
+ // Check for this making an instruction dead. We have to garbage collect
+ // all the dead instructions to ensure the uses of any alloca end up being
+ // minimal.
+ if (Instruction *OldI = dyn_cast<Instruction>(OldV))
+ if (isInstructionTriviallyDead(OldI)) {
+ DeadInsts.insert(OldI);
+ }
+}
+
/// \brief Analyze an alloca for SROA.
///
/// This analyzes the alloca to ensure we can reason about it, builds
@@ -3363,21 +3440,22 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
for (AllocaSlices::dead_user_iterator DI = S.dead_user_begin(),
DE = S.dead_user_end();
DI != DE; ++DI) {
- Changed = true;
+ // Free up everything used by this instruction.
+ for (Use &DeadOp : (*DI)->operands())
+ clobberUse(DeadOp);
+
+ // Now replace the uses of this instruction.
(*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
+
+ // And mark it for deletion.
DeadInsts.insert(*DI);
+ Changed = true;
}
for (AllocaSlices::dead_op_iterator DO = S.dead_op_begin(),
DE = S.dead_op_end();
DO != DE; ++DO) {
- Value *OldV = **DO;
- // Clobber the use with an undef value.
- **DO = UndefValue::get(OldV->getType());
- if (Instruction *OldI = dyn_cast<Instruction>(OldV))
- if (isInstructionTriviallyDead(OldI)) {
- Changed = true;
- DeadInsts.insert(OldI);
- }
+ clobberUse(**DO);
+ Changed = true;
}
// No slices to split. Leave the dead alloca for a later pass to clean up.
@@ -3413,10 +3491,10 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
I->replaceAllUsesWith(UndefValue::get(I->getType()));
- for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
- if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+ for (Use &Operand : I->operands())
+ if (Instruction *U = dyn_cast<Instruction>(Operand)) {
// Zero out the operand and see if it becomes trivially dead.
- *OI = 0;
+ Operand = 0;
if (isInstructionTriviallyDead(U))
DeadInsts.insert(U);
}
@@ -3432,10 +3510,9 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
static void enqueueUsersInWorklist(Instruction &I,
SmallVectorImpl<Instruction *> &Worklist,
SmallPtrSet<Instruction *, 8> &Visited) {
- for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
- ++UI)
- if (Visited.insert(cast<Instruction>(*UI)))
- Worklist.push_back(cast<Instruction>(*UI));
+ for (User *U : I.users())
+ if (Visited.insert(cast<Instruction>(U)))
+ Worklist.push_back(cast<Instruction>(U));
}
/// \brief Promote the allocas, using the best available technique.
@@ -3521,32 +3598,24 @@ bool SROA::promoteAllocas(Function &F) {
return true;
}
-namespace {
- /// \brief A predicate to test whether an alloca belongs to a set.
- class IsAllocaInSet {
- typedef SmallPtrSet<AllocaInst *, 4> SetType;
- const SetType &Set;
-
- public:
- typedef AllocaInst *argument_type;
-
- IsAllocaInSet(const SetType &Set) : Set(Set) {}
- bool operator()(AllocaInst *AI) const { return Set.count(AI); }
- };
-}
-
bool SROA::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
C = &F.getContext();
- DL = getAnalysisIfAvailable<DataLayout>();
- if (!DL) {
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ if (!DLP) {
DEBUG(dbgs() << " Skipping SROA -- no target data!\n");
return false;
}
- DT = getAnalysisIfAvailable<DominatorTree>();
+ DL = &DLP->getDataLayout();
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : 0;
BasicBlock &EntryBB = F.getEntryBlock();
- for (BasicBlock::iterator I = EntryBB.begin(), E = llvm::prior(EntryBB.end());
+ for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
Worklist.insert(AI);
@@ -3564,11 +3633,14 @@ bool SROA::runOnFunction(Function &F) {
// Remove the deleted allocas from various lists so that we don't try to
// continue processing them.
if (!DeletedAllocas.empty()) {
- Worklist.remove_if(IsAllocaInSet(DeletedAllocas));
- PostPromotionWorklist.remove_if(IsAllocaInSet(DeletedAllocas));
+ auto IsInSet = [&](AllocaInst *AI) {
+ return DeletedAllocas.count(AI);
+ };
+ Worklist.remove_if(IsInSet);
+ PostPromotionWorklist.remove_if(IsInSet);
PromotableAllocas.erase(std::remove_if(PromotableAllocas.begin(),
PromotableAllocas.end(),
- IsAllocaInSet(DeletedAllocas)),
+ IsInSet),
PromotableAllocas.end());
DeletedAllocas.clear();
}
@@ -3585,6 +3657,6 @@ bool SROA::runOnFunction(Function &F) {
void SROA::getAnalysisUsage(AnalysisUsage &AU) const {
if (RequiresDomTree)
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
index 9bcd702..20d6daa 100644
--- a/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -24,26 +24,33 @@
#define DEBUG_TYPE "sample-profile"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
+#include <cctype>
using namespace llvm;
@@ -52,8 +59,161 @@ using namespace llvm;
static cl::opt<std::string> SampleProfileFile(
"sample-profile-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
+static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
+ "sample-profile-max-propagate-iterations", cl::init(100),
+ cl::desc("Maximum number of iterations to go through when propagating "
+ "sample block/edge weights through the CFG."));
namespace {
+/// \brief Represents the relative location of an instruction.
+///
+/// Instruction locations are specified by the line offset from the
+/// beginning of the function (marked by the line where the function
+/// header is) and the discriminator value within that line.
+///
+/// The discriminator value is useful to distinguish instructions
+/// that are on the same line but belong to different basic blocks
+/// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
+struct InstructionLocation {
+ InstructionLocation(int L, unsigned D) : LineOffset(L), Discriminator(D) {}
+ int LineOffset;
+ unsigned Discriminator;
+};
+}
+
+namespace llvm {
+template <> struct DenseMapInfo<InstructionLocation> {
+ typedef DenseMapInfo<int> OffsetInfo;
+ typedef DenseMapInfo<unsigned> DiscriminatorInfo;
+ static inline InstructionLocation getEmptyKey() {
+ return InstructionLocation(OffsetInfo::getEmptyKey(),
+ DiscriminatorInfo::getEmptyKey());
+ }
+ static inline InstructionLocation getTombstoneKey() {
+ return InstructionLocation(OffsetInfo::getTombstoneKey(),
+ DiscriminatorInfo::getTombstoneKey());
+ }
+ static inline unsigned getHashValue(InstructionLocation Val) {
+ return DenseMapInfo<std::pair<int, unsigned>>::getHashValue(
+ std::pair<int, unsigned>(Val.LineOffset, Val.Discriminator));
+ }
+ static inline bool isEqual(InstructionLocation LHS, InstructionLocation RHS) {
+ return LHS.LineOffset == RHS.LineOffset &&
+ LHS.Discriminator == RHS.Discriminator;
+ }
+};
+}
+
+namespace {
+typedef DenseMap<InstructionLocation, unsigned> BodySampleMap;
+typedef DenseMap<BasicBlock *, unsigned> BlockWeightMap;
+typedef DenseMap<BasicBlock *, BasicBlock *> EquivalenceClassMap;
+typedef std::pair<BasicBlock *, BasicBlock *> Edge;
+typedef DenseMap<Edge, unsigned> EdgeWeightMap;
+typedef DenseMap<BasicBlock *, SmallVector<BasicBlock *, 8>> BlockEdgeMap;
+
+/// \brief Representation of the runtime profile for a function.
+///
+/// This data structure contains the runtime profile for a given
+/// function. It contains the total number of samples collected
+/// in the function and a map of samples collected in every statement.
+class SampleFunctionProfile {
+public:
+ SampleFunctionProfile()
+ : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(0), PDT(0),
+ LI(0), Ctx(0) {}
+
+ unsigned getFunctionLoc(Function &F);
+ bool emitAnnotations(Function &F, DominatorTree *DomTree,
+ PostDominatorTree *PostDomTree, LoopInfo *Loops);
+ unsigned getInstWeight(Instruction &I);
+ unsigned getBlockWeight(BasicBlock *B);
+ void addTotalSamples(unsigned Num) { TotalSamples += Num; }
+ void addHeadSamples(unsigned Num) { TotalHeadSamples += Num; }
+ void addBodySamples(int LineOffset, unsigned Discriminator, unsigned Num) {
+ assert(LineOffset >= 0);
+ BodySamples[InstructionLocation(LineOffset, Discriminator)] += Num;
+ }
+ void print(raw_ostream &OS);
+ void printEdgeWeight(raw_ostream &OS, Edge E);
+ void printBlockWeight(raw_ostream &OS, BasicBlock *BB);
+ void printBlockEquivalence(raw_ostream &OS, BasicBlock *BB);
+ bool computeBlockWeights(Function &F);
+ void findEquivalenceClasses(Function &F);
+ void findEquivalencesFor(BasicBlock *BB1,
+ SmallVector<BasicBlock *, 8> Descendants,
+ DominatorTreeBase<BasicBlock> *DomTree);
+ void propagateWeights(Function &F);
+ unsigned visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
+ void buildEdges(Function &F);
+ bool propagateThroughEdges(Function &F);
+ bool empty() { return BodySamples.empty(); }
+
+protected:
+ /// \brief Total number of samples collected inside this function.
+ ///
+ /// Samples are cumulative, they include all the samples collected
+ /// inside this function and all its inlined callees.
+ unsigned TotalSamples;
+
+ /// \brief Total number of samples collected at the head of the function.
+ /// FIXME: Use head samples to estimate a cold/hot attribute for the function.
+ unsigned TotalHeadSamples;
+
+ /// \brief Line number for the function header. Used to compute relative
+ /// line numbers from the absolute line LOCs found in instruction locations.
+ /// The relative line numbers are needed to address the samples from the
+ /// profile file.
+ unsigned HeaderLineno;
+
+ /// \brief Map line offsets to collected samples.
+ ///
+ /// Each entry in this map contains the number of samples
+ /// collected at the corresponding line offset. All line locations
+ /// are an offset from the start of the function.
+ BodySampleMap BodySamples;
+
+ /// \brief Map basic blocks to their computed weights.
+ ///
+ /// The weight of a basic block is defined to be the maximum
+ /// of all the instruction weights in that block.
+ BlockWeightMap BlockWeights;
+
+ /// \brief Map edges to their computed weights.
+ ///
+ /// Edge weights are computed by propagating basic block weights in
+ /// SampleProfile::propagateWeights.
+ EdgeWeightMap EdgeWeights;
+
+ /// \brief Set of visited blocks during propagation.
+ SmallPtrSet<BasicBlock *, 128> VisitedBlocks;
+
+ /// \brief Set of visited edges during propagation.
+ SmallSet<Edge, 128> VisitedEdges;
+
+ /// \brief Equivalence classes for block weights.
+ ///
+ /// Two blocks BB1 and BB2 are in the same equivalence class if they
+ /// dominate and post-dominate each other, and they are in the same loop
+ /// nest. When this happens, the two blocks are guaranteed to execute
+ /// the same number of times.
+ EquivalenceClassMap EquivalenceClass;
+
+ /// \brief Dominance, post-dominance and loop information.
+ DominatorTree *DT;
+ PostDominatorTree *PDT;
+ LoopInfo *LI;
+
+ /// \brief Predecessors for each basic block in the CFG.
+ BlockEdgeMap Predecessors;
+
+ /// \brief Successors for each basic block in the CFG.
+ BlockEdgeMap Successors;
+
+ /// \brief LLVM context holding the debug data we need.
+ LLVMContext *Ctx;
+};
+
/// \brief Sample-based profile reader.
///
/// Each profile contains sample counts for all the functions
@@ -77,61 +237,33 @@ namespace {
/// 2. The samples collected at each line in F. To provide some
/// protection against source code shuffling, line numbers should
/// be relative to the start of the function.
-class SampleProfile {
+class SampleModuleProfile {
public:
- SampleProfile(StringRef F) : Profiles(0), Filename(F) {}
+ SampleModuleProfile(const Module &M, StringRef F)
+ : Profiles(0), Filename(F), M(M) {}
void dump();
- void loadText();
+ bool loadText();
void loadNative() { llvm_unreachable("not implemented"); }
- bool emitAnnotations(Function &F);
void printFunctionProfile(raw_ostream &OS, StringRef FName);
void dumpFunctionProfile(StringRef FName);
+ SampleFunctionProfile &getProfile(const Function &F) {
+ return Profiles[F.getName()];
+ }
-protected:
- typedef DenseMap<uint32_t, uint32_t> BodySampleMap;
- typedef DenseMap<BasicBlock *, uint32_t> BlockWeightMap;
-
- /// \brief Representation of the runtime profile for a function.
- ///
- /// This data structure contains the runtime profile for a given
- /// function. It contains the total number of samples collected
- /// in the function and a map of samples collected in every statement.
- struct FunctionProfile {
- /// \brief Total number of samples collected inside this function.
- ///
- /// Samples are cumulative, they include all the samples collected
- /// inside this function and all its inlined callees.
- unsigned TotalSamples;
-
- // \brief Total number of samples collected at the head of the function.
- unsigned TotalHeadSamples;
-
- /// \brief Map line offsets to collected samples.
- ///
- /// Each entry in this map contains the number of samples
- /// collected at the corresponding line offset. All line locations
- /// are an offset from the start of the function.
- BodySampleMap BodySamples;
-
- /// \brief Map basic blocks to their computed weights.
- ///
- /// The weight of a basic block is defined to be the maximum
- /// of all the instruction weights in that block.
- BlockWeightMap BlockWeights;
- };
-
- uint32_t getInstWeight(Instruction &I, unsigned FirstLineno,
- BodySampleMap &BodySamples);
- uint32_t computeBlockWeight(BasicBlock *B, unsigned FirstLineno,
- BodySampleMap &BodySamples);
+ /// \brief Report a parse error message.
+ void reportParseError(int64_t LineNumber, Twine Msg) const {
+ DiagnosticInfoSampleProfile Diag(Filename.data(), LineNumber, Msg);
+ M.getContext().diagnose(Diag);
+ }
+protected:
/// \brief Map every function to its associated profile.
///
/// The profile of every function executed at runtime is collected
- /// in the structure FunctionProfile. This maps function objects
+ /// in the structure SampleFunctionProfile. This maps function objects
/// to their corresponding profiles.
- StringMap<FunctionProfile> Profiles;
+ StringMap<SampleFunctionProfile> Profiles;
/// \brief Path name to the file holding the profile data.
///
@@ -140,63 +272,10 @@ protected:
/// version of the profile format to be used in constructing test
/// cases and debugging.
StringRef Filename;
-};
-/// \brief Loader class for text-based profiles.
-///
-/// This class defines a simple interface to read text files containing
-/// profiles. It keeps track of line number information and location of
-/// the file pointer. Users of this class are responsible for actually
-/// parsing the lines returned by the readLine function.
-///
-/// TODO - This does not really belong here. It is a generic text file
-/// reader. It should be moved to the Support library and made more general.
-class ExternalProfileTextLoader {
-public:
- ExternalProfileTextLoader(StringRef F) : Filename(F) {
- error_code EC;
- EC = MemoryBuffer::getFile(Filename, Buffer);
- if (EC)
- report_fatal_error("Could not open profile file " + Filename + ": " +
- EC.message());
- FP = Buffer->getBufferStart();
- Lineno = 0;
- }
-
- /// \brief Read a line from the mapped file.
- StringRef readLine() {
- size_t Length = 0;
- const char *start = FP;
- while (FP != Buffer->getBufferEnd() && *FP != '\n') {
- Length++;
- FP++;
- }
- if (FP != Buffer->getBufferEnd())
- FP++;
- Lineno++;
- return StringRef(start, Length);
- }
-
- /// \brief Return true, if we've reached EOF.
- bool atEOF() const { return FP == Buffer->getBufferEnd(); }
-
- /// \brief Report a parse error message and stop compilation.
- void reportParseError(Twine Msg) const {
- report_fatal_error(Filename + ":" + Twine(Lineno) + ": " + Msg + "\n");
- }
-
-private:
- /// \brief Memory buffer holding the text file.
- OwningPtr<MemoryBuffer> Buffer;
-
- /// \brief Current position into the memory buffer.
- const char *FP;
-
- /// \brief Current line number.
- int64_t Lineno;
-
- /// \brief Path name where to the profile file.
- StringRef Filename;
+ /// \brief Module being compiled. Used mainly to access the current
+ /// LLVM context for diagnostics.
+ const Module &M;
};
/// \brief Sample profile pass.
@@ -210,148 +289,241 @@ public:
static char ID;
SampleProfileLoader(StringRef Name = SampleProfileFile)
- : FunctionPass(ID), Profiler(0), Filename(Name) {
+ : FunctionPass(ID), Profiler(), Filename(Name), ProfileIsValid(false) {
initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
}
- virtual bool doInitialization(Module &M);
+ bool doInitialization(Module &M) override;
void dump() { Profiler->dump(); }
- virtual const char *getPassName() const { return "Sample profile pass"; }
+ const char *getPassName() const override { return "Sample profile pass"; }
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTree>();
}
protected:
/// \brief Profile reader object.
- OwningPtr<SampleProfile> Profiler;
+ std::unique_ptr<SampleModuleProfile> Profiler;
/// \brief Name of the profile file to load.
StringRef Filename;
+
+ /// \brief Flag indicating whether the profile input loaded succesfully.
+ bool ProfileIsValid;
};
}
-/// \brief Print the function profile for \p FName on stream \p OS.
+/// \brief Print this function profile on stream \p OS.
///
/// \param OS Stream to emit the output to.
-/// \param FName Name of the function to print.
-void SampleProfile::printFunctionProfile(raw_ostream &OS, StringRef FName) {
- FunctionProfile FProfile = Profiles[FName];
- OS << "Function: " << FName << ", " << FProfile.TotalSamples << ", "
- << FProfile.TotalHeadSamples << ", " << FProfile.BodySamples.size()
+void SampleFunctionProfile::print(raw_ostream &OS) {
+ OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()
<< " sampled lines\n";
- for (BodySampleMap::const_iterator SI = FProfile.BodySamples.begin(),
- SE = FProfile.BodySamples.end();
+ for (BodySampleMap::const_iterator SI = BodySamples.begin(),
+ SE = BodySamples.end();
SI != SE; ++SI)
- OS << "\tline offset: " << SI->first
+ OS << "\tline offset: " << SI->first.LineOffset
+ << ", discriminator: " << SI->first.Discriminator
<< ", number of samples: " << SI->second << "\n";
OS << "\n";
}
+/// \brief Print the weight of edge \p E on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param E Edge to print.
+void SampleFunctionProfile::printEdgeWeight(raw_ostream &OS, Edge E) {
+ OS << "weight[" << E.first->getName() << "->" << E.second->getName()
+ << "]: " << EdgeWeights[E] << "\n";
+}
+
+/// \brief Print the equivalence class of block \p BB on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param BB Block to print.
+void SampleFunctionProfile::printBlockEquivalence(raw_ostream &OS,
+ BasicBlock *BB) {
+ BasicBlock *Equiv = EquivalenceClass[BB];
+ OS << "equivalence[" << BB->getName()
+ << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
+}
+
+/// \brief Print the weight of block \p BB on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param BB Block to print.
+void SampleFunctionProfile::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
+ OS << "weight[" << BB->getName() << "]: " << BlockWeights[BB] << "\n";
+}
+
+/// \brief Print the function profile for \p FName on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param FName Name of the function to print.
+void SampleModuleProfile::printFunctionProfile(raw_ostream &OS,
+ StringRef FName) {
+ OS << "Function: " << FName << ":\n";
+ Profiles[FName].print(OS);
+}
+
/// \brief Dump the function profile for \p FName.
///
/// \param FName Name of the function to print.
-void SampleProfile::dumpFunctionProfile(StringRef FName) {
+void SampleModuleProfile::dumpFunctionProfile(StringRef FName) {
printFunctionProfile(dbgs(), FName);
}
/// \brief Dump all the function profiles found.
-void SampleProfile::dump() {
- for (StringMap<FunctionProfile>::const_iterator I = Profiles.begin(),
- E = Profiles.end();
+void SampleModuleProfile::dump() {
+ for (StringMap<SampleFunctionProfile>::const_iterator I = Profiles.begin(),
+ E = Profiles.end();
I != E; ++I)
dumpFunctionProfile(I->getKey());
}
/// \brief Load samples from a text file.
///
-/// The file is divided in two segments:
-///
-/// Symbol table (represented with the string "symbol table")
-/// Number of symbols in the table
-/// symbol 1
-/// symbol 2
-/// ...
-/// symbol N
+/// The file contains a list of samples for every function executed at
+/// runtime. Each function profile has the following format:
///
-/// Function body profiles
-/// function1:total_samples:total_head_samples:number_of_locations
-/// location_offset_1: number_of_samples
-/// location_offset_2: number_of_samples
+/// function1:total_samples:total_head_samples
+/// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
+/// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
/// ...
-/// location_offset_N: number_of_samples
+/// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
///
/// Function names must be mangled in order for the profile loader to
-/// match them in the current translation unit.
+/// match them in the current translation unit. The two numbers in the
+/// function header specify how many total samples were accumulated in
+/// the function (first number), and the total number of samples accumulated
+/// at the prologue of the function (second number). This head sample
+/// count provides an indicator of how frequent is the function invoked.
+///
+/// Each sampled line may contain several items. Some are optional
+/// (marked below):
+///
+/// a- Source line offset. This number represents the line number
+/// in the function where the sample was collected. The line number
+/// is always relative to the line where symbol of the function
+/// is defined. So, if the function has its header at line 280,
+/// the offset 13 is at line 293 in the file.
+///
+/// b- [OPTIONAL] Discriminator. This is used if the sampled program
+/// was compiled with DWARF discriminator support
+/// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators)
+///
+/// c- Number of samples. This is the number of samples collected by
+/// the profiler at this source location.
+///
+/// d- [OPTIONAL] Potential call targets and samples. If present, this
+/// line contains a call instruction. This models both direct and
+/// indirect calls. Each called target is listed together with the
+/// number of samples. For example,
+///
+/// 130: 7 foo:3 bar:2 baz:7
+///
+/// The above means that at relative line offset 130 there is a
+/// call instruction that calls one of foo(), bar() and baz(). With
+/// baz() being the relatively more frequent call target.
+///
+/// FIXME: This is currently unhandled, but it has a lot of
+/// potential for aiding the inliner.
+///
///
/// Since this is a flat profile, a function that shows up more than
/// once gets all its samples aggregated across all its instances.
-/// TODO - flat profiles are too imprecise to provide good optimization
-/// opportunities. Convert them to context-sensitive profile.
+///
+/// FIXME: flat profiles are too imprecise to provide good optimization
+/// opportunities. Convert them to context-sensitive profile.
///
/// This textual representation is useful to generate unit tests and
/// for debugging purposes, but it should not be used to generate
/// profiles for large programs, as the representation is extremely
/// inefficient.
-void SampleProfile::loadText() {
- ExternalProfileTextLoader Loader(Filename);
-
- // Read the symbol table.
- StringRef Line = Loader.readLine();
- if (Line != "symbol table")
- Loader.reportParseError("Expected 'symbol table', found " + Line);
- int NumSymbols;
- Line = Loader.readLine();
- if (Line.getAsInteger(10, NumSymbols))
- Loader.reportParseError("Expected a number, found " + Line);
- for (int I = 0; I < NumSymbols; I++) {
- StringRef FName = Loader.readLine();
- FunctionProfile &FProfile = Profiles[FName];
- FProfile.BodySamples.clear();
- FProfile.TotalSamples = 0;
- FProfile.TotalHeadSamples = 0;
+///
+/// \returns true if the file was loaded successfully, false otherwise.
+bool SampleModuleProfile::loadText() {
+ std::unique_ptr<MemoryBuffer> Buffer;
+ error_code EC = MemoryBuffer::getFile(Filename, Buffer);
+ if (EC) {
+ std::string Msg(EC.message());
+ M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg));
+ return false;
}
+ line_iterator LineIt(*Buffer, '#');
// Read the profile of each function. Since each function may be
// mentioned more than once, and we are collecting flat profiles,
// accumulate samples as we parse them.
- Regex HeadRE("^([^:]+):([0-9]+):([0-9]+):([0-9]+)$");
- Regex LineSample("^([0-9]+): ([0-9]+)$");
- while (!Loader.atEOF()) {
- SmallVector<StringRef, 4> Matches;
- Line = Loader.readLine();
- if (!HeadRE.match(Line, &Matches))
- Loader.reportParseError("Expected 'mangled_name:NUM:NUM:NUM', found " +
- Line);
- assert(Matches.size() == 5);
+ Regex HeadRE("^([^0-9].*):([0-9]+):([0-9]+)$");
+ Regex LineSample("^([0-9]+)\\.?([0-9]+)?: ([0-9]+)(.*)$");
+ while (!LineIt.is_at_eof()) {
+ // Read the header of each function.
+ //
+ // Note that for function identifiers we are actually expecting
+ // mangled names, but we may not always get them. This happens when
+ // the compiler decides not to emit the function (e.g., it was inlined
+ // and removed). In this case, the binary will not have the linkage
+ // name for the function, so the profiler will emit the function's
+ // unmangled name, which may contain characters like ':' and '>' in its
+ // name (member functions, templates, etc).
+ //
+ // The only requirement we place on the identifier, then, is that it
+ // should not begin with a number.
+ SmallVector<StringRef, 3> Matches;
+ if (!HeadRE.match(*LineIt, &Matches)) {
+ reportParseError(LineIt.line_number(),
+ "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
+ return false;
+ }
+ assert(Matches.size() == 4);
StringRef FName = Matches[1];
- unsigned NumSamples, NumHeadSamples, NumSampledLines;
+ unsigned NumSamples, NumHeadSamples;
Matches[2].getAsInteger(10, NumSamples);
Matches[3].getAsInteger(10, NumHeadSamples);
- Matches[4].getAsInteger(10, NumSampledLines);
- FunctionProfile &FProfile = Profiles[FName];
- FProfile.TotalSamples += NumSamples;
- FProfile.TotalHeadSamples += NumHeadSamples;
- BodySampleMap &SampleMap = FProfile.BodySamples;
- unsigned I;
- for (I = 0; I < NumSampledLines && !Loader.atEOF(); I++) {
- Line = Loader.readLine();
- if (!LineSample.match(Line, &Matches))
- Loader.reportParseError("Expected 'NUM: NUM', found " + Line);
- assert(Matches.size() == 3);
- unsigned LineOffset, NumSamples;
+ Profiles[FName] = SampleFunctionProfile();
+ SampleFunctionProfile &FProfile = Profiles[FName];
+ FProfile.addTotalSamples(NumSamples);
+ FProfile.addHeadSamples(NumHeadSamples);
+ ++LineIt;
+
+ // Now read the body. The body of the function ends when we reach
+ // EOF or when we see the start of the next function.
+ while (!LineIt.is_at_eof() && isdigit((*LineIt)[0])) {
+ if (!LineSample.match(*LineIt, &Matches)) {
+ reportParseError(
+ LineIt.line_number(),
+ "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + *LineIt);
+ return false;
+ }
+ assert(Matches.size() == 5);
+ unsigned LineOffset, NumSamples, Discriminator = 0;
Matches[1].getAsInteger(10, LineOffset);
- Matches[2].getAsInteger(10, NumSamples);
- SampleMap[LineOffset] += NumSamples;
- }
+ if (Matches[2] != "")
+ Matches[2].getAsInteger(10, Discriminator);
+ Matches[3].getAsInteger(10, NumSamples);
+
+ // FIXME: Handle called targets (in Matches[4]).
- if (I < NumSampledLines)
- Loader.reportParseError("Unexpected end of file");
+ // When dealing with instruction weights, we use the value
+ // zero to indicate the absence of a sample. If we read an
+ // actual zero from the profile file, return it as 1 to
+ // avoid the confusion later on.
+ if (NumSamples == 0)
+ NumSamples = 1;
+ FProfile.addBodySamples(LineOffset, Discriminator, NumSamples);
+ ++LineIt;
+ }
}
+
+ return true;
}
/// \brief Get the weight for an instruction.
@@ -359,46 +531,49 @@ void SampleProfile::loadText() {
/// The "weight" of an instruction \p Inst is the number of samples
/// collected on that instruction at runtime. To retrieve it, we
/// need to compute the line number of \p Inst relative to the start of its
-/// function. We use \p FirstLineno to compute the offset. We then
-/// look up the samples collected for \p Inst using \p BodySamples.
+/// function. We use HeaderLineno to compute the offset. We then
+/// look up the samples collected for \p Inst using BodySamples.
///
/// \param Inst Instruction to query.
-/// \param FirstLineno Line number of the first instruction in the function.
-/// \param BodySamples Map of relative source line locations to samples.
///
/// \returns The profiled weight of I.
-uint32_t SampleProfile::getInstWeight(Instruction &Inst, unsigned FirstLineno,
- BodySampleMap &BodySamples) {
- unsigned LOffset = Inst.getDebugLoc().getLine() - FirstLineno + 1;
- return BodySamples.lookup(LOffset);
+unsigned SampleFunctionProfile::getInstWeight(Instruction &Inst) {
+ DebugLoc DLoc = Inst.getDebugLoc();
+ unsigned Lineno = DLoc.getLine();
+ if (Lineno < HeaderLineno)
+ return 0;
+
+ DILocation DIL(DLoc.getAsMDNode(*Ctx));
+ int LOffset = Lineno - HeaderLineno;
+ unsigned Discriminator = DIL.getDiscriminator();
+ unsigned Weight =
+ BodySamples.lookup(InstructionLocation(LOffset, Discriminator));
+ DEBUG(dbgs() << " " << Lineno << "." << Discriminator << ":" << Inst
+ << " (line offset: " << LOffset << "." << Discriminator
+ << " - weight: " << Weight << ")\n");
+ return Weight;
}
/// \brief Compute the weight of a basic block.
///
/// The weight of basic block \p B is the maximum weight of all the
-/// instructions in B.
+/// instructions in B. The weight of \p B is computed and cached in
+/// the BlockWeights map.
///
/// \param B The basic block to query.
-/// \param FirstLineno The line number for the first line in the
-/// function holding B.
-/// \param BodySamples The map containing all the samples collected in that
-/// function.
///
/// \returns The computed weight of B.
-uint32_t SampleProfile::computeBlockWeight(BasicBlock *B, unsigned FirstLineno,
- BodySampleMap &BodySamples) {
+unsigned SampleFunctionProfile::getBlockWeight(BasicBlock *B) {
// If we've computed B's weight before, return it.
- Function *F = B->getParent();
- FunctionProfile &FProfile = Profiles[F->getName()];
std::pair<BlockWeightMap::iterator, bool> Entry =
- FProfile.BlockWeights.insert(std::make_pair(B, 0));
+ BlockWeights.insert(std::make_pair(B, 0));
if (!Entry.second)
return Entry.first->second;
// Otherwise, compute and cache B's weight.
- uint32_t Weight = 0;
+ unsigned Weight = 0;
for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) {
- uint32_t InstWeight = getInstWeight(*I, FirstLineno, BodySamples);
+ unsigned InstWeight = getInstWeight(*I);
if (InstWeight > Weight)
Weight = InstWeight;
}
@@ -406,31 +581,344 @@ uint32_t SampleProfile::computeBlockWeight(BasicBlock *B, unsigned FirstLineno,
return Weight;
}
-/// \brief Generate branch weight metadata for all branches in \p F.
+/// \brief Compute and store the weights of every basic block.
///
-/// For every branch instruction B in \p F, we compute the weight of the
-/// target block for each of the edges out of B. This is the weight
-/// that we associate with that branch.
+/// This populates the BlockWeights map by computing
+/// the weights of every basic block in the CFG.
///
-/// TODO - This weight assignment will most likely be wrong if the
-/// target branch has more than two predecessors. This needs to be done
-/// using some form of flow propagation.
+/// \param F The function to query.
+bool SampleFunctionProfile::computeBlockWeights(Function &F) {
+ bool Changed = false;
+ DEBUG(dbgs() << "Block weights\n");
+ for (Function::iterator B = F.begin(), E = F.end(); B != E; ++B) {
+ unsigned Weight = getBlockWeight(B);
+ Changed |= (Weight > 0);
+ DEBUG(printBlockWeight(dbgs(), B));
+ }
+
+ return Changed;
+}
+
+/// \brief Find equivalence classes for the given block.
///
-/// Once all the branch weights are computed, we emit the MD_prof
-/// metadata on B using the computed values.
+/// This finds all the blocks that are guaranteed to execute the same
+/// number of times as \p BB1. To do this, it traverses all the the
+/// descendants of \p BB1 in the dominator or post-dominator tree.
+///
+/// A block BB2 will be in the same equivalence class as \p BB1 if
+/// the following holds:
+///
+/// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
+/// is a descendant of \p BB1 in the dominator tree, then BB2 should
+/// dominate BB1 in the post-dominator tree.
+///
+/// 2- Both BB2 and \p BB1 must be in the same loop.
+///
+/// For every block BB2 that meets those two requirements, we set BB2's
+/// equivalence class to \p BB1.
+///
+/// \param BB1 Block to check.
+/// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
+/// \param DomTree Opposite dominator tree. If \p Descendants is filled
+/// with blocks from \p BB1's dominator tree, then
+/// this is the post-dominator tree, and vice versa.
+void SampleFunctionProfile::findEquivalencesFor(
+ BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
+ DominatorTreeBase<BasicBlock> *DomTree) {
+ for (SmallVectorImpl<BasicBlock *>::iterator I = Descendants.begin(),
+ E = Descendants.end();
+ I != E; ++I) {
+ BasicBlock *BB2 = *I;
+ bool IsDomParent = DomTree->dominates(BB2, BB1);
+ bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
+ if (BB1 != BB2 && VisitedBlocks.insert(BB2) && IsDomParent &&
+ IsInSameLoop) {
+ EquivalenceClass[BB2] = BB1;
+
+ // If BB2 is heavier than BB1, make BB2 have the same weight
+ // as BB1.
+ //
+ // Note that we don't worry about the opposite situation here
+ // (when BB2 is lighter than BB1). We will deal with this
+ // during the propagation phase. Right now, we just want to
+ // make sure that BB1 has the largest weight of all the
+ // members of its equivalence set.
+ unsigned &BB1Weight = BlockWeights[BB1];
+ unsigned &BB2Weight = BlockWeights[BB2];
+ BB1Weight = std::max(BB1Weight, BB2Weight);
+ }
+ }
+}
+
+/// \brief Find equivalence classes.
+///
+/// Since samples may be missing from blocks, we can fill in the gaps by setting
+/// the weights of all the blocks in the same equivalence class to the same
+/// weight. To compute the concept of equivalence, we use dominance and loop
+/// information. Two blocks B1 and B2 are in the same equivalence class if B1
+/// dominates B2, B2 post-dominates B1 and both are in the same loop.
///
/// \param F The function to query.
-bool SampleProfile::emitAnnotations(Function &F) {
+void SampleFunctionProfile::findEquivalenceClasses(Function &F) {
+ SmallVector<BasicBlock *, 8> DominatedBBs;
+ DEBUG(dbgs() << "\nBlock equivalence classes\n");
+ // Find equivalence sets based on dominance and post-dominance information.
+ for (Function::iterator B = F.begin(), E = F.end(); B != E; ++B) {
+ BasicBlock *BB1 = B;
+
+ // Compute BB1's equivalence class once.
+ if (EquivalenceClass.count(BB1)) {
+ DEBUG(printBlockEquivalence(dbgs(), BB1));
+ continue;
+ }
+
+ // By default, blocks are in their own equivalence class.
+ EquivalenceClass[BB1] = BB1;
+
+ // Traverse all the blocks dominated by BB1. We are looking for
+ // every basic block BB2 such that:
+ //
+ // 1- BB1 dominates BB2.
+ // 2- BB2 post-dominates BB1.
+ // 3- BB1 and BB2 are in the same loop nest.
+ //
+ // If all those conditions hold, it means that BB2 is executed
+ // as many times as BB1, so they are placed in the same equivalence
+ // class by making BB2's equivalence class be BB1.
+ DominatedBBs.clear();
+ DT->getDescendants(BB1, DominatedBBs);
+ findEquivalencesFor(BB1, DominatedBBs, PDT->DT);
+
+ // Repeat the same logic for all the blocks post-dominated by BB1.
+ // We are looking for every basic block BB2 such that:
+ //
+ // 1- BB1 post-dominates BB2.
+ // 2- BB2 dominates BB1.
+ // 3- BB1 and BB2 are in the same loop nest.
+ //
+ // If all those conditions hold, BB2's equivalence class is BB1.
+ DominatedBBs.clear();
+ PDT->getDescendants(BB1, DominatedBBs);
+ findEquivalencesFor(BB1, DominatedBBs, DT);
+
+ DEBUG(printBlockEquivalence(dbgs(), BB1));
+ }
+
+ // Assign weights to equivalence classes.
+ //
+ // All the basic blocks in the same equivalence class will execute
+ // the same number of times. Since we know that the head block in
+ // each equivalence class has the largest weight, assign that weight
+ // to all the blocks in that equivalence class.
+ DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n");
+ for (Function::iterator B = F.begin(), E = F.end(); B != E; ++B) {
+ BasicBlock *BB = B;
+ BasicBlock *EquivBB = EquivalenceClass[BB];
+ if (BB != EquivBB)
+ BlockWeights[BB] = BlockWeights[EquivBB];
+ DEBUG(printBlockWeight(dbgs(), BB));
+ }
+}
+
+/// \brief Visit the given edge to decide if it has a valid weight.
+///
+/// If \p E has not been visited before, we copy to \p UnknownEdge
+/// and increment the count of unknown edges.
+///
+/// \param E Edge to visit.
+/// \param NumUnknownEdges Current number of unknown edges.
+/// \param UnknownEdge Set if E has not been visited before.
+///
+/// \returns E's weight, if known. Otherwise, return 0.
+unsigned SampleFunctionProfile::visitEdge(Edge E, unsigned *NumUnknownEdges,
+ Edge *UnknownEdge) {
+ if (!VisitedEdges.count(E)) {
+ (*NumUnknownEdges)++;
+ *UnknownEdge = E;
+ return 0;
+ }
+
+ return EdgeWeights[E];
+}
+
+/// \brief Propagate weights through incoming/outgoing edges.
+///
+/// If the weight of a basic block is known, and there is only one edge
+/// with an unknown weight, we can calculate the weight of that edge.
+///
+/// Similarly, if all the edges have a known count, we can calculate the
+/// count of the basic block, if needed.
+///
+/// \param F Function to process.
+///
+/// \returns True if new weights were assigned to edges or blocks.
+bool SampleFunctionProfile::propagateThroughEdges(Function &F) {
bool Changed = false;
- FunctionProfile &FProfile = Profiles[F.getName()];
- unsigned FirstLineno = inst_begin(F)->getDebugLoc().getLine();
- MDBuilder MDB(F.getContext());
+ DEBUG(dbgs() << "\nPropagation through edges\n");
+ for (Function::iterator BI = F.begin(), EI = F.end(); BI != EI; ++BI) {
+ BasicBlock *BB = BI;
+
+ // Visit all the predecessor and successor edges to determine
+ // which ones have a weight assigned already. Note that it doesn't
+ // matter that we only keep track of a single unknown edge. The
+ // only case we are interested in handling is when only a single
+ // edge is unknown (see setEdgeOrBlockWeight).
+ for (unsigned i = 0; i < 2; i++) {
+ unsigned TotalWeight = 0;
+ unsigned NumUnknownEdges = 0;
+ Edge UnknownEdge, SelfReferentialEdge;
+
+ if (i == 0) {
+ // First, visit all predecessor edges.
+ for (size_t I = 0; I < Predecessors[BB].size(); I++) {
+ Edge E = std::make_pair(Predecessors[BB][I], BB);
+ TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
+ if (E.first == E.second)
+ SelfReferentialEdge = E;
+ }
+ } else {
+ // On the second round, visit all successor edges.
+ for (size_t I = 0; I < Successors[BB].size(); I++) {
+ Edge E = std::make_pair(BB, Successors[BB][I]);
+ TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
+ }
+ }
+
+ // After visiting all the edges, there are three cases that we
+ // can handle immediately:
+ //
+ // - All the edge weights are known (i.e., NumUnknownEdges == 0).
+ // In this case, we simply check that the sum of all the edges
+ // is the same as BB's weight. If not, we change BB's weight
+ // to match. Additionally, if BB had not been visited before,
+ // we mark it visited.
+ //
+ // - Only one edge is unknown and BB has already been visited.
+ // In this case, we can compute the weight of the edge by
+ // subtracting the total block weight from all the known
+ // edge weights. If the edges weight more than BB, then the
+ // edge of the last remaining edge is set to zero.
+ //
+ // - There exists a self-referential edge and the weight of BB is
+ // known. In this case, this edge can be based on BB's weight.
+ // We add up all the other known edges and set the weight on
+ // the self-referential edge as we did in the previous case.
+ //
+ // In any other case, we must continue iterating. Eventually,
+ // all edges will get a weight, or iteration will stop when
+ // it reaches SampleProfileMaxPropagateIterations.
+ if (NumUnknownEdges <= 1) {
+ unsigned &BBWeight = BlockWeights[BB];
+ if (NumUnknownEdges == 0) {
+ // If we already know the weight of all edges, the weight of the
+ // basic block can be computed. It should be no larger than the sum
+ // of all edge weights.
+ if (TotalWeight > BBWeight) {
+ BBWeight = TotalWeight;
+ Changed = true;
+ DEBUG(dbgs() << "All edge weights for " << BB->getName()
+ << " known. Set weight for block: ";
+ printBlockWeight(dbgs(), BB););
+ }
+ if (VisitedBlocks.insert(BB))
+ Changed = true;
+ } else if (NumUnknownEdges == 1 && VisitedBlocks.count(BB)) {
+ // If there is a single unknown edge and the block has been
+ // visited, then we can compute E's weight.
+ if (BBWeight >= TotalWeight)
+ EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
+ else
+ EdgeWeights[UnknownEdge] = 0;
+ VisitedEdges.insert(UnknownEdge);
+ Changed = true;
+ DEBUG(dbgs() << "Set weight for edge: ";
+ printEdgeWeight(dbgs(), UnknownEdge));
+ }
+ } else if (SelfReferentialEdge.first && VisitedBlocks.count(BB)) {
+ unsigned &BBWeight = BlockWeights[BB];
+ // We have a self-referential edge and the weight of BB is known.
+ if (BBWeight >= TotalWeight)
+ EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
+ else
+ EdgeWeights[SelfReferentialEdge] = 0;
+ VisitedEdges.insert(SelfReferentialEdge);
+ Changed = true;
+ DEBUG(dbgs() << "Set self-referential edge weight to: ";
+ printEdgeWeight(dbgs(), SelfReferentialEdge));
+ }
+ }
+ }
+
+ return Changed;
+}
+
+/// \brief Build in/out edge lists for each basic block in the CFG.
+///
+/// We are interested in unique edges. If a block B1 has multiple
+/// edges to another block B2, we only add a single B1->B2 edge.
+void SampleFunctionProfile::buildEdges(Function &F) {
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *B1 = I;
- // Clear the block weights cache.
- FProfile.BlockWeights.clear();
+ // Add predecessors for B1.
+ SmallPtrSet<BasicBlock *, 16> Visited;
+ if (!Predecessors[B1].empty())
+ llvm_unreachable("Found a stale predecessors list in a basic block.");
+ for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
+ BasicBlock *B2 = *PI;
+ if (Visited.insert(B2))
+ Predecessors[B1].push_back(B2);
+ }
+
+ // Add successors for B1.
+ Visited.clear();
+ if (!Successors[B1].empty())
+ llvm_unreachable("Found a stale successors list in a basic block.");
+ for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
+ BasicBlock *B2 = *SI;
+ if (Visited.insert(B2))
+ Successors[B1].push_back(B2);
+ }
+ }
+}
+
+/// \brief Propagate weights into edges
+///
+/// The following rules are applied to every block B in the CFG:
+///
+/// - If B has a single predecessor/successor, then the weight
+/// of that edge is the weight of the block.
+///
+/// - If all incoming or outgoing edges are known except one, and the
+/// weight of the block is already known, the weight of the unknown
+/// edge will be the weight of the block minus the sum of all the known
+/// edges. If the sum of all the known edges is larger than B's weight,
+/// we set the unknown edge weight to zero.
+///
+/// - If there is a self-referential edge, and the weight of the block is
+/// known, the weight for that edge is set to the weight of the block
+/// minus the weight of the other incoming edges to that block (if
+/// known).
+void SampleFunctionProfile::propagateWeights(Function &F) {
+ bool Changed = true;
+ unsigned i = 0;
+
+ // Before propagation starts, build, for each block, a list of
+ // unique predecessors and successors. This is necessary to handle
+ // identical edges in multiway branches. Since we visit all blocks and all
+ // edges of the CFG, it is cleaner to build these lists once at the start
+ // of the pass.
+ buildEdges(F);
- // When we find a branch instruction: For each edge E out of the branch,
- // the weight of E is the weight of the target block.
+ // Propagate until we converge or we go past the iteration limit.
+ while (Changed && i++ < SampleProfileMaxPropagateIterations) {
+ Changed = propagateThroughEdges(F);
+ }
+
+ // Generate MD_prof metadata for every branch instruction using the
+ // edge weights computed during propagation.
+ DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
+ MDBuilder MDB(F.getContext());
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
BasicBlock *B = I;
TerminatorInst *TI = B->getTerminator();
@@ -439,34 +927,155 @@ bool SampleProfile::emitAnnotations(Function &F) {
if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
continue;
- SmallVector<uint32_t, 4> Weights;
- unsigned NSuccs = TI->getNumSuccessors();
- for (unsigned I = 0; I < NSuccs; ++I) {
+ DEBUG(dbgs() << "\nGetting weights for branch at line "
+ << TI->getDebugLoc().getLine() << ".\n");
+ SmallVector<unsigned, 4> Weights;
+ bool AllWeightsZero = true;
+ for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
BasicBlock *Succ = TI->getSuccessor(I);
- uint32_t Weight =
- computeBlockWeight(Succ, FirstLineno, FProfile.BodySamples);
+ Edge E = std::make_pair(B, Succ);
+ unsigned Weight = EdgeWeights[E];
+ DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
Weights.push_back(Weight);
+ if (Weight != 0)
+ AllWeightsZero = false;
}
- TI->setMetadata(llvm::LLVMContext::MD_prof,
- MDB.createBranchWeights(Weights));
- Changed = true;
+ // Only set weights if there is at least one non-zero weight.
+ // In any other case, let the analyzer set weights.
+ if (!AllWeightsZero) {
+ DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
+ TI->setMetadata(llvm::LLVMContext::MD_prof,
+ MDB.createBranchWeights(Weights));
+ } else {
+ DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
+ }
}
+}
- return Changed;
+/// \brief Get the line number for the function header.
+///
+/// This looks up function \p F in the current compilation unit and
+/// retrieves the line number where the function is defined. This is
+/// line 0 for all the samples read from the profile file. Every line
+/// number is relative to this line.
+///
+/// \param F Function object to query.
+///
+/// \returns the line number where \p F is defined. If it returns 0,
+/// it means that there is no debug information available for \p F.
+unsigned SampleFunctionProfile::getFunctionLoc(Function &F) {
+ NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu");
+ if (CUNodes) {
+ for (unsigned I = 0, E1 = CUNodes->getNumOperands(); I != E1; ++I) {
+ DICompileUnit CU(CUNodes->getOperand(I));
+ DIArray Subprograms = CU.getSubprograms();
+ for (unsigned J = 0, E2 = Subprograms.getNumElements(); J != E2; ++J) {
+ DISubprogram Subprogram(Subprograms.getElement(J));
+ if (Subprogram.describes(&F))
+ return Subprogram.getLineNumber();
+ }
+ }
+ }
+
+ F.getContext().diagnose(DiagnosticInfoSampleProfile(
+ "No debug information found in function " + F.getName()));
+ return 0;
}
-char SampleProfileLoader::ID = 0;
-INITIALIZE_PASS(SampleProfileLoader, "sample-profile", "Sample Profile loader",
- false, false)
+/// \brief Generate branch weight metadata for all branches in \p F.
+///
+/// Branch weights are computed out of instruction samples using a
+/// propagation heuristic. Propagation proceeds in 3 phases:
+///
+/// 1- Assignment of block weights. All the basic blocks in the function
+/// are initial assigned the same weight as their most frequently
+/// executed instruction.
+///
+/// 2- Creation of equivalence classes. Since samples may be missing from
+/// blocks, we can fill in the gaps by setting the weights of all the
+/// blocks in the same equivalence class to the same weight. To compute
+/// the concept of equivalence, we use dominance and loop information.
+/// Two blocks B1 and B2 are in the same equivalence class if B1
+/// dominates B2, B2 post-dominates B1 and both are in the same loop.
+///
+/// 3- Propagation of block weights into edges. This uses a simple
+/// propagation heuristic. The following rules are applied to every
+/// block B in the CFG:
+///
+/// - If B has a single predecessor/successor, then the weight
+/// of that edge is the weight of the block.
+///
+/// - If all the edges are known except one, and the weight of the
+/// block is already known, the weight of the unknown edge will
+/// be the weight of the block minus the sum of all the known
+/// edges. If the sum of all the known edges is larger than B's weight,
+/// we set the unknown edge weight to zero.
+///
+/// - If there is a self-referential edge, and the weight of the block is
+/// known, the weight for that edge is set to the weight of the block
+/// minus the weight of the other incoming edges to that block (if
+/// known).
+///
+/// Since this propagation is not guaranteed to finalize for every CFG, we
+/// only allow it to proceed for a limited number of iterations (controlled
+/// by -sample-profile-max-propagate-iterations).
+///
+/// FIXME: Try to replace this propagation heuristic with a scheme
+/// that is guaranteed to finalize. A work-list approach similar to
+/// the standard value propagation algorithm used by SSA-CCP might
+/// work here.
+///
+/// Once all the branch weights are computed, we emit the MD_prof
+/// metadata on B using the computed values for each of its branches.
+///
+/// \param F The function to query.
+///
+/// \returns true if \p F was modified. Returns false, otherwise.
+bool SampleFunctionProfile::emitAnnotations(Function &F, DominatorTree *DomTree,
+ PostDominatorTree *PostDomTree,
+ LoopInfo *Loops) {
+ bool Changed = false;
-bool SampleProfileLoader::runOnFunction(Function &F) {
- return Profiler->emitAnnotations(F);
+ // Initialize invariants used during computation and propagation.
+ HeaderLineno = getFunctionLoc(F);
+ if (HeaderLineno == 0)
+ return false;
+
+ DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
+ << ": " << HeaderLineno << "\n");
+ DT = DomTree;
+ PDT = PostDomTree;
+ LI = Loops;
+ Ctx = &F.getParent()->getContext();
+
+ // Compute basic block weights.
+ Changed |= computeBlockWeights(F);
+
+ if (Changed) {
+ // Find equivalence classes.
+ findEquivalenceClasses(F);
+
+ // Propagate weights to all edges.
+ propagateWeights(F);
+ }
+
+ return Changed;
}
+char SampleProfileLoader::ID = 0;
+INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
+ "Sample Profile loader", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
+INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
+ "Sample Profile loader", false, false)
+
bool SampleProfileLoader::doInitialization(Module &M) {
- Profiler.reset(new SampleProfile(Filename));
- Profiler->loadText();
+ Profiler.reset(new SampleModuleProfile(M, Filename));
+ ProfileIsValid = Profiler->loadText();
return true;
}
@@ -477,3 +1086,15 @@ FunctionPass *llvm::createSampleProfileLoaderPass() {
FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) {
return new SampleProfileLoader(Name);
}
+
+bool SampleProfileLoader::runOnFunction(Function &F) {
+ if (!ProfileIsValid)
+ return false;
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ PostDominatorTree *PDT = &getAnalysis<PostDominatorTree>();
+ LoopInfo *LI = &getAnalysis<LoopInfo>();
+ SampleFunctionProfile &FunctionProfile = Profiler->getProfile(F);
+ if (!FunctionProfile.empty())
+ return FunctionProfile.emitAnnotations(F, DT, PDT, LI);
+ return false;
+}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 857597e..e950eba 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -17,8 +17,8 @@
#include "llvm-c/Initialization.h"
#include "llvm-c/Transforms/Scalar.h"
#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
@@ -29,11 +29,12 @@ using namespace llvm;
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeADCEPass(Registry);
initializeSampleProfileLoaderPass(Registry);
- initializeCodeGenPreparePass(Registry);
+ initializeConstantHoistingPass(Registry);
initializeConstantPropagationPass(Registry);
initializeCorrelatedValuePropagationPass(Registry);
initializeDCEPass(Registry);
initializeDeadInstEliminationPass(Registry);
+ initializeScalarizerPass(Registry);
initializeDSEPass(Registry);
initializeGVNPass(Registry);
initializeEarlyCSEPass(Registry);
@@ -81,6 +82,10 @@ void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createDeadStoreEliminationPass());
}
+void LLVMAddScalarizerPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createScalarizerPass());
+}
+
void LLVMAddGVNPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createGVNPass());
}
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 57b290e..e7b5ab2 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -24,15 +24,17 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -41,10 +43,8 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -80,14 +80,14 @@ namespace {
ScalarLoadThreshold = SLT;
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
bool performScalarRepl(Function &F);
bool performPromotion(Function &F);
private:
bool HasDomTree;
- DataLayout *TD;
+ const DataLayout *DL;
/// DeadInsts - Keep track of instructions we have made dead, so that
/// we can remove them after we are done working.
@@ -195,8 +195,8 @@ namespace {
// getAnalysisUsage - This pass does not require any passes, but we know it
// will not alter the CFG, so say so.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
};
@@ -212,7 +212,7 @@ namespace {
// getAnalysisUsage - This pass does not require any passes, but we know it
// will not alter the CFG, so say so.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
}
};
@@ -224,7 +224,7 @@ char SROA_SSAUp::ID = 0;
INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
"Scalar Replacement of Aggregates (DT)", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
"Scalar Replacement of Aggregates (DT)", false, false)
@@ -258,7 +258,7 @@ namespace {
class ConvertToScalarInfo {
/// AllocaSize - The size of the alloca being considered in bytes.
unsigned AllocaSize;
- const DataLayout &TD;
+ const DataLayout &DL;
unsigned ScalarLoadThreshold;
/// IsNotTrivial - This is set to true if there is some access to the object
@@ -301,9 +301,9 @@ class ConvertToScalarInfo {
bool HadDynamicAccess;
public:
- explicit ConvertToScalarInfo(unsigned Size, const DataLayout &td,
+ explicit ConvertToScalarInfo(unsigned Size, const DataLayout &DL,
unsigned SLT)
- : AllocaSize(Size), TD(td), ScalarLoadThreshold(SLT), IsNotTrivial(false),
+ : AllocaSize(Size), DL(DL), ScalarLoadThreshold(SLT), IsNotTrivial(false),
ScalarKind(Unknown), VectorTy(0), HadNonMemTransferAccess(false),
HadDynamicAccess(false) { }
@@ -364,7 +364,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
return 0;
if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
- !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth))
+ !HadNonMemTransferAccess && !DL.fitsInLegalInteger(BitWidth))
return 0;
// Dynamic accesses on integers aren't yet supported. They need us to shift
// by a dynamic amount which could be difficult to work out as we might not
@@ -466,10 +466,10 @@ bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
/// SawVec flag.
bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
Value* NonConstantIdx) {
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (User *U : V->users()) {
+ Instruction *UI = cast<Instruction>(U);
- if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
// Don't break volatile loads.
if (!LI->isSimple())
return false;
@@ -481,7 +481,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
continue;
}
- if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
// Storing the pointer, not into the value?
if (SI->getOperand(0) == V || !SI->isSimple()) return false;
// Don't touch MMX operations.
@@ -492,7 +492,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
continue;
}
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(UI)) {
if (!onlyUsedByLifetimeMarkers(BCI))
IsNotTrivial = true; // Can't be mem2reg'd.
if (!CanConvertToScalar(BCI, Offset, NonConstantIdx))
@@ -500,7 +500,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
continue;
}
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UI)) {
// If this is a GEP with a variable indices, we can't handle it.
PointerType* PtrTy = dyn_cast<PointerType>(GEP->getPointerOperandType());
if (!PtrTy)
@@ -520,7 +520,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
HadDynamicAccess = true;
} else
GEPNonConstantIdx = NonConstantIdx;
- uint64_t GEPOffset = TD.getIndexedOffset(PtrTy,
+ uint64_t GEPOffset = DL.getIndexedOffset(PtrTy,
Indices);
// See if all uses can be converted.
if (!CanConvertToScalar(GEP, Offset+GEPOffset, GEPNonConstantIdx))
@@ -532,7 +532,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
// If this is a constant sized memset of a constant value (e.g. 0) we can
// handle it.
- if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(UI)) {
// Store to dynamic index.
if (NonConstantIdx)
return false;
@@ -559,7 +559,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
// If this is a memcpy or memmove into or out of the whole allocation, we
// can handle it like a load or store of the scalar type.
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(UI)) {
// Store to dynamic index.
if (NonConstantIdx)
return false;
@@ -572,7 +572,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
}
// If this is a lifetime intrinsic, we can handle it.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(UI)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end) {
continue;
@@ -597,7 +597,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
uint64_t Offset,
Value* NonConstantIdx) {
while (!Ptr->use_empty()) {
- Instruction *User = cast<Instruction>(Ptr->use_back());
+ Instruction *User = cast<Instruction>(Ptr->user_back());
if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx);
@@ -615,7 +615,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
GEPNonConstantIdx = Indices.pop_back_val();
} else
GEPNonConstantIdx = NonConstantIdx;
- uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+ uint64_t GEPOffset = DL.getIndexedOffset(GEP->getPointerOperandType(),
Indices);
ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx);
GEP->eraseFromParent();
@@ -692,9 +692,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// If the source and destination are both to the same alloca, then this is
// a noop copy-to-self, just delete it. Otherwise, emit a load and store
// as appropriate.
- AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
+ AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &DL, 0));
- if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
+ if (GetUnderlyingObject(MTI->getSource(), &DL, 0) != OrigAI) {
// Dest must be OrigAI, change this to be a load from the original
// pointer (bitcasted), then a store to our new alloca.
assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
@@ -710,7 +710,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
SrcVal->setAlignment(MTI->getAlignment());
Builder.CreateStore(SrcVal, NewAI);
- } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
+ } else if (GetUnderlyingObject(MTI->getDest(), &DL, 0) != OrigAI) {
// Src must be OrigAI, change this to be a load from NewAI then a store
// through the original dest pointer (bitcasted).
assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
@@ -770,15 +770,15 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
if (VectorType *VTy = dyn_cast<VectorType>(FromType)) {
- unsigned FromTypeSize = TD.getTypeAllocSize(FromType);
- unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
+ unsigned FromTypeSize = DL.getTypeAllocSize(FromType);
+ unsigned ToTypeSize = DL.getTypeAllocSize(ToType);
if (FromTypeSize == ToTypeSize)
return Builder.CreateBitCast(FromVal, ToType);
// Otherwise it must be an element access.
unsigned Elt = 0;
if (Offset) {
- unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+ unsigned EltSize = DL.getTypeAllocSizeInBits(VTy->getElementType());
Elt = Offset/EltSize;
assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
}
@@ -804,7 +804,7 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
if (StructType *ST = dyn_cast<StructType>(ToType)) {
assert(!NonConstantIdx &&
"Dynamic indexing into struct types not supported");
- const StructLayout &Layout = *TD.getStructLayout(ST);
+ const StructLayout &Layout = *DL.getStructLayout(ST);
Value *Res = UndefValue::get(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
@@ -818,7 +818,7 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
assert(!NonConstantIdx &&
"Dynamic indexing into array types not supported");
- uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+ uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType());
Value *Res = UndefValue::get(AT);
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
@@ -834,12 +834,12 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
// If this is a big-endian system and the load is narrower than the
// full alloca type, we need to do a shift to get the right bits.
int ShAmt = 0;
- if (TD.isBigEndian()) {
+ if (DL.isBigEndian()) {
// On big-endian machines, the lowest bit is stored at the bit offset
// from the pointer given by getTypeStoreSizeInBits. This matters for
// integers with a bitwidth that is not a multiple of 8.
- ShAmt = TD.getTypeStoreSizeInBits(NTy) -
- TD.getTypeStoreSizeInBits(ToType) - Offset;
+ ShAmt = DL.getTypeStoreSizeInBits(NTy) -
+ DL.getTypeStoreSizeInBits(ToType) - Offset;
} else {
ShAmt = Offset;
}
@@ -855,7 +855,7 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
ConstantInt::get(FromVal->getType(), -ShAmt));
// Finally, unconditionally truncate the integer to the right width.
- unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
+ unsigned LIBitWidth = DL.getTypeSizeInBits(ToType);
if (LIBitWidth < NTy->getBitWidth())
FromVal =
Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
@@ -902,8 +902,8 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
LLVMContext &Context = Old->getContext();
if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
- uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
- uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
+ uint64_t VecSize = DL.getTypeAllocSizeInBits(VTy);
+ uint64_t ValSize = DL.getTypeAllocSizeInBits(SV->getType());
// Changing the whole vector with memset or with an access of a different
// vector type?
@@ -914,7 +914,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
Type *EltTy = VTy->getElementType();
if (SV->getType() != EltTy)
SV = Builder.CreateBitCast(SV, EltTy);
- uint64_t EltSize = TD.getTypeAllocSizeInBits(EltTy);
+ uint64_t EltSize = DL.getTypeAllocSizeInBits(EltTy);
unsigned Elt = Offset/EltSize;
Value *Idx;
if (NonConstantIdx) {
@@ -933,7 +933,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
if (StructType *ST = dyn_cast<StructType>(SV->getType())) {
assert(!NonConstantIdx &&
"Dynamic indexing into struct types not supported");
- const StructLayout &Layout = *TD.getStructLayout(ST);
+ const StructLayout &Layout = *DL.getStructLayout(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old,
@@ -946,7 +946,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
assert(!NonConstantIdx &&
"Dynamic indexing into array types not supported");
- uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+ uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder);
@@ -956,14 +956,14 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
// If SV is a float, convert it to the appropriate integer type.
// If it is a pointer, do the same.
- unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType());
- unsigned DestWidth = TD.getTypeSizeInBits(AllocaType);
- unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
- unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
+ unsigned SrcWidth = DL.getTypeSizeInBits(SV->getType());
+ unsigned DestWidth = DL.getTypeSizeInBits(AllocaType);
+ unsigned SrcStoreWidth = DL.getTypeStoreSizeInBits(SV->getType());
+ unsigned DestStoreWidth = DL.getTypeStoreSizeInBits(AllocaType);
if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
else if (SV->getType()->isPointerTy())
- SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getType()));
+ SV = Builder.CreatePtrToInt(SV, DL.getIntPtrType(SV->getType()));
// Zero extend or truncate the value if needed.
if (SV->getType() != AllocaType) {
@@ -982,7 +982,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
// If this is a big-endian system and the store is narrower than the
// full alloca type, we need to do a shift to get the right bits.
int ShAmt = 0;
- if (TD.isBigEndian()) {
+ if (DL.isBigEndian()) {
// On big-endian machines, the lowest bit is stored at the bit offset
// from the pointer given by getTypeStoreSizeInBits. This matters for
// integers with a bitwidth that is not a multiple of 8.
@@ -1020,7 +1020,11 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
bool SROA::runOnFunction(Function &F) {
- TD = getAnalysisIfAvailable<DataLayout>();
+ if (skipOptnoneFunction(F))
+ return false;
+
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
bool Changed = performPromotion(F);
@@ -1028,7 +1032,7 @@ bool SROA::runOnFunction(Function &F) {
// theoretically needs to. It should be refactored in order to support
// target-independent IR. Until this is done, just skip the actual
// scalar-replacement portion of this pass.
- if (!TD) return Changed;
+ if (!DL) return Changed;
while (1) {
bool LocalChange = performScalarRepl(F);
@@ -1056,11 +1060,10 @@ public:
// Remember which alloca we're promoting (for isInstInList).
this->AI = AI;
if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI)) {
- for (Value::use_iterator UI = DebugNode->use_begin(),
- E = DebugNode->use_end(); UI != E; ++UI)
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+ for (User *U : DebugNode->users())
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
DDIs.push_back(DDI);
- else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
+ else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
DVIs.push_back(DVI);
}
@@ -1078,14 +1081,14 @@ public:
}
}
- virtual bool isInstInList(Instruction *I,
- const SmallVectorImpl<Instruction*> &Insts) const {
+ bool isInstInList(Instruction *I,
+ const SmallVectorImpl<Instruction*> &Insts) const override {
if (LoadInst *LI = dyn_cast<LoadInst>(I))
return LI->getOperand(0) == AI;
return cast<StoreInst>(I)->getPointerOperand() == AI;
}
- virtual void updateDebugInfo(Instruction *Inst) const {
+ void updateDebugInfo(Instruction *Inst) const override {
for (SmallVectorImpl<DbgDeclareInst *>::const_iterator I = DDIs.begin(),
E = DDIs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
@@ -1134,22 +1137,21 @@ public:
///
/// We can do this to a select if its only uses are loads and if the operand to
/// the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *TD) {
+static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
- for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
- UI != UE; ++UI) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ for (User *U : SI->users()) {
+ LoadInst *LI = dyn_cast<LoadInst>(U);
if (LI == 0 || !LI->isSimple()) return false;
// Both operands to the select need to be dereferencable, either absolutely
// (e.g. allocas) or at this point because we can see other accesses to it.
if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
- LI->getAlignment(), TD))
+ LI->getAlignment(), DL))
return false;
if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
- LI->getAlignment(), TD))
+ LI->getAlignment(), DL))
return false;
}
@@ -1172,16 +1174,15 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *TD) {
///
/// We can do this to a select if its only uses are loads and if the operand to
/// the select can be loaded unconditionally.
-static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *TD) {
+static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
// For now, we can only do this promotion if the load is in the same block as
// the PHI, and if there are no stores between the phi and load.
// TODO: Allow recursive phi users.
// TODO: Allow stores.
BasicBlock *BB = PN->getParent();
unsigned MaxAlign = 0;
- for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
- UI != UE; ++UI) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI);
+ for (User *U : PN->users()) {
+ LoadInst *LI = dyn_cast<LoadInst>(U);
if (LI == 0 || !LI->isSimple()) return false;
// For now we only allow loads in the same block as the PHI. This is a
@@ -1222,7 +1223,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *TD) {
// If this pointer is always safe to load, or if we can prove that there is
// already a load in the block, then we can move the load to the pred block.
if (InVal->isDereferenceablePointer() ||
- isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
+ isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, DL))
continue;
return false;
@@ -1236,13 +1237,10 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *TD) {
/// direct (non-volatile) loads and stores to it. If the alloca is close but
/// not quite there, this will transform the code to allow promotion. As such,
/// it is a non-pure predicate.
-static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
+static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
SetVector<Instruction*, SmallVector<Instruction*, 4>,
SmallPtrSet<Instruction*, 4> > InstsToRewrite;
-
- for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
- UI != UE; ++UI) {
- User *U = *UI;
+ for (User *U : AI->users()) {
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
if (!LI->isSimple())
return false;
@@ -1265,12 +1263,12 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
// This is very rare and we just scrambled the use list of AI, start
// over completely.
- return tryToMakeAllocaBePromotable(AI, TD);
+ return tryToMakeAllocaBePromotable(AI, DL);
}
// If it is safe to turn "load (select c, AI, ptr)" into a select of two
// loads, then we can transform this by rewriting the select.
- if (!isSafeSelectToSpeculate(SI, TD))
+ if (!isSafeSelectToSpeculate(SI, DL))
return false;
InstsToRewrite.insert(SI);
@@ -1285,7 +1283,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
// If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
// in the pred blocks, then we can transform this by rewriting the PHI.
- if (!isSafePHIToSpeculate(PN, TD))
+ if (!isSafePHIToSpeculate(PN, DL))
return false;
InstsToRewrite.insert(PN);
@@ -1312,12 +1310,9 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) {
// This could only be a bitcast used by nothing but lifetime intrinsics.
- for (BitCastInst::use_iterator I = BCI->use_begin(), E = BCI->use_end();
- I != E;) {
- Use &U = I.getUse();
- ++I;
- cast<Instruction>(U.getUser())->eraseFromParent();
- }
+ for (BitCastInst::user_iterator I = BCI->user_begin(), E = BCI->user_end();
+ I != E;)
+ cast<Instruction>(*I++)->eraseFromParent();
BCI->eraseFromParent();
continue;
}
@@ -1326,7 +1321,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
// Selects in InstsToRewrite only have load uses. Rewrite each as two
// loads with a new select.
while (!SI->use_empty()) {
- LoadInst *LI = cast<LoadInst>(SI->use_back());
+ LoadInst *LI = cast<LoadInst>(SI->user_back());
IRBuilder<> Builder(LI);
LoadInst *TrueLoad =
@@ -1367,13 +1362,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *TD) {
// Get the TBAA tag and alignment to use from one of the loads. It doesn't
// matter which one we get and if any differ, it doesn't matter.
- LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
+ LoadInst *SomeLoad = cast<LoadInst>(PN->user_back());
MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
unsigned Align = SomeLoad->getAlignment();
// Rewrite all loads of the PN to use the new PHI.
while (!PN->use_empty()) {
- LoadInst *LI = cast<LoadInst>(PN->use_back());
+ LoadInst *LI = cast<LoadInst>(PN->user_back());
LI->replaceAllUsesWith(NewPN);
LI->eraseFromParent();
}
@@ -1407,7 +1402,7 @@ bool SROA::performPromotion(Function &F) {
std::vector<AllocaInst*> Allocas;
DominatorTree *DT = 0;
if (HasDomTree)
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function
DIBuilder DIB(*F.getParent());
@@ -1420,7 +1415,7 @@ bool SROA::performPromotion(Function &F) {
// the entry node
for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
- if (tryToMakeAllocaBePromotable(AI, TD))
+ if (tryToMakeAllocaBePromotable(AI, DL))
Allocas.push_back(AI);
if (Allocas.empty()) break;
@@ -1433,9 +1428,8 @@ bool SROA::performPromotion(Function &F) {
AllocaInst *AI = Allocas[i];
// Build list of instructions to promote.
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
- UI != E; ++UI)
- Insts.push_back(cast<Instruction>(*UI));
+ for (User *U : AI->users())
+ Insts.push_back(cast<Instruction>(U));
AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts);
Insts.clear();
}
@@ -1496,7 +1490,7 @@ bool SROA::performScalarRepl(Function &F) {
// transform the allocation instruction if it is an array allocation
// (allocations OF arrays are ok though), and an allocation of a scalar
// value cannot be decomposed at all.
- uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
+ uint64_t AllocaSize = DL->getTypeAllocSize(AI->getAllocatedType());
// Do not promote [0 x %struct].
if (AllocaSize == 0) continue;
@@ -1520,7 +1514,7 @@ bool SROA::performScalarRepl(Function &F) {
// that we can't just check based on the type: the alloca may be of an i32
// but that has pointer arithmetic to set byte 3 of it or something.
if (AllocaInst *NewAI = ConvertToScalarInfo(
- (unsigned)AllocaSize, *TD, ScalarLoadThreshold).TryConvert(AI)) {
+ (unsigned)AllocaSize, *DL, ScalarLoadThreshold).TryConvert(AI)) {
NewAI->takeName(AI);
AI->eraseFromParent();
++NumConverted;
@@ -1598,8 +1592,8 @@ void SROA::DeleteDeadInstructions() {
/// referenced by this instruction.
void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
AllocaInfo &Info) {
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (Use &U : I->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
isSafeForScalarRepl(BC, Offset, Info);
@@ -1616,13 +1610,13 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
return MarkUnsafe(Info, User);
isSafeMemAccess(Offset, Length->getZExtValue(), 0,
- UI.getOperandNo() == 0, Info, MI,
+ U.getOperandNo() == 0, Info, MI,
true /*AllowWholeAccess*/);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
if (!LI->isSimple())
return MarkUnsafe(Info, User);
Type *LIType = LI->getType();
- isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+ isSafeMemAccess(Offset, DL->getTypeAllocSize(LIType),
LIType, false, Info, LI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
@@ -1632,7 +1626,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
return MarkUnsafe(Info, User);
Type *SIType = SI->getOperand(0)->getType();
- isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+ isSafeMemAccess(Offset, DL->getTypeAllocSize(SIType),
SIType, true, Info, SI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
@@ -1665,39 +1659,39 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
if (!Info.CheckedPHIs.insert(PN))
return;
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (User *U : I->users()) {
+ Instruction *UI = cast<Instruction>(U);
- if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(UI)) {
isSafePHISelectUseForScalarRepl(BC, Offset, Info);
- } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(UI)) {
// Only allow "bitcast" GEPs for simplicity. We could generalize this,
// but would have to prove that we're staying inside of an element being
// promoted.
if (!GEPI->hasAllZeroIndices())
- return MarkUnsafe(Info, User);
+ return MarkUnsafe(Info, UI);
isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
- } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
if (!LI->isSimple())
- return MarkUnsafe(Info, User);
+ return MarkUnsafe(Info, UI);
Type *LIType = LI->getType();
- isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+ isSafeMemAccess(Offset, DL->getTypeAllocSize(LIType),
LIType, false, Info, LI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
// Store is ok if storing INTO the pointer, not storing the pointer
if (!SI->isSimple() || SI->getOperand(0) == I)
- return MarkUnsafe(Info, User);
+ return MarkUnsafe(Info, UI);
Type *SIType = SI->getOperand(0)->getType();
- isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+ isSafeMemAccess(Offset, DL->getTypeAllocSize(SIType),
SIType, true, Info, SI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
- } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
- isSafePHISelectUseForScalarRepl(User, Offset, Info);
+ } else if (isa<PHINode>(UI) || isa<SelectInst>(UI)) {
+ isSafePHISelectUseForScalarRepl(UI, Offset, Info);
} else {
- return MarkUnsafe(Info, User);
+ return MarkUnsafe(Info, UI);
}
if (Info.isUnsafe) return;
}
@@ -1731,12 +1725,12 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
// Compute the offset due to this GEP and check if the alloca has a
// component element at that offset.
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
- // If this GEP is non constant then the last operand must have been a
+ // If this GEP is non-constant then the last operand must have been a
// dynamic index into a vector. Pop this now as it has no impact on the
// constant part of the offset.
if (NonConstant)
Indices.pop_back();
- Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
+ Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset,
NonConstantIdxSize))
MarkUnsafe(Info, GEPI);
@@ -1795,7 +1789,7 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
bool AllowWholeAccess) {
// Check if this is a load/store of the entire alloca.
if (Offset == 0 && AllowWholeAccess &&
- MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) {
+ MemSize == DL->getTypeAllocSize(Info.AI->getAllocatedType())) {
// This can be safe for MemIntrinsics (where MemOpType is 0) and integer
// loads/stores (which are essentially the same as the MemIntrinsics with
// regard to copying padding between elements). But, if an alloca is
@@ -1832,20 +1826,20 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
Type *EltTy;
uint64_t EltSize;
if (StructType *ST = dyn_cast<StructType>(T)) {
- const StructLayout *Layout = TD->getStructLayout(ST);
+ const StructLayout *Layout = DL->getStructLayout(ST);
unsigned EltIdx = Layout->getElementContainingOffset(Offset);
EltTy = ST->getContainedType(EltIdx);
- EltSize = TD->getTypeAllocSize(EltTy);
+ EltSize = DL->getTypeAllocSize(EltTy);
Offset -= Layout->getElementOffset(EltIdx);
} else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
EltTy = AT->getElementType();
- EltSize = TD->getTypeAllocSize(EltTy);
+ EltSize = DL->getTypeAllocSize(EltTy);
if (Offset >= AT->getNumElements() * EltSize)
return false;
Offset %= EltSize;
} else if (VectorType *VT = dyn_cast<VectorType>(T)) {
EltTy = VT->getElementType();
- EltSize = TD->getTypeAllocSize(EltTy);
+ EltSize = DL->getTypeAllocSize(EltTy);
if (Offset >= VT->getNumElements() * EltSize)
return false;
Offset %= EltSize;
@@ -1867,8 +1861,8 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVectorImpl<AllocaInst *> &NewElts) {
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
- Use &TheUse = UI.getUse();
- Instruction *User = cast<Instruction>(*UI++);
+ Use &TheUse = *UI++;
+ Instruction *User = cast<Instruction>(TheUse.getUser());
if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
RewriteBitCast(BC, AI, Offset, NewElts);
@@ -1884,7 +1878,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
uint64_t MemSize = Length->getZExtValue();
if (Offset == 0 &&
- MemSize == TD->getTypeAllocSize(AI->getAllocatedType()))
+ MemSize == DL->getTypeAllocSize(AI->getAllocatedType()))
RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
// Otherwise the intrinsic can only touch a single element and the
// address operand will be updated, so nothing else needs to be done.
@@ -1920,8 +1914,8 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
LI->replaceAllUsesWith(Insert);
DeadInsts.push_back(LI);
} else if (LIType->isIntegerTy() &&
- TD->getTypeAllocSize(LIType) ==
- TD->getTypeAllocSize(AI->getAllocatedType())) {
+ DL->getTypeAllocSize(LIType) ==
+ DL->getTypeAllocSize(AI->getAllocatedType())) {
// If this is a load of the entire alloca to an integer, rewrite it.
RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
}
@@ -1947,8 +1941,8 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
}
DeadInsts.push_back(SI);
} else if (SIType->isIntegerTy() &&
- TD->getTypeAllocSize(SIType) ==
- TD->getTypeAllocSize(AI->getAllocatedType())) {
+ DL->getTypeAllocSize(SIType) ==
+ DL->getTypeAllocSize(AI->getAllocatedType())) {
// If this is a store of the entire alloca from an integer, rewrite it.
RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
}
@@ -2010,7 +2004,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
Type *&IdxTy) {
uint64_t Idx = 0;
if (StructType *ST = dyn_cast<StructType>(T)) {
- const StructLayout *Layout = TD->getStructLayout(ST);
+ const StructLayout *Layout = DL->getStructLayout(ST);
Idx = Layout->getElementContainingOffset(Offset);
T = ST->getContainedType(Idx);
Offset -= Layout->getElementOffset(Idx);
@@ -2018,7 +2012,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
return Idx;
} else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
T = AT->getElementType();
- uint64_t EltSize = TD->getTypeAllocSize(T);
+ uint64_t EltSize = DL->getTypeAllocSize(T);
Idx = Offset / EltSize;
Offset -= Idx * EltSize;
IdxTy = Type::getInt64Ty(T->getContext());
@@ -2026,7 +2020,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
}
VectorType *VT = cast<VectorType>(T);
T = VT->getElementType();
- uint64_t EltSize = TD->getTypeAllocSize(T);
+ uint64_t EltSize = DL->getTypeAllocSize(T);
Idx = Offset / EltSize;
Offset -= Idx * EltSize;
IdxTy = Type::getInt64Ty(T->getContext());
@@ -2047,7 +2041,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
Value* NonConstantIdx = 0;
if (!GEPI->hasAllConstantIndices())
NonConstantIdx = Indices.pop_back_val();
- Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
+ Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
@@ -2118,7 +2112,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
IdxTy = NewElts[Idx]->getAllocatedType();
- uint64_t EltSize = TD->getTypeAllocSize(IdxTy) - NewOffset;
+ uint64_t EltSize = DL->getTypeAllocSize(IdxTy) - NewOffset;
if (EltSize > Size) {
EltSize = Size;
Size = 0;
@@ -2134,7 +2128,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
for (; Idx != NewElts.size() && Size; ++Idx) {
IdxTy = NewElts[Idx]->getAllocatedType();
- uint64_t EltSize = TD->getTypeAllocSize(IdxTy);
+ uint64_t EltSize = DL->getTypeAllocSize(IdxTy);
if (EltSize > Size) {
EltSize = Size;
Size = 0;
@@ -2226,10 +2220,10 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
Type *OtherTy = OtherPtrTy->getElementType();
if (StructType *ST = dyn_cast<StructType>(OtherTy)) {
- EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
+ EltOffset = DL->getStructLayout(ST)->getElementOffset(i);
} else {
Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
- EltOffset = TD->getTypeAllocSize(EltTy)*i;
+ EltOffset = DL->getTypeAllocSize(EltTy)*i;
}
// The alignment of the other pointer is the guaranteed alignment of the
@@ -2270,7 +2264,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
Type *ValTy = EltTy->getScalarType();
// Construct an integer with the right value.
- unsigned EltSize = TD->getTypeSizeInBits(ValTy);
+ unsigned EltSize = DL->getTypeSizeInBits(ValTy);
APInt OneVal(EltSize, CI->getZExtValue());
APInt TotalVal(OneVal);
// Set each byte.
@@ -2300,7 +2294,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// this element.
}
- unsigned EltSize = TD->getTypeAllocSize(EltTy);
+ unsigned EltSize = DL->getTypeAllocSize(EltTy);
if (!EltSize)
continue;
@@ -2334,12 +2328,12 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// and store the element value to the individual alloca.
Value *SrcVal = SI->getOperand(0);
Type *AllocaEltTy = AI->getAllocatedType();
- uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+ uint64_t AllocaSizeBits = DL->getTypeAllocSizeInBits(AllocaEltTy);
IRBuilder<> Builder(SI);
// Handle tail padding by extending the operand
- if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
+ if (DL->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
SrcVal = Builder.CreateZExt(SrcVal,
IntegerType::get(SI->getContext(), AllocaSizeBits));
@@ -2349,15 +2343,15 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
- const StructLayout *Layout = TD->getStructLayout(EltSTy);
+ const StructLayout *Layout = DL->getStructLayout(EltSTy);
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Get the number of bits to shift SrcVal to get the value.
Type *FieldTy = EltSTy->getElementType(i);
uint64_t Shift = Layout->getElementOffsetInBits(i);
- if (TD->isBigEndian())
- Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
+ if (DL->isBigEndian())
+ Shift = AllocaSizeBits-Shift-DL->getTypeAllocSizeInBits(FieldTy);
Value *EltVal = SrcVal;
if (Shift) {
@@ -2366,7 +2360,7 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
}
// Truncate down to an integer of the right size.
- uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+ uint64_t FieldSizeBits = DL->getTypeSizeInBits(FieldTy);
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
@@ -2391,12 +2385,12 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
} else {
ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
Type *ArrayEltTy = ATy->getElementType();
- uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
- uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
+ uint64_t ElementOffset = DL->getTypeAllocSizeInBits(ArrayEltTy);
+ uint64_t ElementSizeBits = DL->getTypeSizeInBits(ArrayEltTy);
uint64_t Shift;
- if (TD->isBigEndian())
+ if (DL->isBigEndian())
Shift = AllocaSizeBits-ElementOffset;
else
Shift = 0;
@@ -2430,7 +2424,7 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
}
new StoreInst(EltVal, DestField, SI);
- if (TD->isBigEndian())
+ if (DL->isBigEndian())
Shift -= ElementOffset;
else
Shift += ElementOffset;
@@ -2448,7 +2442,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// Extract each element out of the NewElts according to its structure offset
// and form the result value.
Type *AllocaEltTy = AI->getAllocatedType();
- uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+ uint64_t AllocaSizeBits = DL->getTypeAllocSizeInBits(AllocaEltTy);
DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
<< '\n');
@@ -2458,10 +2452,10 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
const StructLayout *Layout = 0;
uint64_t ArrayEltBitOffset = 0;
if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
- Layout = TD->getStructLayout(EltSTy);
+ Layout = DL->getStructLayout(EltSTy);
} else {
Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
- ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
+ ArrayEltBitOffset = DL->getTypeAllocSizeInBits(ArrayEltTy);
}
Value *ResultVal =
@@ -2473,7 +2467,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
Value *SrcField = NewElts[i];
Type *FieldTy =
cast<PointerType>(SrcField->getType())->getElementType();
- uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+ uint64_t FieldSizeBits = DL->getTypeSizeInBits(FieldTy);
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
@@ -2504,7 +2498,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
else // Array case.
Shift = i*ArrayEltBitOffset;
- if (TD->isBigEndian())
+ if (DL->isBigEndian())
Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
if (Shift) {
@@ -2521,7 +2515,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
}
// Handle tail padding by truncating the result
- if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
+ if (DL->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
LI->replaceAllUsesWith(ResultVal);
@@ -2531,15 +2525,15 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
/// HasPadding - Return true if the specified type has any structure or
/// alignment padding in between the elements that would be split apart
/// by SROA; return false otherwise.
-static bool HasPadding(Type *Ty, const DataLayout &TD) {
+static bool HasPadding(Type *Ty, const DataLayout &DL) {
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Ty = ATy->getElementType();
- return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+ return DL.getTypeSizeInBits(Ty) != DL.getTypeAllocSizeInBits(Ty);
}
// SROA currently handles only Arrays and Structs.
StructType *STy = cast<StructType>(Ty);
- const StructLayout *SL = TD.getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
unsigned PrevFieldBitOffset = 0;
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
@@ -2548,7 +2542,7 @@ static bool HasPadding(Type *Ty, const DataLayout &TD) {
// previous one.
if (i) {
unsigned PrevFieldEnd =
- PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
+ PrevFieldBitOffset+DL.getTypeSizeInBits(STy->getElementType(i-1));
if (PrevFieldEnd < FieldBitOffset)
return true;
}
@@ -2557,7 +2551,7 @@ static bool HasPadding(Type *Ty, const DataLayout &TD) {
// Check for tail padding.
if (unsigned EltCount = STy->getNumElements()) {
unsigned PrevFieldEnd = PrevFieldBitOffset +
- TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
+ DL.getTypeSizeInBits(STy->getElementType(EltCount-1));
if (PrevFieldEnd < SL->getSizeInBits())
return true;
}
@@ -2584,7 +2578,7 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
// types, but may actually be used. In these cases, we refuse to promote the
// struct.
if (Info.isMemCpySrc && Info.isMemCpyDst &&
- HasPadding(AI->getAllocatedType(), *TD))
+ HasPadding(AI->getAllocatedType(), *DL))
return false;
// If the alloca never has an access to just *part* of it, but is accessed
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
new file mode 100644
index 0000000..006375c
--- /dev/null
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -0,0 +1,662 @@
+//===--- Scalarizer.cpp - Scalarize vector operations ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass converts vector operations into scalar operations, in order
+// to expose optimization opportunities on the individual scalar operations.
+// It is mainly intended for targets that do not have vector units, but it
+// may also be useful for revectorizing code to different vector widths.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scalarizer"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+namespace {
+// Used to store the scattered form of a vector.
+typedef SmallVector<Value *, 8> ValueVector;
+
+// Used to map a vector Value to its scattered form. We use std::map
+// because we want iterators to persist across insertion and because the
+// values are relatively large.
+typedef std::map<Value *, ValueVector> ScatterMap;
+
+// Lists Instructions that have been replaced with scalar implementations,
+// along with a pointer to their scattered forms.
+typedef SmallVector<std::pair<Instruction *, ValueVector *>, 16> GatherList;
+
+// Provides a very limited vector-like interface for lazily accessing one
+// component of a scattered vector or vector pointer.
+class Scatterer {
+public:
+ Scatterer() {}
+
+ // Scatter V into Size components. If new instructions are needed,
+ // insert them before BBI in BB. If Cache is nonnull, use it to cache
+ // the results.
+ Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
+ ValueVector *cachePtr = 0);
+
+ // Return component I, creating a new Value for it if necessary.
+ Value *operator[](unsigned I);
+
+ // Return the number of components.
+ unsigned size() const { return Size; }
+
+private:
+ BasicBlock *BB;
+ BasicBlock::iterator BBI;
+ Value *V;
+ ValueVector *CachePtr;
+ PointerType *PtrTy;
+ ValueVector Tmp;
+ unsigned Size;
+};
+
+// FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
+// called Name that compares X and Y in the same way as FCI.
+struct FCmpSplitter {
+ FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
+ Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
+ const Twine &Name) const {
+ return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
+ }
+ FCmpInst &FCI;
+};
+
+// ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
+// called Name that compares X and Y in the same way as ICI.
+struct ICmpSplitter {
+ ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
+ Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
+ const Twine &Name) const {
+ return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
+ }
+ ICmpInst &ICI;
+};
+
+// BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
+// a binary operator like BO called Name with operands X and Y.
+struct BinarySplitter {
+ BinarySplitter(BinaryOperator &bo) : BO(bo) {}
+ Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
+ const Twine &Name) const {
+ return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
+ }
+ BinaryOperator &BO;
+};
+
+// Information about a load or store that we're scalarizing.
+struct VectorLayout {
+ VectorLayout() : VecTy(0), ElemTy(0), VecAlign(0), ElemSize(0) {}
+
+ // Return the alignment of element I.
+ uint64_t getElemAlign(unsigned I) {
+ return MinAlign(VecAlign, I * ElemSize);
+ }
+
+ // The type of the vector.
+ VectorType *VecTy;
+
+ // The type of each element.
+ Type *ElemTy;
+
+ // The alignment of the vector.
+ uint64_t VecAlign;
+
+ // The size of each element.
+ uint64_t ElemSize;
+};
+
+class Scalarizer : public FunctionPass,
+ public InstVisitor<Scalarizer, bool> {
+public:
+ static char ID;
+
+ Scalarizer() :
+ FunctionPass(ID) {
+ initializeScalarizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+
+ // InstVisitor methods. They return true if the instruction was scalarized,
+ // false if nothing changed.
+ bool visitInstruction(Instruction &) { return false; }
+ bool visitSelectInst(SelectInst &SI);
+ bool visitICmpInst(ICmpInst &);
+ bool visitFCmpInst(FCmpInst &);
+ bool visitBinaryOperator(BinaryOperator &);
+ bool visitGetElementPtrInst(GetElementPtrInst &);
+ bool visitCastInst(CastInst &);
+ bool visitBitCastInst(BitCastInst &);
+ bool visitShuffleVectorInst(ShuffleVectorInst &);
+ bool visitPHINode(PHINode &);
+ bool visitLoadInst(LoadInst &);
+ bool visitStoreInst(StoreInst &);
+
+private:
+ Scatterer scatter(Instruction *, Value *);
+ void gather(Instruction *, const ValueVector &);
+ bool canTransferMetadata(unsigned Kind);
+ void transferMetadata(Instruction *, const ValueVector &);
+ bool getVectorLayout(Type *, unsigned, VectorLayout &);
+ bool finish();
+
+ template<typename T> bool splitBinary(Instruction &, const T &);
+
+ ScatterMap Scattered;
+ GatherList Gathered;
+ unsigned ParallelLoopAccessMDKind;
+ const DataLayout *DL;
+};
+
+char Scalarizer::ID = 0;
+} // end anonymous namespace
+
+// This is disabled by default because having separate loads and stores makes
+// it more likely that the -combiner-alias-analysis limits will be reached.
+static cl::opt<bool> ScalarizeLoadStore
+ ("scalarize-load-store", cl::Hidden, cl::init(false),
+ cl::desc("Allow the scalarizer pass to scalarize loads and store"));
+
+INITIALIZE_PASS(Scalarizer, "scalarizer", "Scalarize vector operations",
+ false, false)
+
+Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
+ ValueVector *cachePtr)
+ : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
+ Type *Ty = V->getType();
+ PtrTy = dyn_cast<PointerType>(Ty);
+ if (PtrTy)
+ Ty = PtrTy->getElementType();
+ Size = Ty->getVectorNumElements();
+ if (!CachePtr)
+ Tmp.resize(Size, 0);
+ else if (CachePtr->empty())
+ CachePtr->resize(Size, 0);
+ else
+ assert(Size == CachePtr->size() && "Inconsistent vector sizes");
+}
+
+// Return component I, creating a new Value for it if necessary.
+Value *Scatterer::operator[](unsigned I) {
+ ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
+ // Try to reuse a previous value.
+ if (CV[I])
+ return CV[I];
+ IRBuilder<> Builder(BB, BBI);
+ if (PtrTy) {
+ if (!CV[0]) {
+ Type *Ty =
+ PointerType::get(PtrTy->getElementType()->getVectorElementType(),
+ PtrTy->getAddressSpace());
+ CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
+ }
+ if (I != 0)
+ CV[I] = Builder.CreateConstGEP1_32(CV[0], I,
+ V->getName() + ".i" + Twine(I));
+ } else {
+ // Search through a chain of InsertElementInsts looking for element I.
+ // Record other elements in the cache. The new V is still suitable
+ // for all uncached indices.
+ for (;;) {
+ InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
+ if (!Insert)
+ break;
+ ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
+ if (!Idx)
+ break;
+ unsigned J = Idx->getZExtValue();
+ CV[J] = Insert->getOperand(1);
+ V = Insert->getOperand(0);
+ if (I == J)
+ return CV[J];
+ }
+ CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
+ V->getName() + ".i" + Twine(I));
+ }
+ return CV[I];
+}
+
+bool Scalarizer::doInitialization(Module &M) {
+ ParallelLoopAccessMDKind =
+ M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
+ return false;
+}
+
+bool Scalarizer::runOnFunction(Function &F) {
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
+ for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
+ BasicBlock *BB = BBI;
+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
+ Instruction *I = II;
+ bool Done = visit(I);
+ ++II;
+ if (Done && I->getType()->isVoidTy())
+ I->eraseFromParent();
+ }
+ }
+ return finish();
+}
+
+// Return a scattered form of V that can be accessed by Point. V must be a
+// vector or a pointer to a vector.
+Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
+ if (Argument *VArg = dyn_cast<Argument>(V)) {
+ // Put the scattered form of arguments in the entry block,
+ // so that it can be used everywhere.
+ Function *F = VArg->getParent();
+ BasicBlock *BB = &F->getEntryBlock();
+ return Scatterer(BB, BB->begin(), V, &Scattered[V]);
+ }
+ if (Instruction *VOp = dyn_cast<Instruction>(V)) {
+ // Put the scattered form of an instruction directly after the
+ // instruction.
+ BasicBlock *BB = VOp->getParent();
+ return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
+ V, &Scattered[V]);
+ }
+ // In the fallback case, just put the scattered before Point and
+ // keep the result local to Point.
+ return Scatterer(Point->getParent(), Point, V);
+}
+
+// Replace Op with the gathered form of the components in CV. Defer the
+// deletion of Op and creation of the gathered form to the end of the pass,
+// so that we can avoid creating the gathered form if all uses of Op are
+// replaced with uses of CV.
+void Scalarizer::gather(Instruction *Op, const ValueVector &CV) {
+ // Since we're not deleting Op yet, stub out its operands, so that it
+ // doesn't make anything live unnecessarily.
+ for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
+ Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
+
+ transferMetadata(Op, CV);
+
+ // If we already have a scattered form of Op (created from ExtractElements
+ // of Op itself), replace them with the new form.
+ ValueVector &SV = Scattered[Op];
+ if (!SV.empty()) {
+ for (unsigned I = 0, E = SV.size(); I != E; ++I) {
+ Instruction *Old = cast<Instruction>(SV[I]);
+ CV[I]->takeName(Old);
+ Old->replaceAllUsesWith(CV[I]);
+ Old->eraseFromParent();
+ }
+ }
+ SV = CV;
+ Gathered.push_back(GatherList::value_type(Op, &SV));
+}
+
+// Return true if it is safe to transfer the given metadata tag from
+// vector to scalar instructions.
+bool Scalarizer::canTransferMetadata(unsigned Tag) {
+ return (Tag == LLVMContext::MD_tbaa
+ || Tag == LLVMContext::MD_fpmath
+ || Tag == LLVMContext::MD_tbaa_struct
+ || Tag == LLVMContext::MD_invariant_load
+ || Tag == ParallelLoopAccessMDKind);
+}
+
+// Transfer metadata from Op to the instructions in CV if it is known
+// to be safe to do so.
+void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ Op->getAllMetadataOtherThanDebugLoc(MDs);
+ for (unsigned I = 0, E = CV.size(); I != E; ++I) {
+ if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
+ for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+ MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI)
+ if (canTransferMetadata(MI->first))
+ New->setMetadata(MI->first, MI->second);
+ New->setDebugLoc(Op->getDebugLoc());
+ }
+ }
+}
+
+// Try to fill in Layout from Ty, returning true on success. Alignment is
+// the alignment of the vector, or 0 if the ABI default should be used.
+bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
+ VectorLayout &Layout) {
+ if (!DL)
+ return false;
+
+ // Make sure we're dealing with a vector.
+ Layout.VecTy = dyn_cast<VectorType>(Ty);
+ if (!Layout.VecTy)
+ return false;
+
+ // Check that we're dealing with full-byte elements.
+ Layout.ElemTy = Layout.VecTy->getElementType();
+ if (DL->getTypeSizeInBits(Layout.ElemTy) !=
+ DL->getTypeStoreSizeInBits(Layout.ElemTy))
+ return false;
+
+ if (Alignment)
+ Layout.VecAlign = Alignment;
+ else
+ Layout.VecAlign = DL->getABITypeAlignment(Layout.VecTy);
+ Layout.ElemSize = DL->getTypeStoreSize(Layout.ElemTy);
+ return true;
+}
+
+// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
+// to create an instruction like I with operands X and Y and name Name.
+template<typename Splitter>
+bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
+ VectorType *VT = dyn_cast<VectorType>(I.getType());
+ if (!VT)
+ return false;
+
+ unsigned NumElems = VT->getNumElements();
+ IRBuilder<> Builder(I.getParent(), &I);
+ Scatterer Op0 = scatter(&I, I.getOperand(0));
+ Scatterer Op1 = scatter(&I, I.getOperand(1));
+ assert(Op0.size() == NumElems && "Mismatched binary operation");
+ assert(Op1.size() == NumElems && "Mismatched binary operation");
+ ValueVector Res;
+ Res.resize(NumElems);
+ for (unsigned Elem = 0; Elem < NumElems; ++Elem)
+ Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
+ I.getName() + ".i" + Twine(Elem));
+ gather(&I, Res);
+ return true;
+}
+
+bool Scalarizer::visitSelectInst(SelectInst &SI) {
+ VectorType *VT = dyn_cast<VectorType>(SI.getType());
+ if (!VT)
+ return false;
+
+ unsigned NumElems = VT->getNumElements();
+ IRBuilder<> Builder(SI.getParent(), &SI);
+ Scatterer Op1 = scatter(&SI, SI.getOperand(1));
+ Scatterer Op2 = scatter(&SI, SI.getOperand(2));
+ assert(Op1.size() == NumElems && "Mismatched select");
+ assert(Op2.size() == NumElems && "Mismatched select");
+ ValueVector Res;
+ Res.resize(NumElems);
+
+ if (SI.getOperand(0)->getType()->isVectorTy()) {
+ Scatterer Op0 = scatter(&SI, SI.getOperand(0));
+ assert(Op0.size() == NumElems && "Mismatched select");
+ for (unsigned I = 0; I < NumElems; ++I)
+ Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
+ SI.getName() + ".i" + Twine(I));
+ } else {
+ Value *Op0 = SI.getOperand(0);
+ for (unsigned I = 0; I < NumElems; ++I)
+ Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
+ SI.getName() + ".i" + Twine(I));
+ }
+ gather(&SI, Res);
+ return true;
+}
+
+bool Scalarizer::visitICmpInst(ICmpInst &ICI) {
+ return splitBinary(ICI, ICmpSplitter(ICI));
+}
+
+bool Scalarizer::visitFCmpInst(FCmpInst &FCI) {
+ return splitBinary(FCI, FCmpSplitter(FCI));
+}
+
+bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) {
+ return splitBinary(BO, BinarySplitter(BO));
+}
+
+bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
+ if (!VT)
+ return false;
+
+ IRBuilder<> Builder(GEPI.getParent(), &GEPI);
+ unsigned NumElems = VT->getNumElements();
+ unsigned NumIndices = GEPI.getNumIndices();
+
+ Scatterer Base = scatter(&GEPI, GEPI.getOperand(0));
+
+ SmallVector<Scatterer, 8> Ops;
+ Ops.resize(NumIndices);
+ for (unsigned I = 0; I < NumIndices; ++I)
+ Ops[I] = scatter(&GEPI, GEPI.getOperand(I + 1));
+
+ ValueVector Res;
+ Res.resize(NumElems);
+ for (unsigned I = 0; I < NumElems; ++I) {
+ SmallVector<Value *, 8> Indices;
+ Indices.resize(NumIndices);
+ for (unsigned J = 0; J < NumIndices; ++J)
+ Indices[J] = Ops[J][I];
+ Res[I] = Builder.CreateGEP(Base[I], Indices,
+ GEPI.getName() + ".i" + Twine(I));
+ if (GEPI.isInBounds())
+ if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
+ NewGEPI->setIsInBounds();
+ }
+ gather(&GEPI, Res);
+ return true;
+}
+
+bool Scalarizer::visitCastInst(CastInst &CI) {
+ VectorType *VT = dyn_cast<VectorType>(CI.getDestTy());
+ if (!VT)
+ return false;
+
+ unsigned NumElems = VT->getNumElements();
+ IRBuilder<> Builder(CI.getParent(), &CI);
+ Scatterer Op0 = scatter(&CI, CI.getOperand(0));
+ assert(Op0.size() == NumElems && "Mismatched cast");
+ ValueVector Res;
+ Res.resize(NumElems);
+ for (unsigned I = 0; I < NumElems; ++I)
+ Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
+ CI.getName() + ".i" + Twine(I));
+ gather(&CI, Res);
+ return true;
+}
+
+bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
+ VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
+ VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
+ if (!DstVT || !SrcVT)
+ return false;
+
+ unsigned DstNumElems = DstVT->getNumElements();
+ unsigned SrcNumElems = SrcVT->getNumElements();
+ IRBuilder<> Builder(BCI.getParent(), &BCI);
+ Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
+ ValueVector Res;
+ Res.resize(DstNumElems);
+
+ if (DstNumElems == SrcNumElems) {
+ for (unsigned I = 0; I < DstNumElems; ++I)
+ Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
+ BCI.getName() + ".i" + Twine(I));
+ } else if (DstNumElems > SrcNumElems) {
+ // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the
+ // individual elements to the destination.
+ unsigned FanOut = DstNumElems / SrcNumElems;
+ Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut);
+ unsigned ResI = 0;
+ for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
+ Value *V = Op0[Op0I];
+ Instruction *VI;
+ // Look through any existing bitcasts before converting to <N x t2>.
+ // In the best case, the resulting conversion might be a no-op.
+ while ((VI = dyn_cast<Instruction>(V)) &&
+ VI->getOpcode() == Instruction::BitCast)
+ V = VI->getOperand(0);
+ V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
+ Scatterer Mid = scatter(&BCI, V);
+ for (unsigned MidI = 0; MidI < FanOut; ++MidI)
+ Res[ResI++] = Mid[MidI];
+ }
+ } else {
+ // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2.
+ unsigned FanIn = SrcNumElems / DstNumElems;
+ Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn);
+ unsigned Op0I = 0;
+ for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
+ Value *V = UndefValue::get(MidTy);
+ for (unsigned MidI = 0; MidI < FanIn; ++MidI)
+ V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
+ BCI.getName() + ".i" + Twine(ResI)
+ + ".upto" + Twine(MidI));
+ Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
+ BCI.getName() + ".i" + Twine(ResI));
+ }
+ }
+ gather(&BCI, Res);
+ return true;
+}
+
+bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+ VectorType *VT = dyn_cast<VectorType>(SVI.getType());
+ if (!VT)
+ return false;
+
+ unsigned NumElems = VT->getNumElements();
+ Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
+ Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
+ ValueVector Res;
+ Res.resize(NumElems);
+
+ for (unsigned I = 0; I < NumElems; ++I) {
+ int Selector = SVI.getMaskValue(I);
+ if (Selector < 0)
+ Res[I] = UndefValue::get(VT->getElementType());
+ else if (unsigned(Selector) < Op0.size())
+ Res[I] = Op0[Selector];
+ else
+ Res[I] = Op1[Selector - Op0.size()];
+ }
+ gather(&SVI, Res);
+ return true;
+}
+
+bool Scalarizer::visitPHINode(PHINode &PHI) {
+ VectorType *VT = dyn_cast<VectorType>(PHI.getType());
+ if (!VT)
+ return false;
+
+ unsigned NumElems = VT->getNumElements();
+ IRBuilder<> Builder(PHI.getParent(), &PHI);
+ ValueVector Res;
+ Res.resize(NumElems);
+
+ unsigned NumOps = PHI.getNumOperands();
+ for (unsigned I = 0; I < NumElems; ++I)
+ Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
+ PHI.getName() + ".i" + Twine(I));
+
+ for (unsigned I = 0; I < NumOps; ++I) {
+ Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
+ BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
+ for (unsigned J = 0; J < NumElems; ++J)
+ cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
+ }
+ gather(&PHI, Res);
+ return true;
+}
+
+bool Scalarizer::visitLoadInst(LoadInst &LI) {
+ if (!ScalarizeLoadStore)
+ return false;
+ if (!LI.isSimple())
+ return false;
+
+ VectorLayout Layout;
+ if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout))
+ return false;
+
+ unsigned NumElems = Layout.VecTy->getNumElements();
+ IRBuilder<> Builder(LI.getParent(), &LI);
+ Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
+ ValueVector Res;
+ Res.resize(NumElems);
+
+ for (unsigned I = 0; I < NumElems; ++I)
+ Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
+ LI.getName() + ".i" + Twine(I));
+ gather(&LI, Res);
+ return true;
+}
+
+bool Scalarizer::visitStoreInst(StoreInst &SI) {
+ if (!ScalarizeLoadStore)
+ return false;
+ if (!SI.isSimple())
+ return false;
+
+ VectorLayout Layout;
+ Value *FullValue = SI.getValueOperand();
+ if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout))
+ return false;
+
+ unsigned NumElems = Layout.VecTy->getNumElements();
+ IRBuilder<> Builder(SI.getParent(), &SI);
+ Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
+ Scatterer Val = scatter(&SI, FullValue);
+
+ ValueVector Stores;
+ Stores.resize(NumElems);
+ for (unsigned I = 0; I < NumElems; ++I) {
+ unsigned Align = Layout.getElemAlign(I);
+ Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
+ }
+ transferMetadata(&SI, Stores);
+ return true;
+}
+
+// Delete the instructions that we scalarized. If a full vector result
+// is still needed, recreate it using InsertElements.
+bool Scalarizer::finish() {
+ if (Gathered.empty())
+ return false;
+ for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end();
+ GMI != GME; ++GMI) {
+ Instruction *Op = GMI->first;
+ ValueVector &CV = *GMI->second;
+ if (!Op->use_empty()) {
+ // The value is still needed, so recreate it using a series of
+ // InsertElements.
+ Type *Ty = Op->getType();
+ Value *Res = UndefValue::get(Ty);
+ BasicBlock *BB = Op->getParent();
+ unsigned Count = Ty->getVectorNumElements();
+ IRBuilder<> Builder(BB, Op);
+ if (isa<PHINode>(Op))
+ Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
+ for (unsigned I = 0; I < Count; ++I)
+ Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
+ Op->getName() + ".upto" + Twine(I));
+ Res->takeName(Op);
+ Op->replaceAllUsesWith(Res);
+ }
+ Op->eraseFromParent();
+ }
+ Gathered.clear();
+ Scattered.clear();
+ return true;
+}
+
+FunctionPass *llvm::createScalarizerPass() {
+ return new Scalarizer();
+}
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 8371f6d..ceae5a7 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -28,13 +28,13 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -46,9 +46,9 @@ struct CFGSimplifyPass : public FunctionPass {
CFGSimplifyPass() : FunctionPass(ID) {
initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfo>();
}
};
@@ -145,7 +145,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
/// iterating until no more changes are made.
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
- const DataLayout *TD) {
+ const DataLayout *DL) {
bool Changed = false;
bool LocalChange = true;
while (LocalChange) {
@@ -154,7 +154,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
// Loop over all of the basic blocks and remove them if they are unneeded...
//
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
- if (SimplifyCFG(BBIt++, TTI, TD)) {
+ if (SimplifyCFG(BBIt++, TTI, DL)) {
LocalChange = true;
++NumSimpl;
}
@@ -168,11 +168,15 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
// simplify the CFG.
//
bool CFGSimplifyPass::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
- const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
bool EverChanged = removeUnreachableBlocks(F);
EverChanged |= mergeEmptyReturnBlocks(F);
- EverChanged |= iterativelySimplifyCFG(F, TTI, TD);
+ EverChanged |= iterativelySimplifyCFG(F, TTI, DL);
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
@@ -186,7 +190,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
return true;
do {
- EverChanged = iterativelySimplifyCFG(F, TTI, TD);
+ EverChanged = iterativelySimplifyCFG(F, TTI, DL);
EverChanged |= removeUnreachableBlocks(F);
} while (EverChanged);
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index d4595bb..4107374 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -16,12 +16,11 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -41,15 +40,15 @@ namespace {
initializeSinkingPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<AliasAnalysis>();
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfo>();
}
private:
@@ -63,7 +62,7 @@ namespace {
char Sinking::ID = 0;
INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
@@ -77,15 +76,14 @@ bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
// This may leave a referencing dbg_value in the original block, before
// the definition of the vreg. Dwarf generator handles this although the
// user might not get the right info at runtime.
- for (Value::use_iterator I = Inst->use_begin(),
- E = Inst->use_end(); I != E; ++I) {
+ for (Use &U : Inst->uses()) {
// Determine the block of the use.
- Instruction *UseInst = cast<Instruction>(*I);
+ Instruction *UseInst = cast<Instruction>(U.getUser());
BasicBlock *UseBlock = UseInst->getParent();
if (PHINode *PN = dyn_cast<PHINode>(UseInst)) {
// PHI nodes use the operand in the predecessor block, not the block with
// the PHI.
- unsigned Num = PHINode::getIncomingValueNumForOperand(I.getOperandNo());
+ unsigned Num = PHINode::getIncomingValueNumForOperand(U.getOperandNo());
UseBlock = PN->getIncomingBlock(Num);
}
// Check that it dominates.
@@ -96,7 +94,7 @@ bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
}
bool Sinking::runOnFunction(Function &F) {
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfo>();
AA = &getAnalysis<AliasAnalysis>();
@@ -218,6 +216,13 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
/// instruction out of its current block into a successor.
bool Sinking::SinkInstruction(Instruction *Inst,
SmallPtrSet<Instruction *, 8> &Stores) {
+
+ // Don't sink static alloca instructions. CodeGen assumes allocas outside the
+ // entry block are dynamically sized stack objects.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+ if (AI->isStaticAlloca())
+ return false;
+
// Check if it's safe to move the instruction.
if (!isSafeToMove(Inst, AA, Stores))
return false;
@@ -259,9 +264,9 @@ bool Sinking::SinkInstruction(Instruction *Inst,
return false;
DEBUG(dbgs() << "Sink" << *Inst << " (";
- WriteAsOperand(dbgs(), Inst->getParent(), false);
+ Inst->getParent()->printAsOperand(dbgs(), false);
dbgs() << " -> ";
- WriteAsOperand(dbgs(), SuccToSinkTo, false);
+ SuccToSinkTo->printAsOperand(dbgs(), false);
dbgs() << ")\n");
// Move the instruction.
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 5045ff8..8fd2268 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -15,7 +15,7 @@
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/PatternMatch.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
@@ -235,18 +235,18 @@ public:
}
using Pass::doInitialization;
- virtual bool doInitialization(Region *R, RGPassManager &RGM);
+ bool doInitialization(Region *R, RGPassManager &RGM) override;
- virtual bool runOnRegion(Region *R, RGPassManager &RGM);
+ bool runOnRegion(Region *R, RGPassManager &RGM) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Structurize control flow";
}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredID(LowerSwitchID);
- AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
RegionPass::getAnalysisUsage(AU);
}
};
@@ -258,7 +258,7 @@ char StructurizeCFG::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG",
false, false)
INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(RegionInfo)
INITIALIZE_PASS_END(StructurizeCFG, "structurizecfg", "Structurize the CFG",
false, false)
@@ -277,9 +277,8 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
/// \brief Build up the general order of nodes
void StructurizeCFG::orderNodes() {
- scc_iterator<Region *> I = scc_begin(ParentRegion),
- E = scc_end(ParentRegion);
- for (Order.clear(); I != E; ++I) {
+ scc_iterator<Region *> I = scc_begin(ParentRegion);
+ for (Order.clear(); !I.isAtEnd(); ++I) {
std::vector<RegionNode *> &Nodes = *I;
Order.append(Nodes.begin(), Nodes.end());
}
@@ -326,16 +325,10 @@ Value *StructurizeCFG::invert(Value *Condition) {
if (Instruction *Inst = dyn_cast<Instruction>(Condition)) {
// Third: Check all the users for an invert
BasicBlock *Parent = Inst->getParent();
- for (Value::use_iterator I = Condition->use_begin(),
- E = Condition->use_end(); I != E; ++I) {
-
- Instruction *User = dyn_cast<Instruction>(*I);
- if (!User || User->getParent() != Parent)
- continue;
-
- if (match(*I, m_Not(m_Specific(Condition))))
- return *I;
- }
+ for (User *U : Condition->users())
+ if (Instruction *I = dyn_cast<Instruction>(U))
+ if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition))))
+ return I;
// Last option: Create a new instruction
return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
@@ -830,25 +823,19 @@ void StructurizeCFG::createFlow() {
/// no longer dominate all their uses. Not sure if this is really nessasary
void StructurizeCFG::rebuildSSA() {
SSAUpdater Updater;
- for (Region::block_iterator I = ParentRegion->block_begin(),
- E = ParentRegion->block_end();
- I != E; ++I) {
-
- BasicBlock *BB = *I;
+ for (const auto &BB : ParentRegion->blocks())
for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {
bool Initialized = false;
- for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
-
- Next = I->getNext();
-
- Instruction *User = cast<Instruction>(I->getUser());
+ for (auto I = II->use_begin(), E = II->use_end(); I != E;) {
+ Use &U = *I++;
+ Instruction *User = cast<Instruction>(U.getUser());
if (User->getParent() == BB) {
continue;
} else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
- if (UserPN->getIncomingBlock(*I) == BB)
+ if (UserPN->getIncomingBlock(U) == BB)
continue;
}
@@ -862,10 +849,9 @@ void StructurizeCFG::rebuildSSA() {
Updater.AddAvailableValue(BB, II);
Initialized = true;
}
- Updater.RewriteUseAfterInsertions(*I);
+ Updater.RewriteUseAfterInsertions(U);
}
}
- }
}
/// \brief Run the transformation for each region found
@@ -876,7 +862,7 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
Func = R->getEntry()->getParent();
ParentRegion = R;
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
orderNodes();
collectInfos();
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 9fb8ddc..6d02777 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -60,17 +60,17 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -89,9 +89,9 @@ namespace {
initializeTailCallElimPass(*PassRegistry::getPassRegistry());
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
private:
CallInst *FindTRECandidate(Instruction *I,
@@ -149,16 +149,16 @@ namespace {
struct AllocaCaptureTracker : public CaptureTracker {
AllocaCaptureTracker() : Captured(false) {}
- void tooManyUses() LLVM_OVERRIDE { Captured = true; }
+ void tooManyUses() override { Captured = true; }
- bool shouldExplore(Use *U) LLVM_OVERRIDE {
+ bool shouldExplore(const Use *U) override {
Value *V = U->getUser();
if (isa<CallInst>(V) || isa<InvokeInst>(V))
UsesAlloca.insert(V);
return true;
}
- bool captured(Use *U) LLVM_OVERRIDE {
+ bool captured(const Use *U) override {
if (isa<ReturnInst>(U->getUser()))
return false;
Captured = true;
@@ -171,6 +171,9 @@ struct AllocaCaptureTracker : public CaptureTracker {
} // end anonymous namespace
bool TailCallElim::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
// If this function is a varargs function, we won't be able to PHI the args
// right, so don't even try to convert it...
if (F.getFunctionType()->isVarArg()) return false;
@@ -377,13 +380,13 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
return 0;
// The only user of this instruction we allow is a single return instruction.
- if (!I->hasOneUse() || !isa<ReturnInst>(I->use_back()))
+ if (!I->hasOneUse() || !isa<ReturnInst>(I->user_back()))
return 0;
// Ok, now we have to check all of the other return instructions in this
// function. If they return non-constants or differing values, then we cannot
// transform the function safely.
- return getCommonReturnValue(cast<ReturnInst>(I->use_back()), CI);
+ return getCommonReturnValue(cast<ReturnInst>(I->user_back()), CI);
}
static Instruction *FirstNonDbg(BasicBlock::iterator I) {
@@ -426,7 +429,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
// lower the call to fabs into inline code.
if (BB == &F->getEntryBlock() &&
FirstNonDbg(BB->front()) == CI &&
- FirstNonDbg(llvm::next(BB->begin())) == TI &&
+ FirstNonDbg(std::next(BB->begin())) == TI &&
CI->getCalledFunction() &&
!TTI->isLoweredToCall(CI->getCalledFunction())) {
// A single-block function with just a call and a return. Check that
diff --git a/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
new file mode 100644
index 0000000..cce016a
--- /dev/null
+++ b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -0,0 +1,114 @@
+//===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Definition of ComputeASanStackFrameLayout (see ASanStackFrameLayout.h).
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+namespace llvm {
+
+// We sort the stack variables by alignment (largest first) to minimize
+// unnecessary large gaps due to alignment.
+// It is tempting to also sort variables by size so that larger variables
+// have larger redzones at both ends. But reordering will make report analysis
+// harder, especially when temporary unnamed variables are present.
+// So, until we can provide more information (type, line number, etc)
+// for the stack variables we avoid reordering them too much.
+static inline bool CompareVars(const ASanStackVariableDescription &a,
+ const ASanStackVariableDescription &b) {
+ return a.Alignment > b.Alignment;
+}
+
+// We also force minimal alignment for all vars to kMinAlignment so that vars
+// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars.
+static const size_t kMinAlignment = 16;
+
+static size_t RoundUpTo(size_t X, size_t RoundTo) {
+ assert((RoundTo & (RoundTo - 1)) == 0);
+ return (X + RoundTo - 1) & ~(RoundTo - 1);
+}
+
+// The larger the variable Size the larger is the redzone.
+// The resulting frame size is a multiple of Alignment.
+static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
+ size_t Res = 0;
+ if (Size <= 4) Res = 16;
+ else if (Size <= 16) Res = 32;
+ else if (Size <= 128) Res = Size + 32;
+ else if (Size <= 512) Res = Size + 64;
+ else if (Size <= 4096) Res = Size + 128;
+ else Res = Size + 256;
+ return RoundUpTo(Res, Alignment);
+}
+
+void
+ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
+ size_t Granularity, size_t MinHeaderSize,
+ ASanStackFrameLayout *Layout) {
+ assert(Granularity >= 8 && Granularity <= 64 &&
+ (Granularity & (Granularity - 1)) == 0);
+ assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 &&
+ MinHeaderSize >= Granularity);
+ size_t NumVars = Vars.size();
+ assert(NumVars > 0);
+ for (size_t i = 0; i < NumVars; i++)
+ Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment);
+
+ std::stable_sort(Vars.begin(), Vars.end(), CompareVars);
+ SmallString<2048> StackDescriptionStorage;
+ raw_svector_ostream StackDescription(StackDescriptionStorage);
+ StackDescription << NumVars;
+ Layout->FrameAlignment = std::max(Granularity, Vars[0].Alignment);
+ SmallVector<uint8_t, 64> &SB(Layout->ShadowBytes);
+ SB.clear();
+ size_t Offset = std::max(std::max(MinHeaderSize, Granularity),
+ Vars[0].Alignment);
+ assert((Offset % Granularity) == 0);
+ SB.insert(SB.end(), Offset / Granularity, kAsanStackLeftRedzoneMagic);
+ for (size_t i = 0; i < NumVars; i++) {
+ bool IsLast = i == NumVars - 1;
+ size_t Alignment = std::max(Granularity, Vars[i].Alignment);
+ (void)Alignment; // Used only in asserts.
+ size_t Size = Vars[i].Size;
+ const char *Name = Vars[i].Name;
+ assert((Alignment & (Alignment - 1)) == 0);
+ assert(Layout->FrameAlignment >= Alignment);
+ assert((Offset % Alignment) == 0);
+ assert(Size > 0);
+ StackDescription << " " << Offset << " " << Size << " " << strlen(Name)
+ << " " << Name;
+ size_t NextAlignment = IsLast ? Granularity
+ : std::max(Granularity, Vars[i + 1].Alignment);
+ size_t SizeWithRedzone = VarAndRedzoneSize(Vars[i].Size, NextAlignment);
+ SB.insert(SB.end(), Size / Granularity, 0);
+ if (Size % Granularity)
+ SB.insert(SB.end(), Size % Granularity);
+ SB.insert(SB.end(), (SizeWithRedzone - Size) / Granularity,
+ IsLast ? kAsanStackRightRedzoneMagic
+ : kAsanStackMidRedzoneMagic);
+ Vars[i].Offset = Offset;
+ Offset += SizeWithRedzone;
+ }
+ if (Offset % MinHeaderSize) {
+ size_t ExtraRedzone = MinHeaderSize - (Offset % MinHeaderSize);
+ SB.insert(SB.end(), ExtraRedzone / Granularity,
+ kAsanStackRightRedzoneMagic);
+ Offset += ExtraRedzone;
+ }
+ Layout->DescriptionString = StackDescription.str();
+ Layout->FrameSize = Offset;
+ assert((Layout->FrameSize % MinHeaderSize) == 0);
+ assert(Layout->FrameSize / Granularity == Layout->ShadowBytes.size());
+}
+
+} // llvm namespace
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
new file mode 100644
index 0000000..f42635e
--- /dev/null
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -0,0 +1,217 @@
+//===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file adds DWARF discriminators to the IR. Path discriminators are
+// used to decide what CFG path was taken inside sub-graphs whose instructions
+// share the same line and column number information.
+//
+// The main user of this is the sample profiler. Instruction samples are
+// mapped to line number information. Since a single line may be spread
+// out over several basic blocks, discriminators add more precise location
+// for the samples.
+//
+// For example,
+//
+// 1 #define ASSERT(P)
+// 2 if (!(P))
+// 3 abort()
+// ...
+// 100 while (true) {
+// 101 ASSERT (sum < 0);
+// 102 ...
+// 130 }
+//
+// when converted to IR, this snippet looks something like:
+//
+// while.body: ; preds = %entry, %if.end
+// %0 = load i32* %sum, align 4, !dbg !15
+// %cmp = icmp slt i32 %0, 0, !dbg !15
+// br i1 %cmp, label %if.end, label %if.then, !dbg !15
+//
+// if.then: ; preds = %while.body
+// call void @abort(), !dbg !15
+// br label %if.end, !dbg !15
+//
+// Notice that all the instructions in blocks 'while.body' and 'if.then'
+// have exactly the same debug information. When this program is sampled
+// at runtime, the profiler will assume that all these instructions are
+// equally frequent. This, in turn, will consider the edge while.body->if.then
+// to be frequently taken (which is incorrect).
+//
+// By adding a discriminator value to the instructions in block 'if.then',
+// we can distinguish instructions at line 101 with discriminator 0 from
+// the instructions at line 101 with discriminator 1.
+//
+// For more details about DWARF discriminators, please visit
+// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "add-discriminators"
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+ struct AddDiscriminators : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ AddDiscriminators() : FunctionPass(ID) {
+ initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+ };
+}
+
+char AddDiscriminators::ID = 0;
+INITIALIZE_PASS_BEGIN(AddDiscriminators, "add-discriminators",
+ "Add DWARF path discriminators", false, false)
+INITIALIZE_PASS_END(AddDiscriminators, "add-discriminators",
+ "Add DWARF path discriminators", false, false)
+
+// Command line option to disable discriminator generation even in the
+// presence of debug information. This is only needed when debugging
+// debug info generation issues.
+static cl::opt<bool>
+NoDiscriminators("no-discriminators", cl::init(false),
+ cl::desc("Disable generation of discriminator information."));
+
+FunctionPass *llvm::createAddDiscriminatorsPass() {
+ return new AddDiscriminators();
+}
+
+static bool hasDebugInfo(const Function &F) {
+ NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu");
+ return CUNodes != 0;
+}
+
+/// \brief Assign DWARF discriminators.
+///
+/// To assign discriminators, we examine the boundaries of every
+/// basic block and its successors. Suppose there is a basic block B1
+/// with successor B2. The last instruction I1 in B1 and the first
+/// instruction I2 in B2 are located at the same file and line number.
+/// This situation is illustrated in the following code snippet:
+///
+/// if (i < 10) x = i;
+///
+/// entry:
+/// br i1 %cmp, label %if.then, label %if.end, !dbg !10
+/// if.then:
+/// %1 = load i32* %i.addr, align 4, !dbg !10
+/// store i32 %1, i32* %x, align 4, !dbg !10
+/// br label %if.end, !dbg !10
+/// if.end:
+/// ret void, !dbg !12
+///
+/// Notice how the branch instruction in block 'entry' and all the
+/// instructions in block 'if.then' have the exact same debug location
+/// information (!dbg !10).
+///
+/// To distinguish instructions in block 'entry' from instructions in
+/// block 'if.then', we generate a new lexical block for all the
+/// instruction in block 'if.then' that share the same file and line
+/// location with the last instruction of block 'entry'.
+///
+/// This new lexical block will have the same location information as
+/// the previous one, but with a new DWARF discriminator value.
+///
+/// One of the main uses of this discriminator value is in runtime
+/// sample profilers. It allows the profiler to distinguish instructions
+/// at location !dbg !10 that execute on different basic blocks. This is
+/// important because while the predicate 'if (x < 10)' may have been
+/// executed millions of times, the assignment 'x = i' may have only
+/// executed a handful of times (meaning that the entry->if.then edge is
+/// seldom taken).
+///
+/// If we did not have discriminator information, the profiler would
+/// assign the same weight to both blocks 'entry' and 'if.then', which
+/// in turn will make it conclude that the entry->if.then edge is very
+/// hot.
+///
+/// To decide where to create new discriminator values, this function
+/// traverses the CFG and examines instruction at basic block boundaries.
+/// If the last instruction I1 of a block B1 is at the same file and line
+/// location as instruction I2 of successor B2, then it creates a new
+/// lexical block for I2 and all the instruction in B2 that share the same
+/// file and line location as I2. This new lexical block will have a
+/// different discriminator number than I1.
+bool AddDiscriminators::runOnFunction(Function &F) {
+ // No need to do anything if there is no debug info for this function.
+ // If the function has debug information, but the user has disabled
+ // discriminators, do nothing.
+ if (!hasDebugInfo(F) || NoDiscriminators) return false;
+
+ bool Changed = false;
+ Module *M = F.getParent();
+ LLVMContext &Ctx = M->getContext();
+ DIBuilder Builder(*M);
+
+ // Traverse all the blocks looking for instructions in different
+ // blocks that are at the same file:line location.
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *B = I;
+ TerminatorInst *Last = B->getTerminator();
+ DebugLoc LastLoc = Last->getDebugLoc();
+ if (LastLoc.isUnknown()) continue;
+ DILocation LastDIL(LastLoc.getAsMDNode(Ctx));
+
+ for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) {
+ BasicBlock *Succ = Last->getSuccessor(I);
+ Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime();
+ DebugLoc FirstLoc = First->getDebugLoc();
+ if (FirstLoc.isUnknown()) continue;
+ DILocation FirstDIL(FirstLoc.getAsMDNode(Ctx));
+
+ // If the first instruction (First) of Succ is at the same file
+ // location as B's last instruction (Last), add a new
+ // discriminator for First's location and all the instructions
+ // in Succ that share the same location with First.
+ if (FirstDIL.atSameLineAs(LastDIL)) {
+ // Create a new lexical scope and compute a new discriminator
+ // number for it.
+ StringRef Filename = FirstDIL.getFilename();
+ unsigned LineNumber = FirstDIL.getLineNumber();
+ unsigned ColumnNumber = FirstDIL.getColumnNumber();
+ DIScope Scope = FirstDIL.getScope();
+ DIFile File = Builder.createFile(Filename, Scope.getDirectory());
+ unsigned Discriminator = FirstDIL.computeNewDiscriminator(Ctx);
+ DILexicalBlock NewScope = Builder.createLexicalBlock(
+ Scope, File, LineNumber, ColumnNumber, Discriminator);
+ DILocation NewDIL = FirstDIL.copyWithNewScope(Ctx, NewScope);
+ DebugLoc newDebugLoc = DebugLoc::getFromDILocation(NewDIL);
+
+ // Attach this new debug location to First and every
+ // instruction following First that shares the same location.
+ for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1;
+ ++I1) {
+ if (I1->getDebugLoc() != FirstLoc) break;
+ I1->setDebugLoc(newDebugLoc);
+ DEBUG(dbgs() << NewDIL.getFilename() << ":" << NewDIL.getLineNumber()
+ << ":" << NewDIL.getColumnNumber() << ":"
+ << NewDIL.getDiscriminator() << *I1 << "\n");
+ }
+ DEBUG(dbgs() << "\n");
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk
index 73bb3bf..ab4d8a8 100644
--- a/lib/Transforms/Utils/Android.mk
+++ b/lib/Transforms/Utils/Android.mk
@@ -1,6 +1,8 @@
LOCAL_PATH:= $(call my-dir)
transforms_utils_SRC_FILES := \
+ AddDiscriminators.cpp \
+ ASanStackFrameLayout.cpp \
BasicBlockUtils.cpp \
BreakCriticalEdges.cpp \
BuildLibCalls.cpp \
@@ -50,6 +52,7 @@ include $(BUILD_HOST_STATIC_LIBRARY)
# For the device
# =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(transforms_utils_SRC_FILES)
@@ -60,3 +63,4 @@ LOCAL_MODULE_TAGS := optional
include $(LLVM_DEVICE_BUILD_MK)
include $(LLVM_GEN_INTRINSICS_MK)
include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 12de9ee..b3cd5ce 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -15,17 +15,17 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@@ -167,15 +167,17 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
// Finally, erase the old block and update dominator info.
if (P) {
- if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
- if (DomTreeNode *DTN = DT->getNode(BB)) {
- DomTreeNode *PredDTN = DT->getNode(PredBB);
+ if (DominatorTreeWrapperPass *DTWP =
+ P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DominatorTree &DT = DTWP->getDomTree();
+ if (DomTreeNode *DTN = DT.getNode(BB)) {
+ DomTreeNode *PredDTN = DT.getNode(PredBB);
SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
for (SmallVectorImpl<DomTreeNode *>::iterator DI = Children.begin(),
DE = Children.end(); DI != DE; ++DI)
- DT->changeImmediateDominator(*DI, PredDTN);
+ DT.changeImmediateDominator(*DI, PredDTN);
- DT->eraseNode(BB);
+ DT.eraseNode(BB);
}
if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
@@ -280,18 +282,20 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
if (Loop *L = LI->getLoopFor(Old))
L->addBasicBlockToLoop(New, LI->getBase());
- if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+ if (DominatorTreeWrapperPass *DTWP =
+ P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DominatorTree &DT = DTWP->getDomTree();
// Old dominates New. New node dominates all other nodes dominated by Old.
- if (DomTreeNode *OldNode = DT->getNode(Old)) {
+ if (DomTreeNode *OldNode = DT.getNode(Old)) {
std::vector<DomTreeNode *> Children;
for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
I != E; ++I)
Children.push_back(*I);
- DomTreeNode *NewNode = DT->addNewBlock(New,Old);
+ DomTreeNode *NewNode = DT.addNewBlock(New, Old);
for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
E = Children.end(); I != E; ++I)
- DT->changeImmediateDominator(*I, NewNode);
+ DT.changeImmediateDominator(*I, NewNode);
}
}
@@ -336,9 +340,9 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
}
// Update dominator tree if available.
- DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
- if (DT)
- DT->splitBlock(NewBB);
+ if (DominatorTreeWrapperPass *DTWP =
+ P->getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DTWP->getDomTree().splitBlock(NewBB);
if (!L) return;
@@ -630,28 +634,29 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
}
/// SplitBlockAndInsertIfThen - Split the containing block at the
-/// specified instruction - everything before and including Cmp stays
-/// in the old basic block, and everything after Cmp is moved to a
+/// specified instruction - everything before and including SplitBefore stays
+/// in the old basic block, and everything after SplitBefore is moved to a
/// new block. The two blocks are connected by a conditional branch
/// (with value of Cmp being the condition).
/// Before:
/// Head
-/// Cmp
+/// SplitBefore
/// Tail
/// After:
/// Head
-/// Cmp
-/// if (Cmp)
+/// if (Cond)
/// ThenBlock
+/// SplitBefore
/// Tail
///
/// If Unreachable is true, then ThenBlock ends with
/// UnreachableInst, otherwise it branches to Tail.
/// Returns the NewBasicBlock's terminator.
-TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp,
- bool Unreachable, MDNode *BranchWeights) {
- Instruction *SplitBefore = Cmp->getNextNode();
+TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond,
+ Instruction *SplitBefore,
+ bool Unreachable,
+ MDNode *BranchWeights) {
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
TerminatorInst *HeadOldTerm = Head->getTerminator();
@@ -662,13 +667,51 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp,
CheckTerm = new UnreachableInst(C, ThenBlock);
else
CheckTerm = BranchInst::Create(Tail, ThenBlock);
+ CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
BranchInst *HeadNewTerm =
- BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
+ BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond);
+ HeadNewTerm->setDebugLoc(SplitBefore->getDebugLoc());
HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
return CheckTerm;
}
+/// SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen,
+/// but also creates the ElseBlock.
+/// Before:
+/// Head
+/// SplitBefore
+/// Tail
+/// After:
+/// Head
+/// if (Cond)
+/// ThenBlock
+/// else
+/// ElseBlock
+/// SplitBefore
+/// Tail
+void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
+ TerminatorInst **ThenTerm,
+ TerminatorInst **ElseTerm,
+ MDNode *BranchWeights) {
+ BasicBlock *Head = SplitBefore->getParent();
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
+ TerminatorInst *HeadOldTerm = Head->getTerminator();
+ LLVMContext &C = Head->getContext();
+ BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ *ThenTerm = BranchInst::Create(Tail, ThenBlock);
+ (*ThenTerm)->setDebugLoc(SplitBefore->getDebugLoc());
+ *ElseTerm = BranchInst::Create(Tail, ElseBlock);
+ (*ElseTerm)->setDebugLoc(SplitBefore->getDebugLoc());
+ BranchInst *HeadNewTerm =
+ BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/ElseBlock, Cond);
+ HeadNewTerm->setDebugLoc(SplitBefore->getDebugLoc());
+ HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
+ ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+}
+
+
/// GetIfCondition - Given a basic block (BB) with two predecessors,
/// check to see if the merge at this block is due
/// to an "if condition". If so, return the boolean condition that determines
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 0e7f7f7..76ebb9f 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -20,12 +20,12 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
@@ -39,10 +39,10 @@ namespace {
initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfo>();
// No loop canonicalization guarantees are broken by this pass.
@@ -209,7 +209,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// If we don't have a pass object, we can't update anything...
if (P == 0) return NewBB;
- DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+ DominatorTreeWrapperPass *DTWP =
+ P->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0;
LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
// If we have nothing to update, just return.
@@ -297,9 +299,8 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
P->addBasicBlockToLoop(NewBB, LI->getBase());
}
}
- // If TIBB is in a loop and DestBB is outside of that loop, split the
- // other exit blocks of the loop that also have predecessors outside
- // the loop, to maintain a LoopSimplify guarantee.
+ // If TIBB is in a loop and DestBB is outside of that loop, we may need
+ // to update LoopSimplify form and LCSSA form.
if (!TIL->contains(DestBB) &&
P->mustPreserveAnalysisID(LoopSimplifyID)) {
assert(!TIL->contains(NewBB) &&
@@ -309,50 +310,35 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
if (P->mustPreserveAnalysisID(LCSSAID))
createPHIsForSplitLoopExit(TIBB, NewBB, DestBB);
- // For each unique exit block...
- // FIXME: This code is functionally equivalent to the corresponding
- // loop in LoopSimplify.
- SmallVector<BasicBlock *, 4> ExitBlocks;
- TIL->getExitBlocks(ExitBlocks);
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- // Collect all the preds that are inside the loop, and note
- // whether there are any preds outside the loop.
- SmallVector<BasicBlock *, 4> Preds;
- bool HasPredOutsideOfLoop = false;
- BasicBlock *Exit = ExitBlocks[i];
- for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
- I != E; ++I) {
- BasicBlock *P = *I;
- if (TIL->contains(P)) {
- if (isa<IndirectBrInst>(P->getTerminator())) {
- Preds.clear();
- break;
- }
- Preds.push_back(P);
- } else {
- HasPredOutsideOfLoop = true;
- }
- }
- // If there are any preds not in the loop, we'll need to split
- // the edges. The Preds.empty() check is needed because a block
- // may appear multiple times in the list. We can't use
- // getUniqueExitBlocks above because that depends on LoopSimplify
- // form, which we're in the process of restoring!
- if (!Preds.empty() && HasPredOutsideOfLoop) {
- if (!Exit->isLandingPad()) {
- BasicBlock *NewExitBB =
- SplitBlockPredecessors(Exit, Preds, "split", P);
- if (P->mustPreserveAnalysisID(LCSSAID))
- createPHIsForSplitLoopExit(Preds, NewExitBB, Exit);
- } else if (SplitLandingPads) {
- SmallVector<BasicBlock*, 8> NewBBs;
- SplitLandingPadPredecessors(Exit, Preds,
- ".split1", ".split2",
- P, NewBBs);
- if (P->mustPreserveAnalysisID(LCSSAID))
- createPHIsForSplitLoopExit(Preds, NewBBs[0], Exit);
- }
+ // The only that we can break LoopSimplify form by splitting a critical
+ // edge is if after the split there exists some edge from TIL to DestBB
+ // *and* the only edge into DestBB from outside of TIL is that of
+ // NewBB. If the first isn't true, then LoopSimplify still holds, NewBB
+ // is the new exit block and it has no non-loop predecessors. If the
+ // second isn't true, then DestBB was not in LoopSimplify form prior to
+ // the split as it had a non-loop predecessor. In both of these cases,
+ // the predecessor must be directly in TIL, not in a subloop, or again
+ // LoopSimplify doesn't hold.
+ SmallVector<BasicBlock *, 4> LoopPreds;
+ for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E;
+ ++I) {
+ BasicBlock *P = *I;
+ if (P == NewBB)
+ continue; // The new block is known.
+ if (LI->getLoopFor(P) != TIL) {
+ // No need to re-simplify, it wasn't to start with.
+ LoopPreds.clear();
+ break;
}
+ LoopPreds.push_back(P);
+ }
+ if (!LoopPreds.empty()) {
+ assert(!DestBB->isLandingPad() &&
+ "We don't split edges to landing pads!");
+ BasicBlock *NewExitBB =
+ SplitBlockPredecessors(DestBB, LoopPreds, "split", P);
+ if (P->mustPreserveAnalysisID(LCSSAID))
+ createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
}
}
// LCSSA form was updated above for the case where LoopSimplify is
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 6d13217..82384a1 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -286,6 +286,21 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
return CI;
}
+/// Append a suffix to the function name according to the type of 'Op'.
+static void AppendTypeSuffix(Value *Op, StringRef &Name, SmallString<20> &NameBuffer) {
+ if (!Op->getType()->isDoubleTy()) {
+ NameBuffer += Name;
+
+ if (Op->getType()->isFloatTy())
+ NameBuffer += 'f';
+ else
+ NameBuffer += 'l';
+
+ Name = NameBuffer;
+ }
+ return;
+}
+
/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
/// 'floor'). This function is known to take a single of type matching 'Op' and
/// returns one value with the same type. If 'Op' is a long double, 'l' is
@@ -293,15 +308,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
const AttributeSet &Attrs) {
SmallString<20> NameBuffer;
- if (!Op->getType()->isDoubleTy()) {
- // If we need to add a suffix, copy into NameBuffer.
- NameBuffer += Name;
- if (Op->getType()->isFloatTy())
- NameBuffer += 'f'; // floorf
- else
- NameBuffer += 'l'; // floorl
- Name = NameBuffer;
- }
+ AppendTypeSuffix(Op, Name, NameBuffer);
Module *M = B.GetInsertBlock()->getParent()->getParent();
Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
@@ -314,6 +321,27 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
return CI;
}
+/// EmitBinaryFloatFnCall - Emit a call to the binary function named 'Name'
+/// (e.g. 'fmin'). This function is known to take type matching 'Op1' and 'Op2'
+/// and return one value with the same type. If 'Op1/Op2' are long double, 'l'
+/// is added as the suffix of name, if 'Op1/Op2' is a float, we add a 'f'
+/// suffix.
+Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
+ IRBuilder<> &B, const AttributeSet &Attrs) {
+ SmallString<20> NameBuffer;
+ AppendTypeSuffix(Op1, Name, NameBuffer);
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, Op1->getType(),
+ Op1->getType(), Op2->getType(), NULL);
+ CallInst *CI = B.CreateCall2(Callee, Op1, Op2, Name);
+ CI->setAttributes(Attrs);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 5afd6b8..dac2090 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -1,4 +1,6 @@
add_llvm_library(LLVMTransformUtils
+ AddDiscriminators.cpp
+ ASanStackFrameLayout.cpp
BasicBlockUtils.cpp
BreakCriticalEdges.cpp
BuildLibCalls.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index d105f5e..a199086 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -17,8 +17,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
@@ -26,7 +27,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@@ -88,26 +89,28 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
assert(VMap.count(I) && "No mapping from source argument specified!");
#endif
+ // Copy all attributes other than those stored in the AttributeSet. We need
+ // to remap the parameter indices of the AttributeSet.
+ AttributeSet NewAttrs = NewFunc->getAttributes();
+ NewFunc->copyAttributesFrom(OldFunc);
+ NewFunc->setAttributes(NewAttrs);
+
AttributeSet OldAttrs = OldFunc->getAttributes();
// Clone any argument attributes that are present in the VMap.
- for (Function::const_arg_iterator I = OldFunc->arg_begin(),
- E = OldFunc->arg_end();
- I != E; ++I)
- if (Argument *Anew = dyn_cast<Argument>(VMap[I])) {
+ for (const Argument &OldArg : OldFunc->args())
+ if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) {
AttributeSet attrs =
- OldAttrs.getParamAttributes(I->getArgNo() + 1);
+ OldAttrs.getParamAttributes(OldArg.getArgNo() + 1);
if (attrs.getNumSlots() > 0)
- Anew->addAttr(attrs);
+ NewArg->addAttr(attrs);
}
- NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttributes(NewFunc->getContext(),
- AttributeSet::ReturnIndex,
- OldAttrs.getRetAttributes()));
- NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttributes(NewFunc->getContext(),
- AttributeSet::FunctionIndex,
- OldAttrs.getFnAttributes()));
+ NewFunc->setAttributes(
+ NewFunc->getAttributes()
+ .addAttributes(NewFunc->getContext(), AttributeSet::ReturnIndex,
+ OldAttrs.getRetAttributes())
+ .addAttributes(NewFunc->getContext(), AttributeSet::FunctionIndex,
+ OldAttrs.getFnAttributes()));
// Loop over all of the basic blocks in the function, cloning them as
// appropriate. Note that we save BE this way in order to handle cloning of
@@ -151,6 +154,54 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
TypeMapper, Materializer);
}
+// Find the MDNode which corresponds to the DISubprogram data that described F.
+static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) {
+ for (DISubprogram Subprogram : Finder.subprograms()) {
+ if (Subprogram.describes(F)) return Subprogram;
+ }
+ return NULL;
+}
+
+// Add an operand to an existing MDNode. The new operand will be added at the
+// back of the operand list.
+static void AddOperand(MDNode *Node, Value *Operand) {
+ SmallVector<Value*, 16> Operands;
+ for (unsigned i = 0; i < Node->getNumOperands(); i++) {
+ Operands.push_back(Node->getOperand(i));
+ }
+ Operands.push_back(Operand);
+ MDNode *NewNode = MDNode::get(Node->getContext(), Operands);
+ Node->replaceAllUsesWith(NewNode);
+}
+
+// Clone the module-level debug info associated with OldFunc. The cloned data
+// will point to NewFunc instead.
+static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
+ ValueToValueMapTy &VMap) {
+ DebugInfoFinder Finder;
+ Finder.processModule(*OldFunc->getParent());
+
+ const MDNode *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder);
+ if (!OldSubprogramMDNode) return;
+
+ // Ensure that OldFunc appears in the map.
+ // (if it's already there it must point to NewFunc anyway)
+ VMap[OldFunc] = NewFunc;
+ DISubprogram NewSubprogram(MapValue(OldSubprogramMDNode, VMap));
+
+ for (DICompileUnit CU : Finder.compile_units()) {
+ DIArray Subprograms(CU.getSubprograms());
+
+ // If the compile unit's function list contains the old function, it should
+ // also contain the new one.
+ for (unsigned i = 0; i < Subprograms.getNumElements(); i++) {
+ if ((MDNode*)Subprograms.getElement(i) == OldSubprogramMDNode) {
+ AddOperand(Subprograms, NewSubprogram);
+ }
+ }
+ }
+}
+
/// CloneFunction - Return a copy of the specified function, but without
/// embedding the function into another module. Also, any references specified
/// in the VMap are changed to refer to their mapped value instead of the
@@ -188,6 +239,9 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
VMap[I] = DestI++; // Add mapping to VMap
}
+ if (ModuleLevelChanges)
+ CloneDebugInfoMetadata(NewF, F, VMap);
+
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo);
return NewF;
@@ -205,17 +259,17 @@ namespace {
bool ModuleLevelChanges;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
- const DataLayout *TD;
+ const DataLayout *DL;
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
ValueToValueMapTy &valueMap,
bool moduleLevelChanges,
const char *nameSuffix,
ClonedCodeInfo *codeInfo,
- const DataLayout *td)
+ const DataLayout *DL)
: NewFunc(newFunc), OldFunc(oldFunc),
VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
- NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+ NameSuffix(nameSuffix), CodeInfo(codeInfo), DL(DL) {
}
/// CloneBlock - The specified block is found to be reachable, clone it and
@@ -272,7 +326,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// If we can simplify this instruction to some other value, simply add
// a mapping to that value rather than inserting a new instruction into
// the basic block.
- if (Value *V = SimplifyInstruction(NewInst, TD)) {
+ if (Value *V = SimplifyInstruction(NewInst, DL)) {
// On the off-chance that this simplifies to an instruction in the old
// function, map it back into the new function.
if (Value *MappedV = VMap.lookup(V))
@@ -368,7 +422,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
- const DataLayout *TD,
+ const DataLayout *DL,
Instruction *TheCall) {
assert(NameSuffix && "NameSuffix cannot be null!");
@@ -379,7 +433,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
#endif
PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
- NameSuffix, CodeInfo, TD);
+ NameSuffix, CodeInfo, DL);
// Clone the entry block, and anything recursively reachable from it.
std::vector<const BasicBlock*> CloneWorklist;
@@ -509,7 +563,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// node).
for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
- recursivelySimplifyInstruction(PN, TD);
+ recursivelySimplifyInstruction(PN, DL);
// Now that the inlined function body has been fully constructed, go through
// and zap unconditional fall-through branches. This happen all the time when
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 6f00864..b814842 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -14,20 +14,20 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/CodeExtractor.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
-#include "llvm/Analysis/Verifier.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -86,7 +86,7 @@ static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin,
}
#ifndef NDEBUG
- for (SetVector<BasicBlock *>::iterator I = llvm::next(Result.begin()),
+ for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()),
E = Result.end();
I != E; ++I)
for (pred_iterator PI = pred_begin(*I), PE = pred_end(*I);
@@ -171,9 +171,8 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
if (definedInCaller(Blocks, *OI))
Inputs.insert(*OI);
- for (Value::use_iterator UI = II->use_begin(), UE = II->use_end();
- UI != UE; ++UI)
- if (!definedInRegion(Blocks, *UI)) {
+ for (User *U : II->users())
+ if (!definedInRegion(Blocks, U)) {
Outputs.insert(II);
break;
}
@@ -369,7 +368,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
} else
RewriteVal = AI++;
- std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end());
+ std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end());
for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
use != useE; ++use)
if (Instruction* inst = dyn_cast<Instruction>(*use))
@@ -389,7 +388,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
// Rewrite branches to basic blocks outside of the loop to new dummy blocks
// within the new function. This must be done before we lose track of which
// blocks were originally in the code region.
- std::vector<User*> Users(header->use_begin(), header->use_end());
+ std::vector<User*> Users(header->user_begin(), header->user_end());
for (unsigned i = 0, e = Users.size(); i != e; ++i)
// The BasicBlock which contains the branch is not in the region
// modify the branch target to a new block
@@ -405,13 +404,12 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
/// that uses the value within the basic block, and return the predecessor
/// block associated with that use, or return 0 if none is found.
static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
- for (Value::use_iterator UI = Used->use_begin(),
- UE = Used->use_end(); UI != UE; ++UI) {
- PHINode *P = dyn_cast<PHINode>(*UI);
+ for (Use &U : Used->uses()) {
+ PHINode *P = dyn_cast<PHINode>(U.getUser());
if (P && P->getParent() == BB)
- return P->getIncomingBlock(UI);
+ return P->getIncomingBlock(U);
}
-
+
return 0;
}
@@ -502,7 +500,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
Reloads.push_back(load);
codeReplacer->getInstList().push_back(load);
- std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());
+ std::vector<User*> Users(outputs[i]->user_begin(), outputs[i]->user_end());
for (unsigned u = 0, e = Users.size(); u != e; ++u) {
Instruction *inst = cast<Instruction>(Users[u]);
if (!Blocks.count(inst->getParent()))
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 0723b35..ac6926f 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -8,12 +8,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
/// DemoteRegToStack - This function takes a virtual register computed by an
@@ -41,7 +41,7 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
// Change all of the users of the instruction to read from the stack slot.
while (!I.use_empty()) {
- Instruction *U = cast<Instruction>(I.use_back());
+ Instruction *U = cast<Instruction>(I.user_back());
if (PHINode *PN = dyn_cast<PHINode>(U)) {
// If this is a PHI node, we can't insert a load of the value before the
// use. Instead insert the load in the predecessor block corresponding
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 1da226b..39c80f8 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -240,7 +240,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
CmpInst::Predicate Predicate = CI->getPredicate();
- // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
+ // Canonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
BI->swapSuccessors();
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index 5f0a563..e9ebc45 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -9,9 +9,9 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
using namespace llvm;
@@ -35,9 +35,8 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) {
if (isa<GlobalValue>(C))
return false;
- for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
- ++UI)
- if (const Constant *CU = dyn_cast<Constant>(*UI)) {
+ for (const User *U : C->users())
+ if (const Constant *CU = dyn_cast<Constant>(U)) {
if (!isSafeToDestroyConstant(CU))
return false;
} else
@@ -47,10 +46,9 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) {
static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
SmallPtrSet<const PHINode *, 16> &PhiUsers) {
- for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
- ++UI) {
- const User *U = *UI;
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ for (const Use &U : V->uses()) {
+ const User *UR = U.getUser();
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) {
GS.HasNonInstructionUser = true;
// If the result of the constantexpr isn't pointer type, then we won't
@@ -60,7 +58,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
if (analyzeGlobalAux(CE, GS, PhiUsers))
return true;
- } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
+ } else if (const Instruction *I = dyn_cast<Instruction>(UR)) {
if (!GS.HasMultipleAccessingFunctions) {
const Function *F = I->getParent()->getParent();
if (GS.AccessingFunction == 0)
@@ -150,13 +148,13 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
return true;
GS.StoredType = GlobalStatus::Stored;
} else if (ImmutableCallSite C = I) {
- if (!C.isCallee(UI))
+ if (!C.isCallee(&U))
return true;
GS.IsLoaded = true;
} else {
return true; // Any other non-load instruction might take address!
}
- } else if (const Constant *C = dyn_cast<Constant>(U)) {
+ } else if (const Constant *C = dyn_cast<Constant>(UR)) {
GS.HasNonInstructionUser = true;
// We might have a dead and dangling constant hanging off of here.
if (!isSafeToDestroyConstant(C))
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index d021bce..86def3e 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -17,17 +17,17 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/DebugInfo.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -144,7 +144,6 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
void InvokeInliningInfo::forwardResume(ResumeInst *RI,
SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) {
BasicBlock *Dest = getInnerResumeDest();
- LandingPadInst *OuterLPad = getLandingPadInst();
BasicBlock *Src = RI->getParent();
BranchInst::Create(Dest, Src);
@@ -155,16 +154,6 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI,
InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);
RI->eraseFromParent();
-
- // Append the clauses from the outer landing pad instruction into the inlined
- // landing pad instructions.
- for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
- E = InlinedLPads.end(); I != E; ++I) {
- LandingPadInst *InlinedLPad = *I;
- for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses();
- OuterIdx != OuterNum; ++OuterIdx)
- InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
- }
}
/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
@@ -172,22 +161,11 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI,
/// invokes. This function analyze BB to see if there are any calls, and if so,
/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
/// nodes in that block with the values specified in InvokeDestPHIValues.
-///
-/// Returns true to indicate that the next block should be skipped.
-static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
+static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
InvokeInliningInfo &Invoke) {
- LandingPadInst *LPI = Invoke.getLandingPadInst();
-
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
Instruction *I = BBI++;
- if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) {
- unsigned NumClauses = LPI->getNumClauses();
- L->reserveClauses(NumClauses);
- for (unsigned i = 0; i != NumClauses; ++i)
- L->addClause(LPI->getClause(i));
- }
-
// We only need to check for function calls: inlined invoke
// instructions require no special handling.
CallInst *CI = dyn_cast<CallInst>(I);
@@ -223,10 +201,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
// Update any PHI nodes in the exceptional block to indicate that there is
// now a new entry in them.
Invoke.addIncomingPHIValuesFor(BB);
- return false;
+ return;
}
-
- return false;
}
/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
@@ -252,13 +228,23 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
InlinedLPads.insert(II->getLandingPadInst());
+ // Append the clauses from the outer landing pad instruction into the inlined
+ // landing pad instructions.
+ LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
+ for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(),
+ E = InlinedLPads.end(); I != E; ++I) {
+ LandingPadInst *InlinedLPad = *I;
+ unsigned OuterNum = OuterLPad->getNumClauses();
+ InlinedLPad->reserveClauses(OuterNum);
+ for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
+ InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));
+ if (OuterLPad->isCleanup())
+ InlinedLPad->setCleanup(true);
+ }
+
for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
if (InlinedCodeInfo.ContainsCalls)
- if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
- // Honor a request to skip the next block.
- ++BB;
- continue;
- }
+ HandleCallsInBlockInlinedThroughInvoke(BB, Invoke);
// Forward any resumes that are remaining here.
if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
@@ -357,7 +343,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// If the pointer is already known to be sufficiently aligned, or if we can
// round it up to a larger alignment, then we don't need a temporary.
if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
- IFI.TD) >= ByValAlignment)
+ IFI.DL) >= ByValAlignment)
return Arg;
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
@@ -370,8 +356,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// Create the alloca. If we have DataLayout, use nice alignment.
unsigned Align = 1;
- if (IFI.TD)
- Align = IFI.TD->getPrefTypeAlignment(AggTy);
+ if (IFI.DL)
+ Align = IFI.DL->getPrefTypeAlignment(AggTy);
// If the byval had an alignment specified, we *must* use at least that
// alignment, as it is required by the byval argument (and uses of the
@@ -391,11 +377,11 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
Value *Size;
- if (IFI.TD == 0)
+ if (IFI.DL == 0)
Size = ConstantExpr::getSizeOf(AggTy);
else
Size = ConstantInt::get(Type::getInt64Ty(Context),
- IFI.TD->getTypeStoreSize(AggTy));
+ IFI.DL->getTypeStoreSize(AggTy));
// Always generate a memcpy of alignment 1 here because we don't know
// the alignment of the src pointer. Other optimizations can infer
@@ -415,9 +401,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
// intrinsic.
static bool isUsedByLifetimeMarker(Value *V) {
- for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE;
- ++UI) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI)) {
+ for (User *U : V->users()) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::lifetime_start:
@@ -437,11 +422,10 @@ static bool hasLifetimeMarkers(AllocaInst *AI) {
return isUsedByLifetimeMarker(AI);
// Do a scan to find all the casts to i8*.
- for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E;
- ++I) {
- if (I->getType() != Int8PtrTy) continue;
- if (I->stripPointerCasts() != AI) continue;
- if (isUsedByLifetimeMarker(*I))
+ for (User *U : AI->users()) {
+ if (U->getType() != Int8PtrTy) continue;
+ if (U->stripPointerCasts() != AI) continue;
+ if (isUsedByLifetimeMarker(U))
return true;
}
return false;
@@ -613,7 +597,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// happy with whatever the cloner can do.
CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
/*ModuleLevelChanges=*/false, Returns, ".i",
- &InlinedFunctionInfo, IFI.TD, TheCall);
+ &InlinedFunctionInfo, IFI.DL, TheCall);
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
@@ -683,9 +667,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
ConstantInt *AllocaSize = 0;
if (ConstantInt *AIArraySize =
dyn_cast<ConstantInt>(AI->getArraySize())) {
- if (IFI.TD) {
+ if (IFI.DL) {
Type *AllocaType = AI->getAllocatedType();
- uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType);
+ uint64_t AllocaTypeSize = IFI.DL->getTypeAllocSize(AllocaType);
uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
// Check that array size doesn't saturate uint64_t and doesn't
@@ -922,7 +906,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// the entries are the same or undef). If so, remove the PHI so it doesn't
// block other optimizations.
if (PHI) {
- if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
+ if (Value *V = SimplifyInstruction(PHI, IFI.DL)) {
PHI->replaceAllUsesWith(V);
PHI->eraseFromParent();
}
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index a020bc7..da890a2 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -27,11 +27,11 @@ namespace {
initializeInstNamerPass(*PassRegistry::getPassRegistry());
}
- void getAnalysisUsage(AnalysisUsage &Info) const {
+ void getAnalysisUsage(AnalysisUsage &Info) const override {
Info.setPreservesAll();
}
- bool runOnFunction(Function &F) {
+ bool runOnFunction(Function &F) override {
for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
AI != AE; ++AI)
if (!AI->hasName() && !AI->getType()->isVoidTy())
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 3cb8ded..e73a543 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -7,10 +7,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains an implementation of 32bit scalar integer division for
-// targets that don't have native support. It's largely derived from
-// compiler-rt's implementation of __udivsi3, but hand-tuned to reduce the
-// amount of control flow
+// This file contains an implementation of 32bit and 64bit scalar integer
+// division for targets that don't have native support. It's largely derived
+// from compiler-rt's implementations of __udivsi3 and __udivmoddi4,
+// but hand-tuned for targets that prefer less control flow.
//
//===----------------------------------------------------------------------===//
@@ -20,6 +20,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include <utility>
using namespace llvm;
@@ -31,7 +32,18 @@ using namespace llvm;
/// be expanded if the user wishes
static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
IRBuilder<> &Builder) {
- ConstantInt *ThirtyOne = Builder.getInt32(31);
+ unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
+ ConstantInt *Shift;
+
+ if (BitWidth == 64) {
+ Shift = Builder.getInt64(63);
+ } else {
+ assert(BitWidth == 32 && "Unexpected bit width");
+ Shift = Builder.getInt32(31);
+ }
+
+ // Following instructions are generated for both i32 (shift 31) and
+ // i64 (shift 63).
// ; %dividend_sgn = ashr i32 %dividend, 31
// ; %divisor_sgn = ashr i32 %divisor, 31
@@ -42,8 +54,8 @@ static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
// ; %urem = urem i32 %dividend, %divisor
// ; %xored = xor i32 %urem, %dividend_sgn
// ; %srem = sub i32 %xored, %dividend_sgn
- Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne);
- Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne);
+ Value *DividendSign = Builder.CreateAShr(Dividend, Shift);
+ Value *DivisorSign = Builder.CreateAShr(Divisor, Shift);
Value *DvdXor = Builder.CreateXor(Dividend, DividendSign);
Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign);
Value *UDividend = Builder.CreateSub(DvdXor, DividendSign);
@@ -68,6 +80,8 @@ static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
IRBuilder<> &Builder) {
// Remainder = Dividend - Quotient*Divisor
+ // Following instructions are generated for both i32 and i64
+
// ; %quotient = udiv i32 %dividend, %divisor
// ; %product = mul i32 %divisor, %quotient
// ; %remainder = sub i32 %dividend, %product
@@ -88,9 +102,20 @@ static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
/// present, i.e. not folded), ready to be expanded if the user wishes.
static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
IRBuilder<> &Builder) {
- // Implementation taken from compiler-rt's __divsi3
+ // Implementation taken from compiler-rt's __divsi3 and __divdi3
- ConstantInt *ThirtyOne = Builder.getInt32(31);
+ unsigned BitWidth = Dividend->getType()->getIntegerBitWidth();
+ ConstantInt *Shift;
+
+ if (BitWidth == 64) {
+ Shift = Builder.getInt64(63);
+ } else {
+ assert(BitWidth == 32 && "Unexpected bit width");
+ Shift = Builder.getInt32(31);
+ }
+
+ // Following instructions are generated for both i32 (shift 31) and
+ // i64 (shift 63).
// ; %tmp = ashr i32 %dividend, 31
// ; %tmp1 = ashr i32 %divisor, 31
@@ -102,8 +127,8 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
// ; %q_mag = udiv i32 %u_dvnd, %u_dvsr
// ; %tmp4 = xor i32 %q_mag, %q_sgn
// ; %q = sub i32 %tmp4, %q_sgn
- Value *Tmp = Builder.CreateAShr(Dividend, ThirtyOne);
- Value *Tmp1 = Builder.CreateAShr(Divisor, ThirtyOne);
+ Value *Tmp = Builder.CreateAShr(Dividend, Shift);
+ Value *Tmp1 = Builder.CreateAShr(Divisor, Shift);
Value *Tmp2 = Builder.CreateXor(Tmp, Dividend);
Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp);
Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor);
@@ -119,9 +144,9 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
return Q;
}
-/// Generates code to divide two unsigned scalar 32-bit integers. Returns the
-/// quotient, rounded towards 0. Builder's insert point should be pointing where
-/// the caller wants code generated, e.g. at the udiv instruction.
+/// Generates code to divide two unsigned scalar 32-bit or 64-bit integers.
+/// Returns the quotient, rounded towards 0. Builder's insert point should
+/// point where the caller wants code generated, e.g. at the udiv instruction.
static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
IRBuilder<> &Builder) {
// The basic algorithm can be found in the compiler-rt project's
@@ -129,18 +154,33 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
// that's been hand-tuned to lessen the amount of control flow involved.
// Some helper values
- IntegerType *I32Ty = Builder.getInt32Ty();
+ IntegerType *DivTy = cast<IntegerType>(Dividend->getType());
+ unsigned BitWidth = DivTy->getBitWidth();
+
+ ConstantInt *Zero;
+ ConstantInt *One;
+ ConstantInt *NegOne;
+ ConstantInt *MSB;
+
+ if (BitWidth == 64) {
+ Zero = Builder.getInt64(0);
+ One = Builder.getInt64(1);
+ NegOne = ConstantInt::getSigned(DivTy, -1);
+ MSB = Builder.getInt64(63);
+ } else {
+ assert(BitWidth == 32 && "Unexpected bit width");
+ Zero = Builder.getInt32(0);
+ One = Builder.getInt32(1);
+ NegOne = ConstantInt::getSigned(DivTy, -1);
+ MSB = Builder.getInt32(31);
+ }
- ConstantInt *Zero = Builder.getInt32(0);
- ConstantInt *One = Builder.getInt32(1);
- ConstantInt *ThirtyOne = Builder.getInt32(31);
- ConstantInt *NegOne = ConstantInt::getSigned(I32Ty, -1);
- ConstantInt *True = Builder.getTrue();
+ ConstantInt *True = Builder.getTrue();
BasicBlock *IBB = Builder.GetInsertBlock();
Function *F = IBB->getParent();
- Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
- I32Ty);
+ Function *CTLZ = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+ DivTy);
// Our CFG is going to look like:
// +---------------------+
@@ -190,6 +230,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
// We'll be overwriting the terminator to insert our extra blocks
SpecialCases->getTerminator()->eraseFromParent();
+ // Same instructions are generated for both i32 (msb 31) and i64 (msb 63).
+
// First off, check for special cases: dividend or divisor is zero, divisor
// is greater than dividend, and divisor is 1.
// ; special-cases:
@@ -209,12 +251,12 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero);
Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero);
Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2);
- Value *Tmp0 = Builder.CreateCall2(CTLZi32, Divisor, True);
- Value *Tmp1 = Builder.CreateCall2(CTLZi32, Dividend, True);
+ Value *Tmp0 = Builder.CreateCall2(CTLZ, Divisor, True);
+ Value *Tmp1 = Builder.CreateCall2(CTLZ, Dividend, True);
Value *SR = Builder.CreateSub(Tmp0, Tmp1);
- Value *Ret0_4 = Builder.CreateICmpUGT(SR, ThirtyOne);
+ Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB);
Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4);
- Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne);
+ Value *RetDividend = Builder.CreateICmpEQ(SR, MSB);
Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend);
Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend);
Builder.CreateCondBr(EarlyRet, End, BB1);
@@ -227,7 +269,7 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
// ; br i1 %skipLoop, label %loop-exit, label %preheader
Builder.SetInsertPoint(BB1);
Value *SR_1 = Builder.CreateAdd(SR, One);
- Value *Tmp2 = Builder.CreateSub(ThirtyOne, SR);
+ Value *Tmp2 = Builder.CreateSub(MSB, SR);
Value *Q = Builder.CreateShl(Dividend, Tmp2);
Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero);
Builder.CreateCondBr(SkipLoop, LoopExit, Preheader);
@@ -260,17 +302,17 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
// ; %tmp12 = icmp eq i32 %sr_2, 0
// ; br i1 %tmp12, label %loop-exit, label %do-while
Builder.SetInsertPoint(DoWhile);
- PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2);
- PHINode *SR_3 = Builder.CreatePHI(I32Ty, 2);
- PHINode *R_1 = Builder.CreatePHI(I32Ty, 2);
- PHINode *Q_2 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *Carry_1 = Builder.CreatePHI(DivTy, 2);
+ PHINode *SR_3 = Builder.CreatePHI(DivTy, 2);
+ PHINode *R_1 = Builder.CreatePHI(DivTy, 2);
+ PHINode *Q_2 = Builder.CreatePHI(DivTy, 2);
Value *Tmp5 = Builder.CreateShl(R_1, One);
- Value *Tmp6 = Builder.CreateLShr(Q_2, ThirtyOne);
+ Value *Tmp6 = Builder.CreateLShr(Q_2, MSB);
Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6);
Value *Tmp8 = Builder.CreateShl(Q_2, One);
Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8);
Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7);
- Value *Tmp10 = Builder.CreateAShr(Tmp9, 31);
+ Value *Tmp10 = Builder.CreateAShr(Tmp9, MSB);
Value *Carry = Builder.CreateAnd(Tmp10, One);
Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor);
Value *R = Builder.CreateSub(Tmp7, Tmp11);
@@ -285,8 +327,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
// ; %q_4 = or i32 %carry_2, %tmp13
// ; br label %end
Builder.SetInsertPoint(LoopExit);
- PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2);
- PHINode *Q_3 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *Carry_2 = Builder.CreatePHI(DivTy, 2);
+ PHINode *Q_3 = Builder.CreatePHI(DivTy, 2);
Value *Tmp13 = Builder.CreateShl(Q_3, One);
Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13);
Builder.CreateBr(End);
@@ -295,7 +337,7 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
// ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
// ; ret i32 %q_5
Builder.SetInsertPoint(End, End->begin());
- PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2);
+ PHINode *Q_5 = Builder.CreatePHI(DivTy, 2);
// Populate the Phis, since all values have now been created. Our Phis were:
// ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
@@ -326,9 +368,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
/// Generate code to calculate the remainder of two integers, replacing Rem with
/// the generated code. This currently generates code using the udiv expansion,
/// but future work includes generating more specialized code, e.g. when more
-/// information about the operands are known. Currently only implements 32bit
-/// scalar division (due to udiv's limitation), but future work is removing this
-/// limitation.
+/// information about the operands are known. Implements both 32bit and 64bit
+/// scalar division.
///
/// @brief Replace Rem with generated code.
bool llvm::expandRemainder(BinaryOperator *Rem) {
@@ -338,6 +379,15 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
IRBuilder<> Builder(Rem);
+ Type *RemTy = Rem->getType();
+ if (RemTy->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
+
+ if (RemTyBitWidth != 32 && RemTyBitWidth != 64)
+ llvm_unreachable("Div of bitwidth other than 32 or 64 not supported");
+
// First prepare the sign if it's a signed remainder
if (Rem->getOpcode() == Instruction::SRem) {
Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0),
@@ -376,9 +426,8 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
/// Generate code to divide two integers, replacing Div with the generated
/// code. This currently generates code similarly to compiler-rt's
/// implementations, but future work includes generating more specialized code
-/// when more information about the operands are known. Currently only
-/// implements 32bit scalar division, but future work is removing this
-/// limitation.
+/// when more information about the operands are known. Implements both
+/// 32bit and 64bit scalar division.
///
/// @brief Replace Div with generated code.
bool llvm::expandDivision(BinaryOperator *Div) {
@@ -388,9 +437,15 @@ bool llvm::expandDivision(BinaryOperator *Div) {
IRBuilder<> Builder(Div);
- if (Div->getType()->isVectorTy())
+ Type *DivTy = Div->getType();
+ if (DivTy->isVectorTy())
llvm_unreachable("Div over vectors not supported");
+ unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
+
+ if (DivTyBitWidth != 32 && DivTyBitWidth != 64)
+ llvm_unreachable("Div of bitwidth other than 32 or 64 not supported");
+
// First prepare the sign if it's a signed division
if (Div->getOpcode() == Instruction::SDiv) {
// Lower the code to unsigned division, and reset Div to point to the udiv.
@@ -443,7 +498,7 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
if (RemTyBitWidth == 32)
return expandRemainder(Rem);
- // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+ // If bitwidth smaller than 32 extend inputs, extend output and proceed
// with 32 bit division.
IRBuilder<> Builder(Rem);
@@ -471,6 +526,55 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
return expandRemainder(cast<BinaryOperator>(ExtRem));
}
+/// Generate code to compute the remainder of two integers of bitwidth up to
+/// 64 bits. Uses the above routines and extends the inputs/truncates the
+/// outputs to operate in 64 bits.
+///
+/// @brief Replace Rem with emulation code.
+bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ Type *RemTy = Rem->getType();
+ if (RemTy->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
+
+ if (RemTyBitWidth > 64)
+ llvm_unreachable("Div of bitwidth greater than 64 not supported");
+
+ if (RemTyBitWidth == 64)
+ return expandRemainder(Rem);
+
+ // If bitwidth smaller than 64 extend inputs, extend output and proceed
+ // with 64 bit division.
+ IRBuilder<> Builder(Rem);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtRem;
+ Value *Trunc;
+ Type *Int64Ty = Builder.getInt64Ty();
+
+ if (Rem->getOpcode() == Instruction::SRem) {
+ ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int64Ty);
+ ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int64Ty);
+ ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtRem, RemTy);
+
+ Rem->replaceAllUsesWith(Trunc);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ return expandRemainder(cast<BinaryOperator>(ExtRem));
+}
/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the
/// above routines and extends the inputs/truncates the outputs to operate
@@ -495,7 +599,7 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
if (DivTyBitWidth == 32)
return expandDivision(Div);
- // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+ // If bitwidth smaller than 32 extend inputs, extend output and proceed
// with 32 bit division.
IRBuilder<> Builder(Div);
@@ -522,3 +626,53 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
return expandDivision(cast<BinaryOperator>(ExtDiv));
}
+
+/// Generate code to divide two integers of bitwidth up to 64 bits. Uses the
+/// above routines and extends the inputs/truncates the outputs to operate
+/// in 64 bits.
+///
+/// @brief Replace Div with emulation code.
+bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {
+ assert((Div->getOpcode() == Instruction::SDiv ||
+ Div->getOpcode() == Instruction::UDiv) &&
+ "Trying to expand division from a non-division function");
+
+ Type *DivTy = Div->getType();
+ if (DivTy->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
+
+ if (DivTyBitWidth > 64)
+ llvm_unreachable("Div of bitwidth greater than 64 not supported");
+
+ if (DivTyBitWidth == 64)
+ return expandDivision(Div);
+
+ // If bitwidth smaller than 64 extend inputs, extend output and proceed
+ // with 64 bit division.
+ IRBuilder<> Builder(Div);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtDiv;
+ Value *Trunc;
+ Type *Int64Ty = Builder.getInt64Ty();
+
+ if (Div->getOpcode() == Instruction::SDiv) {
+ ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int64Ty);
+ ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty);
+ ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty);
+ ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
+
+ Div->replaceAllUsesWith(Trunc);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ return expandDivision(cast<BinaryOperator>(ExtDiv));
+}
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index f15e8d5..d538175 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -31,216 +31,103 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PredIteratorCache.h"
#include "llvm/Pass.h"
-#include "llvm/Support/PredIteratorCache.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
STATISTIC(NumLCSSA, "Number of live out of a loop variables");
-namespace {
- struct LCSSA : public LoopPass {
- static char ID; // Pass identification, replacement for typeid
- LCSSA() : LoopPass(ID) {
- initializeLCSSAPass(*PassRegistry::getPassRegistry());
- }
-
- // Cached analysis information for the current function.
- DominatorTree *DT;
- LoopInfo *LI;
- ScalarEvolution *SE;
- PredIteratorCache PredCache;
- Loop *L;
-
- virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG. It maintains both of these,
- /// as well as the CFG. It also requires dominator information.
- ///
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
-
- AU.addRequired<DominatorTree>();
- AU.addRequired<LoopInfo>();
- AU.addPreservedID(LoopSimplifyID);
- AU.addPreserved<ScalarEvolution>();
- }
- private:
- bool ProcessInstruction(Instruction *Inst,
- const SmallVectorImpl<BasicBlock*> &ExitBlocks);
-
- /// verifyAnalysis() - Verify loop nest.
- virtual void verifyAnalysis() const {
- // Check the special guarantees that LCSSA makes.
- assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
- }
- };
-}
-
-char LCSSA::ID = 0;
-INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
-
-Pass *llvm::createLCSSAPass() { return new LCSSA(); }
-char &llvm::LCSSAID = LCSSA::ID;
-
-
-/// BlockDominatesAnExit - Return true if the specified block dominates at least
-/// one of the blocks in the specified list.
-static bool BlockDominatesAnExit(BasicBlock *BB,
- const SmallVectorImpl<BasicBlock*> &ExitBlocks,
- DominatorTree *DT) {
- DomTreeNode *DomNode = DT->getNode(BB);
+/// Return true if the specified block is in the list.
+static bool isExitBlock(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i])))
+ if (ExitBlocks[i] == BB)
return true;
-
return false;
}
+/// Given an instruction in the loop, check to see if it has any uses that are
+/// outside the current loop. If so, insert LCSSA PHI nodes and rewrite the
+/// uses.
+static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
+ const SmallVectorImpl<BasicBlock *> &ExitBlocks,
+ PredIteratorCache &PredCache) {
+ SmallVector<Use *, 16> UsesToRewrite;
-/// runOnFunction - Process all loops in the function, inner-most out.
-bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
- L = TheLoop;
-
- DT = &getAnalysis<DominatorTree>();
- LI = &getAnalysis<LoopInfo>();
- SE = getAnalysisIfAvailable<ScalarEvolution>();
+ BasicBlock *InstBB = Inst.getParent();
- // Get the set of exiting blocks.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
-
- if (ExitBlocks.empty())
- return false;
-
- // Look at all the instructions in the loop, checking to see if they have uses
- // outside the loop. If so, rewrite those uses.
- bool MadeChange = false;
-
- for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end();
- BBI != E; ++BBI) {
- BasicBlock *BB = *BBI;
-
- // For large loops, avoid use-scanning by using dominance information: In
- // particular, if a block does not dominate any of the loop exits, then none
- // of the values defined in the block could be used outside the loop.
- if (!BlockDominatesAnExit(BB, ExitBlocks, DT))
- continue;
-
- for (BasicBlock::iterator I = BB->begin(), E = BB->end();
- I != E; ++I) {
- // Reject two common cases fast: instructions with no uses (like stores)
- // and instructions with one use that is in the same block as this.
- if (I->use_empty() ||
- (I->hasOneUse() && I->use_back()->getParent() == BB &&
- !isa<PHINode>(I->use_back())))
- continue;
-
- MadeChange |= ProcessInstruction(I, ExitBlocks);
- }
- }
-
- // If we modified the code, remove any caches about the loop from SCEV to
- // avoid dangling entries.
- // FIXME: This is a big hammer, can we clear the cache more selectively?
- if (SE && MadeChange)
- SE->forgetLoop(L);
-
- assert(L->isLCSSAForm(*DT));
- PredCache.clear();
-
- return MadeChange;
-}
-
-/// isExitBlock - Return true if the specified block is in the list.
-static bool isExitBlock(BasicBlock *BB,
- const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (ExitBlocks[i] == BB)
- return true;
- return false;
-}
+ for (Use &U : Inst.uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
+ BasicBlock *UserBB = User->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(User))
+ UserBB = PN->getIncomingBlock(U);
-/// ProcessInstruction - Given an instruction in the loop, check to see if it
-/// has any uses that are outside the current loop. If so, insert LCSSA PHI
-/// nodes and rewrite the uses.
-bool LCSSA::ProcessInstruction(Instruction *Inst,
- const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
- SmallVector<Use*, 16> UsesToRewrite;
-
- BasicBlock *InstBB = Inst->getParent();
-
- for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
- UI != E; ++UI) {
- User *U = *UI;
- BasicBlock *UserBB = cast<Instruction>(U)->getParent();
- if (PHINode *PN = dyn_cast<PHINode>(U))
- UserBB = PN->getIncomingBlock(UI);
-
- if (InstBB != UserBB && !L->contains(UserBB))
- UsesToRewrite.push_back(&UI.getUse());
+ if (InstBB != UserBB && !L.contains(UserBB))
+ UsesToRewrite.push_back(&U);
}
// If there are no uses outside the loop, exit with no change.
- if (UsesToRewrite.empty()) return false;
-
+ if (UsesToRewrite.empty())
+ return false;
+
++NumLCSSA; // We are applying the transformation
// Invoke instructions are special in that their result value is not available
- // along their unwind edge. The code below tests to see whether DomBB dominates
+ // along their unwind edge. The code below tests to see whether DomBB
+ // dominates
// the value, so adjust DomBB to the normal destination block, which is
// effectively where the value is first usable.
- BasicBlock *DomBB = Inst->getParent();
- if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))
+ BasicBlock *DomBB = Inst.getParent();
+ if (InvokeInst *Inv = dyn_cast<InvokeInst>(&Inst))
DomBB = Inv->getNormalDest();
- DomTreeNode *DomNode = DT->getNode(DomBB);
+ DomTreeNode *DomNode = DT.getNode(DomBB);
- SmallVector<PHINode*, 16> AddedPHIs;
+ SmallVector<PHINode *, 16> AddedPHIs;
SSAUpdater SSAUpdate;
- SSAUpdate.Initialize(Inst->getType(), Inst->getName());
-
+ SSAUpdate.Initialize(Inst.getType(), Inst.getName());
+
// Insert the LCSSA phi's into all of the exit blocks dominated by the
// value, and add them to the Phi's map.
- for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
- BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
+ for (SmallVectorImpl<BasicBlock *>::const_iterator BBI = ExitBlocks.begin(),
+ BBE = ExitBlocks.end();
+ BBI != BBE; ++BBI) {
BasicBlock *ExitBB = *BBI;
- if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue;
-
+ if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
+ continue;
+
// If we already inserted something for this BB, don't reprocess it.
- if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
-
- PHINode *PN = PHINode::Create(Inst->getType(),
- PredCache.GetNumPreds(ExitBB),
- Inst->getName()+".lcssa",
- ExitBB->begin());
+ if (SSAUpdate.HasValueForBlock(ExitBB))
+ continue;
+
+ PHINode *PN = PHINode::Create(Inst.getType(), PredCache.GetNumPreds(ExitBB),
+ Inst.getName() + ".lcssa", ExitBB->begin());
// Add inputs from inside the loop for this PHI.
for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
- PN->addIncoming(Inst, *PI);
+ PN->addIncoming(&Inst, *PI);
// If the exit block has a predecessor not within the loop, arrange for
// the incoming value use corresponding to that predecessor to be
// rewritten in terms of a different LCSSA PHI.
- if (!L->contains(*PI))
+ if (!L.contains(*PI))
UsesToRewrite.push_back(
- &PN->getOperandUse(
- PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
+ &PN->getOperandUse(PN->getOperandNumForIncomingValue(
+ PN->getNumIncomingValues() - 1)));
}
AddedPHIs.push_back(PN);
-
+
// Remember that this phi makes the value alive in this block.
SSAUpdate.AddAvailableValue(ExitBB, PN);
}
@@ -257,15 +144,14 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
if (PHINode *PN = dyn_cast<PHINode>(User))
UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
- if (isa<PHINode>(UserBB->begin()) &&
- isExitBlock(UserBB, ExitBlocks)) {
+ if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
// Tell the VHs that the uses changed. This updates SCEV's caches.
if (UsesToRewrite[i]->get()->hasValueHandle())
ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin());
UsesToRewrite[i]->set(UserBB->begin());
continue;
}
-
+
// Otherwise, do full PHI insertion.
SSAUpdate.RewriteUse(*UsesToRewrite[i]);
}
@@ -275,7 +161,154 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
if (AddedPHIs[i]->use_empty())
AddedPHIs[i]->eraseFromParent();
}
-
+
return true;
}
+/// Return true if the specified block dominates at least
+/// one of the blocks in the specified list.
+static bool
+blockDominatesAnExit(BasicBlock *BB,
+ DominatorTree &DT,
+ const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
+ DomTreeNode *DomNode = DT.getNode(BB);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ if (DT.dominates(DomNode, DT.getNode(ExitBlocks[i])))
+ return true;
+
+ return false;
+}
+
+bool llvm::formLCSSA(Loop &L, DominatorTree &DT, ScalarEvolution *SE) {
+ bool Changed = false;
+
+ // Get the set of exiting blocks.
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L.getExitBlocks(ExitBlocks);
+
+ if (ExitBlocks.empty())
+ return false;
+
+ PredIteratorCache PredCache;
+
+ // Look at all the instructions in the loop, checking to see if they have uses
+ // outside the loop. If so, rewrite those uses.
+ for (Loop::block_iterator BBI = L.block_begin(), BBE = L.block_end();
+ BBI != BBE; ++BBI) {
+ BasicBlock *BB = *BBI;
+
+ // For large loops, avoid use-scanning by using dominance information: In
+ // particular, if a block does not dominate any of the loop exits, then none
+ // of the values defined in the block could be used outside the loop.
+ if (!blockDominatesAnExit(BB, DT, ExitBlocks))
+ continue;
+
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Reject two common cases fast: instructions with no uses (like stores)
+ // and instructions with one use that is in the same block as this.
+ if (I->use_empty() ||
+ (I->hasOneUse() && I->user_back()->getParent() == BB &&
+ !isa<PHINode>(I->user_back())))
+ continue;
+
+ Changed |= processInstruction(L, *I, DT, ExitBlocks, PredCache);
+ }
+ }
+
+ // If we modified the code, remove any caches about the loop from SCEV to
+ // avoid dangling entries.
+ // FIXME: This is a big hammer, can we clear the cache more selectively?
+ if (SE && Changed)
+ SE->forgetLoop(&L);
+
+ assert(L.isLCSSAForm(DT));
+
+ return Changed;
+}
+
+/// Process a loop nest depth first.
+bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT,
+ ScalarEvolution *SE) {
+ bool Changed = false;
+
+ // Recurse depth-first through inner loops.
+ for (Loop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
+ Changed |= formLCSSARecursively(**LI, DT, SE);
+
+ Changed |= formLCSSA(L, DT, SE);
+ return Changed;
+}
+
+namespace {
+struct LCSSA : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ LCSSA() : FunctionPass(ID) {
+ initializeLCSSAPass(*PassRegistry::getPassRegistry());
+ }
+
+ // Cached analysis information for the current function.
+ DominatorTree *DT;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+
+ bool runOnFunction(Function &F) override;
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG. It maintains both of these,
+ /// as well as the CFG. It also requires dominator information.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<ScalarEvolution>();
+ }
+
+private:
+ bool processLoop(Loop &L);
+
+ void verifyAnalysis() const override;
+};
+}
+
+char LCSSA::ID = 0;
+INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+
+Pass *llvm::createLCSSAPass() { return new LCSSA(); }
+char &llvm::LCSSAID = LCSSA::ID;
+
+
+/// Process all loops in the function, inner-most out.
+bool LCSSA::runOnFunction(Function &F) {
+ bool Changed = false;
+ LI = &getAnalysis<LoopInfo>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ SE = getAnalysisIfAvailable<ScalarEvolution>();
+
+ // Simplify each loop nest in the function.
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ Changed |= formLCSSARecursively(**I, *DT, SE);
+
+ return Changed;
+}
+
+static void verifyLoop(Loop &L, DominatorTree &DT) {
+ // Recurse depth-first through inner loops.
+ for (Loop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
+ verifyLoop(**LI, DT);
+
+ // Check the special guarantees that LCSSA makes.
+ //assert(L.isLCSSAForm(DT) && "LCSSA form not preserved!");
+}
+
+void LCSSA::verifyAnalysis() const {
+ // Verify each loop nest in the function, assuming LI still points at that
+ // function's loop info.
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ verifyLoop(**I, *DT);
+}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 2768041..9d0be8b 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -17,15 +17,17 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
@@ -35,11 +37,9 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -127,8 +127,10 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// dest. If so, eliminate it as an explicit compare.
if (i.getCaseSuccessor() == DefaultDest) {
MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
- // MD should have 2 + NumCases operands.
- if (MD && MD->getNumOperands() == 2 + SI->getNumCases()) {
+ unsigned NCases = SI->getNumCases();
+ // Fold the case metadata into the default if there will be any branches
+ // left, unless the metadata doesn't match the switch.
+ if (NCases > 1 && MD && MD->getNumOperands() == 2 + NCases) {
// Collect branch weights into a vector.
SmallVector<uint32_t, 8> Weights;
for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
@@ -352,8 +354,8 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
/// true when there are no uses or multiple uses that all refer to the same
/// value.
static bool areAllUsesEqual(Instruction *I) {
- Value::use_iterator UI = I->use_begin();
- Value::use_iterator UE = I->use_end();
+ Value::user_iterator UI = I->user_begin();
+ Value::user_iterator UE = I->user_end();
if (UI == UE)
return true;
@@ -374,7 +376,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
const TargetLibraryInfo *TLI) {
SmallPtrSet<Instruction*, 4> Visited;
for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
- I = cast<Instruction>(*I->use_begin())) {
+ I = cast<Instruction>(*I->user_begin())) {
if (I->use_empty())
return RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
@@ -506,11 +508,12 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
if (P) {
- DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
- if (DT) {
- BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
- DT->changeImmediateDominator(DestBB, PredBBIDom);
- DT->eraseNode(PredBB);
+ if (DominatorTreeWrapperPass *DTWP =
+ P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DominatorTree &DT = DTWP->getDomTree();
+ BasicBlock *PredBBIDom = DT.getNode(PredBB)->getIDom()->getBlock();
+ DT.changeImmediateDominator(DestBB, PredBBIDom);
+ DT.eraseNode(PredBB);
}
}
// Nuke BB.
@@ -749,10 +752,9 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
if (!Succ->getSinglePredecessor()) {
BasicBlock::iterator BBI = BB->begin();
while (isa<PHINode>(*BBI)) {
- for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
- UI != E; ++UI) {
- if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
- if (PN->getIncomingBlock(UI) != BB)
+ for (Use &U : BBI->uses()) {
+ if (PHINode* PN = dyn_cast<PHINode>(U.getUser())) {
+ if (PN->getIncomingBlock(U) != BB)
return false;
} else {
return false;
@@ -1034,17 +1036,16 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
bool llvm::LowerDbgDeclare(Function &F) {
DIBuilder DIB(*F.getParent());
SmallVector<DbgDeclareInst *, 4> Dbgs;
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
- for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) {
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+ for (auto &FI : F)
+ for (BasicBlock::iterator BI : FI)
+ if (auto DDI = dyn_cast<DbgDeclareInst>(BI))
Dbgs.push_back(DDI);
- }
+
if (Dbgs.empty())
return false;
- for (SmallVectorImpl<DbgDeclareInst *>::iterator I = Dbgs.begin(),
- E = Dbgs.end(); I != E; ++I) {
- DbgDeclareInst *DDI = *I;
+ for (auto &I : Dbgs) {
+ DbgDeclareInst *DDI = I;
AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
// If this is an alloca for a scalar variable, insert a dbg.value
// at each load and store to the alloca and erase the dbg.declare.
@@ -1053,11 +1054,10 @@ bool llvm::LowerDbgDeclare(Function &F) {
// We only remove the dbg.declare intrinsic if all uses are
// converted to dbg.value intrinsics.
bool RemoveDDI = true;
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
- UI != E; ++UI)
- if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+ for (User *U : AI->users())
+ if (StoreInst *SI = dyn_cast<StoreInst>(U))
ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
- else if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+ else if (LoadInst *LI = dyn_cast<LoadInst>(U))
ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
else
RemoveDDI = false;
@@ -1072,9 +1072,8 @@ bool llvm::LowerDbgDeclare(Function &F) {
/// alloca 'V', if any.
DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V))
- for (Value::use_iterator UI = DebugNode->use_begin(),
- E = DebugNode->use_end(); UI != E; ++UI)
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+ for (User *U : DebugNode->users())
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
return DDI;
return 0;
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 6d5f16c..47083ea 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -42,20 +42,21 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/DependenceAnalysis.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -65,303 +66,41 @@ using namespace llvm;
STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
STATISTIC(NumNested , "Number of nested loops split out");
-namespace {
- struct LoopSimplify : public LoopPass {
- static char ID; // Pass identification, replacement for typeid
- LoopSimplify() : LoopPass(ID) {
- initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
- }
-
- // AA - If we have an alias analysis object to update, this is it, otherwise
- // this is null.
- AliasAnalysis *AA;
- LoopInfo *LI;
- DominatorTree *DT;
- ScalarEvolution *SE;
- Loop *L;
- virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- // We need loop information to identify the loops...
- AU.addRequired<DominatorTree>();
- AU.addPreserved<DominatorTree>();
-
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
-
- AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<ScalarEvolution>();
- AU.addPreserved<DependenceAnalysis>();
- AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
- }
-
- /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
- void verifyAnalysis() const;
-
- private:
- bool ProcessLoop(Loop *L, LPPassManager &LPM);
- BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
- Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
- BasicBlock *Preheader);
- BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
- };
-}
-
-static void PlaceSplitBlockCarefully(BasicBlock *NewBB,
- SmallVectorImpl<BasicBlock*> &SplitPreds,
- Loop *L);
-
-char LoopSimplify::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
- "Canonicalize natural loops", true, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
- "Canonicalize natural loops", true, false)
-
-// Publicly exposed interface to pass...
-char &llvm::LoopSimplifyID = LoopSimplify::ID;
-Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
-
-/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
-/// it in any convenient order) inserting preheaders...
-///
-bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
- L = l;
- bool Changed = false;
- LI = &getAnalysis<LoopInfo>();
- AA = getAnalysisIfAvailable<AliasAnalysis>();
- DT = &getAnalysis<DominatorTree>();
- SE = getAnalysisIfAvailable<ScalarEvolution>();
-
- Changed |= ProcessLoop(L, LPM);
-
- return Changed;
-}
-
-/// ProcessLoop - Walk the loop structure in depth first order, ensuring that
-/// all loops have preheaders.
-///
-bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {
- bool Changed = false;
-ReprocessLoop:
-
- // Check to see that no blocks (other than the header) in this loop have
- // predecessors that are not in the loop. This is not valid for natural
- // loops, but can occur if the blocks are unreachable. Since they are
- // unreachable we can just shamelessly delete those CFG edges!
- for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
- BB != E; ++BB) {
- if (*BB == L->getHeader()) continue;
-
- SmallPtrSet<BasicBlock*, 4> BadPreds;
- for (pred_iterator PI = pred_begin(*BB),
- PE = pred_end(*BB); PI != PE; ++PI) {
- BasicBlock *P = *PI;
- if (!L->contains(P))
- BadPreds.insert(P);
- }
-
- // Delete each unique out-of-loop (and thus dead) predecessor.
- for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
- E = BadPreds.end(); I != E; ++I) {
-
- DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
- << (*I)->getName() << "\n");
-
- // Inform each successor of each dead pred.
- for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
- (*SI)->removePredecessor(*I);
- // Zap the dead pred's terminator and replace it with unreachable.
- TerminatorInst *TI = (*I)->getTerminator();
- TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
- (*I)->getTerminator()->eraseFromParent();
- new UnreachableInst((*I)->getContext(), *I);
- Changed = true;
- }
- }
-
- // If there are exiting blocks with branches on undef, resolve the undef in
- // the direction which will exit the loop. This will help simplify loop
- // trip count computations.
- SmallVector<BasicBlock*, 8> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
- for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
- E = ExitingBlocks.end(); I != E; ++I)
- if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
- if (BI->isConditional()) {
- if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
-
- DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
- << (*I)->getName() << "\n");
-
- BI->setCondition(ConstantInt::get(Cond->getType(),
- !L->contains(BI->getSuccessor(0))));
-
- // This may make the loop analyzable, force SCEV recomputation.
- if (SE)
- SE->forgetLoop(L);
-
- Changed = true;
- }
- }
-
- // Does the loop already have a preheader? If so, don't insert one.
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) {
- Preheader = InsertPreheaderForLoop(L, this);
- if (Preheader) {
- ++NumInserted;
- Changed = true;
- }
- }
-
- // Next, check to make sure that all exit nodes of the loop only have
- // predecessors that are inside of the loop. This check guarantees that the
- // loop preheader/header will dominate the exit blocks. If the exit block has
- // predecessors from outside of the loop, split the edge now.
- SmallVector<BasicBlock*, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
-
- SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
- ExitBlocks.end());
- for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
- E = ExitBlockSet.end(); I != E; ++I) {
- BasicBlock *ExitBlock = *I;
- for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
- PI != PE; ++PI)
- // Must be exactly this loop: no subloops, parent loops, or non-loop preds
- // allowed.
- if (!L->contains(*PI)) {
- if (RewriteLoopExitBlock(L, ExitBlock)) {
- ++NumInserted;
- Changed = true;
- }
- break;
- }
- }
-
- // If the header has more than two predecessors at this point (from the
- // preheader and from multiple backedges), we must adjust the loop.
- BasicBlock *LoopLatch = L->getLoopLatch();
- if (!LoopLatch) {
- // If this is really a nested loop, rip it out into a child loop. Don't do
- // this for loops with a giant number of backedges, just factor them into a
- // common backedge instead.
- if (L->getNumBackEdges() < 8) {
- if (SeparateNestedLoop(L, LPM, Preheader)) {
- ++NumNested;
- // This is a big restructuring change, reprocess the whole loop.
- Changed = true;
- // GCC doesn't tail recursion eliminate this.
- goto ReprocessLoop;
- }
- }
-
- // If we either couldn't, or didn't want to, identify nesting of the loops,
- // insert a new block that all backedges target, then make it jump to the
- // loop header.
- LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
- if (LoopLatch) {
- ++NumInserted;
- Changed = true;
- }
+// If the block isn't already, move the new block to right after some 'outside
+// block' block. This prevents the preheader from being placed inside the loop
+// body, e.g. when the loop hasn't been rotated.
+static void placeSplitBlockCarefully(BasicBlock *NewBB,
+ SmallVectorImpl<BasicBlock *> &SplitPreds,
+ Loop *L) {
+ // Check to see if NewBB is already well placed.
+ Function::iterator BBI = NewBB; --BBI;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ if (&*BBI == SplitPreds[i])
+ return;
}
- // Scan over the PHI nodes in the loop header. Since they now have only two
- // incoming values (the loop is canonicalized), we may have simplified the PHI
- // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
- PHINode *PN;
- for (BasicBlock::iterator I = L->getHeader()->begin();
- (PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
- if (AA) AA->deleteValue(PN);
- if (SE) SE->forgetValue(PN);
- PN->replaceAllUsesWith(V);
- PN->eraseFromParent();
- }
-
- // If this loop has multiple exits and the exits all go to the same
- // block, attempt to merge the exits. This helps several passes, such
- // as LoopRotation, which do not support loops with multiple exits.
- // SimplifyCFG also does this (and this code uses the same utility
- // function), however this code is loop-aware, where SimplifyCFG is
- // not. That gives it the advantage of being able to hoist
- // loop-invariant instructions out of the way to open up more
- // opportunities, and the disadvantage of having the responsibility
- // to preserve dominator information.
- bool UniqueExit = true;
- if (!ExitBlocks.empty())
- for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
- if (ExitBlocks[i] != ExitBlocks[0]) {
- UniqueExit = false;
- break;
- }
- if (UniqueExit) {
- for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
- BasicBlock *ExitingBlock = ExitingBlocks[i];
- if (!ExitingBlock->getSinglePredecessor()) continue;
- BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (!BI || !BI->isConditional()) continue;
- CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
- if (!CI || CI->getParent() != ExitingBlock) continue;
-
- // Attempt to hoist out all instructions except for the
- // comparison and the branch.
- bool AllInvariant = true;
- for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
- Instruction *Inst = I++;
- // Skip debug info intrinsics.
- if (isa<DbgInfoIntrinsic>(Inst))
- continue;
- if (Inst == CI)
- continue;
- if (!L->makeLoopInvariant(Inst, Changed,
- Preheader ? Preheader->getTerminator() : 0)) {
- AllInvariant = false;
- break;
- }
- }
- if (!AllInvariant) continue;
-
- // The block has now been cleared of all instructions except for
- // a comparison and a conditional branch. SimplifyCFG may be able
- // to fold it now.
- if (!FoldBranchToCommonDest(BI)) continue;
-
- // Success. The block is now dead, so remove it from the loop,
- // update the dominator tree and delete it.
- DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
- << ExitingBlock->getName() << "\n");
-
- // If any reachable control flow within this loop has changed, notify
- // ScalarEvolution. Currently assume the parent loop doesn't change
- // (spliting edges doesn't count). If blocks, CFG edges, or other values
- // in the parent loop change, then we need call to forgetLoop() for the
- // parent instead.
- if (SE)
- SE->forgetLoop(L);
-
- assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
- Changed = true;
- LI->removeBlock(ExitingBlock);
-
- DomTreeNode *Node = DT->getNode(ExitingBlock);
- const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
- Node->getChildren();
- while (!Children.empty()) {
- DomTreeNode *Child = Children.front();
- DT->changeImmediateDominator(Child, Node->getIDom());
- }
- DT->eraseNode(ExitingBlock);
+ // If it isn't already after an outside block, move it after one. This is
+ // always good as it makes the uncond branch from the outside block into a
+ // fall-through.
- BI->getSuccessor(0)->removePredecessor(ExitingBlock);
- BI->getSuccessor(1)->removePredecessor(ExitingBlock);
- ExitingBlock->eraseFromParent();
+ // Figure out *which* outside block to put this after. Prefer an outside
+ // block that neighbors a BB actually in the loop.
+ BasicBlock *FoundBB = 0;
+ for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+ Function::iterator BBI = SplitPreds[i];
+ if (++BBI != NewBB->getParent()->end() &&
+ L->contains(BBI)) {
+ FoundBB = SplitPreds[i];
+ break;
}
}
- return Changed;
+ // If our heuristic for a *good* bb to place this after doesn't find
+ // anything, just pick something. It's likely better than leaving it within
+ // the loop.
+ if (!FoundBB)
+ FoundBB = SplitPreds[0];
+ NewBB->moveAfter(FoundBB);
}
/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
@@ -406,15 +145,16 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
- PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
+ placeSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
return PreheaderBB;
}
-/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
-/// blocks. This method is used to split exit blocks that have predecessors
-/// outside of the loop.
-BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
+/// \brief Ensure that the loop preheader dominates all exit blocks.
+///
+/// This method is used to split exit blocks that have predecessors outside of
+/// the loop.
+static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) {
SmallVector<BasicBlock*, 8> LoopBlocks;
for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
BasicBlock *P = *I;
@@ -434,10 +174,10 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
SplitLandingPadPredecessors(Exit, ArrayRef<BasicBlock*>(&LoopBlocks[0],
LoopBlocks.size()),
".loopexit", ".nonloopexit",
- this, NewBBs);
+ PP, NewBBs);
NewExitBB = NewBBs[0];
} else {
- NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this);
+ NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", PP);
}
DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
@@ -445,29 +185,29 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
return NewExitBB;
}
-/// AddBlockAndPredsToSet - Add the specified block, and all of its
-/// predecessors, to the specified set, if it's not already in there. Stop
-/// predecessor traversal when we reach StopBlock.
-static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
+/// Add the specified block, and all of its predecessors, to the specified set,
+/// if it's not already in there. Stop predecessor traversal when we reach
+/// StopBlock.
+static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
std::set<BasicBlock*> &Blocks) {
- std::vector<BasicBlock *> WorkList;
- WorkList.push_back(InputBB);
+ SmallVector<BasicBlock *, 8> Worklist;
+ Worklist.push_back(InputBB);
do {
- BasicBlock *BB = WorkList.back(); WorkList.pop_back();
+ BasicBlock *BB = Worklist.pop_back_val();
if (Blocks.insert(BB).second && BB != StopBlock)
// If BB is not already processed and it is not a stop block then
// insert its predecessor in the work list
for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
BasicBlock *WBB = *I;
- WorkList.push_back(WBB);
+ Worklist.push_back(WBB);
}
- } while(!WorkList.empty());
+ } while (!Worklist.empty());
}
-/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
-/// PHI node that tells us how to partition the loops.
-static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
- AliasAnalysis *AA, LoopInfo *LI) {
+/// \brief The first part of loop-nestification is to find a PHI node that tells
+/// us how to partition the loops.
+static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
+ DominatorTree *DT) {
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
@@ -489,46 +229,10 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
return 0;
}
-// PlaceSplitBlockCarefully - If the block isn't already, move the new block to
-// right after some 'outside block' block. This prevents the preheader from
-// being placed inside the loop body, e.g. when the loop hasn't been rotated.
-void PlaceSplitBlockCarefully(BasicBlock *NewBB,
- SmallVectorImpl<BasicBlock*> &SplitPreds,
- Loop *L) {
- // Check to see if NewBB is already well placed.
- Function::iterator BBI = NewBB; --BBI;
- for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
- if (&*BBI == SplitPreds[i])
- return;
- }
-
- // If it isn't already after an outside block, move it after one. This is
- // always good as it makes the uncond branch from the outside block into a
- // fall-through.
-
- // Figure out *which* outside block to put this after. Prefer an outside
- // block that neighbors a BB actually in the loop.
- BasicBlock *FoundBB = 0;
- for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
- Function::iterator BBI = SplitPreds[i];
- if (++BBI != NewBB->getParent()->end() &&
- L->contains(BBI)) {
- FoundBB = SplitPreds[i];
- break;
- }
- }
-
- // If our heuristic for a *good* bb to place this after doesn't find
- // anything, just pick something. It's likely better than leaving it within
- // the loop.
- if (!FoundBB)
- FoundBB = SplitPreds[0];
- NewBB->moveAfter(FoundBB);
-}
-
-
-/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of
-/// them out into a nested loop. This is important for code that looks like
+/// \brief If this loop has multiple backedges, try to pull one of them out into
+/// a nested loop.
+///
+/// This is important for code that looks like
/// this:
///
/// Loop:
@@ -544,8 +248,9 @@ void PlaceSplitBlockCarefully(BasicBlock *NewBB,
/// If we are able to separate out a loop, return the new outer loop that was
/// created.
///
-Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
- BasicBlock *Preheader) {
+static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
+ AliasAnalysis *AA, DominatorTree *DT,
+ LoopInfo *LI, ScalarEvolution *SE, Pass *PP) {
// Don't try to separate loops without a preheader.
if (!Preheader)
return 0;
@@ -554,7 +259,7 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
assert(!L->getHeader()->isLandingPad() &&
"Can't insert backedge to landing pad");
- PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
+ PHINode *PN = findPHIToPartitionLoops(L, AA, DT);
if (PN == 0) return 0; // No known way to partition.
// Pull out all predecessors that have varying values in the loop. This
@@ -580,11 +285,11 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
BasicBlock *Header = L->getHeader();
BasicBlock *NewBB =
- SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", this);
+ SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", PP);
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
- PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);
+ placeSplitBlockCarefully(NewBB, OuterLoopPreds, L);
// Create the new outer loop.
Loop *NewOuter = new Loop();
@@ -598,9 +303,6 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
// L is now a subloop of our outer loop.
NewOuter->addChildLoop(L);
- // Add the new loop to the pass manager queue.
- LPM.insertLoopIntoQueue(NewOuter);
-
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
NewOuter->addBlockEntry(*I);
@@ -615,7 +317,7 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
BasicBlock *P = *PI;
if (DT->dominates(Header, P))
- AddBlockAndPredsToSet(P, Header, BlocksInL);
+ addBlockAndPredsToSet(P, Header, BlocksInL);
}
// Scan all of the loop children of L, moving them to OuterLoop if they are
@@ -643,15 +345,15 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
return NewOuter;
}
-
-
-/// InsertUniqueBackedgeBlock - This method is called when the specified loop
-/// has more than one backedge in it. If this occurs, revector all of these
-/// backedges to target a new basic block and have that block branch to the loop
-/// header. This ensures that loops have exactly one backedge.
+/// \brief This method is called when the specified loop has more than one
+/// backedge in it.
///
-BasicBlock *
-LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
+/// If this occurs, revector all of these backedges to target a new basic block
+/// and have that block branch to the loop header. This ensures that loops
+/// have exactly one backedge.
+static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
+ AliasAnalysis *AA,
+ DominatorTree *DT, LoopInfo *LI) {
assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
// Get information about the loop
@@ -762,7 +464,349 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
return BEBlock;
}
-void LoopSimplify::verifyAnalysis() const {
+/// \brief Simplify one loop and queue further loops for simplification.
+///
+/// FIXME: Currently this accepts both lots of analyses that it uses and a raw
+/// Pass pointer. The Pass pointer is used by numerous utilities to update
+/// specific analyses. Rather than a pass it would be much cleaner and more
+/// explicit if they accepted the analysis directly and then updated it.
+static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
+ AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
+ ScalarEvolution *SE, Pass *PP) {
+ bool Changed = false;
+ReprocessLoop:
+
+ // Check to see that no blocks (other than the header) in this loop have
+ // predecessors that are not in the loop. This is not valid for natural
+ // loops, but can occur if the blocks are unreachable. Since they are
+ // unreachable we can just shamelessly delete those CFG edges!
+ for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+ BB != E; ++BB) {
+ if (*BB == L->getHeader()) continue;
+
+ SmallPtrSet<BasicBlock*, 4> BadPreds;
+ for (pred_iterator PI = pred_begin(*BB),
+ PE = pred_end(*BB); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (!L->contains(P))
+ BadPreds.insert(P);
+ }
+
+ // Delete each unique out-of-loop (and thus dead) predecessor.
+ for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
+ E = BadPreds.end(); I != E; ++I) {
+
+ DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+ << (*I)->getName() << "\n");
+
+ // Inform each successor of each dead pred.
+ for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+ (*SI)->removePredecessor(*I);
+ // Zap the dead pred's terminator and replace it with unreachable.
+ TerminatorInst *TI = (*I)->getTerminator();
+ TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+ (*I)->getTerminator()->eraseFromParent();
+ new UnreachableInst((*I)->getContext(), *I);
+ Changed = true;
+ }
+ }
+
+ // If there are exiting blocks with branches on undef, resolve the undef in
+ // the direction which will exit the loop. This will help simplify loop
+ // trip count computations.
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+ E = ExitingBlocks.end(); I != E; ++I)
+ if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
+ if (BI->isConditional()) {
+ if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+ DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+ << (*I)->getName() << "\n");
+
+ BI->setCondition(ConstantInt::get(Cond->getType(),
+ !L->contains(BI->getSuccessor(0))));
+
+ // This may make the loop analyzable, force SCEV recomputation.
+ if (SE)
+ SE->forgetLoop(L);
+
+ Changed = true;
+ }
+ }
+
+ // Does the loop already have a preheader? If so, don't insert one.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ Preheader = InsertPreheaderForLoop(L, PP);
+ if (Preheader) {
+ ++NumInserted;
+ Changed = true;
+ }
+ }
+
+ // Next, check to make sure that all exit nodes of the loop only have
+ // predecessors that are inside of the loop. This check guarantees that the
+ // loop preheader/header will dominate the exit blocks. If the exit block has
+ // predecessors from outside of the loop, split the edge now.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+
+ SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
+ ExitBlocks.end());
+ for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
+ E = ExitBlockSet.end(); I != E; ++I) {
+ BasicBlock *ExitBlock = *I;
+ for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
+ PI != PE; ++PI)
+ // Must be exactly this loop: no subloops, parent loops, or non-loop preds
+ // allowed.
+ if (!L->contains(*PI)) {
+ if (rewriteLoopExitBlock(L, ExitBlock, PP)) {
+ ++NumInserted;
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ // If the header has more than two predecessors at this point (from the
+ // preheader and from multiple backedges), we must adjust the loop.
+ BasicBlock *LoopLatch = L->getLoopLatch();
+ if (!LoopLatch) {
+ // If this is really a nested loop, rip it out into a child loop. Don't do
+ // this for loops with a giant number of backedges, just factor them into a
+ // common backedge instead.
+ if (L->getNumBackEdges() < 8) {
+ if (Loop *OuterL = separateNestedLoop(L, Preheader, AA, DT, LI, SE, PP)) {
+ ++NumNested;
+ // Enqueue the outer loop as it should be processed next in our
+ // depth-first nest walk.
+ Worklist.push_back(OuterL);
+
+ // This is a big restructuring change, reprocess the whole loop.
+ Changed = true;
+ // GCC doesn't tail recursion eliminate this.
+ // FIXME: It isn't clear we can't rely on LLVM to TRE this.
+ goto ReprocessLoop;
+ }
+ }
+
+ // If we either couldn't, or didn't want to, identify nesting of the loops,
+ // insert a new block that all backedges target, then make it jump to the
+ // loop header.
+ LoopLatch = insertUniqueBackedgeBlock(L, Preheader, AA, DT, LI);
+ if (LoopLatch) {
+ ++NumInserted;
+ Changed = true;
+ }
+ }
+
+ // Scan over the PHI nodes in the loop header. Since they now have only two
+ // incoming values (the loop is canonicalized), we may have simplified the PHI
+ // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
+ PHINode *PN;
+ for (BasicBlock::iterator I = L->getHeader()->begin();
+ (PN = dyn_cast<PHINode>(I++)); )
+ if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
+ if (AA) AA->deleteValue(PN);
+ if (SE) SE->forgetValue(PN);
+ PN->replaceAllUsesWith(V);
+ PN->eraseFromParent();
+ }
+
+ // If this loop has multiple exits and the exits all go to the same
+ // block, attempt to merge the exits. This helps several passes, such
+ // as LoopRotation, which do not support loops with multiple exits.
+ // SimplifyCFG also does this (and this code uses the same utility
+ // function), however this code is loop-aware, where SimplifyCFG is
+ // not. That gives it the advantage of being able to hoist
+ // loop-invariant instructions out of the way to open up more
+ // opportunities, and the disadvantage of having the responsibility
+ // to preserve dominator information.
+ bool UniqueExit = true;
+ if (!ExitBlocks.empty())
+ for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i] != ExitBlocks[0]) {
+ UniqueExit = false;
+ break;
+ }
+ if (UniqueExit) {
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
+ if (!ExitingBlock->getSinglePredecessor()) continue;
+ BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!BI || !BI->isConditional()) continue;
+ CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ if (!CI || CI->getParent() != ExitingBlock) continue;
+
+ // Attempt to hoist out all instructions except for the
+ // comparison and the branch.
+ bool AllInvariant = true;
+ bool AnyInvariant = false;
+ for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
+ Instruction *Inst = I++;
+ // Skip debug info intrinsics.
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+ if (Inst == CI)
+ continue;
+ if (!L->makeLoopInvariant(Inst, AnyInvariant,
+ Preheader ? Preheader->getTerminator() : 0)) {
+ AllInvariant = false;
+ break;
+ }
+ }
+ if (AnyInvariant) {
+ Changed = true;
+ // The loop disposition of all SCEV expressions that depend on any
+ // hoisted values have also changed.
+ if (SE)
+ SE->forgetLoopDispositions(L);
+ }
+ if (!AllInvariant) continue;
+
+ // The block has now been cleared of all instructions except for
+ // a comparison and a conditional branch. SimplifyCFG may be able
+ // to fold it now.
+ if (!FoldBranchToCommonDest(BI)) continue;
+
+ // Success. The block is now dead, so remove it from the loop,
+ // update the dominator tree and delete it.
+ DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+ << ExitingBlock->getName() << "\n");
+
+ // Notify ScalarEvolution before deleting this block. Currently assume the
+ // parent loop doesn't change (spliting edges doesn't count). If blocks,
+ // CFG edges, or other values in the parent loop change, then we need call
+ // to forgetLoop() for the parent instead.
+ if (SE)
+ SE->forgetLoop(L);
+
+ assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+ Changed = true;
+ LI->removeBlock(ExitingBlock);
+
+ DomTreeNode *Node = DT->getNode(ExitingBlock);
+ const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+ Node->getChildren();
+ while (!Children.empty()) {
+ DomTreeNode *Child = Children.front();
+ DT->changeImmediateDominator(Child, Node->getIDom());
+ }
+ DT->eraseNode(ExitingBlock);
+
+ BI->getSuccessor(0)->removePredecessor(ExitingBlock);
+ BI->getSuccessor(1)->removePredecessor(ExitingBlock);
+ ExitingBlock->eraseFromParent();
+ }
+ }
+
+ return Changed;
+}
+
+bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
+ AliasAnalysis *AA, ScalarEvolution *SE) {
+ bool Changed = false;
+
+ // Worklist maintains our depth-first queue of loops in this nest to process.
+ SmallVector<Loop *, 4> Worklist;
+ Worklist.push_back(L);
+
+ // Walk the worklist from front to back, pushing newly found sub loops onto
+ // the back. This will let us process loops from back to front in depth-first
+ // order. We can use this simple process because loops form a tree.
+ for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
+ Loop *L2 = Worklist[Idx];
+ for (Loop::iterator I = L2->begin(), E = L2->end(); I != E; ++I)
+ Worklist.push_back(*I);
+ }
+
+ while (!Worklist.empty())
+ Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI, SE, PP);
+
+ return Changed;
+}
+
+namespace {
+ struct LoopSimplify : public FunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ LoopSimplify() : FunctionPass(ID) {
+ initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ // AA - If we have an alias analysis object to update, this is it, otherwise
+ // this is null.
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ // We need loop information to identify the loops...
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+
+ AU.addPreserved<AliasAnalysis>();
+ AU.addPreserved<ScalarEvolution>();
+ AU.addPreserved<DependenceAnalysis>();
+ AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
+ }
+
+ /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+ void verifyAnalysis() const override;
+
+ private:
+ bool ProcessLoop(Loop *L);
+ BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
+ Loop *SeparateNestedLoop(Loop *L, BasicBlock *Preheader);
+ BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
+ };
+}
+
+char LoopSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", true, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
+ "Canonicalize natural loops", true, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+
+/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
+/// it in any convenient order) inserting preheaders...
+///
+bool LoopSimplify::runOnFunction(Function &F) {
+ bool Changed = false;
+ AA = getAnalysisIfAvailable<AliasAnalysis>();
+ LI = &getAnalysis<LoopInfo>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ SE = getAnalysisIfAvailable<ScalarEvolution>();
+
+ // Simplify each loop nest in the function.
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ Changed |= simplifyLoop(*I, DT, LI, this, AA, SE);
+
+ return Changed;
+}
+
+// FIXME: Restore this code when we re-enable verification in verifyAnalysis
+// below.
+#if 0
+static void verifyLoop(Loop *L) {
+ // Verify subloops.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ verifyLoop(*I);
+
// It used to be possible to just assert L->isLoopSimplifyForm(), however
// with the introduction of indirectbr, there are now cases where it's
// not possible to transform a loop as necessary. We can at least check
@@ -799,3 +843,15 @@ void LoopSimplify::verifyAnalysis() const {
(void)HasIndBrExiting;
}
}
+#endif
+
+void LoopSimplify::verifyAnalysis() const {
+ // FIXME: This routine is being called mid-way through the loop pass manager
+ // as loop passes destroy this analysis. That's actually fine, but we have no
+ // way of expressing that here. Once all of the passes that destroy this are
+ // hoisted out of the loop pass manager we can add back verification here.
+#if 0
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+ verifyLoop(*I);
+#endif
+}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 162807d..d2dfc20 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -24,11 +24,13 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
using namespace llvm;
@@ -137,10 +139,10 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
/// removed from the LoopPassManager as well. LPM can also be NULL.
///
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
-/// available it must also preserve those analyses.
+/// available from the Pass it must also preserve those analyses.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
bool AllowRuntime, unsigned TripMultiple,
- LoopInfo *LI, LPPassManager *LPM) {
+ LoopInfo *LI, Pass *PP, LPPassManager *LPM) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n");
@@ -208,8 +210,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// Notify ScalarEvolution that the loop will be substantially changed,
// if not outright eliminated.
- if (LPM) {
- ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ if (PP) {
+ ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
if (SE)
SE->forgetLoop(L);
}
@@ -409,14 +411,18 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
}
}
- if (LPM) {
+ DominatorTree *DT = 0;
+ if (PP) {
// FIXME: Reconstruct dom info, because it is not preserved properly.
// Incrementally updating domtree after loop unrolling would be easy.
- if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>())
- DT->runOnFunction(*L->getHeader()->getParent());
+ if (DominatorTreeWrapperPass *DTWP =
+ PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
+ DT = &DTWP->getDomTree();
+ DT->recalculate(*L->getHeader()->getParent());
+ }
// Simplify any new induction variables in the partially unrolled loop.
- ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+ ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
if (SE && !CompletelyUnroll) {
SmallVector<WeakVH, 16> DeadInsts;
simplifyLoopIVs(L, SE, LPM, DeadInsts);
@@ -449,9 +455,25 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
NumCompletelyUnrolled += CompletelyUnroll;
++NumUnrolled;
+
+ Loop *OuterL = L->getParentLoop();
// Remove the loop from the LoopPassManager if it's completely removed.
if (CompletelyUnroll && LPM != NULL)
LPM->deleteLoopFromQueue(L);
+ // If we have a pass and a DominatorTree we should re-simplify impacted loops
+ // to ensure subsequent analyses can rely on this form. We want to simplify
+ // at least one layer outside of the loop that was unrolled so that any
+ // changes to the parent loop exposed by the unrolling are considered.
+ if (PP && DT) {
+ if (!OuterL && !CompletelyUnroll)
+ OuterL = L;
+ if (OuterL) {
+ ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
+ simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ 0, SE);
+ formLCSSARecursively(*OuterL, *DT, SE);
+ }
+ }
+
return true;
}
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index e017f50..3e61289 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -52,7 +52,7 @@ namespace {
initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
}
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
};
}
@@ -94,15 +94,25 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
return false;
// Handle non-optimized IR code like:
- // %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1)
+ // %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 1)
// %tobool = icmp ne i64 %expval, 0
// br i1 %tobool, label %if.then, label %if.end
+ //
+ // Or the following simpler case:
+ // %expval = call i1 @llvm.expect.i1(i1 %cmp, i1 1)
+ // br i1 %expval, label %if.then, label %if.end
+
+ CallInst *CI;
ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition());
- if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE)
- return false;
+ if (!CmpI) {
+ CI = dyn_cast<CallInst>(BI->getCondition());
+ } else {
+ if (CmpI->getPredicate() != CmpInst::ICMP_NE)
+ return false;
+ CI = dyn_cast<CallInst>(CmpI->getOperand(0));
+ }
- CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0));
if (!CI)
return false;
@@ -127,7 +137,10 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
BI->setMetadata(LLVMContext::MD_prof, Node);
- CmpI->setOperand(0, ArgValue);
+ if (CmpI)
+ CmpI->setOperand(0, ArgValue);
+ else
+ BI->setCondition(ArgValue);
return true;
}
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 9799a30..b1f758e 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -1,4 +1,4 @@
-//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//===- LowerInvoke.cpp - Eliminate Invoke instructions --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,29 +8,9 @@
//===----------------------------------------------------------------------===//
//
// This transformation is designed for use by code generators which do not yet
-// support stack unwinding. This pass supports two models of exception handling
-// lowering, the 'cheap' support and the 'expensive' support.
-//
-// 'Cheap' exception handling support gives the program the ability to execute
-// any program which does not "throw an exception", by turning 'invoke'
-// instructions into calls and by turning 'unwind' instructions into calls to
-// abort(). If the program does dynamically use the unwind instruction, the
-// program will print a message then abort.
-//
-// 'Expensive' exception handling support gives the full exception handling
-// support to the program at the cost of making the 'invoke' instruction
-// really expensive. It basically inserts setjmp/longjmp calls to emulate the
-// exception handling as necessary.
-//
-// Because the 'expensive' support slows down programs a lot, and EH is only
-// used for a subset of the programs, it must be specifically enabled by an
-// option.
-//
-// Note that after this pass runs the CFG is not entirely accurate (exceptional
-// control flow edges are not correct anymore) so only very simple things should
-// be done after the lowerinvoke pass has run (like generation of native code).
-// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't
-// support the invoke instruction yet" lowering pass.
+// support stack unwinding. This pass converts 'invoke' instructions to 'call'
+// instructions, so that any exception-handling 'landingpad' blocks become dead
+// code (which can be removed by running the '-simplifycfg' pass afterwards).
//
//===----------------------------------------------------------------------===//
@@ -38,64 +18,23 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include <csetjmp>
-#include <set>
using namespace llvm;
STATISTIC(NumInvokes, "Number of invokes replaced");
-STATISTIC(NumSpilled, "Number of registers live across unwind edges");
-
-static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
- cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code"));
namespace {
class LowerInvoke : public FunctionPass {
- const TargetMachine *TM;
-
- // Used for both models.
- Constant *AbortFn;
-
- // Used for expensive EH support.
- StructType *JBLinkTy;
- GlobalVariable *JBListHead;
- Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
- bool useExpensiveEHSupport;
-
public:
static char ID; // Pass identification, replacement for typeid
- explicit LowerInvoke(const TargetMachine *TM = 0,
- bool useExpensiveEHSupport = ExpensiveEHSupport)
- : FunctionPass(ID), TM(TM),
- useExpensiveEHSupport(useExpensiveEHSupport) {
+ explicit LowerInvoke() : FunctionPass(ID) {
initializeLowerInvokePass(*PassRegistry::getPassRegistry());
}
- bool doInitialization(Module &M);
- bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- // This is a cluster of orthogonal Transforms
- AU.addPreserved("mem2reg");
- AU.addPreservedID(LowerSwitchID);
- }
-
- private:
- bool insertCheapEHSupport(Function &F);
- void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes);
- void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
- AllocaInst *InvokeNum, AllocaInst *StackPtr,
- SwitchInst *CatchSwitch);
- bool insertExpensiveEHSupport(Function &F);
+ bool runOnFunction(Function &F) override;
};
}
@@ -107,65 +46,11 @@ INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
char &llvm::LowerInvokePassID = LowerInvoke::ID;
// Public Interface To the LowerInvoke pass.
-FunctionPass *llvm::createLowerInvokePass(const TargetMachine *TM,
- bool useExpensiveEHSupport) {
- return new LowerInvoke(TM, useExpensiveEHSupport || ExpensiveEHSupport);
+FunctionPass *llvm::createLowerInvokePass() {
+ return new LowerInvoke();
}
-// doInitialization - Make sure that there is a prototype for abort in the
-// current module.
-bool LowerInvoke::doInitialization(Module &M) {
- Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
- if (useExpensiveEHSupport) {
- // Insert a type for the linked list of jump buffers.
- const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0;
- unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
- JBSize = JBSize ? JBSize : 200;
- Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
-
- JBLinkTy = StructType::create(M.getContext(), "llvm.sjljeh.jmpbufty");
- Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) };
- JBLinkTy->setBody(Elts);
-
- Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
-
- // Now that we've done that, insert the jmpbuf list head global, unless it
- // already exists.
- if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) {
- JBListHead = new GlobalVariable(M, PtrJBList, false,
- GlobalValue::LinkOnceAnyLinkage,
- Constant::getNullValue(PtrJBList),
- "llvm.sjljeh.jblist");
- }
-
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
- !defined(setjmp_undefined_for_msvc)
-# pragma push_macro("setjmp")
-# undef setjmp
-# define setjmp_undefined_for_msvc
-#endif
-
- SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
-
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
- // let's return it to _setjmp state
-# pragma pop_macro("setjmp")
-# undef setjmp_undefined_for_msvc
-#endif
-
- LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
- StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
- StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
- }
-
- // We need the 'write' and 'abort' functions for both models.
- AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()),
- (Type *)0);
- return true;
-}
-
-bool LowerInvoke::insertCheapEHSupport(Function &F) {
+bool LowerInvoke::runOnFunction(Function &F) {
bool Changed = false;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
@@ -192,388 +77,3 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
}
return Changed;
}
-
-/// rewriteExpensiveInvoke - Insert code and hack the function to replace the
-/// specified invoke instruction with a call.
-void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
- AllocaInst *InvokeNum,
- AllocaInst *StackPtr,
- SwitchInst *CatchSwitch) {
- ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
- InvokeNo);
-
- // If the unwind edge has phi nodes, split the edge.
- if (isa<PHINode>(II->getUnwindDest()->begin())) {
- SplitCriticalEdge(II, 1, this);
-
- // If there are any phi nodes left, they must have a single predecessor.
- while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
- PN->replaceAllUsesWith(PN->getIncomingValue(0));
- PN->eraseFromParent();
- }
- }
-
- // Insert a store of the invoke num before the invoke and store zero into the
- // location afterward.
- new StoreInst(InvokeNoC, InvokeNum, true, II); // volatile
-
- // Insert a store of the stack ptr before the invoke, so we can restore it
- // later in the exception case.
- CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II);
- new StoreInst(StackSaveRet, StackPtr, true, II); // volatile
-
- BasicBlock::iterator NI = II->getNormalDest()->getFirstInsertionPt();
- // nonvolatile.
- new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())),
- InvokeNum, false, NI);
-
- Instruction* StackPtrLoad =
- new LoadInst(StackPtr, "stackptr.restore", true,
- II->getUnwindDest()->getFirstInsertionPt());
- CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad);
-
- // Add a switch case to our unwind block.
- CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
-
- // Insert a normal call instruction.
- SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
- CallInst *NewCall = CallInst::Create(II->getCalledValue(),
- CallArgs, "", II);
- NewCall->takeName(II);
- NewCall->setCallingConv(II->getCallingConv());
- NewCall->setAttributes(II->getAttributes());
- NewCall->setDebugLoc(II->getDebugLoc());
- II->replaceAllUsesWith(NewCall);
-
- // Replace the invoke with an uncond branch.
- BranchInst::Create(II->getNormalDest(), NewCall->getParent());
- II->eraseFromParent();
-}
-
-/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
-/// we reach blocks we've already seen.
-static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
- if (!LiveBBs.insert(BB).second) return; // already been here.
-
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- MarkBlocksLiveIn(*PI, LiveBBs);
-}
-
-// First thing we need to do is scan the whole function for values that are
-// live across unwind edges. Each value that is live across an unwind edge
-// we spill into a stack location, guaranteeing that there is nothing live
-// across the unwind edge. This process also splits all critical edges
-// coming out of invoke's.
-void LowerInvoke::
-splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
- // First step, split all critical edges from invoke instructions.
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
- InvokeInst *II = Invokes[i];
- SplitCriticalEdge(II, 0, this);
- SplitCriticalEdge(II, 1, this);
- assert(!isa<PHINode>(II->getNormalDest()) &&
- !isa<PHINode>(II->getUnwindDest()) &&
- "critical edge splitting left single entry phi nodes?");
- }
-
- Function *F = Invokes.back()->getParent()->getParent();
-
- // To avoid having to handle incoming arguments specially, we lower each arg
- // to a copy instruction in the entry block. This ensures that the argument
- // value itself cannot be live across the entry block.
- BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
- while (isa<AllocaInst>(AfterAllocaInsertPt) &&
- isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
- ++AfterAllocaInsertPt;
- for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
- AI != E; ++AI) {
- Type *Ty = AI->getType();
- // Aggregate types can't be cast, but are legal argument types, so we have
- // to handle them differently. We use an extract/insert pair as a
- // lightweight method to achieve the same goal.
- if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
- Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
- Instruction *NI = InsertValueInst::Create(AI, EI, 0);
- NI->insertAfter(EI);
- AI->replaceAllUsesWith(NI);
- // Set the operand of the instructions back to the AllocaInst.
- EI->setOperand(0, AI);
- NI->setOperand(0, AI);
- } else {
- // This is always a no-op cast because we're casting AI to AI->getType()
- // so src and destination types are identical. BitCast is the only
- // possibility.
- CastInst *NC = new BitCastInst(
- AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
- AI->replaceAllUsesWith(NC);
- // Set the operand of the cast instruction back to the AllocaInst.
- // Normally it's forbidden to replace a CastInst's operand because it
- // could cause the opcode to reflect an illegal conversion. However,
- // we're replacing it here with the same value it was constructed with.
- // We do this because the above replaceAllUsesWith() clobbered the
- // operand, but we want this one to remain.
- NC->setOperand(0, AI);
- }
- }
-
- // Finally, scan the code looking for instructions with bad live ranges.
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
- // Ignore obvious cases we don't have to handle. In particular, most
- // instructions either have no uses or only have a single use inside the
- // current block. Ignore them quickly.
- Instruction *Inst = II;
- if (Inst->use_empty()) continue;
- if (Inst->hasOneUse() &&
- cast<Instruction>(Inst->use_back())->getParent() == BB &&
- !isa<PHINode>(Inst->use_back())) continue;
-
- // If this is an alloca in the entry block, it's not a real register
- // value.
- if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
- if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
- continue;
-
- // Avoid iterator invalidation by copying users to a temporary vector.
- SmallVector<Instruction*,16> Users;
- for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
- UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
- if (User->getParent() != BB || isa<PHINode>(User))
- Users.push_back(User);
- }
-
- // Scan all of the uses and see if the live range is live across an unwind
- // edge. If we find a use live across an invoke edge, create an alloca
- // and spill the value.
-
- // Find all of the blocks that this value is live in.
- std::set<BasicBlock*> LiveBBs;
- LiveBBs.insert(Inst->getParent());
- while (!Users.empty()) {
- Instruction *U = Users.back();
- Users.pop_back();
-
- if (!isa<PHINode>(U)) {
- MarkBlocksLiveIn(U->getParent(), LiveBBs);
- } else {
- // Uses for a PHI node occur in their predecessor block.
- PHINode *PN = cast<PHINode>(U);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == Inst)
- MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
- }
- }
-
- // Now that we know all of the blocks that this thing is live in, see if
- // it includes any of the unwind locations.
- bool NeedsSpill = false;
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
- BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
- if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
- NeedsSpill = true;
- }
- }
-
- // If we decided we need a spill, do it.
- if (NeedsSpill) {
- ++NumSpilled;
- DemoteRegToStack(*Inst, true);
- }
- }
-}
-
-bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
- SmallVector<ReturnInst*,16> Returns;
- SmallVector<InvokeInst*,16> Invokes;
- UnreachableInst* UnreachablePlaceholder = 0;
-
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
- // Remember all return instructions in case we insert an invoke into this
- // function.
- Returns.push_back(RI);
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
- Invokes.push_back(II);
- }
-
- if (Invokes.empty()) return false;
-
- NumInvokes += Invokes.size();
-
- // TODO: This is not an optimal way to do this. In particular, this always
- // inserts setjmp calls into the entries of functions with invoke instructions
- // even though there are possibly paths through the function that do not
- // execute any invokes. In particular, for functions with early exits, e.g.
- // the 'addMove' method in hexxagon, it would be nice to not have to do the
- // setjmp stuff on the early exit path. This requires a bit of dataflow, but
- // would not be too hard to do.
-
- // If we have an invoke instruction, insert a setjmp that dominates all
- // invokes. After the setjmp, use a cond branch that goes to the original
- // code path on zero, and to a designated 'catch' block of nonzero.
- Value *OldJmpBufPtr = 0;
- if (!Invokes.empty()) {
- // First thing we need to do is scan the whole function for values that are
- // live across unwind edges. Each value that is live across an unwind edge
- // we spill into a stack location, guaranteeing that there is nothing live
- // across the unwind edge. This process also splits all critical edges
- // coming out of invoke's.
- splitLiveRangesLiveAcrossInvokes(Invokes);
-
- BasicBlock *EntryBB = F.begin();
-
- // Create an alloca for the incoming jump buffer ptr and the new jump buffer
- // that needs to be restored on all exits from the function. This is an
- // alloca because the value needs to be live across invokes.
- const TargetLowering *TLI = TM ? TM->getTargetLowering() : 0;
- unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
- AllocaInst *JmpBuf =
- new AllocaInst(JBLinkTy, 0, Align,
- "jblink", F.begin()->begin());
-
- Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
- ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) };
- OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "OldBuf",
- EntryBB->getTerminator());
-
- // Copy the JBListHead to the alloca.
- Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
- EntryBB->getTerminator());
- new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator());
-
- // Add the new jumpbuf to the list.
- new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator());
-
- // Create the catch block. The catch block is basically a big switch
- // statement that goes to all of the invoke catch blocks.
- BasicBlock *CatchBB =
- BasicBlock::Create(F.getContext(), "setjmp.catch", &F);
-
- // Create an alloca which keeps track of the stack pointer before every
- // invoke, this allows us to properly restore the stack pointer after
- // long jumping.
- AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0,
- "stackptr", EntryBB->begin());
-
- // Create an alloca which keeps track of which invoke is currently
- // executing. For normal calls it contains zero.
- AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0,
- "invokenum",EntryBB->begin());
- new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
- InvokeNum, true, EntryBB->getTerminator());
-
- // Insert a load in the Catch block, and a switch on its value. By default,
- // we go to a block that just does an unwind (which is the correct action
- // for a standard call). We insert an unreachable instruction here and
- // modify the block to jump to the correct unwinding pad later.
- BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F);
- UnreachablePlaceholder = new UnreachableInst(F.getContext(), UnwindBB);
-
- Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
- SwitchInst *CatchSwitch =
- SwitchInst::Create(CatchLoad, UnwindBB, Invokes.size(), CatchBB);
-
- // Now that things are set up, insert the setjmp call itself.
-
- // Split the entry block to insert the conditional branch for the setjmp.
- BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
- "setjmp.cont");
-
- Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
- Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx, "TheJmpBuf",
- EntryBB->getTerminator());
- JmpBufPtr = new BitCastInst(JmpBufPtr,
- Type::getInt8PtrTy(F.getContext()),
- "tmp", EntryBB->getTerminator());
- Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret",
- EntryBB->getTerminator());
-
- // Compare the return value to zero.
- Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
- ICmpInst::ICMP_EQ, SJRet,
- Constant::getNullValue(SJRet->getType()),
- "notunwind");
- // Nuke the uncond branch.
- EntryBB->getTerminator()->eraseFromParent();
-
- // Put in a new condbranch in its place.
- BranchInst::Create(ContBlock, CatchBB, IsNormal, EntryBB);
-
- // At this point, we are all set up, rewrite each invoke instruction.
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
- rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch);
- }
-
- // We know that there is at least one unwind.
-
- // Create three new blocks, the block to load the jmpbuf ptr and compare
- // against null, the block to do the longjmp, and the error block for if it
- // is null. Add them at the end of the function because they are not hot.
- BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(),
- "dounwind", &F);
- BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F);
- BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F);
-
- // If this function contains an invoke, restore the old jumpbuf ptr.
- Value *BufPtr;
- if (OldJmpBufPtr) {
- // Before the return, insert a copy from the saved value to the new value.
- BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler);
- new StoreInst(BufPtr, JBListHead, UnwindHandler);
- } else {
- BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler);
- }
-
- // Load the JBList, if it's null, then there was no catch!
- Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr,
- Constant::getNullValue(BufPtr->getType()),
- "notnull");
- BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler);
-
- // Create the block to do the longjmp.
- // Get a pointer to the jmpbuf and longjmp.
- Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
- ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) };
- Idx[0] = GetElementPtrInst::Create(BufPtr, Idx, "JmpBuf", UnwindBlock);
- Idx[0] = new BitCastInst(Idx[0],
- Type::getInt8PtrTy(F.getContext()),
- "tmp", UnwindBlock);
- Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
- CallInst::Create(LongJmpFn, Idx, "", UnwindBlock);
- new UnreachableInst(F.getContext(), UnwindBlock);
-
- // Set up the term block ("throw without a catch").
- new UnreachableInst(F.getContext(), TermBlock);
-
- // Insert a call to abort()
- CallInst::Create(AbortFn, "",
- TermBlock->getTerminator())->setTailCall();
-
- // Replace the inserted unreachable with a branch to the unwind handler.
- if (UnreachablePlaceholder) {
- BranchInst::Create(UnwindHandler, UnreachablePlaceholder);
- UnreachablePlaceholder->eraseFromParent();
- }
-
- // Finally, for any returns from this function, if this function contains an
- // invoke, restore the old jmpbuf pointer to its input value.
- if (OldJmpBufPtr) {
- for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
- ReturnInst *R = Returns[i];
-
- // Before the return, insert a copy from the saved value to the new value.
- Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R);
- new StoreInst(OldBuf, JBListHead, true, R);
- }
- }
-
- return true;
-}
-
-bool LowerInvoke::runOnFunction(Function &F) {
- if (useExpensiveEHSupport)
- return insertExpensiveEHSupport(F);
- else
- return insertCheapEHSupport(F);
-}
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 2d2a8a5..6fb7410 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -37,9 +37,9 @@ namespace {
initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
// This is a cluster of orthogonal Transforms
AU.addPreserved<UnifyFunctionExitNodes>();
AU.addPreserved("mem2reg");
@@ -245,7 +245,8 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
// Merge case into clusters
if (Cases.size()>=2)
- for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
+ for (CaseItr I = Cases.begin(), J = std::next(Cases.begin());
+ J != Cases.end();) {
int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
BasicBlock* nextBB = J->BB;
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 61b3965..a188ac5 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -15,7 +15,7 @@
#define DEBUG_TYPE "mem2reg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -34,10 +34,10 @@ namespace {
// runOnFunction - To run this pass, first we calculate the alloca
// instructions that are safe for promotion, then we promote each one.
//
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTree>();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
// This is a cluster of orthogonal Transforms
AU.addPreserved<UnifyFunctionExitNodes>();
@@ -50,7 +50,7 @@ namespace {
char PromotePass::ID = 0;
INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
false, false)
@@ -61,7 +61,7 @@ bool PromotePass::runOnFunction(Function &F) {
bool Changed = false;
- DominatorTree &DT = getAnalysis<DominatorTree>();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
while (1) {
Allocas.clear();
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index c370453..395a46b 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -48,11 +48,11 @@ namespace {
initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
- bool runOnModule(Module &M) {
+ bool runOnModule(Module &M) override {
static const char *const metaNames[] = {
// See http://en.wikipedia.org/wiki/Metasyntactic_variable
"foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 8f6eee3..25fab89 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -34,18 +34,18 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <queue>
@@ -61,9 +61,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
// assignments to subsections of the memory unit.
// Only allow direct and non-volatile loads and stores...
- for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
- UI != UE; ++UI) { // Loop over all of the uses of the alloca
- const User *U = *UI;
+ for (const User *U : AI->users()) {
if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
// Note that atomic loads can be transformed; atomic semantics do
// not have any meaning for a local alloca.
@@ -131,8 +129,7 @@ struct AllocaInfo {
// As we scan the uses of the alloca instruction, keep track of stores,
// and decide whether all of the loads and stores to the alloca are within
// the same basic block.
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
- UI != E;) {
+ for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
Instruction *User = cast<Instruction>(*UI++);
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
@@ -317,8 +314,7 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
// Knowing that this alloca is promotable, we know that it's safe to kill all
// instructions except for load and store.
- for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
- UI != UE;) {
+ for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) {
Instruction *I = cast<Instruction>(*UI);
++UI;
if (isa<LoadInst>(I) || isa<StoreInst>(I))
@@ -328,10 +324,9 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
// The only users of this bitcast/GEP instruction are lifetime intrinsics.
// Follow the use/def chain to erase them now instead of leaving it for
// dead code elimination later.
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE;) {
- Instruction *Inst = cast<Instruction>(*UI);
- ++UI;
+ for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) {
+ Instruction *Inst = cast<Instruction>(*UUI);
+ ++UUI;
Inst->eraseFromParent();
}
}
@@ -359,7 +354,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// Clear out UsingBlocks. We will reconstruct it here if needed.
Info.UsingBlocks.clear();
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+ for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
Instruction *UserInst = cast<Instruction>(*UI++);
if (!isa<LoadInst>(UserInst)) {
assert(UserInst == OnlyStore && "Should only have load/stores");
@@ -456,9 +451,8 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy;
StoresByIndexTy StoresByIndex;
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;
- ++UI)
- if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+ for (User *U : AI->users())
+ if (StoreInst *SI = dyn_cast<StoreInst>(U))
StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
// Sort the stores by their index, making it efficient to do a lookup with a
@@ -467,7 +461,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// Walk all of the loads from this alloca, replacing them with the nearest
// store above them, if any.
- for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+ for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
LoadInst *LI = dyn_cast<LoadInst>(*UI++);
if (!LI)
continue;
@@ -485,7 +479,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
else
// Otherwise, there was a store before this load, the load takes its value.
- LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0));
+ LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
if (AST && LI->getType()->isPointerTy())
AST->deleteValue(LI);
@@ -495,7 +489,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// Remove the (now dead) stores and alloca.
while (!AI->use_empty()) {
- StoreInst *SI = cast<StoreInst>(AI->use_back());
+ StoreInst *SI = cast<StoreInst>(AI->user_back());
// Record debuginfo for the store before removing it.
if (DbgDeclareInst *DDI = Info.DbgDeclare) {
DIBuilder DIB(*AI->getParent()->getParent()->getParent());
@@ -679,8 +673,8 @@ void PromoteMem2Reg::run() {
// Iterating over NewPhiNodes is deterministic, so it is safe to try to
// simplify and RAUW them as we go. If it was not, we could add uses to
- // the values we replace with in a non deterministic order, thus creating
- // non deterministic def->use chains.
+ // the values we replace with in a non-deterministic order, thus creating
+ // non-deterministic def->use chains.
for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
I = NewPhiNodes.begin(),
E = NewPhiNodes.end();
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 30adbfa..28f5c44 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -16,12 +16,10 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/AlignOf.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index ff50b12..1e88587 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -23,6 +23,8 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -34,14 +36,12 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/NoFolder.h"
-#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
@@ -62,12 +62,13 @@ static cl::opt<bool>
SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
cl::desc("Sink common instructions down to the end block"));
-static cl::opt<bool>
-HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
- cl::desc("Hoist conditional stores if an unconditional store preceeds"));
+static cl::opt<bool> HoistCondStores(
+ "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
+ cl::desc("Hoist conditional stores if an unconditional store precedes"));
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
+STATISTIC(NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)");
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
@@ -90,7 +91,7 @@ namespace {
class SimplifyCFGOpt {
const TargetTransformInfo &TTI;
- const DataLayout *const TD;
+ const DataLayout *const DL;
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
std::vector<ValueEqualityComparisonCase> &Cases);
@@ -109,8 +110,8 @@ class SimplifyCFGOpt {
bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
public:
- SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD)
- : TTI(TTI), TD(TD) {}
+ SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *DL)
+ : TTI(TTI), DL(DL) {}
bool run(BasicBlock *BB);
};
}
@@ -306,15 +307,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
/// and PointerNullValue. Return NULL if value is not a constant int.
-static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) {
+static ConstantInt *GetConstantInt(Value *V, const DataLayout *DL) {
// Normal constant int.
ConstantInt *CI = dyn_cast<ConstantInt>(V);
- if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
+ if (CI || !DL || !isa<Constant>(V) || !V->getType()->isPointerTy())
return CI;
// This is some kind of pointer constant. Turn it into a pointer-sized
// ConstantInt if possible.
- IntegerType *PtrTy = cast<IntegerType>(TD->getIntPtrType(V->getType()));
+ IntegerType *PtrTy = cast<IntegerType>(DL->getIntPtrType(V->getType()));
// Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
if (isa<ConstantPointerNull>(V))
@@ -340,13 +341,13 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) {
/// Values vector.
static Value *
GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
- const DataLayout *TD, bool isEQ, unsigned &UsedICmps) {
+ const DataLayout *DL, bool isEQ, unsigned &UsedICmps) {
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0) return 0;
// If this is an icmp against a constant, handle this as one of the cases.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
- if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
+ if (ConstantInt *C = GetConstantInt(I->getOperand(1), DL)) {
Value *RHSVal;
ConstantInt *RHSC;
@@ -405,11 +406,11 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
unsigned NumValsBeforeLHS = Vals.size();
unsigned UsedICmpsBeforeLHS = UsedICmps;
- if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
+ if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, DL,
isEQ, UsedICmps)) {
unsigned NumVals = Vals.size();
unsigned UsedICmpsBeforeRHS = UsedICmps;
- if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+ if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, DL,
isEQ, UsedICmps)) {
if (LHS == RHS)
return LHS;
@@ -434,7 +435,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
if (Extra == 0 || Extra == I->getOperand(0)) {
Value *OldExtra = Extra;
Extra = I->getOperand(0);
- if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+ if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, DL,
isEQ, UsedICmps))
return RHS;
assert(Vals.size() == NumValsBeforeLHS);
@@ -472,14 +473,14 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
if (BI->isConditional() && BI->getCondition()->hasOneUse())
if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
- if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), TD))
+ if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
CV = ICI->getOperand(0);
// Unwrap any lossless ptrtoint cast.
- if (TD && CV) {
+ if (DL && CV) {
if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
Value *Ptr = PTII->getPointerOperand();
- if (PTII->getType() == TD->getIntPtrType(Ptr->getType()))
+ if (PTII->getType() == DL->getIntPtrType(Ptr->getType()))
CV = Ptr;
}
}
@@ -504,7 +505,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
Cases.push_back(ValueEqualityComparisonCase(GetConstantInt(ICI->getOperand(1),
- TD),
+ DL),
Succ));
return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
}
@@ -732,8 +733,7 @@ static void GetBranchWeights(TerminatorInst *TI,
MDNode* MD = TI->getMetadata(LLVMContext::MD_prof);
assert(MD);
for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
- ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i));
- assert(CI);
+ ConstantInt *CI = cast<ConstantInt>(MD->getOperand(i));
Weights.push_back(CI->getValue().getZExtValue());
}
@@ -748,21 +748,14 @@ static void GetBranchWeights(TerminatorInst *TI,
}
}
-/// Sees if any of the weights are too big for a uint32_t, and halves all the
-/// weights if any are.
+/// Keep halving the weights until all can fit in uint32_t.
static void FitWeights(MutableArrayRef<uint64_t> Weights) {
- bool Halve = false;
- for (unsigned i = 0; i < Weights.size(); ++i)
- if (Weights[i] > UINT_MAX) {
- Halve = true;
- break;
- }
-
- if (! Halve)
- return;
-
- for (unsigned i = 0; i < Weights.size(); ++i)
- Weights[i] /= 2;
+ uint64_t Max = *std::max_element(Weights.begin(), Weights.end());
+ if (Max > UINT_MAX) {
+ unsigned Offset = 32 - countLeadingZeros(Max);
+ for (uint64_t &I : Weights)
+ I >>= Offset;
+ }
}
/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
@@ -929,8 +922,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
Builder.SetInsertPoint(PTI);
// Convert pointer to int before we switch.
if (CV->getType()->isPointerTy()) {
- assert(TD && "Cannot switch on pointer without DataLayout");
- CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()),
+ assert(DL && "Cannot switch on pointer without DataLayout");
+ CV = Builder.CreatePtrToInt(CV, DL->getIntPtrType(CV->getType()),
"magicptr");
}
@@ -1421,7 +1414,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
Value *SpeculatedStoreValue = 0;
StoreInst *SpeculatedStore = 0;
for (BasicBlock::iterator BBI = ThenBB->begin(),
- BBE = llvm::prior(ThenBB->end());
+ BBE = std::prev(ThenBB->end());
BBI != BBE; ++BBI) {
Instruction *I = BBI;
// Skip debug info.
@@ -1531,7 +1524,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
// Hoist the instructions.
BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
- llvm::prior(ThenBB->end()));
+ std::prev(ThenBB->end()));
// Insert selects and rewrite the PHI operands.
IRBuilder<true, NoFolder> Builder(BI);
@@ -1589,10 +1582,9 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
// We can only support instructions that do not define values that are
// live outside of the current basic block.
- for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
- UI != E; ++UI) {
- Instruction *U = cast<Instruction>(*UI);
- if (U->getParent() != BB || isa<PHINode>(U)) return false;
+ for (User *U : BBI->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (UI->getParent() != BB || isa<PHINode>(UI)) return false;
}
// Looks ok, continue checking.
@@ -1605,7 +1597,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
/// that is defined in the same block as the branch and if any PHI entries are
/// constants, thread edges corresponding to that entry to be branches to their
/// ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) {
BasicBlock *BB = BI->getParent();
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
// NOTE: we currently cannot transform this case if the PHI node is used
@@ -1674,7 +1666,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
}
// Check for trivial simplification.
- if (Value *V = SimplifyInstruction(N, TD)) {
+ if (Value *V = SimplifyInstruction(N, DL)) {
TranslateMap[BBI] = V;
delete N; // Instruction folded away, don't need actual inst
} else {
@@ -1695,7 +1687,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
}
// Recurse, simplifying any other constants.
- return FoldCondBranchOnPHI(BI, TD) | true;
+ return FoldCondBranchOnPHI(BI, DL) | true;
}
return false;
@@ -1703,7 +1695,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
/// PHI node, see if we can eliminate it.
-static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) {
+static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
// statement", which has a very simple dominance structure. Basically, we
// are trying to find the condition that is being branched on, which
@@ -1737,7 +1729,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) {
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
PHINode *PN = cast<PHINode>(II++);
- if (Value *V = SimplifyInstruction(PN, TD)) {
+ if (Value *V = SimplifyInstruction(PN, DL)) {
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
continue;
@@ -2015,7 +2007,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// register pressure or inhibit out-of-order execution.
Instruction *BonusInst = 0;
if (&*FrontIt != Cond &&
- FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
+ FrontIt->hasOneUse() && FrontIt->user_back() == Cond &&
isSafeToSpeculativelyExecute(FrontIt)) {
BonusInst = &*FrontIt;
++FrontIt;
@@ -2094,7 +2086,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// instructions that are used by the terminator's condition because it
// exposes more merging opportunities.
bool UsedByBranch = (BonusInst && BonusInst->hasOneUse() &&
- *BonusInst->use_begin() == Cond);
+ BonusInst->user_back() == Cond);
if (BonusInst && !UsedByBranch) {
// Collect the values used by the bonus inst
@@ -2153,6 +2145,14 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
Instruction *NewBonus = 0;
if (BonusInst) {
NewBonus = BonusInst->clone();
+
+ // If we moved a load, we cannot any longer claim any knowledge about
+ // its potential value. The previous information might have been valid
+ // only given the branch precondition.
+ // For an analogous reason, we must also drop all the metadata whose
+ // semantics we don't understand.
+ NewBonus->dropUnknownMetadata(LLVMContext::MD_dbg);
+
PredBlock->getInstList().insert(PBI, NewBonus);
NewBonus->takeName(BonusInst);
BonusInst->setName(BonusInst->getName()+".old");
@@ -2625,7 +2625,7 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
/// the PHI, merging the third icmp into the switch.
static bool TryToSimplifyUncondBranchWithICmpInIt(
ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI,
- const DataLayout *TD) {
+ const DataLayout *DL) {
BasicBlock *BB = ICI->getParent();
// If the block has any PHIs in it or the icmp has multiple uses, it is too
@@ -2653,12 +2653,12 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
assert(VVal && "Should have a unique destination value");
ICI->setOperand(0, VVal);
- if (Value *V = SimplifyInstruction(ICI, TD)) {
+ if (Value *V = SimplifyInstruction(ICI, DL)) {
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
}
// BB is now empty, so it is likely to simplify away.
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
}
// Ok, the block is reachable from the default dest. If the constant we're
@@ -2674,13 +2674,13 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
// BB is now empty, so it is likely to simplify away.
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
}
// The use of the icmp has to be in the 'end' block, by the only PHI node in
// the block.
BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
- PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back());
+ PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
if (PHIUse == 0 || PHIUse != &SuccBlock->front() ||
isa<PHINode>(++BasicBlock::iterator(PHIUse)))
return false;
@@ -2730,7 +2730,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
/// Check to see if it is branching on an or/and chain of icmp instructions, and
/// fold it into a switch instruction if so.
-static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL,
IRBuilder<> &Builder) {
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
if (Cond == 0) return false;
@@ -2746,10 +2746,10 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
unsigned UsedICmps = 0;
if (Cond->getOpcode() == Instruction::Or) {
- CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, DL, true,
UsedICmps);
} else if (Cond->getOpcode() == Instruction::And) {
- CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false,
+ CompVal = GatherConstantCompares(Cond, Values, ExtraCase, DL, false,
UsedICmps);
TrueWhenEqual = false;
}
@@ -2811,9 +2811,9 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
Builder.SetInsertPoint(BI);
// Convert pointer to int before we switch.
if (CompVal->getType()->isPointerTy()) {
- assert(TD && "Cannot switch on pointer without DataLayout");
+ assert(DL && "Cannot switch on pointer without DataLayout");
CompVal = Builder.CreatePtrToInt(CompVal,
- TD->getIntPtrType(CompVal->getType()),
+ DL->getIntPtrType(CompVal->getType()),
"magicptr");
}
@@ -3222,7 +3222,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) {
Case.getCaseSuccessor()->removePredecessor(SI->getParent());
SI->removeCase(Case);
}
- if (HasWeight) {
+ if (HasWeight && Weights.size() >= 2) {
SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
SI->setMetadata(LLVMContext::MD_prof,
MDBuilder(SI->getParent()->getContext()).
@@ -3428,7 +3428,7 @@ GetCaseResults(SwitchInst *SI,
Res.push_back(std::make_pair(PHI, ConstVal));
}
- return true;
+ return Res.size() > 0;
}
namespace {
@@ -3444,7 +3444,7 @@ namespace {
ConstantInt *Offset,
const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
Constant *DefaultValue,
- const DataLayout *TD);
+ const DataLayout *DL);
/// BuildLookup - Build instructions with Builder to retrieve the value at
/// the position given by Index in the lookup table.
@@ -3452,7 +3452,7 @@ namespace {
/// WouldFitInRegister - Return true if a table with TableSize elements of
/// type ElementType would fit in a target-legal register.
- static bool WouldFitInRegister(const DataLayout *TD,
+ static bool WouldFitInRegister(const DataLayout *DL,
uint64_t TableSize,
const Type *ElementType);
@@ -3491,7 +3491,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
ConstantInt *Offset,
const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
Constant *DefaultValue,
- const DataLayout *TD)
+ const DataLayout *DL)
: SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) {
assert(Values.size() && "Can't build lookup table without values!");
assert(TableSize >= Values.size() && "Can't fit values in table!");
@@ -3499,12 +3499,14 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
// If all values in the table are equal, this is that value.
SingleValue = Values.begin()->second;
+ Type *ValueType = Values.begin()->second->getType();
+
// Build up the table contents.
SmallVector<Constant*, 64> TableContents(TableSize);
for (size_t I = 0, E = Values.size(); I != E; ++I) {
ConstantInt *CaseVal = Values[I].first;
Constant *CaseRes = Values[I].second;
- assert(CaseRes->getType() == DefaultValue->getType());
+ assert(CaseRes->getType() == ValueType);
uint64_t Idx = (CaseVal->getValue() - Offset->getValue())
.getLimitedValue();
@@ -3516,6 +3518,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
// Fill in any holes in the table with the default result.
if (Values.size() < TableSize) {
+ assert(DefaultValue && "Need a default value to fill the lookup table holes.");
+ assert(DefaultValue->getType() == ValueType);
for (uint64_t I = 0; I < TableSize; ++I) {
if (!TableContents[I])
TableContents[I] = DefaultValue;
@@ -3533,8 +3537,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
}
// If the type is integer and the table fits in a register, build a bitmap.
- if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) {
- IntegerType *IT = cast<IntegerType>(DefaultValue->getType());
+ if (WouldFitInRegister(DL, TableSize, ValueType)) {
+ IntegerType *IT = cast<IntegerType>(ValueType);
APInt TableInt(TableSize * IT->getBitWidth(), 0);
for (uint64_t I = TableSize; I > 0; --I) {
TableInt <<= IT->getBitWidth();
@@ -3552,7 +3556,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
}
// Store the table in an array.
- ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize);
+ ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
@@ -3598,10 +3602,10 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
llvm_unreachable("Unknown lookup table kind!");
}
-bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
+bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL,
uint64_t TableSize,
const Type *ElementType) {
- if (!TD)
+ if (!DL)
return false;
const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
if (!IT)
@@ -3612,7 +3616,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
// Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
if (TableSize >= UINT_MAX/IT->getBitWidth())
return false;
- return TD->fitsInLegalInteger(TableSize * IT->getBitWidth());
+ return DL->fitsInLegalInteger(TableSize * IT->getBitWidth());
}
/// ShouldBuildLookupTable - Determine whether a lookup table should be built
@@ -3621,7 +3625,7 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
static bool ShouldBuildLookupTable(SwitchInst *SI,
uint64_t TableSize,
const TargetTransformInfo &TTI,
- const DataLayout *TD,
+ const DataLayout *DL,
const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
return false; // TableSize overflowed, or mul below might overflow.
@@ -3637,7 +3641,7 @@ static bool ShouldBuildLookupTable(SwitchInst *SI,
// Saturate this flag to false.
AllTablesFitInRegister = AllTablesFitInRegister &&
- SwitchLookupTable::WouldFitInRegister(TD, TableSize, Ty);
+ SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
// If both flags saturate, we're done. NOTE: This *only* works with
// saturating flags, and all flags have to saturate first due to the
@@ -3666,7 +3670,7 @@ static bool ShouldBuildLookupTable(SwitchInst *SI,
static bool SwitchToLookupTable(SwitchInst *SI,
IRBuilder<> &Builder,
const TargetTransformInfo &TTI,
- const DataLayout* TD) {
+ const DataLayout* DL) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
// Only build lookup table when we have a target that supports it.
@@ -3680,11 +3684,9 @@ static bool SwitchToLookupTable(SwitchInst *SI,
// GEP needs a runtime relocation in PIC code. We should just build one big
// string and lookup indices into that.
- // Ignore the switch if the number of cases is too small.
- // This is similar to the check when building jump tables in
- // SelectionDAGBuilder::handleJTSwitchCase.
- // FIXME: Determine the best cut-off.
- if (SI->getNumCases() < 4)
+ // Ignore switches with less than three cases. Lookup tables will not make them
+ // faster, so we don't analyze them.
+ if (SI->getNumCases() < 3)
return false;
// Figure out the corresponding result for each case value and phi node in the
@@ -3712,7 +3714,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
ResultsTy Results;
if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
- Results, TD))
+ Results, DL))
return false;
// Append the result from this case to the list for each phi.
@@ -3723,21 +3725,41 @@ static bool SwitchToLookupTable(SwitchInst *SI,
}
}
- // Get the resulting values for the default case.
+ // Keep track of the result types.
+ for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
+ PHINode *PHI = PHIs[I];
+ ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
+ }
+
+ uint64_t NumResults = ResultLists[PHIs[0]].size();
+ APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
+ uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
+ bool TableHasHoles = (NumResults < TableSize);
+
+ // If the table has holes, we need a constant result for the default case
+ // or a bitmask that fits in a register.
SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
- if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
- DefaultResultsList, TD))
- return false;
+ bool HasDefaultResults = false;
+ if (TableHasHoles) {
+ HasDefaultResults = GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
+ DefaultResultsList, DL);
+ }
+ bool NeedMask = (TableHasHoles && !HasDefaultResults);
+ if (NeedMask) {
+ // As an extra penalty for the validity test we require more cases.
+ if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
+ return false;
+ if (!(DL && DL->fitsInLegalInteger(TableSize)))
+ return false;
+ }
+
for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
PHINode *PHI = DefaultResultsList[I].first;
Constant *Result = DefaultResultsList[I].second;
DefaultResults[PHI] = Result;
- ResultTypes[PHI] = Result->getType();
}
- APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
- uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
- if (!ShouldBuildLookupTable(SI, TableSize, TTI, TD, ResultTypes))
+ if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
return false;
// Create the BB that does the lookups.
@@ -3755,7 +3777,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
// Compute the maximum table size representable by the integer type we are
// switching upon.
unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
- uint64_t MaxTableSize = CaseSize > 63? UINT64_MAX : 1ULL << CaseSize;
+ uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
assert(MaxTableSize >= TableSize &&
"It is impossible for a switch to have more entries than the max "
"representable value of its input integer type's size.");
@@ -3776,19 +3798,61 @@ static bool SwitchToLookupTable(SwitchInst *SI,
// Populate the BB that does the lookups.
Builder.SetInsertPoint(LookupBB);
+
+ if (NeedMask) {
+ // Before doing the lookup we do the hole check.
+ // The LookupBB is therefore re-purposed to do the hole check
+ // and we create a new LookupBB.
+ BasicBlock *MaskBB = LookupBB;
+ MaskBB->setName("switch.hole_check");
+ LookupBB = BasicBlock::Create(Mod.getContext(),
+ "switch.lookup",
+ CommonDest->getParent(),
+ CommonDest);
+
+ // Build bitmask; fill in a 1 bit for every case.
+ APInt MaskInt(TableSize, 0);
+ APInt One(TableSize, 1);
+ const ResultListTy &ResultList = ResultLists[PHIs[0]];
+ for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
+ uint64_t Idx = (ResultList[I].first->getValue() -
+ MinCaseVal->getValue()).getLimitedValue();
+ MaskInt |= One << Idx;
+ }
+ ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
+
+ // Get the TableIndex'th bit of the bitmask.
+ // If this bit is 0 (meaning hole) jump to the default destination,
+ // else continue with table lookup.
+ IntegerType *MapTy = TableMask->getType();
+ Value *MaskIndex = Builder.CreateZExtOrTrunc(TableIndex, MapTy,
+ "switch.maskindex");
+ Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex,
+ "switch.shifted");
+ Value *LoBit = Builder.CreateTrunc(Shifted,
+ Type::getInt1Ty(Mod.getContext()),
+ "switch.lobit");
+ Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
+
+ Builder.SetInsertPoint(LookupBB);
+ AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent());
+ }
+
bool ReturnedEarly = false;
for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
PHINode *PHI = PHIs[I];
+ // If using a bitmask, use any value to fill the lookup table holes.
+ Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI],
- DefaultResults[PHI], TD);
+ DV, DL);
Value *Result = Table.BuildLookup(TableIndex, Builder);
// If the result is used to return immediately from the function, we want to
// do that right here.
- if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin()) &&
- *PHI->use_begin() == CommonDest->getFirstNonPHIOrDbg()) {
+ if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->user_begin()) &&
+ PHI->user_back() == CommonDest->getFirstNonPHIOrDbg()) {
Builder.CreateRet(Result);
ReturnedEarly = true;
break;
@@ -3811,6 +3875,8 @@ static bool SwitchToLookupTable(SwitchInst *SI,
SI->eraseFromParent();
++NumLookupTables;
+ if (NeedMask)
+ ++NumLookupTablesHoles;
return true;
}
@@ -3822,12 +3888,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// see if that predecessor totally determines the outcome of this switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
Value *Cond = SI->getCondition();
if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
if (SimplifySwitchOnSelect(SI, Select))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
// If the block only contains the switch, see if we can fold the block
// away into any preds.
@@ -3837,22 +3903,22 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
++BBI;
if (SI == &*BBI)
if (FoldValueComparisonIntoPredecessors(SI, Builder))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
}
// Try to transform the switch into an icmp and a branch.
if (TurnSwitchRangeIntoICmp(SI, Builder))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
// Remove unreachable cases.
if (EliminateDeadSwitchCases(SI))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
if (ForwardSwitchConditionToPHI(SI))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
- if (SwitchToLookupTable(SI, Builder, TTI, TD))
- return SimplifyCFG(BB, TTI, TD) | true;
+ if (SwitchToLookupTable(SI, Builder, TTI, DL))
+ return SimplifyCFG(BB, TTI, DL) | true;
return false;
}
@@ -3889,7 +3955,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
if (SimplifyIndirectBrOnSelect(IBI, SI))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
}
return Changed;
}
@@ -3913,7 +3979,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
for (++I; isa<DbgInfoIntrinsic>(I); ++I)
;
if (I->isTerminator() &&
- TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, TD))
+ TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, DL))
return true;
}
@@ -3922,7 +3988,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
if (FoldBranchToCommonDest(BI))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
return false;
}
@@ -3937,7 +4003,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
// This block must be empty, except for the setcond inst, if it exists.
// Ignore dbg intrinsics.
@@ -3947,26 +4013,26 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
++I;
if (&*I == BI) {
if (FoldValueComparisonIntoPredecessors(BI, Builder))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
} else if (&*I == cast<Instruction>(BI->getCondition())){
++I;
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(I))
++I;
if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
}
}
// Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
- if (SimplifyBranchOnICmpChain(BI, TD, Builder))
+ if (SimplifyBranchOnICmpChain(BI, DL, Builder))
return true;
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
if (FoldBranchToCommonDest(BI))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
// We have a conditional branch to two blocks that are only reachable
// from BI. We know that the condbr dominates the two blocks, so see if
@@ -3975,7 +4041,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
if (HoistThenElseCodeToIf(BI))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
// execute Successor #0 if it branches to successor #1.
@@ -3983,7 +4049,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (Succ0TI->getNumSuccessors() == 1 &&
Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
}
} else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
// If Successor #0 has multiple preds, we may be able to conditionally
@@ -3992,22 +4058,22 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (Succ1TI->getNumSuccessors() == 1 &&
Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
}
// If this is a branch on a phi node in the current block, thread control
// through this block if any PHI node entries are constants.
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
- if (FoldCondBranchOnPHI(BI, TD))
- return SimplifyCFG(BB, TTI, TD) | true;
+ if (FoldCondBranchOnPHI(BI, DL))
+ return SimplifyCFG(BB, TTI, DL) | true;
// Scan predecessor blocks for conditional branches.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
if (PBI != BI && PBI->isConditional())
if (SimplifyCondBranchToCondBranch(PBI, BI))
- return SimplifyCFG(BB, TTI, TD) | true;
+ return SimplifyCFG(BB, TTI, DL) | true;
return false;
}
@@ -4023,7 +4089,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
if (C->isNullValue()) {
// Only look at the first use, avoid hurting compile time with long uselists
- User *Use = *I->use_begin();
+ User *Use = *I->user_begin();
// Now make sure that there are no instructions in between that can alter
// control flow (eg. calls)
@@ -4119,7 +4185,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
// eliminate it, do so now.
if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
if (PN->getNumIncomingValues() == 2)
- Changed |= FoldTwoEntryPHINode(PN, TD);
+ Changed |= FoldTwoEntryPHINode(PN, DL);
Builder.SetInsertPoint(BB->getTerminator());
if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
@@ -4151,6 +4217,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
/// of the CFG. It returns true if a modification was made.
///
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
- const DataLayout *TD) {
- return SimplifyCFGOpt(TTI, TD).run(BB);
+ const DataLayout *DL) {
+ return SimplifyCFGOpt(TTI, DL).run(BB);
}
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index bf3442a..30f56be 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -16,6 +16,7 @@
#define DEBUG_TYPE "indvars"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/IVUsers.h"
@@ -23,7 +24,10 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -44,7 +48,7 @@ namespace {
Loop *L;
LoopInfo *LI;
ScalarEvolution *SE;
- const DataLayout *TD; // May be NULL
+ const DataLayout *DL; // May be NULL
SmallVectorImpl<WeakVH> &DeadInsts;
@@ -56,9 +60,10 @@ namespace {
L(Loop),
LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
SE(SE),
- TD(LPM->getAnalysisIfAvailable<DataLayout>()),
DeadInsts(Dead),
Changed(false) {
+ DataLayoutPass *DLP = LPM->getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
assert(LI && "IV simplification requires LoopInfo");
}
@@ -75,6 +80,9 @@ namespace {
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
bool IsSigned);
+
+ Instruction *splitOverflowIntrinsic(Instruction *IVUser,
+ const DominatorTree *DT);
};
}
@@ -263,6 +271,69 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
return true;
}
+/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow
+/// analysis and optimization.
+///
+/// \return A new value representing the non-overflowing add if possible,
+/// otherwise return the original value.
+Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser,
+ const DominatorTree *DT) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(IVUser);
+ if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow)
+ return IVUser;
+
+ // Find a branch guarded by the overflow check.
+ BranchInst *Branch = 0;
+ Instruction *AddVal = 0;
+ for (User *U : II->users()) {
+ if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(U)) {
+ if (ExtractInst->getNumIndices() != 1)
+ continue;
+ if (ExtractInst->getIndices()[0] == 0)
+ AddVal = ExtractInst;
+ else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse())
+ Branch = dyn_cast<BranchInst>(ExtractInst->user_back());
+ }
+ }
+ if (!AddVal || !Branch)
+ return IVUser;
+
+ BasicBlock *ContinueBB = Branch->getSuccessor(1);
+ if (std::next(pred_begin(ContinueBB)) != pred_end(ContinueBB))
+ return IVUser;
+
+ // Check if all users of the add are provably NSW.
+ bool AllNSW = true;
+ for (Use &U : AddVal->uses()) {
+ if (Instruction *UseInst = dyn_cast<Instruction>(U.getUser())) {
+ BasicBlock *UseBB = UseInst->getParent();
+ if (PHINode *PHI = dyn_cast<PHINode>(UseInst))
+ UseBB = PHI->getIncomingBlock(U);
+ if (!DT->dominates(ContinueBB, UseBB)) {
+ AllNSW = false;
+ break;
+ }
+ }
+ }
+ if (!AllNSW)
+ return IVUser;
+
+ // Go for it...
+ IRBuilder<> Builder(IVUser);
+ Instruction *AddInst = dyn_cast<Instruction>(
+ Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1)));
+
+ // The caller expects the new add to have the same form as the intrinsic. The
+ // IV operand position must be the same.
+ assert((AddInst->getOpcode() == Instruction::Add &&
+ AddInst->getOperand(0) == II->getOperand(0)) &&
+ "Bad add instruction created from overflow intrinsic.");
+
+ AddVal->replaceAllUsesWith(AddInst);
+ DeadInsts.push_back(AddVal);
+ return AddInst;
+}
+
/// pushIVUsers - Add all uses of Def to the current IV's worklist.
///
static void pushIVUsers(
@@ -270,16 +341,15 @@ static void pushIVUsers(
SmallPtrSet<Instruction*,16> &Simplified,
SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
- for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
- UI != E; ++UI) {
- Instruction *User = cast<Instruction>(*UI);
+ for (User *U : Def->users()) {
+ Instruction *UI = cast<Instruction>(U);
// Avoid infinite or exponential worklist processing.
// Also ensure unique worklist users.
// If Def is a LoopPhi, it may not be in the Simplified set, so check for
// self edges first.
- if (User != Def && Simplified.insert(User))
- SimpleIVUsers.push_back(std::make_pair(User, Def));
+ if (UI != Def && Simplified.insert(UI))
+ SimpleIVUsers.push_back(std::make_pair(UI, Def));
}
}
@@ -334,8 +404,16 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
while (!SimpleIVUsers.empty()) {
std::pair<Instruction*, Instruction*> UseOper =
SimpleIVUsers.pop_back_val();
+ Instruction *UseInst = UseOper.first;
+
// Bypass back edges to avoid extra work.
- if (UseOper.first == CurrIV) continue;
+ if (UseInst == CurrIV) continue;
+
+ if (V && V->shouldSplitOverflowInstrinsics()) {
+ UseInst = splitOverflowIntrinsic(UseInst, V->getDomTree());
+ if (!UseInst)
+ continue;
+ }
Instruction *IVOperand = UseOper.second;
for (unsigned N = 0; IVOperand; ++N) {
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index f9687e4..bbd65f1 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -19,9 +19,9 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
@@ -38,15 +38,18 @@ namespace {
initializeInstSimplifierPass(*PassRegistry::getPassRegistry());
}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<TargetLibraryInfo>();
}
/// runOnFunction - Remove instructions that simplify.
- bool runOnFunction(Function &F) {
- const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
- const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
+ bool runOnFunction(Function &F) override {
+ const DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0;
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
@@ -63,11 +66,10 @@ namespace {
continue;
// Don't waste time simplifying unused instructions.
if (!I->use_empty())
- if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
+ if (Value *V = SimplifyInstruction(I, DL, TLI, DT)) {
// Mark all uses for resimplification next time round the loop.
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI)
- Next->insert(cast<Instruction>(*UI));
+ for (User *U : I->users())
+ Next->insert(cast<Instruction>(U));
I->replaceAllUsesWith(V);
++NumSimplified;
Changed = true;
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 15b3e66..b5bc391 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -43,7 +43,7 @@ namespace {
class LibCallOptimization {
protected:
Function *Caller;
- const DataLayout *TD;
+ const DataLayout *DL;
const TargetLibraryInfo *TLI;
const LibCallSimplifier *LCS;
LLVMContext* Context;
@@ -63,11 +63,11 @@ public:
/// change the calling convention.
virtual bool ignoreCallingConv() { return false; }
- Value *optimizeCall(CallInst *CI, const DataLayout *TD,
+ Value *optimizeCall(CallInst *CI, const DataLayout *DL,
const TargetLibraryInfo *TLI,
const LibCallSimplifier *LCS, IRBuilder<> &B) {
Caller = CI->getParent()->getParent();
- this->TD = TD;
+ this->DL = DL;
this->TLI = TLI;
this->LCS = LCS;
if (CI->getCalledFunction())
@@ -88,9 +88,8 @@ public:
/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
/// value is equal or not-equal to zero.
static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
- UI != E; ++UI) {
- if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ for (User *U : V->users()) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
if (IC->isEquality())
if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
if (C->isNullValue())
@@ -104,9 +103,8 @@ static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
/// isOnlyUsedInEqualityComparison - Return true if it is only used in equality
/// comparisons with With.
static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
- UI != E; ++UI) {
- if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ for (User *U : V->users()) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
if (IC->isEquality() && IC->getOperand(1) == With)
continue;
// Unknown instruction.
@@ -152,7 +150,8 @@ protected:
struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization {
CallInst *CI;
- bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+ bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp,
+ bool isString) const override {
if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
return true;
if (ConstantInt *SizeCI =
@@ -175,7 +174,8 @@ struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization {
};
struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
this->CI = CI;
FunctionType *FT = Callee->getFunctionType();
LLVMContext &Context = CI->getParent()->getContext();
@@ -184,8 +184,8 @@ struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != DL->getIntPtrType(Context) ||
+ FT->getParamType(3) != DL->getIntPtrType(Context))
return 0;
if (isFoldable(3, 2, false)) {
@@ -198,7 +198,8 @@ struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
};
struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
this->CI = CI;
FunctionType *FT = Callee->getFunctionType();
LLVMContext &Context = CI->getParent()->getContext();
@@ -207,8 +208,8 @@ struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != DL->getIntPtrType(Context) ||
+ FT->getParamType(3) != DL->getIntPtrType(Context))
return 0;
if (isFoldable(3, 2, false)) {
@@ -221,7 +222,8 @@ struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
};
struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
this->CI = CI;
FunctionType *FT = Callee->getFunctionType();
LLVMContext &Context = CI->getParent()->getContext();
@@ -230,8 +232,8 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(Context) ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != DL->getIntPtrType(Context) ||
+ FT->getParamType(3) != DL->getIntPtrType(Context))
return 0;
if (isFoldable(3, 2, false)) {
@@ -245,7 +247,8 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
};
struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
this->CI = CI;
StringRef Name = Callee->getName();
FunctionType *FT = Callee->getFunctionType();
@@ -256,7 +259,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- FT->getParamType(2) != TD->getIntPtrType(Context))
+ FT->getParamType(2) != DL->getIntPtrType(Context))
return 0;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
@@ -269,7 +272,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
// TODO: It might be nice to get a maximum length out of the possible
// string lengths for varying.
if (isFoldable(2, 1, true)) {
- Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6));
+ Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
return Ret;
} else {
// Maybe we can stil fold __strcpy_chk to __memcpy_chk.
@@ -277,12 +280,12 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
if (Len == 0) return 0;
// This optimization require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
Value *Ret =
EmitMemCpyChk(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(Context), Len),
- CI->getArgOperand(2), B, TD, TLI);
+ ConstantInt::get(DL->getIntPtrType(Context), Len),
+ CI->getArgOperand(2), B, DL, TLI);
return Ret;
}
return 0;
@@ -290,7 +293,8 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
};
struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
this->CI = CI;
StringRef Name = Callee->getName();
FunctionType *FT = Callee->getFunctionType();
@@ -301,12 +305,12 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
- FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
+ FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
return 0;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
- Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+ Value *StrLen = EmitStrLen(Src, B, DL, TLI);
return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
}
@@ -316,7 +320,7 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
// TODO: It might be nice to get a maximum length out of the possible
// string lengths for varying.
if (isFoldable(2, 1, true)) {
- Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6));
+ Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
return Ret;
} else {
// Maybe we can stil fold __stpcpy_chk to __memcpy_chk.
@@ -324,14 +328,14 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
if (Len == 0) return 0;
// This optimization require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
Type *PT = FT->getParamType(0);
- Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
+ Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
Value *DstEnd = B.CreateGEP(Dst,
- ConstantInt::get(TD->getIntPtrType(PT),
+ ConstantInt::get(DL->getIntPtrType(PT),
Len - 1));
- if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD, TLI))
+ if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, DL, TLI))
return 0;
return DstEnd;
}
@@ -340,7 +344,8 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
};
struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
this->CI = CI;
StringRef Name = Callee->getName();
FunctionType *FT = Callee->getFunctionType();
@@ -351,12 +356,12 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
!FT->getParamType(2)->isIntegerTy() ||
- FT->getParamType(3) != TD->getIntPtrType(Context))
+ FT->getParamType(3) != DL->getIntPtrType(Context))
return 0;
if (isFoldable(3, 2, false)) {
Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TD, TLI,
+ CI->getArgOperand(2), B, DL, TLI,
Name.substr(2, 7));
return Ret;
}
@@ -369,7 +374,8 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
//===----------------------------------------------------------------------===//
struct StrCatOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "strcat" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
@@ -392,7 +398,7 @@ struct StrCatOpt : public LibCallOptimization {
return Dst;
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
return emitStrLenMemCpy(Src, Dst, Len, B);
}
@@ -401,7 +407,7 @@ struct StrCatOpt : public LibCallOptimization {
IRBuilder<> &B) {
// We need to find the end of the destination string. That's where the
// memory is to be moved to. We just generate a call to strlen.
- Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
+ Value *DstLen = EmitStrLen(Dst, B, DL, TLI);
if (!DstLen)
return 0;
@@ -413,13 +419,14 @@ struct StrCatOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
B.CreateMemCpy(CpyDst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+ ConstantInt::get(DL->getIntPtrType(*Context), Len + 1), 1);
return Dst;
}
};
struct StrNCatOpt : public StrCatOpt {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "strncat" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 ||
@@ -451,7 +458,7 @@ struct StrNCatOpt : public StrCatOpt {
if (SrcLen == 0 || Len == 0) return Dst;
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
// We don't optimize this case
if (Len < SrcLen) return 0;
@@ -463,7 +470,8 @@ struct StrNCatOpt : public StrCatOpt {
};
struct StrChrOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "strchr" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
@@ -479,22 +487,25 @@ struct StrChrOpt : public LibCallOptimization {
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
if (CharC == 0) {
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
uint64_t Len = GetStringLength(SrcStr);
if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
return 0;
return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- B, TD, TLI);
+ ConstantInt::get(DL->getIntPtrType(*Context), Len),
+ B, DL, TLI);
}
// Otherwise, the character is a constant, see if the first argument is
// a string literal. If so, we can constant fold.
StringRef Str;
- if (!getConstantStringInfo(SrcStr, Str))
+ if (!getConstantStringInfo(SrcStr, Str)) {
+ if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
+ return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
return 0;
+ }
// Compute the offset, make sure to handle the case when we're searching for
// zero (a weird way to spell strlen).
@@ -509,7 +520,8 @@ struct StrChrOpt : public LibCallOptimization {
};
struct StrRChrOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "strrchr" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
@@ -528,8 +540,8 @@ struct StrRChrOpt : public LibCallOptimization {
StringRef Str;
if (!getConstantStringInfo(SrcStr, Str)) {
// strrchr(s, 0) -> strchr(s, 0)
- if (TD && CharC->isZero())
- return EmitStrChr(SrcStr, '\0', B, TD, TLI);
+ if (DL && CharC->isZero())
+ return EmitStrChr(SrcStr, '\0', B, DL, TLI);
return 0;
}
@@ -545,7 +557,8 @@ struct StrRChrOpt : public LibCallOptimization {
};
struct StrCmpOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "strcmp" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
@@ -578,11 +591,11 @@ struct StrCmpOpt : public LibCallOptimization {
uint64_t Len2 = GetStringLength(Str2P);
if (Len1 && Len2) {
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
return EmitMemCmp(Str1P, Str2P,
- ConstantInt::get(TD->getIntPtrType(*Context),
- std::min(Len1, Len2)), B, TD, TLI);
+ ConstantInt::get(DL->getIntPtrType(*Context),
+ std::min(Len1, Len2)), B, DL, TLI);
}
return 0;
@@ -590,7 +603,8 @@ struct StrCmpOpt : public LibCallOptimization {
};
struct StrNCmpOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "strncmp" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 ||
@@ -614,8 +628,8 @@ struct StrNCmpOpt : public LibCallOptimization {
if (Length == 0) // strncmp(x,y,0) -> 0
return ConstantInt::get(CI->getType(), 0);
- if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
- return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
+ if (DL && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI);
StringRef Str1, Str2;
bool HasStr1 = getConstantStringInfo(Str1P, Str1);
@@ -640,7 +654,8 @@ struct StrNCmpOpt : public LibCallOptimization {
};
struct StrCpyOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "strcpy" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
@@ -654,7 +669,7 @@ struct StrCpyOpt : public LibCallOptimization {
return Src;
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
@@ -663,13 +678,14 @@ struct StrCpyOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
B.CreateMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+ ConstantInt::get(DL->getIntPtrType(*Context), Len), 1);
return Dst;
}
};
struct StpCpyOpt: public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Verify the "stpcpy" function prototype.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
@@ -679,11 +695,11 @@ struct StpCpyOpt: public LibCallOptimization {
return 0;
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
- Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+ Value *StrLen = EmitStrLen(Src, B, DL, TLI);
return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
}
@@ -692,9 +708,9 @@ struct StpCpyOpt: public LibCallOptimization {
if (Len == 0) return 0;
Type *PT = FT->getParamType(0);
- Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len);
+ Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
Value *DstEnd = B.CreateGEP(Dst,
- ConstantInt::get(TD->getIntPtrType(PT),
+ ConstantInt::get(DL->getIntPtrType(PT),
Len - 1));
// We have enough information to now generate the memcpy call to do the
@@ -705,7 +721,8 @@ struct StpCpyOpt: public LibCallOptimization {
};
struct StrNCpyOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
@@ -737,7 +754,7 @@ struct StrNCpyOpt : public LibCallOptimization {
if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
// Let strncpy handle the zero padding
if (Len > SrcLen+1) return 0;
@@ -745,15 +762,16 @@ struct StrNCpyOpt : public LibCallOptimization {
Type *PT = FT->getParamType(0);
// strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
B.CreateMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(PT), Len), 1);
+ ConstantInt::get(DL->getIntPtrType(PT), Len), 1);
return Dst;
}
};
struct StrLenOpt : public LibCallOptimization {
- virtual bool ignoreCallingConv() { return true; }
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ bool ignoreCallingConv() override { return true; }
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
@@ -775,7 +793,8 @@ struct StrLenOpt : public LibCallOptimization {
};
struct StrPBrkOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
@@ -802,15 +821,16 @@ struct StrPBrkOpt : public LibCallOptimization {
}
// strpbrk(s, "a") -> strchr(s, 'a')
- if (TD && HasS2 && S2.size() == 1)
- return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI);
+ if (DL && HasS2 && S2.size() == 1)
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI);
return 0;
}
};
struct StrToOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
!FT->getParamType(0)->isPointerTy() ||
@@ -829,7 +849,8 @@ struct StrToOpt : public LibCallOptimization {
};
struct StrSpnOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
@@ -858,7 +879,8 @@ struct StrSpnOpt : public LibCallOptimization {
};
struct StrCSpnOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
@@ -882,15 +904,16 @@ struct StrCSpnOpt : public LibCallOptimization {
}
// strcspn(s, "") -> strlen(s)
- if (TD && HasS2 && S2.empty())
- return EmitStrLen(CI->getArgOperand(0), B, TD, TLI);
+ if (DL && HasS2 && S2.empty())
+ return EmitStrLen(CI->getArgOperand(0), B, DL, TLI);
return 0;
}
};
struct StrStrOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
!FT->getParamType(0)->isPointerTy() ||
@@ -903,16 +926,15 @@ struct StrStrOpt : public LibCallOptimization {
return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
// fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
- if (TD && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
- Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI);
+ if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+ Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI);
if (!StrLen)
return 0;
Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
- StrLen, B, TD, TLI);
+ StrLen, B, DL, TLI);
if (!StrNCmp)
return 0;
- for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
- UI != UE; ) {
+ for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) {
ICmpInst *Old = cast<ICmpInst>(*UI++);
Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
ConstantInt::getNullValue(StrNCmp->getType()),
@@ -946,7 +968,7 @@ struct StrStrOpt : public LibCallOptimization {
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1) {
- Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI);
+ Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI);
return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0;
}
return 0;
@@ -954,7 +976,8 @@ struct StrStrOpt : public LibCallOptimization {
};
struct MemCmpOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
@@ -1006,15 +1029,16 @@ struct MemCmpOpt : public LibCallOptimization {
};
struct MemCpyOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
+ FT->getParamType(2) != DL->getIntPtrType(*Context))
return 0;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
@@ -1025,15 +1049,16 @@ struct MemCpyOpt : public LibCallOptimization {
};
struct MemMoveOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
+ FT->getParamType(2) != DL->getIntPtrType(*Context))
return 0;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
@@ -1044,15 +1069,16 @@ struct MemMoveOpt : public LibCallOptimization {
};
struct MemSetOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
+ FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
@@ -1072,7 +1098,8 @@ struct MemSetOpt : public LibCallOptimization {
struct UnaryDoubleFPOpt : public LibCallOptimization {
bool CheckRetType;
UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
!FT->getParamType(0)->isDoubleTy())
@@ -1080,9 +1107,8 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
if (CheckRetType) {
// Check if all the uses for function like 'sin' are converted to float.
- for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end();
- ++UseI) {
- FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI);
+ for (User *U : CI->users()) {
+ FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
if (Cast == 0 || !Cast->getType()->isFloatTy())
return 0;
}
@@ -1100,6 +1126,49 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
}
};
+// Double -> Float Shrinking Optimizations for Binary Functions like 'fmin/fmax'
+struct BinaryDoubleFPOpt : public LibCallOptimization {
+ bool CheckRetType;
+ BinaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
+ FunctionType *FT = Callee->getFunctionType();
+ // Just make sure this has 2 arguments of the same FP type, which match the
+ // result type.
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return 0;
+
+ if (CheckRetType) {
+ // Check if all the uses for function like 'fmin/fmax' are converted to
+ // float.
+ for (User *U : CI->users()) {
+ FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
+ if (Cast == 0 || !Cast->getType()->isFloatTy())
+ return 0;
+ }
+ }
+
+ // If this is something like 'fmin((double)floatval1, (double)floatval2)',
+ // we convert it to fminf.
+ FPExtInst *Cast1 = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+ FPExtInst *Cast2 = dyn_cast<FPExtInst>(CI->getArgOperand(1));
+ if (Cast1 == 0 || !Cast1->getOperand(0)->getType()->isFloatTy() ||
+ Cast2 == 0 || !Cast2->getOperand(0)->getType()->isFloatTy())
+ return 0;
+
+ // fmin((double)floatval1, (double)floatval2)
+ // -> (double)fmin(floatval1, floatval2)
+ Value *V = NULL;
+ Value *V1 = Cast1->getOperand(0);
+ Value *V2 = Cast2->getOperand(0);
+ V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B,
+ Callee->getAttributes());
+ return B.CreateFPExt(V, B.getDoubleTy());
+ }
+};
+
struct UnsafeFPLibCallOptimization : public LibCallOptimization {
bool UnsafeFPShrink;
UnsafeFPLibCallOptimization(bool UnsafeFPShrink) {
@@ -1109,7 +1178,8 @@ struct UnsafeFPLibCallOptimization : public LibCallOptimization {
struct CosOpt : public UnsafeFPLibCallOptimization {
CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
Value *Ret = NULL;
if (UnsafeFPShrink && Callee->getName() == "cos" &&
TLI->has(LibFunc::cosf)) {
@@ -1136,7 +1206,8 @@ struct CosOpt : public UnsafeFPLibCallOptimization {
struct PowOpt : public UnsafeFPLibCallOptimization {
PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
Value *Ret = NULL;
if (UnsafeFPShrink && Callee->getName() == "pow" &&
TLI->has(LibFunc::powf)) {
@@ -1162,6 +1233,12 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f,
LibFunc::exp2l))
return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+ // pow(10.0, x) -> exp10(x)
+ if (Op1C->isExactlyValue(10.0) &&
+ hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f,
+ LibFunc::exp10l))
+ return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp10), B,
+ Callee->getAttributes());
}
ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
@@ -1204,7 +1281,8 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
struct Exp2Opt : public UnsafeFPLibCallOptimization {
Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
Value *Ret = NULL;
if (UnsafeFPShrink && Callee->getName() == "exp2" &&
TLI->has(LibFunc::exp2f)) {
@@ -1222,37 +1300,37 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
Value *Op = CI->getArgOperand(0);
// Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
// Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
- Value *LdExpArg = 0;
- if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
- if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
- LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
- } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
- if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
- LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
- }
+ LibFunc::Func LdExp = LibFunc::ldexpl;
+ if (Op->getType()->isFloatTy())
+ LdExp = LibFunc::ldexpf;
+ else if (Op->getType()->isDoubleTy())
+ LdExp = LibFunc::ldexp;
+
+ if (TLI->has(LdExp)) {
+ Value *LdExpArg = 0;
+ if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+ LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
+ } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+ if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+ LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty());
+ }
- if (LdExpArg) {
- const char *Name;
- if (Op->getType()->isFloatTy())
- Name = "ldexpf";
- else if (Op->getType()->isDoubleTy())
- Name = "ldexp";
- else
- Name = "ldexpl";
-
- Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
- if (!Op->getType()->isFloatTy())
- One = ConstantExpr::getFPExtend(One, Op->getType());
-
- Module *M = Caller->getParent();
- Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
- Op->getType(),
- B.getInt32Ty(), NULL);
- CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
- if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
+ if (LdExpArg) {
+ Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
+ if (!Op->getType()->isFloatTy())
+ One = ConstantExpr::getFPExtend(One, Op->getType());
- return CI;
+ Module *M = Caller->getParent();
+ Value *Callee =
+ M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
+ Op->getType(), B.getInt32Ty(), NULL);
+ CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+ }
}
return Ret;
}
@@ -1261,7 +1339,8 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
struct SinCosPiOpt : public LibCallOptimization {
SinCosPiOpt() {}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Make sure the prototype is as expected, otherwise the rest of the
// function is probably invalid and likely to abort.
if (!isTrigLibCall(CI))
@@ -1277,9 +1356,8 @@ struct SinCosPiOpt : public LibCallOptimization {
// Look for all compatible sinpi, cospi and sincospi calls with the same
// argument. If there are enough (in some sense) we can make the
// substitution.
- for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
- UI != UE; ++UI)
- classifyArgUse(*UI, CI->getParent(), IsFloat, SinCalls, CosCalls,
+ for (User *U : Arg->users())
+ classifyArgUse(U, CI->getParent(), IsFloat, SinCalls, CosCalls,
SinCosCalls);
// It's only worthwhile if both sinpi and cospi are actually used.
@@ -1334,7 +1412,7 @@ struct SinCosPiOpt : public LibCallOptimization {
SinCalls.push_back(CI);
else if (Func == LibFunc::cospif)
CosCalls.push_back(CI);
- else if (Func == LibFunc::sincospi_stretf)
+ else if (Func == LibFunc::sincospif_stret)
SinCosCalls.push_back(CI);
} else {
if (Func == LibFunc::sinpi)
@@ -1363,7 +1441,7 @@ struct SinCosPiOpt : public LibCallOptimization {
Triple T(OrigCallee->getParent()->getTargetTriple());
if (UseFloat) {
- Name = "__sincospi_stretf";
+ Name = "__sincospif_stret";
assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
// x86_64 can't use {float, float} since that would be returned in both
@@ -1412,7 +1490,8 @@ struct SinCosPiOpt : public LibCallOptimization {
//===----------------------------------------------------------------------===//
struct FFSOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
// Just make sure this has 2 arguments of the same FP type, which match the
// result type.
@@ -1445,8 +1524,9 @@ struct FFSOpt : public LibCallOptimization {
};
struct AbsOpt : public LibCallOptimization {
- virtual bool ignoreCallingConv() { return true; }
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ bool ignoreCallingConv() override { return true; }
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
// We require integer(integer) where the types agree.
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
@@ -1463,7 +1543,8 @@ struct AbsOpt : public LibCallOptimization {
};
struct IsDigitOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
// We require integer(i32)
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
@@ -1479,7 +1560,8 @@ struct IsDigitOpt : public LibCallOptimization {
};
struct IsAsciiOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
// We require integer(i32)
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
@@ -1494,7 +1576,8 @@ struct IsAsciiOpt : public LibCallOptimization {
};
struct ToAsciiOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
FunctionType *FT = Callee->getFunctionType();
// We require i32(i32)
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
@@ -1514,7 +1597,8 @@ struct ToAsciiOpt : public LibCallOptimization {
struct ErrorReportingOpt : public LibCallOptimization {
ErrorReportingOpt(int S = -1) : StreamArg(S) {}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &) override {
// Error reporting calls should be cold, mark them as such.
// This applies even to non-builtin calls: it is only a hint and applies to
// functions that the frontend might not understand as builtins.
@@ -1580,7 +1664,7 @@ struct PrintFOpt : public LibCallOptimization {
// printf("x") -> putchar('x'), even for '%'.
if (FormatStr.size() == 1) {
- Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI);
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, DL, TLI);
if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1592,7 +1676,7 @@ struct PrintFOpt : public LibCallOptimization {
// pass to be run after this pass, to merge duplicate strings.
FormatStr = FormatStr.drop_back();
Value *GV = B.CreateGlobalString(FormatStr, "str");
- Value *NewCI = EmitPutS(GV, B, TD, TLI);
+ Value *NewCI = EmitPutS(GV, B, DL, TLI);
return (CI->use_empty() || !NewCI) ?
NewCI :
ConstantInt::get(CI->getType(), FormatStr.size()+1);
@@ -1602,7 +1686,7 @@ struct PrintFOpt : public LibCallOptimization {
// printf("%c", chr) --> putchar(chr)
if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isIntegerTy()) {
- Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI);
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, DL, TLI);
if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
@@ -1611,12 +1695,13 @@ struct PrintFOpt : public LibCallOptimization {
// printf("%s\n", str) --> puts(str)
if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isPointerTy()) {
- return EmitPutS(CI->getArgOperand(1), B, TD, TLI);
+ return EmitPutS(CI->getArgOperand(1), B, DL, TLI);
}
return 0;
}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Require one fixed pointer argument and an integer/void result.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
@@ -1660,11 +1745,11 @@ struct SPrintFOpt : public LibCallOptimization {
return 0; // we found a format specifier, bail out.
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+ ConstantInt::get(DL->getIntPtrType(*Context), // Copy the
FormatStr.size() + 1), 1); // nul byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1690,12 +1775,12 @@ struct SPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 's') {
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
- Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI);
+ Value *Len = EmitStrLen(CI->getArgOperand(2), B, DL, TLI);
if (!Len)
return 0;
Value *IncLen = B.CreateAdd(Len,
@@ -1709,7 +1794,8 @@ struct SPrintFOpt : public LibCallOptimization {
return 0;
}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Require two fixed pointer arguments and an integer result.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
@@ -1760,12 +1846,12 @@ struct FPrintFOpt : public LibCallOptimization {
return 0; // We found a format specifier.
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
return EmitFWrite(CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context),
+ ConstantInt::get(DL->getIntPtrType(*Context),
FormatStr.size()),
- CI->getArgOperand(0), B, TD, TLI);
+ CI->getArgOperand(0), B, DL, TLI);
}
// The remaining optimizations require the format string to be "%s" or "%c"
@@ -1778,19 +1864,20 @@ struct FPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 'c') {
// fprintf(F, "%c", chr) --> fputc(chr, F)
if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
- return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
+ return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
if (!CI->getArgOperand(2)->getType()->isPointerTy())
return 0;
- return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
+ return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
}
return 0;
}
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Require two fixed paramters as pointers and integer result.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
@@ -1818,7 +1905,8 @@ struct FPrintFOpt : public LibCallOptimization {
};
struct FWriteOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
ErrorReportingOpt ER(/* StreamArg = */ 3);
(void) ER.callOptimizer(Callee, CI, B);
@@ -1845,7 +1933,7 @@ struct FWriteOpt : public LibCallOptimization {
// This optimisation is only valid, if the return value is unused.
if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
- Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI);
+ Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI);
return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
}
@@ -1854,12 +1942,13 @@ struct FWriteOpt : public LibCallOptimization {
};
struct FPutsOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
ErrorReportingOpt ER(/* StreamArg = */ 1);
(void) ER.callOptimizer(Callee, CI, B);
// These optimizations require DataLayout.
- if (!TD) return 0;
+ if (!DL) return 0;
// Require two pointers. Also, we can't optimize if return value is used.
FunctionType *FT = Callee->getFunctionType();
@@ -1873,13 +1962,14 @@ struct FPutsOpt : public LibCallOptimization {
if (!Len) return 0;
// Known to have no uses (see above).
return EmitFWrite(CI->getArgOperand(0),
- ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
- CI->getArgOperand(1), B, TD, TLI);
+ ConstantInt::get(DL->getIntPtrType(*Context), Len-1),
+ CI->getArgOperand(1), B, DL, TLI);
}
};
struct PutsOpt : public LibCallOptimization {
- virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ Value *callOptimizer(Function *Callee, CallInst *CI,
+ IRBuilder<> &B) override {
// Require one fixed pointer argument and an integer/void result.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
@@ -1894,7 +1984,7 @@ struct PutsOpt : public LibCallOptimization {
if (Str.empty() && CI->use_empty()) {
// puts("") -> putchar('\n')
- Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI);
+ Value *Res = EmitPutChar(B.getInt32('\n'), B, DL, TLI);
if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1908,7 +1998,7 @@ struct PutsOpt : public LibCallOptimization {
namespace llvm {
class LibCallSimplifierImpl {
- const DataLayout *TD;
+ const DataLayout *DL;
const TargetLibraryInfo *TLI;
const LibCallSimplifier *LCS;
bool UnsafeFPShrink;
@@ -1918,11 +2008,11 @@ class LibCallSimplifierImpl {
PowOpt Pow;
Exp2Opt Exp2;
public:
- LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI,
+ LibCallSimplifierImpl(const DataLayout *DL, const TargetLibraryInfo *TLI,
const LibCallSimplifier *LCS,
bool UnsafeFPShrink = false)
: Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) {
- this->TD = TD;
+ this->DL = DL;
this->TLI = TLI;
this->LCS = LCS;
this->UnsafeFPShrink = UnsafeFPShrink;
@@ -1975,6 +2065,7 @@ static MemSetOpt MemSet;
// Math library call optimizations.
static UnaryDoubleFPOpt UnaryDoubleFP(false);
+static BinaryDoubleFPOpt BinaryDoubleFP(false);
static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
static SinCosPiOpt SinCosPi;
@@ -2144,6 +2235,11 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
if (UnsafeFPShrink && hasFloatVersion(FuncName))
return &UnsafeUnaryDoubleFP;
return 0;
+ case LibFunc::fmin:
+ case LibFunc::fmax:
+ if (hasFloatVersion(FuncName))
+ return &BinaryDoubleFP;
+ return 0;
case LibFunc::memcpy_chk:
return &MemCpyChk;
default:
@@ -2175,15 +2271,15 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
LibCallOptimization *LCO = lookupOptimization(CI);
if (LCO) {
IRBuilder<> Builder(CI);
- return LCO->optimizeCall(CI, TD, TLI, LCS, Builder);
+ return LCO->optimizeCall(CI, DL, TLI, LCS, Builder);
}
return 0;
}
-LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
+LibCallSimplifier::LibCallSimplifier(const DataLayout *DL,
const TargetLibraryInfo *TLI,
bool UnsafeFPShrink) {
- Impl = new LibCallSimplifierImpl(TD, TLI, this, UnsafeFPShrink);
+ Impl = new LibCallSimplifierImpl(DL, TLI, this, UnsafeFPShrink);
}
LibCallSimplifier::~LibCallSimplifier() {
@@ -2242,8 +2338,6 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const {
// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
//
-// strchr:
-// * strchr(p, 0) -> strlen(p)
// tan, tanf, tanl:
// * tan(atan(x)) -> x
//
diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp
index 2ef692c..c318560 100644
--- a/lib/Transforms/Utils/SpecialCaseList.cpp
+++ b/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -15,9 +15,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SpecialCaseList.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/IR/DerivedTypes.h"
@@ -55,7 +54,7 @@ SpecialCaseList *SpecialCaseList::create(
const StringRef Path, std::string &Error) {
if (Path.empty())
return new SpecialCaseList();
- OwningPtr<MemoryBuffer> File;
+ std::unique_ptr<MemoryBuffer> File;
if (error_code EC = MemoryBuffer::getFile(Path, File)) {
Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str();
return 0;
@@ -65,10 +64,10 @@ SpecialCaseList *SpecialCaseList::create(
SpecialCaseList *SpecialCaseList::create(
const MemoryBuffer *MB, std::string &Error) {
- OwningPtr<SpecialCaseList> SCL(new SpecialCaseList());
+ std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
if (!SCL->parse(MB, Error))
return 0;
- return SCL.take();
+ return SCL.release();
}
SpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) {
diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp
index c3df215..ed4f45c 100644
--- a/lib/Transforms/Utils/Utils.cpp
+++ b/lib/Transforms/Utils/Utils.cpp
@@ -13,14 +13,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/InitializePasses.h"
-#include "llvm/PassRegistry.h"
#include "llvm-c/Initialization.h"
+#include "llvm/PassRegistry.h"
using namespace llvm;
/// initializeTransformUtils - Initialize all passes in the TransformUtils
/// library.
void llvm::initializeTransformUtils(PassRegistry &Registry) {
+ initializeAddDiscriminatorsPass(Registry);
initializeBreakCriticalEdgesPass(Registry);
initializeInstNamerPass(Registry);
initializeLCSSAPass(Registry);
diff --git a/lib/Transforms/Vectorize/Android.mk b/lib/Transforms/Vectorize/Android.mk
index 2778900..ea090c0 100644
--- a/lib/Transforms/Vectorize/Android.mk
+++ b/lib/Transforms/Vectorize/Android.mk
@@ -21,6 +21,7 @@ include $(BUILD_HOST_STATIC_LIBRARY)
# For the device
# =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(transforms_vectorize_SRC_FILES)
@@ -31,3 +32,4 @@ LOCAL_MODULE_TAGS := optional
include $(LLVM_DEVICE_BUILD_MK)
include $(LLVM_GEN_INTRINSICS_MK)
include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index c5e1dcb..71350e7 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -26,7 +26,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -34,6 +33,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -41,10 +41,10 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@@ -199,9 +199,10 @@ namespace {
BBVectorize(Pass *P, const VectorizeConfig &C)
: BasicBlockPass(ID), Config(C) {
AA = &P->getAnalysis<AliasAnalysis>();
- DT = &P->getAnalysis<DominatorTree>();
+ DT = &P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &P->getAnalysis<ScalarEvolution>();
- TD = P->getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
TTI = IgnoreTargetInfo ? 0 : &P->getAnalysis<TargetTransformInfo>();
}
@@ -214,7 +215,7 @@ namespace {
AliasAnalysis *AA;
DominatorTree *DT;
ScalarEvolution *SE;
- DataLayout *TD;
+ const DataLayout *DL;
const TargetTransformInfo *TTI;
// FIXME: const correct?
@@ -388,6 +389,8 @@ namespace {
void combineMetadata(Instruction *K, const Instruction *J);
bool vectorizeBB(BasicBlock &BB) {
+ if (skipOptnoneFunction(BB))
+ return false;
if (!DT->isReachableFromEntry(&BB)) {
DEBUG(dbgs() << "BBV: skipping unreachable " << BB.getName() <<
" in " << BB.getParent()->getName() << "\n");
@@ -428,24 +431,27 @@ namespace {
return changed;
}
- virtual bool runOnBasicBlock(BasicBlock &BB) {
+ bool runOnBasicBlock(BasicBlock &BB) override {
+ // OptimizeNone check deferred to vectorizeBB().
+
AA = &getAnalysis<AliasAnalysis>();
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolution>();
- TD = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
TTI = IgnoreTargetInfo ? 0 : &getAnalysis<TargetTransformInfo>();
return vectorizeBB(BB);
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
BasicBlockPass::getAnalysisUsage(AU);
AU.addRequired<AliasAnalysis>();
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
AU.addRequired<TargetTransformInfo>();
AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<ScalarEvolution>();
AU.setPreservesCFG();
}
@@ -528,7 +534,11 @@ namespace {
// Returns the cost of the provided instruction using TTI.
// This does not handle loads and stores.
- unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2) {
+ unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2,
+ TargetTransformInfo::OperandValueKind Op1VK =
+ TargetTransformInfo::OK_AnyValue,
+ TargetTransformInfo::OperandValueKind Op2VK =
+ TargetTransformInfo::OK_AnyValue) {
switch (Opcode) {
default: break;
case Instruction::GetElementPtr:
@@ -558,7 +568,7 @@ namespace {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
- return TTI->getArithmeticInstrCost(Opcode, T1);
+ return TTI->getArithmeticInstrCost(Opcode, T1, Op1VK, Op2VK);
case Instruction::Select:
case Instruction::ICmp:
case Instruction::FCmp:
@@ -626,11 +636,11 @@ namespace {
int64_t Offset = IntOff->getSExtValue();
Type *VTy = IPtr->getType()->getPointerElementType();
- int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy);
+ int64_t VTyTSS = (int64_t) DL->getTypeStoreSize(VTy);
Type *VTy2 = JPtr->getType()->getPointerElementType();
if (VTy != VTy2 && Offset < 0) {
- int64_t VTy2TSS = (int64_t) TD->getTypeStoreSize(VTy2);
+ int64_t VTy2TSS = (int64_t) DL->getTypeStoreSize(VTy2);
OffsetInElmts = Offset/VTy2TSS;
return (abs64(Offset) % VTy2TSS) == 0;
}
@@ -813,7 +823,7 @@ namespace {
// It is important to cleanup here so that future iterations of this
// function have less work to do.
- (void) SimplifyInstructionsInBlock(&BB, TD, AA->getTargetLibraryInfo());
+ (void) SimplifyInstructionsInBlock(&BB, DL, AA->getTargetLibraryInfo());
return true;
}
@@ -868,7 +878,7 @@ namespace {
}
// We can't vectorize memory operations without target data
- if (TD == 0 && IsSimpleLoadStore)
+ if (DL == 0 && IsSimpleLoadStore)
return false;
Type *T1, *T2;
@@ -905,7 +915,7 @@ namespace {
if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy())
return false;
- if ((!Config.VectorizePointers || TD == 0) &&
+ if ((!Config.VectorizePointers || DL == 0) &&
(T1->getScalarType()->isPointerTy() ||
T2->getScalarType()->isPointerTy()))
return false;
@@ -969,7 +979,7 @@ namespace {
// with the lower offset has an alignment suitable for the
// vector type.
- unsigned VecAlignment = TD->getPrefTypeAlignment(VType);
+ unsigned VecAlignment = DL->getPrefTypeAlignment(VType);
if (BottomAlignment < VecAlignment)
return false;
}
@@ -1009,13 +1019,49 @@ namespace {
unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2);
Type *VT1 = getVecTypeForPair(IT1, JT1),
*VT2 = getVecTypeForPair(IT2, JT2);
+ TargetTransformInfo::OperandValueKind Op1VK =
+ TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueKind Op2VK =
+ TargetTransformInfo::OK_AnyValue;
+
+ // On some targets (example X86) the cost of a vector shift may vary
+ // depending on whether the second operand is a Uniform or
+ // NonUniform Constant.
+ switch (I->getOpcode()) {
+ default : break;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+
+ // If both I and J are scalar shifts by constant, then the
+ // merged vector shift count would be either a constant splat value
+ // or a non-uniform vector of constants.
+ if (ConstantInt *CII = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (ConstantInt *CIJ = dyn_cast<ConstantInt>(J->getOperand(1)))
+ Op2VK = CII == CIJ ? TargetTransformInfo::OK_UniformConstantValue :
+ TargetTransformInfo::OK_NonUniformConstantValue;
+ } else {
+ // Check for a splat of a constant or for a non uniform vector
+ // of constants.
+ Value *IOp = I->getOperand(1);
+ Value *JOp = J->getOperand(1);
+ if ((isa<ConstantVector>(IOp) || isa<ConstantDataVector>(IOp)) &&
+ (isa<ConstantVector>(JOp) || isa<ConstantDataVector>(JOp))) {
+ Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
+ Constant *SplatValue = cast<Constant>(IOp)->getSplatValue();
+ if (SplatValue != NULL &&
+ SplatValue == cast<Constant>(JOp)->getSplatValue())
+ Op2VK = TargetTransformInfo::OK_UniformConstantValue;
+ }
+ }
+ }
// Note that this procedure is incorrect for insert and extract element
// instructions (because combining these often results in a shuffle),
// but this cost is ignored (because insert and extract element
// instructions are assigned a zero depth factor and are not really
// fused in general).
- unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2);
+ unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2, Op1VK, Op2VK);
if (VCost > ICost + JCost)
return false;
@@ -1185,7 +1231,7 @@ namespace {
if (I->mayWriteToMemory()) WriteSet.add(I);
bool JAfterStart = IAfterStart;
- BasicBlock::iterator J = llvm::next(I);
+ BasicBlock::iterator J = std::next(I);
for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
if (J == Start) JAfterStart = true;
@@ -1230,7 +1276,7 @@ namespace {
// The next call to this function must start after the last instruction
// selected during this invocation.
if (JAfterStart) {
- Start = llvm::next(J);
+ Start = std::next(J);
IAfterStart = JAfterStart = false;
}
@@ -1272,13 +1318,15 @@ namespace {
// For each possible pairing for this variable, look at the uses of
// the first value...
- for (Value::use_iterator I = P.first->use_begin(),
- E = P.first->use_end(); I != E; ++I) {
- if (isa<LoadInst>(*I)) {
+ for (Value::user_iterator I = P.first->user_begin(),
+ E = P.first->user_end();
+ I != E; ++I) {
+ User *UI = *I;
+ if (isa<LoadInst>(UI)) {
// A pair cannot be connected to a load because the load only takes one
// operand (the address) and it is a scalar even after vectorization.
continue;
- } else if ((SI = dyn_cast<StoreInst>(*I)) &&
+ } else if ((SI = dyn_cast<StoreInst>(UI)) &&
P.first == SI->getPointerOperand()) {
// Similarly, a pair cannot be connected to a store through its
// pointer operand.
@@ -1287,22 +1335,21 @@ namespace {
// For each use of the first variable, look for uses of the second
// variable...
- for (Value::use_iterator J = P.second->use_begin(),
- E2 = P.second->use_end(); J != E2; ++J) {
- if ((SJ = dyn_cast<StoreInst>(*J)) &&
+ for (User *UJ : P.second->users()) {
+ if ((SJ = dyn_cast<StoreInst>(UJ)) &&
P.second == SJ->getPointerOperand())
continue;
// Look for <I, J>:
- if (CandidatePairsSet.count(ValuePair(*I, *J))) {
- VPPair VP(P, ValuePair(*I, *J));
+ if (CandidatePairsSet.count(ValuePair(UI, UJ))) {
+ VPPair VP(P, ValuePair(UI, UJ));
ConnectedPairs[VP.first].push_back(VP.second);
PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect));
}
// Look for <J, I>:
- if (CandidatePairsSet.count(ValuePair(*J, *I))) {
- VPPair VP(P, ValuePair(*J, *I));
+ if (CandidatePairsSet.count(ValuePair(UJ, UI))) {
+ VPPair VP(P, ValuePair(UJ, UI));
ConnectedPairs[VP.first].push_back(VP.second);
PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap));
}
@@ -1311,13 +1358,14 @@ namespace {
if (Config.SplatBreaksChain) continue;
// Look for cases where just the first value in the pair is used by
// both members of another pair (splatting).
- for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) {
- if ((SJ = dyn_cast<StoreInst>(*J)) &&
+ for (Value::user_iterator J = P.first->user_begin(); J != E; ++J) {
+ User *UJ = *J;
+ if ((SJ = dyn_cast<StoreInst>(UJ)) &&
P.first == SJ->getPointerOperand())
continue;
- if (CandidatePairsSet.count(ValuePair(*I, *J))) {
- VPPair VP(P, ValuePair(*I, *J));
+ if (CandidatePairsSet.count(ValuePair(UI, UJ))) {
+ VPPair VP(P, ValuePair(UI, UJ));
ConnectedPairs[VP.first].push_back(VP.second);
PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
}
@@ -1327,21 +1375,24 @@ namespace {
if (Config.SplatBreaksChain) return;
// Look for cases where just the second value in the pair is used by
// both members of another pair (splatting).
- for (Value::use_iterator I = P.second->use_begin(),
- E = P.second->use_end(); I != E; ++I) {
- if (isa<LoadInst>(*I))
+ for (Value::user_iterator I = P.second->user_begin(),
+ E = P.second->user_end();
+ I != E; ++I) {
+ User *UI = *I;
+ if (isa<LoadInst>(UI))
continue;
- else if ((SI = dyn_cast<StoreInst>(*I)) &&
+ else if ((SI = dyn_cast<StoreInst>(UI)) &&
P.second == SI->getPointerOperand())
continue;
- for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) {
- if ((SJ = dyn_cast<StoreInst>(*J)) &&
+ for (Value::user_iterator J = P.second->user_begin(); J != E; ++J) {
+ User *UJ = *J;
+ if ((SJ = dyn_cast<StoreInst>(UJ)) &&
P.second == SJ->getPointerOperand())
continue;
- if (CandidatePairsSet.count(ValuePair(*I, *J))) {
- VPPair VP(P, ValuePair(*I, *J));
+ if (CandidatePairsSet.count(ValuePair(UI, UJ))) {
+ VPPair VP(P, ValuePair(UI, UJ));
ConnectedPairs[VP.first].push_back(VP.second);
PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
}
@@ -1407,7 +1458,7 @@ namespace {
AliasSetTracker WriteSet(*AA);
if (I->mayWriteToMemory()) WriteSet.add(I);
- for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) {
+ for (BasicBlock::iterator J = std::next(I); J != E; ++J) {
(void) trackUsesOfI(Users, WriteSet, I, J);
if (J == EL)
@@ -1901,16 +1952,15 @@ namespace {
Type *VTy = getVecTypeForPair(Ty1, Ty2);
bool NeedsExtraction = false;
- for (Value::use_iterator I = S->first->use_begin(),
- IE = S->first->use_end(); I != IE; ++I) {
- if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
+ for (User *U : S->first->users()) {
+ if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(U)) {
// Shuffle can be folded if it has no other input
if (isa<UndefValue>(SI->getOperand(1)))
continue;
}
- if (isa<ExtractElementInst>(*I))
+ if (isa<ExtractElementInst>(U))
continue;
- if (PrunedDAGInstrs.count(*I))
+ if (PrunedDAGInstrs.count(U))
continue;
NeedsExtraction = true;
break;
@@ -1933,16 +1983,15 @@ namespace {
}
NeedsExtraction = false;
- for (Value::use_iterator I = S->second->use_begin(),
- IE = S->second->use_end(); I != IE; ++I) {
- if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(*I)) {
+ for (User *U : S->second->users()) {
+ if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(U)) {
// Shuffle can be folded if it has no other input
if (isa<UndefValue>(SI->getOperand(1)))
continue;
}
- if (isa<ExtractElementInst>(*I))
+ if (isa<ExtractElementInst>(U))
continue;
- if (PrunedDAGInstrs.count(*I))
+ if (PrunedDAGInstrs.count(U))
continue;
NeedsExtraction = true;
break;
@@ -2795,7 +2844,7 @@ namespace {
DenseSet<ValuePair> &LoadMoveSetPairs,
Instruction *I, Instruction *J) {
// Skip to the first instruction past I.
- BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
+ BasicBlock::iterator L = std::next(BasicBlock::iterator(I));
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
@@ -2817,7 +2866,7 @@ namespace {
Instruction *&InsertionPt,
Instruction *I, Instruction *J) {
// Skip to the first instruction past I.
- BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
+ BasicBlock::iterator L = std::next(BasicBlock::iterator(I));
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
@@ -2848,7 +2897,7 @@ namespace {
DenseSet<ValuePair> &LoadMoveSetPairs,
Instruction *I) {
// Skip to the first instruction past I.
- BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
+ BasicBlock::iterator L = std::next(BasicBlock::iterator(I));
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
@@ -3119,7 +3168,7 @@ namespace {
}
// Before removing I, set the iterator to the next instruction.
- PI = llvm::next(BasicBlock::iterator(I));
+ PI = std::next(BasicBlock::iterator(I));
if (cast<Instruction>(PI) == J)
++PI;
@@ -3141,7 +3190,7 @@ static const char bb_vectorize_name[] = "Basic-Block Vectorization";
INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
diff --git a/lib/Transforms/Vectorize/LLVMBuild.txt b/lib/Transforms/Vectorize/LLVMBuild.txt
index 7167d27..b57ce6c 100644
--- a/lib/Transforms/Vectorize/LLVMBuild.txt
+++ b/lib/Transforms/Vectorize/LLVMBuild.txt
@@ -20,5 +20,4 @@ type = Library
name = Vectorize
parent = Transforms
library_name = Vectorize
-required_libraries = Analysis Core InstCombine Support Target TransformUtils
-
+required_libraries = Analysis Core Support Target TransformUtils
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5e75871..9a98c44 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -56,7 +56,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
@@ -65,24 +65,26 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/Verifier.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -114,6 +116,21 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
"trip count that is smaller than this "
"value."));
+/// This enables versioning on the strides of symbolically striding memory
+/// accesses in code like the following.
+/// for (i = 0; i < N; ++i)
+/// A[i * Stride1] += B[i * Stride2] ...
+///
+/// Will be roughly translated to
+/// if (Stride1 == 1 && Stride2 == 1) {
+/// for (i = 0; i < N; i+=4)
+/// A[i:i+3] += ...
+/// } else
+/// ...
+static cl::opt<bool> EnableMemAccessVersioning(
+ "enable-mem-access-versioning", cl::init(true), cl::Hidden,
+ cl::desc("Enable symblic stride memory access versioning"));
+
/// We don't unroll loops with a known constant trip count below this number.
static const unsigned TinyTripCountUnrollThreshold = 128;
@@ -124,11 +141,60 @@ static const unsigned RuntimeMemoryCheckThreshold = 8;
/// Maximum simd width.
static const unsigned MaxVectorWidth = 64;
+static cl::opt<unsigned> ForceTargetNumScalarRegs(
+ "force-target-num-scalar-regs", cl::init(0), cl::Hidden,
+ cl::desc("A flag that overrides the target's number of scalar registers."));
+
+static cl::opt<unsigned> ForceTargetNumVectorRegs(
+ "force-target-num-vector-regs", cl::init(0), cl::Hidden,
+ cl::desc("A flag that overrides the target's number of vector registers."));
+
/// Maximum vectorization unroll count.
static const unsigned MaxUnrollFactor = 16;
-/// The cost of a loop that is considered 'small' by the unroller.
-static const unsigned SmallLoopCost = 20;
+static cl::opt<unsigned> ForceTargetMaxScalarUnrollFactor(
+ "force-target-max-scalar-unroll", cl::init(0), cl::Hidden,
+ cl::desc("A flag that overrides the target's max unroll factor for scalar "
+ "loops."));
+
+static cl::opt<unsigned> ForceTargetMaxVectorUnrollFactor(
+ "force-target-max-vector-unroll", cl::init(0), cl::Hidden,
+ cl::desc("A flag that overrides the target's max unroll factor for "
+ "vectorized loops."));
+
+static cl::opt<unsigned> ForceTargetInstructionCost(
+ "force-target-instruction-cost", cl::init(0), cl::Hidden,
+ cl::desc("A flag that overrides the target's expected cost for "
+ "an instruction to a single constant value. Mostly "
+ "useful for getting consistent testing."));
+
+static cl::opt<unsigned> SmallLoopCost(
+ "small-loop-cost", cl::init(20), cl::Hidden,
+ cl::desc("The cost of a loop that is considered 'small' by the unroller."));
+
+static cl::opt<bool> LoopVectorizeWithBlockFrequency(
+ "loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden,
+ cl::desc("Enable the use of the block frequency analysis to access PGO "
+ "heuristics minimizing code growth in cold regions and being more "
+ "aggressive in hot regions."));
+
+// Runtime unroll loops for load/store throughput.
+static cl::opt<bool> EnableLoadStoreRuntimeUnroll(
+ "enable-loadstore-runtime-unroll", cl::init(true), cl::Hidden,
+ cl::desc("Enable runtime unrolling until load/store ports are saturated"));
+
+/// The number of stores in a loop that are allowed to need predication.
+static cl::opt<unsigned> NumberOfStoresToPredicate(
+ "vectorize-num-stores-pred", cl::init(1), cl::Hidden,
+ cl::desc("Max number of stores to be predicated behind an if."));
+
+static cl::opt<bool> EnableIndVarRegisterHeur(
+ "enable-ind-var-reg-heur", cl::init(true), cl::Hidden,
+ cl::desc("Count the induction variable only once when unrolling"));
+
+static cl::opt<bool> EnableCondStoresVectorization(
+ "enable-cond-stores-vec", cl::init(false), cl::Hidden,
+ cl::desc("Enable if predication of stores during vectorization."));
namespace {
@@ -153,20 +219,21 @@ class LoopVectorizationCostModel;
class InnerLoopVectorizer {
public:
InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, DataLayout *DL,
+ DominatorTree *DT, const DataLayout *DL,
const TargetLibraryInfo *TLI, unsigned VecWidth,
unsigned UnrollFactor)
: OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI),
VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), Induction(0),
- OldInduction(0), WidenMap(UnrollFactor) {}
+ OldInduction(0), WidenMap(UnrollFactor), Legal(0) {}
// Perform the actual loop widening (vectorization).
- void vectorize(LoopVectorizationLegality *Legal) {
+ void vectorize(LoopVectorizationLegality *L) {
+ Legal = L;
// Create a new empty loop. Unlink the old loop and connect the new one.
- createEmptyLoop(Legal);
+ createEmptyLoop();
// Widen each instruction in the old loop to a new one in the new loop.
// Use the Legality module to find the induction and reduction variables.
- vectorizeLoop(Legal);
+ vectorizeLoop();
// Register the new loop and update the analysis passes.
updateAnalysis();
}
@@ -186,14 +253,23 @@ protected:
typedef DenseMap<std::pair<BasicBlock*, BasicBlock*>,
VectorParts> EdgeMaskCache;
- /// Add code that checks at runtime if the accessed arrays overlap.
- /// Returns the comparator value or NULL if no check is needed.
- Instruction *addRuntimeCheck(LoopVectorizationLegality *Legal,
- Instruction *Loc);
+ /// \brief Add code that checks at runtime if the accessed arrays overlap.
+ ///
+ /// Returns a pair of instructions where the first element is the first
+ /// instruction generated in possibly a sequence of instructions and the
+ /// second value is the final comparator value or NULL if no check is needed.
+ std::pair<Instruction *, Instruction *> addRuntimeCheck(Instruction *Loc);
+
+ /// \brief Add checks for strides that where assumed to be 1.
+ ///
+ /// Returns the last check instruction and the first check instruction in the
+ /// pair as (first, last).
+ std::pair<Instruction *, Instruction *> addStrideCheck(Instruction *Loc);
+
/// Create an empty loop, based on the loop ranges of the old loop.
- void createEmptyLoop(LoopVectorizationLegality *Legal);
+ void createEmptyLoop();
/// Copy and widen the instructions from the old loop.
- virtual void vectorizeLoop(LoopVectorizationLegality *Legal);
+ virtual void vectorizeLoop();
/// \brief The Loop exit block may have single value PHI nodes where the
/// incoming value is 'Undef'. While vectorizing we only handled real values
@@ -210,14 +286,12 @@ protected:
VectorParts createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
/// A helper function to vectorize a single BB within the innermost loop.
- void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB,
- PhiVector *PV);
+ void vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV);
/// Vectorize a single PHINode in a block. This method handles the induction
/// variable canonicalization. It supports both VF = 1 for unrolled loops and
/// arbitrary length vectors.
void widenPHIInstruction(Instruction *PN, VectorParts &Entry,
- LoopVectorizationLegality *Legal,
unsigned UF, unsigned VF, PhiVector *PV);
/// Insert the new loop to the loop hierarchy and pass manager
@@ -225,12 +299,14 @@ protected:
void updateAnalysis();
/// This instruction is un-vectorizable. Implement it as a sequence
- /// of scalars.
- virtual void scalarizeInstruction(Instruction *Instr);
+ /// of scalars. If \p IfPredicateStore is true we need to 'hide' each
+ /// scalarized instruction behind an if block predicated on the control
+ /// dependence of the instruction.
+ virtual void scalarizeInstruction(Instruction *Instr,
+ bool IfPredicateStore=false);
/// Vectorize Load and Store instructions,
- virtual void vectorizeMemoryInstruction(Instruction *Instr,
- LoopVectorizationLegality *Legal);
+ virtual void vectorizeMemoryInstruction(Instruction *Instr);
/// Create a broadcast instruction. This method generates a broadcast
/// instruction (shuffle) for loop invariant values and for the induction
@@ -303,7 +379,7 @@ protected:
/// Dominator Tree.
DominatorTree *DT;
/// Data Layout.
- DataLayout *DL;
+ const DataLayout *DL;
/// Target Library Info.
const TargetLibraryInfo *TLI;
@@ -330,7 +406,7 @@ protected:
///The ExitBlock of the scalar loop.
BasicBlock *LoopExitBlock;
///The vector loop body.
- BasicBlock *LoopVectorBody;
+ SmallVector<BasicBlock *, 4> LoopVectorBody;
///The scalar loop body.
BasicBlock *LoopScalarBody;
/// A list of all bypass blocks. The first block is the entry of the loop.
@@ -345,22 +421,24 @@ protected:
/// Maps scalars to widened vectors.
ValueMap WidenMap;
EdgeMaskCache MaskCache;
+
+ LoopVectorizationLegality *Legal;
};
class InnerLoopUnroller : public InnerLoopVectorizer {
public:
InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, DataLayout *DL,
+ DominatorTree *DT, const DataLayout *DL,
const TargetLibraryInfo *TLI, unsigned UnrollFactor) :
InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
private:
- virtual void scalarizeInstruction(Instruction *Instr);
- virtual void vectorizeMemoryInstruction(Instruction *Instr,
- LoopVectorizationLegality *Legal);
- virtual Value *getBroadcastInstrs(Value *V);
- virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
- virtual Value *reverseVector(Value *Vec);
+ void scalarizeInstruction(Instruction *Instr,
+ bool IfPredicateStore = false) override;
+ void vectorizeMemoryInstruction(Instruction *Instr) override;
+ Value *getBroadcastInstrs(Value *V) override;
+ Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate) override;
+ Value *reverseVector(Value *Vec) override;
};
/// \brief Look for a meaningful debug location on the instruction or it's
@@ -406,10 +484,14 @@ static void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) {
/// induction variable and the different reduction variables.
class LoopVectorizationLegality {
public:
- LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL,
+ unsigned NumLoads;
+ unsigned NumStores;
+ unsigned NumPredStores;
+
+ LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
DominatorTree *DT, TargetLibraryInfo *TLI)
- : TheLoop(L), SE(SE), DL(DL), DT(DT), TLI(TLI),
- Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
+ : NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
+ DT(DT), TLI(TLI), Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
MaxSafeDepDistBytes(-1U) {}
/// This enum represents the kinds of reductions that we support.
@@ -500,7 +582,7 @@ public:
/// Insert a pointer and calculate the start and end SCEVs.
void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr,
- unsigned DepSetId);
+ unsigned DepSetId, ValueToValueMap &Strides);
/// This flag indicates if we need to add the runtime check.
bool Need;
@@ -564,7 +646,7 @@ public:
/// pointer itself is an induction variable.
/// This check allows us to vectorize A[idx] into a wide load/store.
/// Returns:
- /// 0 - Stride is unknown or non consecutive.
+ /// 0 - Stride is unknown or non-consecutive.
/// 1 - Address is consecutive.
/// -1 - Address is consecutive, and decreasing.
int isConsecutivePtr(Value *Ptr);
@@ -584,6 +666,13 @@ public:
unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
+ bool hasStride(Value *V) { return StrideSet.count(V); }
+ bool mustCheckStrides() { return !StrideSet.empty(); }
+ SmallPtrSet<Value *, 8>::iterator strides_begin() {
+ return StrideSet.begin();
+ }
+ SmallPtrSet<Value *, 8>::iterator strides_end() { return StrideSet.end(); }
+
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
@@ -626,12 +715,18 @@ private:
/// if the PHI is not an induction variable.
InductionKind isInductionVariable(PHINode *Phi);
+ /// \brief Collect memory access with loop invariant strides.
+ ///
+ /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
+ /// invariant.
+ void collectStridedAcccess(Value *LoadOrStoreInst);
+
/// The loop that we evaluate.
Loop *TheLoop;
/// Scev analysis.
ScalarEvolution *SE;
/// DataLayout analysis.
- DataLayout *DL;
+ const DataLayout *DL;
/// Dominators.
DominatorTree *DT;
/// Target Library Info.
@@ -664,6 +759,9 @@ private:
bool HasFunNoNaNAttr;
unsigned MaxSafeDepDistBytes;
+
+ ValueToValueMap Strides;
+ SmallPtrSet<Value *, 8> StrideSet;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -678,7 +776,7 @@ public:
LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
- DataLayout *DL, const TargetLibraryInfo *TLI)
+ const DataLayout *DL, const TargetLibraryInfo *TLI)
: TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI) {}
/// Information about vectorization costs
@@ -751,7 +849,7 @@ private:
/// Vector target information.
const TargetTransformInfo &TTI;
/// Target data layout information.
- DataLayout *DL;
+ const DataLayout *DL;
/// Target Library Info.
const TargetLibraryInfo *TLI;
};
@@ -763,10 +861,13 @@ struct LoopVectorizeHints {
unsigned Width;
/// Vectorization unroll factor.
unsigned Unroll;
+ /// Vectorization forced (-1 not selected, 0 force disabled, 1 force enabled)
+ int Force;
LoopVectorizeHints(const Loop *L, bool DisableUnrolling)
: Width(VectorizationFactor)
, Unroll(DisableUnrolling ? 1 : VectorizationUnroll)
+ , Force(-1)
, LoopID(L->getLoopID()) {
getHints(L);
// The command line options override any loop metadata except for when
@@ -877,66 +978,117 @@ private:
Unroll = Val;
else
DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n");
+ } else if (Hint == "enable") {
+ if (C->getBitWidth() == 1)
+ Force = Val;
+ else
+ DEBUG(dbgs() << "LV: ignoring invalid enable hint metadata\n");
} else {
DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n');
}
}
};
+static void addInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
+ if (L.empty())
+ return V.push_back(&L);
+
+ for (Loop *InnerL : L)
+ addInnerLoop(*InnerL, V);
+}
+
/// The LoopVectorize Pass.
-struct LoopVectorize : public LoopPass {
+struct LoopVectorize : public FunctionPass {
/// Pass identification, replacement for typeid
static char ID;
- explicit LoopVectorize(bool NoUnrolling = false)
- : LoopPass(ID), DisableUnrolling(NoUnrolling) {
+ explicit LoopVectorize(bool NoUnrolling = false, bool AlwaysVectorize = true)
+ : FunctionPass(ID),
+ DisableUnrolling(NoUnrolling),
+ AlwaysVectorize(AlwaysVectorize) {
initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
}
ScalarEvolution *SE;
- DataLayout *DL;
+ const DataLayout *DL;
LoopInfo *LI;
TargetTransformInfo *TTI;
DominatorTree *DT;
+ BlockFrequencyInfo *BFI;
TargetLibraryInfo *TLI;
bool DisableUnrolling;
+ bool AlwaysVectorize;
- virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
- // We only vectorize innermost loops.
- if (!L->empty())
- return false;
+ BlockFrequency ColdEntryFreq;
+ bool runOnFunction(Function &F) override {
SE = &getAnalysis<ScalarEvolution>();
- DL = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ BFI = &getAnalysis<BlockFrequencyInfo>();
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ // Compute some weights outside of the loop over the loops. Compute this
+ // using a BranchProbability to re-use its scaling math.
+ const BranchProbability ColdProb(1, 5); // 20%
+ ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb;
+
// If the target claims to have no vector registers don't attempt
// vectorization.
if (!TTI->getNumberOfRegisters(true))
return false;
if (DL == NULL) {
- DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout\n");
+ DEBUG(dbgs() << "LV: Not vectorizing: Missing data layout\n");
return false;
}
+ // Build up a worklist of inner-loops to vectorize. This is necessary as
+ // the act of vectorizing or partially unrolling a loop creates new loops
+ // and can invalidate iterators across the loops.
+ SmallVector<Loop *, 8> Worklist;
+
+ for (Loop *L : *LI)
+ addInnerLoop(*L, Worklist);
+
+ // Now walk the identified inner loops.
+ bool Changed = false;
+ while (!Worklist.empty())
+ Changed |= processLoop(Worklist.pop_back_val());
+
+ // Process each loop nest in the function.
+ return Changed;
+ }
+
+ bool processLoop(Loop *L) {
+ assert(L->empty() && "Only process inner loops.");
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
LoopVectorizeHints Hints(L, DisableUnrolling);
+ if (Hints.Force == 0) {
+ DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
+ return false;
+ }
+
+ if (!AlwaysVectorize && Hints.Force != 1) {
+ DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
+ return false;
+ }
+
if (Hints.Width == 1 && Hints.Unroll == 1) {
- DEBUG(dbgs() << "LV: Not vectorizing.\n");
+ DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
return false;
}
// Check if it is legal to vectorize the loop.
LoopVectorizationLegality LVL(L, SE, DL, DT, TLI);
if (!LVL.canVectorize()) {
- DEBUG(dbgs() << "LV: Not vectorizing.\n");
+ DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
return false;
}
@@ -946,13 +1098,25 @@ struct LoopVectorize : public LoopPass {
// Check the function attributes to find out if this function should be
// optimized for size.
Function *F = L->getHeader()->getParent();
- Attribute::AttrKind SzAttr = Attribute::OptimizeForSize;
- Attribute::AttrKind FlAttr = Attribute::NoImplicitFloat;
- unsigned FnIndex = AttributeSet::FunctionIndex;
- bool OptForSize = F->getAttributes().hasAttribute(FnIndex, SzAttr);
- bool NoFloat = F->getAttributes().hasAttribute(FnIndex, FlAttr);
+ bool OptForSize =
+ Hints.Force != 1 && F->hasFnAttribute(Attribute::OptimizeForSize);
+
+ // Compute the weighted frequency of this loop being executed and see if it
+ // is less than 20% of the function entry baseline frequency. Note that we
+ // always have a canonical loop here because we think we *can* vectoriez.
+ // FIXME: This is hidden behind a flag due to pervasive problems with
+ // exactly what block frequency models.
+ if (LoopVectorizeWithBlockFrequency) {
+ BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader());
+ if (Hints.Force != 1 && LoopEntryFreq < ColdEntryFreq)
+ OptForSize = true;
+ }
- if (NoFloat) {
+ // Check the function attributes to see if implicit floats are allowed.a
+ // FIXME: This check doesn't seem possibly correct -- what if the loop is
+ // an integer loop and the vector instructions selected are purely integer
+ // vector instructions?
+ if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
"attribute is used.\n");
return false;
@@ -973,6 +1137,7 @@ struct LoopVectorize : public LoopPass {
DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
if (UF == 1)
return false;
+ DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n");
// We decided not to vectorize, but we may want to unroll.
InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
Unroller.vectorize(&LVL);
@@ -989,16 +1154,16 @@ struct LoopVectorize : public LoopPass {
return true;
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- LoopPass::getAnalysisUsage(AU);
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- AU.addRequired<DominatorTree>();
+ AU.addRequired<BlockFrequencyInfo>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
AU.addRequired<ScalarEvolution>();
AU.addRequired<TargetTransformInfo>();
AU.addPreserved<LoopInfo>();
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
}
};
@@ -1010,12 +1175,53 @@ struct LoopVectorize : public LoopPass {
// LoopVectorizationCostModel.
//===----------------------------------------------------------------------===//
-void
-LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
- Loop *Lp, Value *Ptr,
- bool WritePtr,
- unsigned DepSetId) {
- const SCEV *Sc = SE->getSCEV(Ptr);
+static Value *stripIntegerCast(Value *V) {
+ if (CastInst *CI = dyn_cast<CastInst>(V))
+ if (CI->getOperand(0)->getType()->isIntegerTy())
+ return CI->getOperand(0);
+ return V;
+}
+
+///\brief Replaces the symbolic stride in a pointer SCEV expression by one.
+///
+/// If \p OrigPtr is not null, use it to look up the stride value instead of
+/// \p Ptr.
+static const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE,
+ ValueToValueMap &PtrToStride,
+ Value *Ptr, Value *OrigPtr = 0) {
+
+ const SCEV *OrigSCEV = SE->getSCEV(Ptr);
+
+ // If there is an entry in the map return the SCEV of the pointer with the
+ // symbolic stride replaced by one.
+ ValueToValueMap::iterator SI = PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
+ if (SI != PtrToStride.end()) {
+ Value *StrideVal = SI->second;
+
+ // Strip casts.
+ StrideVal = stripIntegerCast(StrideVal);
+
+ // Replace symbolic stride by one.
+ Value *One = ConstantInt::get(StrideVal->getType(), 1);
+ ValueToValueMap RewriteMap;
+ RewriteMap[StrideVal] = One;
+
+ const SCEV *ByOne =
+ SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
+ DEBUG(dbgs() << "LV: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
+ << "\n");
+ return ByOne;
+ }
+
+ // Otherwise, just return the SCEV of the original pointer.
+ return SE->getSCEV(Ptr);
+}
+
+void LoopVectorizationLegality::RuntimePointerCheck::insert(
+ ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
+ ValueToValueMap &Strides) {
+ // Get the stride replaced scev.
+ const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
@@ -1030,7 +1236,9 @@ LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
// We need to place the broadcast of invariant variables outside the loop.
Instruction *Instr = dyn_cast<Instruction>(V);
- bool NewInstr = (Instr && Instr->getParent() == LoopVectorBody);
+ bool NewInstr =
+ (Instr && std::find(LoopVectorBody.begin(), LoopVectorBody.end(),
+ Instr->getParent()) != LoopVectorBody.end());
bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr;
// Place the code for broadcasting invariant variables in the new preheader.
@@ -1070,7 +1278,7 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
/// \brief Find the operand of the GEP that should be checked for consecutive
/// stores. This ignores trailing indices that have no effect on the final
/// pointer.
-static unsigned getGEPInductionOperand(DataLayout *DL,
+static unsigned getGEPInductionOperand(const DataLayout *DL,
const GetElementPtrInst *Gep) {
unsigned LastOperand = Gep->getNumOperands() - 1;
unsigned GEPAllocSize = DL->getTypeAllocSize(
@@ -1093,7 +1301,7 @@ static unsigned getGEPInductionOperand(DataLayout *DL,
}
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
- assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
+ assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
// Make sure that the pointer does not point to structs.
if (Ptr->getType()->getPointerElementType()->isAggregateType())
return 0;
@@ -1147,7 +1355,27 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
// We can emit wide load/stores only if the last non-zero index is the
// induction variable.
- const SCEV *Last = SE->getSCEV(Gep->getOperand(InductionOperand));
+ const SCEV *Last = 0;
+ if (!Strides.count(Gep))
+ Last = SE->getSCEV(Gep->getOperand(InductionOperand));
+ else {
+ // Because of the multiplication by a stride we can have a s/zext cast.
+ // We are going to replace this stride by 1 so the cast is safe to ignore.
+ //
+ // %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ // %0 = trunc i64 %indvars.iv to i32
+ // %mul = mul i32 %0, %Stride1
+ // %idxprom = zext i32 %mul to i64 << Safe cast.
+ // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom
+ //
+ Last = replaceSymbolicStrideSCEV(SE, Strides,
+ Gep->getOperand(InductionOperand), Gep);
+ if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(Last))
+ Last =
+ (C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend)
+ ? C->getOperand()
+ : Last;
+ }
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
const SCEV *Step = AR->getStepRecurrence(*SE);
@@ -1171,6 +1399,10 @@ InnerLoopVectorizer::getVectorValue(Value *V) {
assert(V != Induction && "The new induction variable should not be used.");
assert(!V->getType()->isVectorTy() && "Can't widen a vector");
+ // If we have a stride that is replaced by one, do it here.
+ if (Legal->hasStride(V))
+ V = ConstantInt::get(V->getType(), 1);
+
// If we have this scalar in the map, return it.
if (WidenMap.has(V))
return WidenMap.get(V);
@@ -1192,9 +1424,7 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
"reverse");
}
-
-void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
- LoopVectorizationLegality *Legal) {
+void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// Attempt to issue a wide load.
LoadInst *LI = dyn_cast<LoadInst>(Instr);
StoreInst *SI = dyn_cast<StoreInst>(Instr);
@@ -1213,10 +1443,13 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
+ if (SI && Legal->blockNeedsPredication(SI->getParent()))
+ return scalarizeInstruction(Instr, true);
+
if (ScalarAllocatedSize != VectorElementSize)
return scalarizeInstruction(Instr);
- // If the pointer is loop invariant or if it is non consecutive,
+ // If the pointer is loop invariant or if it is non-consecutive,
// scalarize the load.
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
bool Reverse = ConsecutiveStride < 0;
@@ -1331,7 +1564,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
}
}
-void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
+void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredicateStore) {
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
// Holds vector parameters or scalars, in case of uniform vals.
SmallVector<VectorParts, 4> Params;
@@ -1376,10 +1609,38 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
+ Instruction *InsertPt = Builder.GetInsertPoint();
+ BasicBlock *IfBlock = Builder.GetInsertBlock();
+ BasicBlock *CondBlock = 0;
+
+ VectorParts Cond;
+ Loop *VectorLp = 0;
+ if (IfPredicateStore) {
+ assert(Instr->getParent()->getSinglePredecessor() &&
+ "Only support single predecessor blocks");
+ Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
+ Instr->getParent());
+ VectorLp = LI->getLoopFor(IfBlock);
+ assert(VectorLp && "Must have a loop for this block");
+ }
+
// For each vector unroll 'part':
for (unsigned Part = 0; Part < UF; ++Part) {
// For each scalar that we create:
for (unsigned Width = 0; Width < VF; ++Width) {
+
+ // Start if-block.
+ Value *Cmp = 0;
+ if (IfPredicateStore) {
+ Cmp = Builder.CreateExtractElement(Cond[Part], Builder.getInt32(Width));
+ Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp, ConstantInt::get(Cmp->getType(), 1));
+ CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
+ LoopVectorBody.push_back(CondBlock);
+ VectorLp->addBasicBlockToLoop(CondBlock, LI->getBase());
+ // Update Builder with newly created basic block.
+ Builder.SetInsertPoint(InsertPt);
+ }
+
Instruction *Cloned = Instr->clone();
if (!IsVoidRetTy)
Cloned->setName(Instr->getName() + ".cloned");
@@ -1400,18 +1661,75 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
if (!IsVoidRetTy)
VecResults[Part] = Builder.CreateInsertElement(VecResults[Part], Cloned,
Builder.getInt32(Width));
+ // End if-block.
+ if (IfPredicateStore) {
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ LoopVectorBody.push_back(NewIfBlock);
+ VectorLp->addBasicBlockToLoop(NewIfBlock, LI->getBase());
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
+ }
}
}
}
-Instruction *
-InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
- Instruction *Loc) {
+static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
+ Instruction *Loc) {
+ if (FirstInst)
+ return FirstInst;
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == Loc->getParent() ? I : 0;
+ return 0;
+}
+
+std::pair<Instruction *, Instruction *>
+InnerLoopVectorizer::addStrideCheck(Instruction *Loc) {
+ Instruction *tnullptr = 0;
+ if (!Legal->mustCheckStrides())
+ return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
+
+ IRBuilder<> ChkBuilder(Loc);
+
+ // Emit checks.
+ Value *Check = 0;
+ Instruction *FirstInst = 0;
+ for (SmallPtrSet<Value *, 8>::iterator SI = Legal->strides_begin(),
+ SE = Legal->strides_end();
+ SI != SE; ++SI) {
+ Value *Ptr = stripIntegerCast(*SI);
+ Value *C = ChkBuilder.CreateICmpNE(Ptr, ConstantInt::get(Ptr->getType(), 1),
+ "stride.chk");
+ // Store the first instruction we create.
+ FirstInst = getFirstInst(FirstInst, C, Loc);
+ if (Check)
+ Check = ChkBuilder.CreateOr(Check, C);
+ else
+ Check = C;
+ }
+
+ // We have to do this trickery because the IRBuilder might fold the check to a
+ // constant expression in which case there is no Instruction anchored in a
+ // the block.
+ LLVMContext &Ctx = Loc->getContext();
+ Instruction *TheCheck =
+ BinaryOperator::CreateAnd(Check, ConstantInt::getTrue(Ctx));
+ ChkBuilder.Insert(TheCheck, "stride.not.one");
+ FirstInst = getFirstInst(FirstInst, TheCheck, Loc);
+
+ return std::make_pair(FirstInst, TheCheck);
+}
+
+std::pair<Instruction *, Instruction *>
+InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) {
LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
Legal->getRuntimePointerCheck();
+ Instruction *tnullptr = 0;
if (!PtrRtCheck->Need)
- return NULL;
+ return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
unsigned NumPointers = PtrRtCheck->Pointers.size();
SmallVector<TrackingVH<Value> , 2> Starts;
@@ -1419,6 +1737,7 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
LLVMContext &Ctx = Loc->getContext();
SCEVExpander Exp(*SE, "induction");
+ Instruction *FirstInst = 0;
for (unsigned i = 0; i < NumPointers; ++i) {
Value *Ptr = PtrRtCheck->Pointers[i];
@@ -1472,11 +1791,16 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc");
Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
+ FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
+ FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
- if (MemoryRuntimeCheck)
+ FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
+ if (MemoryRuntimeCheck) {
IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
"conflict.rdx");
+ FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
+ }
MemoryRuntimeCheck = IsConflict;
}
}
@@ -1487,11 +1811,11 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
ConstantInt::getTrue(Ctx));
ChkBuilder.Insert(Check, "memcheck.conflict");
- return Check;
+ FirstInst = getFirstInst(FirstInst, Check, Loc);
+ return std::make_pair(FirstInst, Check);
}
-void
-InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
+void InnerLoopVectorizer::createEmptyLoop() {
/*
In this function we generate a new loop. The new loop will contain
the vectorized instructions while the old loop will continue to run the
@@ -1642,22 +1966,48 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
BasicBlock *LastBypassBlock = BypassBlock;
+ // Generate the code to check that the strides we assumed to be one are really
+ // one. We want the new basic block to start at the first instruction in a
+ // sequence of instructions that form a check.
+ Instruction *StrideCheck;
+ Instruction *FirstCheckInst;
+ std::tie(FirstCheckInst, StrideCheck) =
+ addStrideCheck(BypassBlock->getTerminator());
+ if (StrideCheck) {
+ // Create a new block containing the stride check.
+ BasicBlock *CheckBlock =
+ BypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck");
+ if (ParentLoop)
+ ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
+ LoopBypassBlocks.push_back(CheckBlock);
+
+ // Replace the branch into the memory check block with a conditional branch
+ // for the "few elements case".
+ Instruction *OldTerm = BypassBlock->getTerminator();
+ BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm);
+ OldTerm->eraseFromParent();
+
+ Cmp = StrideCheck;
+ LastBypassBlock = CheckBlock;
+ }
+
// Generate the code that checks in runtime if arrays overlap. We put the
// checks into a separate block to make the more common case of few elements
// faster.
- Instruction *MemRuntimeCheck = addRuntimeCheck(Legal,
- BypassBlock->getTerminator());
+ Instruction *MemRuntimeCheck;
+ std::tie(FirstCheckInst, MemRuntimeCheck) =
+ addRuntimeCheck(LastBypassBlock->getTerminator());
if (MemRuntimeCheck) {
// Create a new block containing the memory check.
- BasicBlock *CheckBlock = BypassBlock->splitBasicBlock(MemRuntimeCheck,
- "vector.memcheck");
+ BasicBlock *CheckBlock =
+ LastBypassBlock->splitBasicBlock(MemRuntimeCheck, "vector.memcheck");
if (ParentLoop)
ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
LoopBypassBlocks.push_back(CheckBlock);
// Replace the branch into the memory check block with a conditional branch
// for the "few elements case".
- Instruction *OldTerm = BypassBlock->getTerminator();
+ Instruction *OldTerm = LastBypassBlock->getTerminator();
BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm);
OldTerm->eraseFromParent();
@@ -1825,7 +2175,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
LoopScalarPreHeader = ScalarPH;
LoopMiddleBlock = MiddleBlock;
LoopExitBlock = ExitBlock;
- LoopVectorBody = VecBody;
+ LoopVectorBody.push_back(VecBody);
LoopScalarBody = OldBasicBlock;
LoopVectorizeHints Hints(Lp, true);
@@ -2093,30 +2443,56 @@ struct CSEDenseMapInfo {
};
}
+/// \brief Check whether this block is a predicated block.
+/// Due to if predication of stores we might create a sequence of "if(pred) a[i]
+/// = ...; " blocks. We start with one vectorized basic block. For every
+/// conditional block we split this vectorized block. Therefore, every second
+/// block will be a predicated one.
+static bool isPredicatedBlock(unsigned BlockNum) {
+ return BlockNum % 2;
+}
+
///\brief Perform cse of induction variable instructions.
-static void cse(BasicBlock *BB) {
+static void cse(SmallVector<BasicBlock *, 4> &BBs) {
// Perform simple cse.
SmallDenseMap<Instruction *, Instruction *, 4, CSEDenseMapInfo> CSEMap;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
- Instruction *In = I++;
+ for (unsigned i = 0, e = BBs.size(); i != e; ++i) {
+ BasicBlock *BB = BBs[i];
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+ Instruction *In = I++;
- if (!CSEDenseMapInfo::canHandle(In))
- continue;
+ if (!CSEDenseMapInfo::canHandle(In))
+ continue;
- // Check if we can replace this instruction with any of the
- // visited instructions.
- if (Instruction *V = CSEMap.lookup(In)) {
- In->replaceAllUsesWith(V);
- In->eraseFromParent();
- continue;
+ // Check if we can replace this instruction with any of the
+ // visited instructions.
+ if (Instruction *V = CSEMap.lookup(In)) {
+ In->replaceAllUsesWith(V);
+ In->eraseFromParent();
+ continue;
+ }
+ // Ignore instructions in conditional blocks. We create "if (pred) a[i] =
+ // ...;" blocks for predicated stores. Every second block is a predicated
+ // block.
+ if (isPredicatedBlock(i))
+ continue;
+
+ CSEMap[In] = In;
}
+ }
+}
- CSEMap[In] = In;
+/// \brief Adds a 'fast' flag to floating point operations.
+static Value *addFastMathFlag(Value *V) {
+ if (isa<FPMathOperator>(V)){
+ FastMathFlags Flags;
+ Flags.setUnsafeAlgebra();
+ cast<Instruction>(V)->setFastMathFlags(Flags);
}
+ return V;
}
-void
-InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
+void InnerLoopVectorizer::vectorizeLoop() {
//===------------------------------------------------===//
//
// Notice: any optimization or new instruction that go
@@ -2144,7 +2520,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// Vectorize all of the blocks in the original loop.
for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
be = DFS.endRPO(); bb != be; ++bb)
- vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix);
+ vectorizeBlockInLoop(*bb, &RdxPHIsToFix);
// At this point every instruction in the original loop is widened to
// a vector form. We are almost done. Now, we need to fix the PHI nodes
@@ -2169,7 +2545,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
setDebugLocFromInst(Builder, RdxDesc.StartValue);
// We need to generate a reduction vector from the incoming scalar.
- // To do so, we need to generate the 'identity' vector and overide
+ // To do so, we need to generate the 'identity' vector and override
// one of the elements with the incoming scalar reduction. We need
// to do it in the vector-loop preheader.
Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
@@ -2228,7 +2604,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// first unroll part.
Value *StartVal = (part == 0) ? VectorStart : Identity;
cast<PHINode>(VecRdxPhi[part])->addIncoming(StartVal, VecPreheader);
- cast<PHINode>(VecRdxPhi[part])->addIncoming(Val[part], LoopVectorBody);
+ cast<PHINode>(VecRdxPhi[part])->addIncoming(Val[part],
+ LoopVectorBody.back());
}
// Before each round, move the insertion point right between
@@ -2247,7 +2624,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
Value *StartVal = (part == 0) ? VectorStart : Identity;
for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
- NewPhi->addIncoming(RdxExitVal[part], LoopVectorBody);
+ NewPhi->addIncoming(RdxExitVal[part],
+ LoopVectorBody.back());
RdxParts.push_back(NewPhi);
}
@@ -2257,9 +2635,10 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
setDebugLocFromInst(Builder, ReducedPartRdx);
for (unsigned part = 1; part < UF; ++part) {
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
- ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
- RdxParts[part], ReducedPartRdx,
- "bin.rdx");
+ // Floating point operations had to be 'fast' to enable the reduction.
+ ReducedPartRdx = addFastMathFlag(
+ Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxParts[part],
+ ReducedPartRdx, "bin.rdx"));
else
ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
ReducedPartRdx, RdxParts[part]);
@@ -2289,8 +2668,9 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
"rdx.shuf");
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
- TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
- "bin.rdx");
+ // Floating point operations had to be 'fast' to enable the reduction.
+ TmpVec = addFastMathFlag(Builder.CreateBinOp(
+ (Instruction::BinaryOps)Op, TmpVec, Shuf, "bin.rdx"));
else
TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
}
@@ -2411,7 +2791,6 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
InnerLoopVectorizer::VectorParts &Entry,
- LoopVectorizationLegality *Legal,
unsigned UF, unsigned VF, PhiVector *PV) {
PHINode* P = cast<PHINode>(PN);
// Handle reduction variables:
@@ -2421,7 +2800,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
Type *VecTy = (VF == 1) ? PN->getType() :
VectorType::get(PN->getType(), VF);
Entry[part] = PHINode::Create(VecTy, 2, "vec.phi",
- LoopVectorBody-> getFirstInsertionPt());
+ LoopVectorBody.back()-> getFirstInsertionPt());
}
PV->push_back(P);
return;
@@ -2430,7 +2809,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
setDebugLocFromInst(Builder, P);
// Check for PHI nodes that are lowered to vector selects.
if (P->getParent() != OrigLoop->getHeader()) {
- // We know that all PHIs in non header blocks are converted into
+ // We know that all PHIs in non-header blocks are converted into
// selects, so we don't have to worry about the insertion order and we
// can just use the builder.
// At this point we generate the predication tree. There may be
@@ -2573,9 +2952,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
}
}
-void
-InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
- BasicBlock *BB, PhiVector *PV) {
+void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
// For each instruction in the old loop.
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
VectorParts &Entry = WidenMap.get(it);
@@ -2586,7 +2963,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
continue;
case Instruction::PHI:{
// Vectorize PHINodes.
- widenPHIInstruction(it, Entry, Legal, UF, VF, PV);
+ widenPHIInstruction(it, Entry, UF, VF, PV);
continue;
}// End of PHI.
@@ -2627,6 +3004,10 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
if (VecOp && isa<PossiblyExactOperator>(VecOp))
VecOp->setIsExact(BinOp->isExact());
+ // Copy the fast-math flags.
+ if (VecOp && isa<FPMathOperator>(V))
+ VecOp->setFastMathFlags(it->getFastMathFlags());
+
Entry[Part] = V;
}
break;
@@ -2680,7 +3061,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
case Instruction::Store:
case Instruction::Load:
- vectorizeMemoryInstruction(it, Legal);
+ vectorizeMemoryInstruction(it);
break;
case Instruction::ZExt:
case Instruction::SExt:
@@ -2772,13 +3153,25 @@ void InnerLoopVectorizer::updateAnalysis() {
for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]);
DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back());
- DT->addNewBlock(LoopVectorBody, LoopVectorPreHeader);
+
+ // Due to if predication of stores we might create a sequence of "if(pred)
+ // a[i] = ...; " blocks.
+ for (unsigned i = 0, e = LoopVectorBody.size(); i != e; ++i) {
+ if (i == 0)
+ DT->addNewBlock(LoopVectorBody[0], LoopVectorPreHeader);
+ else if (isPredicatedBlock(i)) {
+ DT->addNewBlock(LoopVectorBody[i], LoopVectorBody[i-1]);
+ } else {
+ DT->addNewBlock(LoopVectorBody[i], LoopVectorBody[i-2]);
+ }
+ }
+
DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks.front());
DT->addNewBlock(LoopScalarPreHeader, LoopMiddleBlock);
DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
- DEBUG(DT->verifyAnalysis());
+ DEBUG(DT->verifyDomTree());
}
/// \brief Check whether it is safe to if-convert this phi node.
@@ -2868,7 +3261,7 @@ bool LoopVectorizationLegality::canVectorize() {
DEBUG(dbgs() << "LV: Found a loop: " <<
TheLoop->getHeader()->getName() << '\n');
- // Check if we can if-convert non single-bb loops.
+ // Check if we can if-convert non-single-bb loops.
unsigned NumBlocks = TheLoop->getNumBlocks();
if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
@@ -2916,7 +3309,7 @@ bool LoopVectorizationLegality::canVectorize() {
return true;
}
-static Type *convertPointerToIntegerType(DataLayout &DL, Type *Ty) {
+static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
if (Ty->isPointerTy())
return DL.getIntPtrType(Ty);
@@ -2928,7 +3321,7 @@ static Type *convertPointerToIntegerType(DataLayout &DL, Type *Ty) {
return Ty;
}
-static Type* getWiderType(DataLayout &DL, Type *Ty0, Type *Ty1) {
+static Type* getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
Ty0 = convertPointerToIntegerType(DL, Ty0);
Ty1 = convertPointerToIntegerType(DL, Ty1);
if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
@@ -2944,12 +3337,11 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
// instructions must not have external users.
if (!Reductions.count(Inst))
//Check that all of the users of the loop are inside the BB.
- for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end();
- I != E; ++I) {
- Instruction *U = cast<Instruction>(*I);
+ for (User *U : Inst->users()) {
+ Instruction *UI = cast<Instruction>(U);
// This user may be a reduction exit value.
- if (!TheLoop->contains(U)) {
- DEBUG(dbgs() << "LV: Found an outside user for : " << *U << '\n');
+ if (!TheLoop->contains(UI)) {
+ DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
return true;
}
}
@@ -3097,8 +3489,14 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
Type *T = ST->getValueOperand()->getType();
if (!VectorType::isValidElementType(T))
return false;
+ if (EnableMemAccessVersioning)
+ collectStridedAcccess(ST);
}
+ if (EnableMemAccessVersioning)
+ if (LoadInst *LI = dyn_cast<LoadInst>(it))
+ collectStridedAcccess(LI);
+
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
@@ -3117,6 +3515,138 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return true;
}
+///\brief Remove GEPs whose indices but the last one are loop invariant and
+/// return the induction operand of the gep pointer.
+static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE,
+ const DataLayout *DL, Loop *Lp) {
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP)
+ return Ptr;
+
+ unsigned InductionOperand = getGEPInductionOperand(DL, GEP);
+
+ // Check that all of the gep indices are uniform except for our induction
+ // operand.
+ for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
+ if (i != InductionOperand &&
+ !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
+ return Ptr;
+ return GEP->getOperand(InductionOperand);
+}
+
+///\brief Look for a cast use of the passed value.
+static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
+ Value *UniqueCast = 0;
+ for (User *U : Ptr->users()) {
+ CastInst *CI = dyn_cast<CastInst>(U);
+ if (CI && CI->getType() == Ty) {
+ if (!UniqueCast)
+ UniqueCast = CI;
+ else
+ return 0;
+ }
+ }
+ return UniqueCast;
+}
+
+///\brief Get the stride of a pointer access in a loop.
+/// Looks for symbolic strides "a[i*stride]". Returns the symbolic stride as a
+/// pointer to the Value, or null otherwise.
+static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
+ const DataLayout *DL, Loop *Lp) {
+ const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+ if (!PtrTy || PtrTy->isAggregateType())
+ return 0;
+
+ // Try to remove a gep instruction to make the pointer (actually index at this
+ // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the
+ // pointer, otherwise, we are analyzing the index.
+ Value *OrigPtr = Ptr;
+
+ // The size of the pointer access.
+ int64_t PtrAccessSize = 1;
+
+ Ptr = stripGetElementPtr(Ptr, SE, DL, Lp);
+ const SCEV *V = SE->getSCEV(Ptr);
+
+ if (Ptr != OrigPtr)
+ // Strip off casts.
+ while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
+ V = C->getOperand();
+
+ const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
+ if (!S)
+ return 0;
+
+ V = S->getStepRecurrence(*SE);
+ if (!V)
+ return 0;
+
+ // Strip off the size of access multiplication if we are still analyzing the
+ // pointer.
+ if (OrigPtr == Ptr) {
+ DL->getTypeAllocSize(PtrTy->getElementType());
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
+ if (M->getOperand(0)->getSCEVType() != scConstant)
+ return 0;
+
+ const APInt &APStepVal =
+ cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue();
+
+ // Huge step value - give up.
+ if (APStepVal.getBitWidth() > 64)
+ return 0;
+
+ int64_t StepVal = APStepVal.getSExtValue();
+ if (PtrAccessSize != StepVal)
+ return 0;
+ V = M->getOperand(1);
+ }
+ }
+
+ // Strip off casts.
+ Type *StripedOffRecurrenceCast = 0;
+ if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
+ StripedOffRecurrenceCast = C->getType();
+ V = C->getOperand();
+ }
+
+ // Look for the loop invariant symbolic value.
+ const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
+ if (!U)
+ return 0;
+
+ Value *Stride = U->getValue();
+ if (!Lp->isLoopInvariant(Stride))
+ return 0;
+
+ // If we have stripped off the recurrence cast we have to make sure that we
+ // return the value that is used in this loop so that we can replace it later.
+ if (StripedOffRecurrenceCast)
+ Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
+
+ return Stride;
+}
+
+void LoopVectorizationLegality::collectStridedAcccess(Value *MemAccess) {
+ Value *Ptr = 0;
+ if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
+ Ptr = LI->getPointerOperand();
+ else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
+ Ptr = SI->getPointerOperand();
+ else
+ return;
+
+ Value *Stride = getStrideFromPointer(Ptr, SE, DL, TheLoop);
+ if (!Stride)
+ return;
+
+ DEBUG(dbgs() << "LV: Found a strided access that we can version");
+ DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
+ Strides[Ptr] = Stride;
+ StrideSet.insert(Stride);
+}
+
void LoopVectorizationLegality::collectLoopUniforms() {
// We now know that the loop is vectorizable!
// Collect variables that will remain uniform after vectorization.
@@ -3126,6 +3656,16 @@ void LoopVectorizationLegality::collectLoopUniforms() {
// Start with the conditional branch and walk up the block.
Worklist.push_back(Latch->getTerminator()->getOperand(0));
+ // Also add all consecutive pointer values; these values will be uniform
+ // after vectorization (and subsequent cleanup) and, until revectorization is
+ // supported, all dependencies must also be uniform.
+ for (Loop::block_iterator B = TheLoop->block_begin(),
+ BE = TheLoop->block_end(); B != BE; ++B)
+ for (BasicBlock::iterator I = (*B)->begin(), IE = (*B)->end();
+ I != IE; ++I)
+ if (I->getType()->isPointerTy() && isConsecutivePtr(I))
+ Worklist.insert(Worklist.end(), I->op_begin(), I->op_end());
+
while (Worklist.size()) {
Instruction *I = dyn_cast<Instruction>(Worklist.back());
Worklist.pop_back();
@@ -3158,7 +3698,7 @@ public:
/// \brief Set of potential dependent memory accesses.
typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
- AccessAnalysis(DataLayout *Dl, DepCandidates &DA) :
+ AccessAnalysis(const DataLayout *Dl, DepCandidates &DA) :
DL(Dl), DepCands(DA), AreAllWritesIdentified(true),
AreAllReadsIdentified(true), IsRTCheckNeeded(false) {}
@@ -3178,7 +3718,8 @@ public:
/// non-intersection.
bool canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
unsigned &NumComparisons, ScalarEvolution *SE,
- Loop *TheLoop, bool ShouldCheckStride = false);
+ Loop *TheLoop, ValueToValueMap &Strides,
+ bool ShouldCheckStride = false);
/// \brief Goes over all memory accesses, checks whether a RT check is needed
/// and builds sets of dependent accesses.
@@ -3223,7 +3764,7 @@ private:
/// Set of underlying objects already written to.
SmallPtrSet<Value*, 16> WriteObjects;
- DataLayout *DL;
+ const DataLayout *DL;
/// Sets of potentially dependent accesses - members of one set share an
/// underlying pointer. The set "CheckDeps" identfies which sets really need a
@@ -3238,8 +3779,9 @@ private:
} // end anonymous namespace
/// \brief Check whether a pointer can participate in a runtime bounds check.
-static bool hasComputableBounds(ScalarEvolution *SE, Value *Ptr) {
- const SCEV *PtrScev = SE->getSCEV(Ptr);
+static bool hasComputableBounds(ScalarEvolution *SE, ValueToValueMap &Strides,
+ Value *Ptr) {
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (!AR)
return false;
@@ -3249,13 +3791,13 @@ static bool hasComputableBounds(ScalarEvolution *SE, Value *Ptr) {
/// \brief Check the stride of the pointer and ensure that it does not wrap in
/// the address space.
-static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
- const Loop *Lp);
+static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
+ const Loop *Lp, ValueToValueMap &StridesMap);
bool AccessAnalysis::canCheckPtrAtRT(
- LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
- unsigned &NumComparisons, ScalarEvolution *SE,
- Loop *TheLoop, bool ShouldCheckStride) {
+ LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
+ unsigned &NumComparisons, ScalarEvolution *SE, Loop *TheLoop,
+ ValueToValueMap &StridesMap, bool ShouldCheckStride) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
unsigned NumReadPtrChecks = 0;
@@ -3283,10 +3825,11 @@ bool AccessAnalysis::canCheckPtrAtRT(
else
++NumReadPtrChecks;
- if (hasComputableBounds(SE, Ptr) &&
+ if (hasComputableBounds(SE, StridesMap, Ptr) &&
// When we run after a failing dependency check we have to make sure we
// don't have wrapping pointers.
- (!ShouldCheckStride || isStridedPtr(SE, DL, Ptr, TheLoop) == 1)) {
+ (!ShouldCheckStride ||
+ isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) {
// The id of the dependence set.
unsigned DepId;
@@ -3300,7 +3843,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
// Each access has its own dependence set.
DepId = RunningDepId++;
- RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId);
+ RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, StridesMap);
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
} else {
@@ -3372,8 +3915,8 @@ void AccessAnalysis::processMemAccesses(bool UseDeferred) {
}
bool NeedDepCheck = false;
- // Check whether there is the possiblity of dependency because of underlying
- // objects being the same.
+ // Check whether there is the possibility of dependency because of
+ // underlying objects being the same.
typedef SmallVector<Value*, 16> ValueVector;
ValueVector TempObjects;
GetUnderlyingObjects(Ptr, TempObjects, DL);
@@ -3468,7 +4011,7 @@ public:
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
- MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop *L)
+ MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L)
: SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
ShouldRetryWithRuntimeCheck(false) {}
@@ -3494,7 +4037,7 @@ public:
///
/// Only checks sets with elements in \p CheckDeps.
bool areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
- MemAccessInfoSet &CheckDeps);
+ MemAccessInfoSet &CheckDeps, ValueToValueMap &Strides);
/// \brief The maximum number of bytes of a vector register we can vectorize
/// the accesses safely with.
@@ -3506,7 +4049,7 @@ public:
private:
ScalarEvolution *SE;
- DataLayout *DL;
+ const DataLayout *DL;
const Loop *InnermostLoop;
/// \brief Maps access locations (ptr, read/write) to program order.
@@ -3521,7 +4064,7 @@ private:
// We can access this many bytes in parallel safely.
unsigned MaxSafeDepDistBytes;
- /// \brief If we see a non constant dependence distance we can still try to
+ /// \brief If we see a non-constant dependence distance we can still try to
/// vectorize this loop with runtime checks.
bool ShouldRetryWithRuntimeCheck;
@@ -3538,7 +4081,8 @@ private:
/// distance is smaller than any other distance encountered so far).
/// Otherwise, this function returns true signaling a possible dependence.
bool isDependent(const MemAccessInfo &A, unsigned AIdx,
- const MemAccessInfo &B, unsigned BIdx);
+ const MemAccessInfo &B, unsigned BIdx,
+ ValueToValueMap &Strides);
/// \brief Check whether the data dependence could prevent store-load
/// forwarding.
@@ -3554,10 +4098,10 @@ static bool isInBoundsGep(Value *Ptr) {
}
/// \brief Check whether the access through \p Ptr has a constant stride.
-static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
- const Loop *Lp) {
+static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
+ const Loop *Lp, ValueToValueMap &StridesMap) {
const Type *Ty = Ptr->getType();
- assert(Ty->isPointerTy() && "Unexpected non ptr");
+ assert(Ty->isPointerTy() && "Unexpected non-ptr");
// Make sure that the pointer does not point to aggregate types.
const PointerType *PtrTy = cast<PointerType>(Ty);
@@ -3567,7 +4111,8 @@ static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
return 0;
}
- const SCEV *PtrScev = SE->getSCEV(Ptr);
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr);
+
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (!AR) {
DEBUG(dbgs() << "LV: Bad stride - Not an AddRecExpr pointer "
@@ -3671,7 +4216,8 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
}
bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
- const MemAccessInfo &B, unsigned BIdx) {
+ const MemAccessInfo &B, unsigned BIdx,
+ ValueToValueMap &Strides) {
assert (AIdx < BIdx && "Must pass arguments in program order");
Value *APtr = A.getPointer();
@@ -3683,11 +4229,11 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
if (!AIsWrite && !BIsWrite)
return false;
- const SCEV *AScev = SE->getSCEV(APtr);
- const SCEV *BScev = SE->getSCEV(BPtr);
+ const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
+ const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
- int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop);
- int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop);
+ int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop, Strides);
+ int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop, Strides);
const SCEV *Src = AScev;
const SCEV *Sink = BScev;
@@ -3721,7 +4267,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
if (!C) {
- DEBUG(dbgs() << "LV: Dependence because of non constant distance\n");
+ DEBUG(dbgs() << "LV: Dependence because of non-constant distance\n");
ShouldRetryWithRuntimeCheck = true;
return true;
}
@@ -3792,9 +4338,9 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return false;
}
-bool
-MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
- MemAccessInfoSet &CheckDeps) {
+bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
+ MemAccessInfoSet &CheckDeps,
+ ValueToValueMap &Strides) {
MaxSafeDepDistBytes = -1U;
while (!CheckDeps.empty()) {
@@ -3811,16 +4357,16 @@ MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
// Check every access pair.
while (AI != AE) {
CheckDeps.erase(*AI);
- EquivalenceClasses<MemAccessInfo>::member_iterator OI = llvm::next(AI);
+ EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
while (OI != AE) {
// Check every accessing instruction pair in program order.
for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
- if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2))
+ if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2, Strides))
return false;
- if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1))
+ if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1, Strides))
return false;
}
++OI;
@@ -3875,6 +4421,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
DEBUG(dbgs() << "LV: Found a non-simple load.\n");
return false;
}
+ NumLoads++;
Loads.push_back(Ld);
DepChecker.addAccess(Ld);
continue;
@@ -3888,6 +4435,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
DEBUG(dbgs() << "LV: Found a non-simple store.\n");
return false;
}
+ NumStores++;
Stores.push_back(St);
DepChecker.addAccess(St);
}
@@ -3951,7 +4499,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// read a few words, modify, and write a few words, and some of the
// words may be written to the same address.
bool IsReadOnlyPtr = false;
- if (Seen.insert(Ptr) || !isStridedPtr(SE, DL, Ptr, TheLoop)) {
+ if (Seen.insert(Ptr) || !isStridedPtr(SE, DL, Ptr, TheLoop, Strides)) {
++NumReads;
IsReadOnlyPtr = true;
}
@@ -3975,8 +4523,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
unsigned NumComparisons = 0;
bool CanDoRT = false;
if (NeedRTCheck)
- CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop);
-
+ CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
+ Strides);
DEBUG(dbgs() << "LV: We need to do " << NumComparisons <<
" pointer comparisons.\n");
@@ -4009,8 +4557,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
bool CanVecMem = true;
if (Accesses.isDependencyCheckNeeded()) {
DEBUG(dbgs() << "LV: Checking memory dependencies\n");
- CanVecMem = DepChecker.areDepsSafe(DependentAccesses,
- Accesses.getDependenciesToCheck());
+ CanVecMem = DepChecker.areDepsSafe(
+ DependentAccesses, Accesses.getDependenciesToCheck(), Strides);
MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
@@ -4024,7 +4572,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
PtrRtCheck.Need = true;
CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
- TheLoop, true);
+ TheLoop, Strides, true);
// Check that we did not collect too many pointers or found an unsizeable
// pointer.
if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
@@ -4162,17 +4710,16 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// Check whether we found a reduction operator.
FoundReduxOp |= !IsAPhi;
- // Process users of current instruction. Push non PHI nodes after PHI nodes
+ // Process users of current instruction. Push non-PHI nodes after PHI nodes
// onto the stack. This way we are going to have seen all inputs to PHI
// nodes once we get to them.
SmallVector<Instruction *, 8> NonPHIs;
SmallVector<Instruction *, 8> PHIs;
- for (Value::use_iterator UI = Cur->use_begin(), E = Cur->use_end(); UI != E;
- ++UI) {
- Instruction *Usr = cast<Instruction>(*UI);
+ for (User *U : Cur->users()) {
+ Instruction *UI = cast<Instruction>(U);
// Check if we found the exit user.
- BasicBlock *Parent = Usr->getParent();
+ BasicBlock *Parent = UI->getParent();
if (!TheLoop->contains(Parent)) {
// Exit if you find multiple outside users or if the header phi node is
// being used. In this case the user uses the value of the previous
@@ -4191,15 +4738,24 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
continue;
}
- // Process instructions only once (termination).
- if (VisitedInsts.insert(Usr)) {
- if (isa<PHINode>(Usr))
- PHIs.push_back(Usr);
+ // Process instructions only once (termination). Each reduction cycle
+ // value must only be used once, except by phi nodes and min/max
+ // reductions which are represented as a cmp followed by a select.
+ ReductionInstDesc IgnoredVal(false, 0);
+ if (VisitedInsts.insert(UI)) {
+ if (isa<PHINode>(UI))
+ PHIs.push_back(UI);
else
- NonPHIs.push_back(Usr);
- }
+ NonPHIs.push_back(UI);
+ } else if (!isa<PHINode>(UI) &&
+ ((!isa<FCmpInst>(UI) &&
+ !isa<ICmpInst>(UI) &&
+ !isa<SelectInst>(UI)) ||
+ !isMinMaxSelectCmpPattern(UI, IgnoredVal).IsReduction))
+ return false;
+
// Remember that we completed the cycle.
- if (Usr == Phi)
+ if (UI == Phi)
FoundStartPHI = true;
}
Worklist.append(PHIs.begin(), PHIs.end());
@@ -4245,7 +4801,7 @@ LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I,
// We must handle the select(cmp()) as a single instruction. Advance to the
// select.
if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
- if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->use_begin())))
+ if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
return ReductionInstDesc(false, I);
return ReductionInstDesc(Select, Prev.MinMaxKind);
}
@@ -4390,7 +4946,16 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
}
// We don't predicate stores at the moment.
- if (it->mayWriteToMemory() || it->mayThrow())
+ if (it->mayWriteToMemory()) {
+ StoreInst *SI = dyn_cast<StoreInst>(it);
+ // We only support predication of stores in basic blocks with one
+ // predecessor.
+ if (!SI || ++NumPredStores > NumberOfStoresToPredicate ||
+ !SafePtrs.count(SI->getPointerOperand()) ||
+ !SI->getParent()->getSinglePredecessor())
+ return false;
+ }
+ if (it->mayThrow())
return false;
// Check that we don't have a constant expression that can trap as operand.
@@ -4425,6 +4990,11 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
return Factor;
}
+ if (!EnableCondStoresVectorization && Legal->NumPredStores) {
+ DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n");
+ return Factor;
+ }
+
// Find the trip count.
unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
@@ -4580,9 +5150,17 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
if (TC > 1 && TC < TinyTripCountUnrollThreshold)
return 1;
- unsigned TargetVectorRegisters = TTI.getNumberOfRegisters(true);
- DEBUG(dbgs() << "LV: The target has " << TargetVectorRegisters <<
- " vector registers\n");
+ unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1);
+ DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters <<
+ " registers\n");
+
+ if (VF == 1) {
+ if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
+ TargetNumRegisters = ForceTargetNumScalarRegs;
+ } else {
+ if (ForceTargetNumVectorRegs.getNumOccurrences() > 0)
+ TargetNumRegisters = ForceTargetNumVectorRegs;
+ }
LoopVectorizationCostModel::RegisterUsage R = calculateRegisterUsage();
// We divide by these constants so assume that we have at least one
@@ -4595,12 +5173,29 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
// registers. These registers are used by all of the unrolled instances.
// Next, divide the remaining registers by the number of registers that is
// required by the loop, in order to estimate how many parallel instances
- // fit without causing spills.
- unsigned UF = (TargetVectorRegisters - R.LoopInvariantRegs) / R.MaxLocalUsers;
+ // fit without causing spills. All of this is rounded down if necessary to be
+ // a power of two. We want power of two unroll factors to simplify any
+ // addressing operations or alignment considerations.
+ unsigned UF = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) /
+ R.MaxLocalUsers);
+
+ // Don't count the induction variable as unrolled.
+ if (EnableIndVarRegisterHeur)
+ UF = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) /
+ std::max(1U, (R.MaxLocalUsers - 1)));
// Clamp the unroll factor ranges to reasonable factors.
unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor();
+ // Check if the user has overridden the unroll max.
+ if (VF == 1) {
+ if (ForceTargetMaxScalarUnrollFactor.getNumOccurrences() > 0)
+ MaxUnrollSize = ForceTargetMaxScalarUnrollFactor;
+ } else {
+ if (ForceTargetMaxVectorUnrollFactor.getNumOccurrences() > 0)
+ MaxUnrollSize = ForceTargetMaxVectorUnrollFactor;
+ }
+
// If we did not calculate the cost for VF (because the user selected the VF)
// then we calculate the cost of VF here.
if (LoopCost == 0)
@@ -4613,32 +5208,40 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
else if (UF < 1)
UF = 1;
- bool HasReductions = Legal->getReductionVars()->size();
-
- // Decide if we want to unroll if we decided that it is legal to vectorize
- // but not profitable.
- if (VF == 1) {
- if (TheLoop->getNumBlocks() > 1 || !HasReductions ||
- LoopCost > SmallLoopCost)
- return 1;
-
- return UF;
- }
-
- if (HasReductions) {
+ // Unroll if we vectorized this loop and there is a reduction that could
+ // benefit from unrolling.
+ if (VF > 1 && Legal->getReductionVars()->size()) {
DEBUG(dbgs() << "LV: Unrolling because of reductions.\n");
return UF;
}
- // We want to unroll tiny loops in order to reduce the loop overhead.
- // We assume that the cost overhead is 1 and we use the cost model
- // to estimate the cost of the loop and unroll until the cost of the
- // loop overhead is about 5% of the cost of the loop.
+ // Note that if we've already vectorized the loop we will have done the
+ // runtime check and so unrolling won't require further checks.
+ bool UnrollingRequiresRuntimePointerCheck =
+ (VF == 1 && Legal->getRuntimePointerCheck()->Need);
+
+ // We want to unroll small loops in order to reduce the loop overhead and
+ // potentially expose ILP opportunities.
DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n');
- if (LoopCost < SmallLoopCost) {
+ if (!UnrollingRequiresRuntimePointerCheck &&
+ LoopCost < SmallLoopCost) {
+ // We assume that the cost overhead is 1 and we use the cost model
+ // to estimate the cost of the loop and unroll until the cost of the
+ // loop overhead is about 5% of the cost of the loop.
+ unsigned SmallUF = std::min(UF, (unsigned)PowerOf2Floor(SmallLoopCost / LoopCost));
+
+ // Unroll until store/load ports (estimated by max unroll factor) are
+ // saturated.
+ unsigned StoresUF = UF / (Legal->NumStores ? Legal->NumStores : 1);
+ unsigned LoadsUF = UF / (Legal->NumLoads ? Legal->NumLoads : 1);
+
+ if (EnableLoadStoreRuntimeUnroll && std::max(StoresUF, LoadsUF) > SmallUF) {
+ DEBUG(dbgs() << "LV: Unrolling to saturate store or load ports.\n");
+ return std::max(StoresUF, LoadsUF);
+ }
+
DEBUG(dbgs() << "LV: Unrolling to reduce branch cost.\n");
- unsigned NewUF = SmallLoopCost / (LoopCost + 1);
- return std::min(NewUF, UF);
+ return SmallUF;
}
DEBUG(dbgs() << "LV: Not Unrolling.\n");
@@ -4774,6 +5377,11 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
continue;
unsigned C = getInstructionCost(it, VF);
+
+ // Check if we should override the cost.
+ if (ForceTargetInstructionCost.getNumOccurrences() > 0)
+ C = ForceTargetInstructionCost;
+
BlockCost += C;
DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF " <<
VF << " For instruction: " << *it << '\n');
@@ -4844,6 +5452,12 @@ static bool isLikelyComplexAddressComputation(Value *Ptr,
return StepVal > MaxMergeDistance;
}
+static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
+ if (Legal->hasStride(I->getOperand(0)) || Legal->hasStride(I->getOperand(1)))
+ return true;
+ return false;
+}
+
unsigned
LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// If we know that this instruction will remain uniform, check the cost of
@@ -4886,15 +5500,25 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
+ // Since we will replace the stride by 1 the multiplication should go away.
+ if (I->getOpcode() == Instruction::Mul && isStrideMul(I, Legal))
+ return 0;
// Certain instructions can be cheaper to vectorize if they have a constant
// second vector operand. One example of this are shifts on x86.
TargetTransformInfo::OperandValueKind Op1VK =
TargetTransformInfo::OK_AnyValue;
TargetTransformInfo::OperandValueKind Op2VK =
TargetTransformInfo::OK_AnyValue;
+ Value *Op2 = I->getOperand(1);
- if (isa<ConstantInt>(I->getOperand(1)))
+ // Check for a splat of a constant or for a non uniform vector of constants.
+ if (isa<ConstantInt>(Op2))
Op2VK = TargetTransformInfo::OK_UniformConstantValue;
+ else if (isa<ConstantVector>(Op2) || isa<ConstantDataVector>(Op2)) {
+ Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
+ if (cast<Constant>(Op2)->getSplatValue() != NULL)
+ Op2VK = TargetTransformInfo::OK_UniformConstantValue;
+ }
return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK);
}
@@ -5038,7 +5662,8 @@ char LoopVectorize::ID = 0;
static const char lv_name[] = "Loop Vectorization";
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
@@ -5046,8 +5671,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
- Pass *createLoopVectorizePass(bool NoUnrolling) {
- return new LoopVectorize(NoUnrolling);
+ Pass *createLoopVectorizePass(bool NoUnrolling, bool AlwaysVectorize) {
+ return new LoopVectorize(NoUnrolling, AlwaysVectorize);
}
}
@@ -5064,7 +5689,8 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
}
-void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr) {
+void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
+ bool IfPredicateStore) {
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
// Holds vector parameters or scalars, in case of uniform vals.
SmallVector<VectorParts, 4> Params;
@@ -5109,10 +5735,40 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr) {
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
+ Instruction *InsertPt = Builder.GetInsertPoint();
+ BasicBlock *IfBlock = Builder.GetInsertBlock();
+ BasicBlock *CondBlock = 0;
+
+ VectorParts Cond;
+ Loop *VectorLp = 0;
+ if (IfPredicateStore) {
+ assert(Instr->getParent()->getSinglePredecessor() &&
+ "Only support single predecessor blocks");
+ Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
+ Instr->getParent());
+ VectorLp = LI->getLoopFor(IfBlock);
+ assert(VectorLp && "Must have a loop for this block");
+ }
+
// For each vector unroll 'part':
for (unsigned Part = 0; Part < UF; ++Part) {
// For each scalar that we create:
+ // Start an "if (pred) a[i] = ..." block.
+ Value *Cmp = 0;
+ if (IfPredicateStore) {
+ if (Cond[Part]->getType()->isVectorTy())
+ Cond[Part] =
+ Builder.CreateExtractElement(Cond[Part], Builder.getInt32(0));
+ Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cond[Part],
+ ConstantInt::get(Cond[Part]->getType(), 1));
+ CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
+ LoopVectorBody.push_back(CondBlock);
+ VectorLp->addBasicBlockToLoop(CondBlock, LI->getBase());
+ // Update Builder with newly created basic block.
+ Builder.SetInsertPoint(InsertPt);
+ }
+
Instruction *Cloned = Instr->clone();
if (!IsVoidRetTy)
Cloned->setName(Instr->getName() + ".cloned");
@@ -5129,13 +5785,26 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr) {
// so that future users will be able to use it.
if (!IsVoidRetTy)
VecResults[Part] = Cloned;
+
+ // End if-block.
+ if (IfPredicateStore) {
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ LoopVectorBody.push_back(NewIfBlock);
+ VectorLp->addBasicBlockToLoop(NewIfBlock, LI->getBase());
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
+ }
}
}
-void
-InnerLoopUnroller::vectorizeMemoryInstruction(Instruction *Instr,
- LoopVectorizationLegality*) {
- return scalarizeInstruction(Instr);
+void InnerLoopUnroller::vectorizeMemoryInstruction(Instruction *Instr) {
+ StoreInst *SI = dyn_cast<StoreInst>(Instr);
+ bool IfPredicateStore = (SI && Legal->blockNeedsPredication(SI->getParent()));
+
+ return scalarizeInstruction(Instr, IfPredicateStore);
}
Value *InnerLoopUnroller::reverseVector(Value *Vec) {
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c72b51f..ee32227 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -23,19 +23,20 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -343,7 +344,7 @@ public:
typedef SmallPtrSet<Value *, 16> ValueSet;
typedef SmallVector<StoreInst *, 8> StoreList;
- BoUpSLP(Function *Func, ScalarEvolution *Se, DataLayout *Dl,
+ BoUpSLP(Function *Func, ScalarEvolution *Se, const DataLayout *Dl,
TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li,
DominatorTree *Dt) :
F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), DT(Dt),
@@ -442,7 +443,7 @@ private:
/// \returns whether the VectorizableTree is fully vectoriable and will
/// be beneficial even the tree height is tiny.
- bool isFullyVectorizableTinyTree();
+ bool isFullyVectorizableTinyTree();
struct TreeEntry {
TreeEntry() : Scalars(), VectorizedValue(0), LastScalarIndex(0),
@@ -521,7 +522,7 @@ private:
/// Holds all of the instructions that we gathered.
SetVector<Instruction *> GatherSeq;
/// A list of blocks that we are going to CSE.
- SmallSet<BasicBlock *, 8> CSEBlocks;
+ SetVector<BasicBlock *> CSEBlocks;
/// Numbers instructions in different blocks.
DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
@@ -532,7 +533,7 @@ private:
// Analysis and block reference.
Function *F;
ScalarEvolution *SE;
- DataLayout *DL;
+ const DataLayout *DL;
TargetTransformInfo *TTI;
AliasAnalysis *AA;
LoopInfo *LI;
@@ -560,19 +561,18 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
if (Entry->NeedToGather)
continue;
- for (Value::use_iterator User = Scalar->use_begin(),
- UE = Scalar->use_end(); User != UE; ++User) {
- DEBUG(dbgs() << "SLP: Checking user:" << **User << ".\n");
+ for (User *U : Scalar->users()) {
+ DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
// Skip in-tree scalars that become vectors.
- if (ScalarToTreeEntry.count(*User)) {
+ if (ScalarToTreeEntry.count(U)) {
DEBUG(dbgs() << "SLP: \tInternal user will be removed:" <<
- **User << ".\n");
- int Idx = ScalarToTreeEntry[*User]; (void) Idx;
+ *U << ".\n");
+ int Idx = ScalarToTreeEntry[U]; (void) Idx;
assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
continue;
}
- Instruction *UserInst = dyn_cast<Instruction>(*User);
+ Instruction *UserInst = dyn_cast<Instruction>(U);
if (!UserInst)
continue;
@@ -580,9 +580,9 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end())
continue;
- DEBUG(dbgs() << "SLP: Need to extract:" << **User << " from lane " <<
+ DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " <<
Lane << " from " << *Scalar << ".\n");
- ExternalUses.push_back(ExternalUser(Scalar, *User, Lane));
+ ExternalUses.push_back(ExternalUser(Scalar, U, Lane));
}
}
}
@@ -669,57 +669,56 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
Instruction *Scalar = cast<Instruction>(VL[i]);
DEBUG(dbgs() << "SLP: Checking users of " << *Scalar << ". \n");
- for (Value::use_iterator U = Scalar->use_begin(), UE = Scalar->use_end();
- U != UE; ++U) {
- DEBUG(dbgs() << "SLP: \tUser " << **U << ". \n");
- Instruction *User = dyn_cast<Instruction>(*U);
- if (!User) {
+ for (User *U : Scalar->users()) {
+ DEBUG(dbgs() << "SLP: \tUser " << *U << ". \n");
+ Instruction *UI = dyn_cast<Instruction>(U);
+ if (!UI) {
DEBUG(dbgs() << "SLP: Gathering due unknown user. \n");
newTreeEntry(VL, false);
return;
}
// We don't care if the user is in a different basic block.
- BasicBlock *UserBlock = User->getParent();
+ BasicBlock *UserBlock = UI->getParent();
if (UserBlock != BB) {
DEBUG(dbgs() << "SLP: User from a different basic block "
- << *User << ". \n");
+ << *UI << ". \n");
continue;
}
// If this is a PHINode within this basic block then we can place the
// extract wherever we want.
- if (isa<PHINode>(*User)) {
- DEBUG(dbgs() << "SLP: \tWe can schedule PHIs:" << *User << ". \n");
+ if (isa<PHINode>(*UI)) {
+ DEBUG(dbgs() << "SLP: \tWe can schedule PHIs:" << *UI << ". \n");
continue;
}
// Check if this is a safe in-tree user.
- if (ScalarToTreeEntry.count(User)) {
- int Idx = ScalarToTreeEntry[User];
+ if (ScalarToTreeEntry.count(UI)) {
+ int Idx = ScalarToTreeEntry[UI];
int VecLocation = VectorizableTree[Idx].LastScalarIndex;
if (VecLocation <= MyLastIndex) {
DEBUG(dbgs() << "SLP: Gathering due to unschedulable vector. \n");
newTreeEntry(VL, false);
return;
}
- DEBUG(dbgs() << "SLP: In-tree user (" << *User << ") at #" <<
+ DEBUG(dbgs() << "SLP: In-tree user (" << *UI << ") at #" <<
VecLocation << " vector value (" << *Scalar << ") at #"
<< MyLastIndex << ".\n");
continue;
}
// This user is part of the reduction.
- if (RdxOps && RdxOps->count(User))
+ if (RdxOps && RdxOps->count(UI))
continue;
// Make sure that we can schedule this unknown user.
BlockNumbering &BN = BlocksNumbers[BB];
- int UserIndex = BN.getIndex(User);
+ int UserIndex = BN.getIndex(UI);
if (UserIndex < MyLastIndex) {
DEBUG(dbgs() << "SLP: Can't schedule extractelement for "
- << *User << ". \n");
+ << *UI << ". \n");
newTreeEntry(VL, false);
return;
}
@@ -738,11 +737,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
// Check that instructions in this bundle don't reference other instructions.
// The runtime of this check is O(N * N-1 * uses(N)) and a typical N is 4.
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
- for (Value::use_iterator U = VL[i]->use_begin(), UE = VL[i]->use_end();
- U != UE; ++U) {
+ for (User *U : VL[i]->users()) {
for (unsigned j = 0; j < e; ++j) {
- if (i != j && *U == VL[j]) {
- DEBUG(dbgs() << "SLP: Intra-bundle dependencies!" << **U << ". \n");
+ if (i != j && U == VL[j]) {
+ DEBUG(dbgs() << "SLP: Intra-bundle dependencies!" << *U << ". \n");
newTreeEntry(VL, false);
return;
}
@@ -778,7 +776,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
// Check for terminator values (e.g. invoke).
for (unsigned j = 0; j < VL.size(); ++j)
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
- TerminatorInst *Term = dyn_cast<TerminatorInst>(cast<PHINode>(VL[j])->getIncomingValue(i));
+ TerminatorInst *Term = dyn_cast<TerminatorInst>(
+ cast<PHINode>(VL[j])->getIncomingValueForBlock(PH->getIncomingBlock(i)));
if (Term) {
DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
newTreeEntry(VL, false);
@@ -793,7 +792,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
ValueList Operands;
// Prepare the operand vector.
for (unsigned j = 0; j < VL.size(); ++j)
- Operands.push_back(cast<PHINode>(VL[j])->getIncomingValue(i));
+ Operands.push_back(cast<PHINode>(VL[j])->getIncomingValueForBlock(
+ PH->getIncomingBlock(i)));
buildTree_rec(Operands, Depth + 1);
}
@@ -930,7 +930,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
newTreeEntry(VL, false);
- DEBUG(dbgs() << "SLP: Non consecutive store.\n");
+ DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
return;
}
@@ -946,6 +946,39 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
buildTree_rec(Operands, Depth + 1);
return;
}
+ case Instruction::Call: {
+ // Check if the calls are all to the same vectorizable intrinsic.
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]);
+ if (II==NULL) {
+ newTreeEntry(VL, false);
+ DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
+ return;
+ }
+
+ Function *Int = II->getCalledFunction();
+
+ for (unsigned i = 1, e = VL.size(); i != e; ++i) {
+ IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[i]);
+ if (!II2 || II2->getCalledFunction() != Int) {
+ newTreeEntry(VL, false);
+ DEBUG(dbgs() << "SLP: mismatched calls:" << *II << "!=" << *VL[i]
+ << "\n");
+ return;
+ }
+ }
+
+ newTreeEntry(VL, true);
+ for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) {
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j) {
+ IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[j]);
+ Operands.push_back(II2->getArgOperand(i));
+ }
+ buildTree_rec(Operands, Depth + 1);
+ }
+ return;
+ }
default:
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
@@ -979,8 +1012,17 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
return 0;
}
case Instruction::ExtractElement: {
- if (CanReuseExtract(VL))
- return 0;
+ if (CanReuseExtract(VL)) {
+ int DeadCost = 0;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ ExtractElementInst *E = cast<ExtractElementInst>(VL[i]);
+ if (E->hasOneUse())
+ // Take credit for instruction that will become dead.
+ DeadCost +=
+ TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
+ }
+ return -DeadCost;
+ }
return getGatherCost(VecTy);
}
case Instruction::ZExt:
@@ -1043,12 +1085,26 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
TargetTransformInfo::OperandValueKind Op2VK =
TargetTransformInfo::OK_UniformConstantValue;
- // Check whether all second operands are constant.
- for (unsigned i = 0; i < VL.size(); ++i)
- if (!isa<ConstantInt>(cast<Instruction>(VL[i])->getOperand(1))) {
+ // If all operands are exactly the same ConstantInt then set the
+ // operand kind to OK_UniformConstantValue.
+ // If instead not all operands are constants, then set the operand kind
+ // to OK_AnyValue. If all operands are constants but not the same,
+ // then set the operand kind to OK_NonUniformConstantValue.
+ ConstantInt *CInt = NULL;
+ for (unsigned i = 0; i < VL.size(); ++i) {
+ const Instruction *I = cast<Instruction>(VL[i]);
+ if (!isa<ConstantInt>(I->getOperand(1))) {
Op2VK = TargetTransformInfo::OK_AnyValue;
break;
}
+ if (i == 0) {
+ CInt = cast<ConstantInt>(I->getOperand(1));
+ continue;
+ }
+ if (Op2VK == TargetTransformInfo::OK_UniformConstantValue &&
+ CInt != cast<ConstantInt>(I->getOperand(1)))
+ Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
+ }
ScalarCost =
VecTy->getNumElements() *
@@ -1071,6 +1127,30 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
int VecStCost = TTI->getMemoryOpCost(Instruction::Store, VecTy, 1, 0);
return VecStCost - ScalarStCost;
}
+ case Instruction::Call: {
+ CallInst *CI = cast<CallInst>(VL0);
+ IntrinsicInst *II = cast<IntrinsicInst>(CI);
+ Intrinsic::ID ID = II->getIntrinsicID();
+
+ // Calculate the cost of the scalar and vector calls.
+ SmallVector<Type*, 4> ScalarTys, VecTys;
+ for (unsigned op = 0, opc = II->getNumArgOperands(); op!= opc; ++op) {
+ ScalarTys.push_back(CI->getArgOperand(op)->getType());
+ VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(),
+ VecTy->getNumElements()));
+ }
+
+ int ScalarCallCost = VecTy->getNumElements() *
+ TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys);
+
+ int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys);
+
+ DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost
+ << " (" << VecCallCost << "-" << ScalarCallCost << ")"
+ << " for " << *II << "\n");
+
+ return VecCallCost - ScalarCallCost;
+ }
default:
llvm_unreachable("Unknown instruction");
}
@@ -1084,11 +1164,15 @@ bool BoUpSLP::isFullyVectorizableTinyTree() {
if (VectorizableTree.size() != 2)
return false;
+ // Handle splat stores.
+ if (!VectorizableTree[0].NeedToGather && isSplat(VectorizableTree[1].Scalars))
+ return true;
+
// Gathering cost would be too much for tiny trees.
- if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather)
- return false;
+ if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather)
+ return false;
- return true;
+ return true;
}
int BoUpSLP::getTreeCost() {
@@ -1113,16 +1197,19 @@ int BoUpSLP::getTreeCost() {
Cost += C;
}
+ SmallSet<Value *, 16> ExtractCostCalculated;
int ExtractCost = 0;
for (UserList::iterator I = ExternalUses.begin(), E = ExternalUses.end();
I != E; ++I) {
+ // We only add extract cost once for the same scalar.
+ if (!ExtractCostCalculated.insert(I->Scalar))
+ continue;
VectorType *VecTy = VectorType::get(I->Scalar->getType(), BundleWidth);
ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
I->Lane);
}
-
DEBUG(dbgs() << "SLP: Total Cost " << Cost + ExtractCost<< ".\n");
return Cost + ExtractCost;
}
@@ -1551,6 +1638,32 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
E->VectorizedValue = S;
return propagateMetadata(S, E->Scalars);
}
+ case Instruction::Call: {
+ CallInst *CI = cast<CallInst>(VL0);
+
+ setInsertPointAfterBundle(E->Scalars);
+ std::vector<Value *> OpVecs;
+ for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
+ ValueList OpVL;
+ for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
+ CallInst *CEI = cast<CallInst>(E->Scalars[i]);
+ OpVL.push_back(CEI->getArgOperand(j));
+ }
+
+ Value *OpVec = vectorizeTree(OpVL);
+ DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
+ OpVecs.push_back(OpVec);
+ }
+
+ Module *M = F->getParent();
+ IntrinsicInst *II = cast<IntrinsicInst>(CI);
+ Intrinsic::ID ID = II->getIntrinsicID();
+ Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) };
+ Function *CF = Intrinsic::getDeclaration(M, ID, Tys);
+ Value *V = Builder.CreateCall(CF, OpVecs);
+ E->VectorizedValue = V;
+ return V;
+ }
default:
llvm_unreachable("unknown inst");
}
@@ -1571,8 +1684,8 @@ Value *BoUpSLP::vectorizeTree() {
// Skip users that we already RAUW. This happens when one instruction
// has multiple uses of the same value.
- if (std::find(Scalar->use_begin(), Scalar->use_end(), User) ==
- Scalar->use_end())
+ if (std::find(Scalar->user_begin(), Scalar->user_end(), User) ==
+ Scalar->user_end())
continue;
assert(ScalarToTreeEntry.count(Scalar) && "Invalid scalar");
@@ -1586,12 +1699,7 @@ Value *BoUpSLP::vectorizeTree() {
Value *Lane = Builder.getInt32(it->Lane);
// Generate extracts for out-of-tree users.
// Find the insertion point for the extractelement lane.
- if (PHINode *PN = dyn_cast<PHINode>(Vec)) {
- Builder.SetInsertPoint(PN->getParent()->getFirstInsertionPt());
- Value *Ex = Builder.CreateExtractElement(Vec, Lane);
- CSEBlocks.insert(PN->getParent());
- User->replaceUsesOfWith(Scalar, Ex);
- } else if (isa<Instruction>(Vec)){
+ if (isa<Instruction>(Vec)){
if (PHINode *PH = dyn_cast<PHINode>(User)) {
for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) {
if (PH->getIncomingValue(i) == Scalar) {
@@ -1633,15 +1741,16 @@ Value *BoUpSLP::vectorizeTree() {
Type *Ty = Scalar->getType();
if (!Ty->isVoidTy()) {
- for (Value::use_iterator User = Scalar->use_begin(),
- UE = Scalar->use_end(); User != UE; ++User) {
- DEBUG(dbgs() << "SLP: \tvalidating user:" << **User << ".\n");
+#ifndef NDEBUG
+ for (User *U : Scalar->users()) {
+ DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
- assert((ScalarToTreeEntry.count(*User) ||
+ assert((ScalarToTreeEntry.count(U) ||
// It is legal to replace the reduction users by undef.
- (RdxOps && RdxOps->count(*User))) &&
+ (RdxOps && RdxOps->count(U))) &&
"Replacing out-of-tree value with undef");
}
+#endif
Value *Undef = UndefValue::get(Ty);
Scalar->replaceAllUsesWith(Undef);
}
@@ -1658,16 +1767,6 @@ Value *BoUpSLP::vectorizeTree() {
return VectorizableTree[0].VectorizedValue;
}
-class DTCmp {
- const DominatorTree *DT;
-
-public:
- DTCmp(const DominatorTree *DT) : DT(DT) {}
- bool operator()(const BasicBlock *A, const BasicBlock *B) const {
- return DT->properlyDominates(A, B);
- }
-};
-
void BoUpSLP::optimizeGatherSequence() {
DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
<< " gather sequences instructions.\n");
@@ -1706,7 +1805,10 @@ void BoUpSLP::optimizeGatherSequence() {
// Sort blocks by domination. This ensures we visit a block after all blocks
// dominating it are visited.
SmallVector<BasicBlock *, 8> CSEWorkList(CSEBlocks.begin(), CSEBlocks.end());
- std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), DTCmp(DT));
+ std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(),
+ [this](const BasicBlock *A, const BasicBlock *B) {
+ return DT->properlyDominates(A, B);
+ });
// Perform O(N^2) search over the gather sequences and merge identical
// instructions. TODO: We can further optimize this scan if we split the
@@ -1715,7 +1817,7 @@ void BoUpSLP::optimizeGatherSequence() {
for (SmallVectorImpl<BasicBlock *>::iterator I = CSEWorkList.begin(),
E = CSEWorkList.end();
I != E; ++I) {
- assert((I == CSEWorkList.begin() || !DT->dominates(*I, *llvm::prior(I))) &&
+ assert((I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) &&
"Worklist not sorted properly!");
BasicBlock *BB = *I;
// For all instructions in blocks containing gather sequences:
@@ -1760,19 +1862,23 @@ struct SLPVectorizer : public FunctionPass {
}
ScalarEvolution *SE;
- DataLayout *DL;
+ const DataLayout *DL;
TargetTransformInfo *TTI;
AliasAnalysis *AA;
LoopInfo *LI;
DominatorTree *DT;
- virtual bool runOnFunction(Function &F) {
+ bool runOnFunction(Function &F) override {
+ if (skipOptnoneFunction(F))
+ return false;
+
SE = &getAnalysis<ScalarEvolution>();
- DL = getAnalysisIfAvailable<DataLayout>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : 0;
TTI = &getAnalysis<TargetTransformInfo>();
AA = &getAnalysis<AliasAnalysis>();
LI = &getAnalysis<LoopInfo>();
- DT = &getAnalysis<DominatorTree>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
StoreRefs.clear();
bool Changed = false;
@@ -1793,7 +1899,7 @@ struct SLPVectorizer : public FunctionPass {
DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
- // Use the bollom up slp vectorizer to construct chains that start with
+ // Use the bottom up slp vectorizer to construct chains that start with
// he store instructions.
BoUpSLP R(&F, SE, DL, TTI, AA, LI, DT);
@@ -1821,15 +1927,15 @@ struct SLPVectorizer : public FunctionPass {
return Changed;
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<ScalarEvolution>();
AU.addRequired<AliasAnalysis>();
AU.addRequired<TargetTransformInfo>();
AU.addRequired<LoopInfo>();
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfo>();
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
@@ -1867,7 +1973,7 @@ private:
StoreListMap StoreRefs;
};
-/// \brief Check that the Values in the slice in VL array are still existant in
+/// \brief Check that the Values in the slice in VL array are still existent in
/// the WeakVH array.
/// Vectorization of part of the VL array may cause later values in the VL array
/// to become invalid. We track when this has happened in the WeakVH array.
@@ -1894,7 +2000,7 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
if (!isPowerOf2_32(Sz) || VF < 2)
return false;
- // Keep track of values that were delete by vectorizing in the loop below.
+ // Keep track of values that were deleted by vectorizing in the loop below.
SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end());
bool Changed = false;
@@ -2073,7 +2179,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
int Cost = R.getTreeCost();
if (Cost < -SLPCostThreshold) {
- DEBUG(dbgs() << "SLP: Vectorizing pair at cost:" << Cost << ".\n");
+ DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
R.vectorizeTree();
// Move to the next bundle.
@@ -2207,7 +2313,7 @@ public:
/// \brief Try to find a reduction tree.
bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B,
- DataLayout *DL) {
+ const DataLayout *DL) {
assert((!Phi ||
std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
"Thi phi needs to use the binary operator");
@@ -2445,7 +2551,7 @@ static bool findBuildVector(InsertElementInst *IE,
if (IE->use_empty())
return false;
- InsertElementInst *NextUse = dyn_cast<InsertElementInst>(IE->use_back());
+ InsertElementInst *NextUse = dyn_cast<InsertElementInst>(IE->user_back());
if (!NextUse)
return true;
@@ -2512,7 +2618,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
break;
}
- // Start over at the next instruction of a differnt type (or the end).
+ // Start over at the next instruction of a different type (or the end).
IncIt = SameTypeIt;
}
}
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index a927fe1..d459bcf 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -17,7 +17,7 @@
#include "llvm-c/Initialization.h"
#include "llvm-c/Transforms/Vectorize.h"
#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"