aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-02-11 20:01:10 -0800
committerStephen Hines <srhines@google.com>2014-02-11 20:01:10 -0800
commitce9904c6ea8fd669978a8eefb854b330eb9828ff (patch)
tree2418ee2e96ea220977c8fb74959192036ab5b133 /lib/Transforms
parentc27b10b198c1d9e9b51f2303994313ec2778edd7 (diff)
parentdbb832b83351cec97b025b61c26536ef50c3181c (diff)
downloadexternal_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.zip
external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.gz
external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.bz2
Merge remote-tracking branch 'upstream/release_34' into merge-20140211
Conflicts: lib/Linker/LinkModules.cpp lib/Support/Unix/Signals.inc Change-Id: Ia54f291fa5dc828052d2412736e8495c1282aa64
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/Hello/Hello.cpp2
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp10
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp11
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp13
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp54
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp4
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp3
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp270
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp2
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp4
-rw-r--r--lib/Transforms/IPO/Internalize.cpp118
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp77
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp92
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp2
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp226
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h13
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp184
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp34
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp156
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp119
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp30
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp51
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp14
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp26
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp38
-rw-r--r--lib/Transforms/InstCombine/InstCombineWorklist.h7
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp112
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp240
-rw-r--r--lib/Transforms/Instrumentation/BoundsChecking.cpp6
-rw-r--r--lib/Transforms/Instrumentation/CMakeLists.txt5
-rw-r--r--lib/Transforms/Instrumentation/DataFlowSanitizer.cpp1397
-rw-r--r--lib/Transforms/Instrumentation/DebugIR.cpp3
-rw-r--r--lib/Transforms/Instrumentation/EdgeProfiling.cpp117
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp25
-rw-r--r--lib/Transforms/Instrumentation/Instrumentation.cpp4
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp443
-rw-r--r--lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp225
-rw-r--r--lib/Transforms/Instrumentation/PathProfiling.cpp1424
-rw-r--r--lib/Transforms/Instrumentation/ProfilingUtils.cpp169
-rw-r--r--lib/Transforms/Instrumentation/ProfilingUtils.h36
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp12
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCOpts.cpp242
-rw-r--r--lib/Transforms/Scalar/BasicBlockPlacement.cpp152
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt4
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp22
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp10
-rw-r--r--lib/Transforms/Scalar/GVN.cpp197
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp20
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp50
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp1184
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp21
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp42
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp127
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp13
-rw-r--r--lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp156
-rw-r--r--lib/Transforms/Scalar/SROA.cpp155
-rw-r--r--lib/Transforms/Scalar/SampleProfile.cpp479
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp12
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp4
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp165
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp51
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp9
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp9
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt1
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp3
-rw-r--r--lib/Transforms/Utils/FlattenCFG.cpp6
-rw-r--r--lib/Transforms/Utils/GlobalStatus.cpp183
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp3
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp15
-rw-r--r--lib/Transforms/Utils/Local.cpp208
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp8
-rw-r--r--lib/Transforms/Utils/LowerExpectIntrinsic.cpp2
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp1
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp62
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp7
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp298
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp6
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp134
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp271
-rw-r--r--lib/Transforms/Utils/SpecialCaseList.cpp97
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp78
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp1119
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp1097
85 files changed, 7933 insertions, 4577 deletions
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index 9f2343b..9251783 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -52,7 +52,7 @@ namespace {
return false;
}
- // We don't modify the program, so we preserve all analyses
+ // We don't modify the program, so we preserve all analyses.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index c42d506..df08091 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -88,7 +88,7 @@ char ArgPromotion::ID = 0;
INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
"Promote 'by reference' arguments to scalars", false, false)
@@ -504,7 +504,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// OriginalLoads - Keep track of a representative load instruction from the
// original function so that we can tell the alias analysis implementation
// what the new GEP/Load instructions we are inserting look like.
- std::map<IndicesVector, LoadInst*> OriginalLoads;
+ // We need to keep the original loads for each argument and the elements
+ // of the argument that are accessed.
+ std::map<std::pair<Argument*, IndicesVector>, LoadInst*> OriginalLoads;
// Attribute - Keep track of the parameter attributes for the arguments
// that we are *not* promoting. For the ones that we do promote, the parameter
@@ -569,7 +571,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
else
// Take any load, we will use it only to update Alias Analysis
OrigLoad = cast<LoadInst>(User->use_back());
- OriginalLoads[Indices] = OrigLoad;
+ OriginalLoads[std::make_pair(I, Indices)] = OrigLoad;
}
// Add a parameter to the function for each element passed in.
@@ -676,7 +678,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (ScalarizeTable::iterator SI = ArgIndices.begin(),
E = ArgIndices.end(); SI != E; ++SI) {
Value *V = *AI;
- LoadInst *OrigLoad = OriginalLoads[*SI];
+ LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, *SI)];
if (!SI->empty()) {
Ops.reserve(SI->size());
Type *ElTy = V->getType();
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index a7bf188..d94c0f4 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -93,9 +93,12 @@ bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const {
}
unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
+ unsigned Align = GV->getAlignment();
+ if (Align)
+ return Align;
if (TD)
return TD->getPreferredAlignment(GV);
- return GV->getAlignment();
+ return 0;
}
bool ConstantMerge::runOnModule(Module &M) {
@@ -210,9 +213,9 @@ bool ConstantMerge::runOnModule(Module &M) {
// Bump the alignment if necessary.
if (Replacements[i].first->getAlignment() ||
Replacements[i].second->getAlignment()) {
- Replacements[i].second->setAlignment(std::max(
- Replacements[i].first->getAlignment(),
- Replacements[i].second->getAlignment()));
+ Replacements[i].second->setAlignment(
+ std::max(getAlignment(Replacements[i].first),
+ getAlignment(Replacements[i].second)));
}
// Eliminate any uses of the dead global.
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 6ee6162..911c14e 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -357,6 +357,19 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
return false;
+ // If a function seen at compile time is not necessarily the one linked to
+ // the binary being built, it is illegal to change the actual arguments
+ // passed to it. These functions can be captured by isWeakForLinker().
+ // *NOTE* that mayBeOverridden() is insufficient for this purpose as it
+ // doesn't include linkage types like AvailableExternallyLinkage and
+ // LinkOnceODRLinkage. Take link_odr* as an example, it indicates a set of
+ // *EQUIVALENT* globals that can be merged at link-time. However, the
+ // semantic of *EQUIVALENT*-functions includes parameters. Changing
+ // parameters breaks this assumption.
+ //
+ if (Fn.isWeakForLinker())
+ return false;
+
if (Fn.use_empty())
return false;
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index fa3d72d..50fb3e6 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -21,6 +21,38 @@
#include <algorithm>
using namespace llvm;
+/// Make sure GV is visible from both modules. Delete is true if it is
+/// being deleted from this module.
+/// This also makes sure GV cannot be dropped so that references from
+/// the split module remain valid.
+static void makeVisible(GlobalValue &GV, bool Delete) {
+ bool Local = GV.hasLocalLinkage();
+ if (Local)
+ GV.setVisibility(GlobalValue::HiddenVisibility);
+
+ if (Local || Delete) {
+ GV.setLinkage(GlobalValue::ExternalLinkage);
+ return;
+ }
+
+ if (!GV.hasLinkOnceLinkage()) {
+ assert(!GV.isDiscardableIfUnused());
+ return;
+ }
+
+ // Map linkonce* to weak* so that llvm doesn't drop this GV.
+ switch(GV.getLinkage()) {
+ default:
+ llvm_unreachable("Unexpected linkage");
+ case GlobalValue::LinkOnceAnyLinkage:
+ GV.setLinkage(GlobalValue::WeakAnyLinkage);
+ return;
+ case GlobalValue::LinkOnceODRLinkage:
+ GV.setLinkage(GlobalValue::WeakODRLinkage);
+ return;
+ }
+}
+
namespace {
/// @brief A pass to extract specific functions and their dependencies.
class GVExtractorPass : public ModulePass {
@@ -60,12 +92,7 @@ namespace {
continue;
}
- bool Local = I->isDiscardableIfUnused();
- if (Local)
- I->setVisibility(GlobalValue::HiddenVisibility);
-
- if (Local || Delete)
- I->setLinkage(GlobalValue::ExternalLinkage);
+ makeVisible(*I, Delete);
if (Delete)
I->setInitializer(0);
@@ -80,12 +107,7 @@ namespace {
continue;
}
- bool Local = I->isDiscardableIfUnused();
- if (Local)
- I->setVisibility(GlobalValue::HiddenVisibility);
-
- if (Local || Delete)
- I->setLinkage(GlobalValue::ExternalLinkage);
+ makeVisible(*I, Delete);
if (Delete)
I->deleteBody();
@@ -97,12 +119,10 @@ namespace {
Module::alias_iterator CurI = I;
++I;
- if (CurI->isDiscardableIfUnused()) {
- CurI->setVisibility(GlobalValue::HiddenVisibility);
- CurI->setLinkage(GlobalValue::ExternalLinkage);
- }
+ bool Delete = deleteStuff == (bool)Named.count(CurI);
+ makeVisible(*CurI, Delete);
- if (deleteStuff == (bool)Named.count(CurI)) {
+ if (Delete) {
Type *Ty = CurI->getType()->getElementType();
CurI->removeFromParent();
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 1366883..60e5f06 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -136,7 +136,8 @@ namespace {
char FunctionAttrs::ID = 0;
INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
@@ -366,6 +367,7 @@ namespace {
}
}
assert(Found && "Capturing call-site captured nothing?");
+ (void)Found;
return false;
}
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 201f320..901295d 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -179,6 +179,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
// any globals used will be marked as needed.
Function *F = cast<Function>(G);
+ if (F->hasPrefixData())
+ MarkUsedGlobalsAsNeeded(F->getPrefixData());
+
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 64cd515..2ea89a1 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -37,7 +37,9 @@
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
using namespace llvm;
@@ -60,7 +62,6 @@ STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
namespace {
- struct GlobalStatus;
struct GlobalOpt : public ModulePass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetLibraryInfo>();
@@ -80,7 +81,6 @@ namespace {
bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
- const SmallPtrSet<const PHINode*, 16> &PHIUsers,
const GlobalStatus &GS);
bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
@@ -98,209 +98,6 @@ INITIALIZE_PASS_END(GlobalOpt, "globalopt",
ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
-namespace {
-
-/// GlobalStatus - As we analyze each global, keep track of some information
-/// about it. If we find out that the address of the global is taken, none of
-/// this info will be accurate.
-struct GlobalStatus {
- /// isCompared - True if the global's address is used in a comparison.
- bool isCompared;
-
- /// isLoaded - True if the global is ever loaded. If the global isn't ever
- /// loaded it can be deleted.
- bool isLoaded;
-
- /// StoredType - Keep track of what stores to the global look like.
- ///
- enum StoredType {
- /// NotStored - There is no store to this global. It can thus be marked
- /// constant.
- NotStored,
-
- /// isInitializerStored - This global is stored to, but the only thing
- /// stored is the constant it was initialized with. This is only tracked
- /// for scalar globals.
- isInitializerStored,
-
- /// isStoredOnce - This global is stored to, but only its initializer and
- /// one other value is ever stored to it. If this global isStoredOnce, we
- /// track the value stored to it in StoredOnceValue below. This is only
- /// tracked for scalar globals.
- isStoredOnce,
-
- /// isStored - This global is stored to by multiple values or something else
- /// that we cannot track.
- isStored
- } StoredType;
-
- /// StoredOnceValue - If only one value (besides the initializer constant) is
- /// ever stored to this global, keep track of what value it is.
- Value *StoredOnceValue;
-
- /// AccessingFunction/HasMultipleAccessingFunctions - These start out
- /// null/false. When the first accessing function is noticed, it is recorded.
- /// When a second different accessing function is noticed,
- /// HasMultipleAccessingFunctions is set to true.
- const Function *AccessingFunction;
- bool HasMultipleAccessingFunctions;
-
- /// HasNonInstructionUser - Set to true if this global has a user that is not
- /// an instruction (e.g. a constant expr or GV initializer).
- bool HasNonInstructionUser;
-
- /// AtomicOrdering - Set to the strongest atomic ordering requirement.
- AtomicOrdering Ordering;
-
- GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
- StoredOnceValue(0), AccessingFunction(0),
- HasMultipleAccessingFunctions(false),
- HasNonInstructionUser(false), Ordering(NotAtomic) {}
-};
-
-}
-
-/// StrongerOrdering - Return the stronger of the two ordering. If the two
-/// orderings are acquire and release, then return AcquireRelease.
-///
-static AtomicOrdering StrongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
- if (X == Acquire && Y == Release) return AcquireRelease;
- if (Y == Acquire && X == Release) return AcquireRelease;
- return (AtomicOrdering)std::max(X, Y);
-}
-
-/// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
-/// by constants itself. Note that constants cannot be cyclic, so this test is
-/// pretty easy to implement recursively.
-///
-static bool SafeToDestroyConstant(const Constant *C) {
- if (isa<GlobalValue>(C)) return false;
-
- for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
- ++UI)
- if (const Constant *CU = dyn_cast<Constant>(*UI)) {
- if (!SafeToDestroyConstant(CU)) return false;
- } else
- return false;
- return true;
-}
-
-
-/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus
-/// structure. If the global has its address taken, return true to indicate we
-/// can't do anything with it.
-///
-static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
- SmallPtrSet<const PHINode*, 16> &PHIUsers) {
- for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
- ++UI) {
- const User *U = *UI;
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
- GS.HasNonInstructionUser = true;
-
- // If the result of the constantexpr isn't pointer type, then we won't
- // know to expect it in various places. Just reject early.
- if (!isa<PointerType>(CE->getType())) return true;
-
- if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
- } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
- if (!GS.HasMultipleAccessingFunctions) {
- const Function *F = I->getParent()->getParent();
- if (GS.AccessingFunction == 0)
- GS.AccessingFunction = F;
- else if (GS.AccessingFunction != F)
- GS.HasMultipleAccessingFunctions = true;
- }
- if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
- GS.isLoaded = true;
- // Don't hack on volatile loads.
- if (LI->isVolatile()) return true;
- GS.Ordering = StrongerOrdering(GS.Ordering, LI->getOrdering());
- } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
- // Don't allow a store OF the address, only stores TO the address.
- if (SI->getOperand(0) == V) return true;
-
- // Don't hack on volatile stores.
- if (SI->isVolatile()) return true;
-
- GS.Ordering = StrongerOrdering(GS.Ordering, SI->getOrdering());
-
- // If this is a direct store to the global (i.e., the global is a scalar
- // value, not an aggregate), keep more specific information about
- // stores.
- if (GS.StoredType != GlobalStatus::isStored) {
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
- SI->getOperand(1))) {
- Value *StoredVal = SI->getOperand(0);
-
- if (Constant *C = dyn_cast<Constant>(StoredVal)) {
- if (C->isThreadDependent()) {
- // The stored value changes between threads; don't track it.
- return true;
- }
- }
-
- if (StoredVal == GV->getInitializer()) {
- if (GS.StoredType < GlobalStatus::isInitializerStored)
- GS.StoredType = GlobalStatus::isInitializerStored;
- } else if (isa<LoadInst>(StoredVal) &&
- cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
- if (GS.StoredType < GlobalStatus::isInitializerStored)
- GS.StoredType = GlobalStatus::isInitializerStored;
- } else if (GS.StoredType < GlobalStatus::isStoredOnce) {
- GS.StoredType = GlobalStatus::isStoredOnce;
- GS.StoredOnceValue = StoredVal;
- } else if (GS.StoredType == GlobalStatus::isStoredOnce &&
- GS.StoredOnceValue == StoredVal) {
- // noop.
- } else {
- GS.StoredType = GlobalStatus::isStored;
- }
- } else {
- GS.StoredType = GlobalStatus::isStored;
- }
- }
- } else if (isa<BitCastInst>(I)) {
- if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
- } else if (isa<GetElementPtrInst>(I)) {
- if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
- } else if (isa<SelectInst>(I)) {
- if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
- } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
- // PHI nodes we can check just like select or GEP instructions, but we
- // have to be careful about infinite recursion.
- if (PHIUsers.insert(PN)) // Not already visited.
- if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
- } else if (isa<CmpInst>(I)) {
- GS.isCompared = true;
- } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
- if (MTI->isVolatile()) return true;
- if (MTI->getArgOperand(0) == V)
- GS.StoredType = GlobalStatus::isStored;
- if (MTI->getArgOperand(1) == V)
- GS.isLoaded = true;
- } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
- assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
- if (MSI->isVolatile()) return true;
- GS.StoredType = GlobalStatus::isStored;
- } else {
- return true; // Any other non-load instruction might take address!
- }
- } else if (const Constant *C = dyn_cast<Constant>(U)) {
- GS.HasNonInstructionUser = true;
- // We might have a dead and dangling constant hanging off of here.
- if (!SafeToDestroyConstant(C))
- return true;
- } else {
- GS.HasNonInstructionUser = true;
- // Otherwise must be some other user.
- return true;
- }
- }
-
- return false;
-}
-
/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
/// as a root? If so, we might not really want to eliminate the stores to it.
static bool isLeakCheckerRoot(GlobalVariable *GV) {
@@ -434,7 +231,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
Changed = true;
}
} else if (Constant *C = dyn_cast<Constant>(U)) {
- if (SafeToDestroyConstant(C)) {
+ if (isSafeToDestroyConstant(C)) {
C->destroyConstant();
// This could have invalidated UI, start over from scratch.
Dead.clear();
@@ -471,9 +268,17 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
DataLayout *TD, TargetLibraryInfo *TLI) {
bool Changed = false;
- SmallVector<User*, 8> WorkList(V->use_begin(), V->use_end());
+ // Note that we need to use a weak value handle for the worklist items. When
+ // we delete a constant array, we may also be holding pointer to one of its
+ // elements (or an element of one of its elements if we're dealing with an
+ // array of arrays) in the worklist.
+ SmallVector<WeakVH, 8> WorkList(V->use_begin(), V->use_end());
while (!WorkList.empty()) {
- User *U = WorkList.pop_back_val();
+ Value *UV = WorkList.pop_back_val();
+ if (!UV)
+ continue;
+
+ User *U = cast<User>(UV);
if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
if (Init) {
@@ -534,7 +339,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
} else if (Constant *C = dyn_cast<Constant>(U)) {
// If we have a chain of dead constantexprs or other things dangling from
// us, and if they are all dead, nuke them without remorse.
- if (SafeToDestroyConstant(C)) {
+ if (isSafeToDestroyConstant(C)) {
C->destroyConstant();
CleanupConstantGlobalUsers(V, Init, TD, TLI);
return true;
@@ -549,7 +354,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
static bool isSafeSROAElementUse(Value *V) {
// We might have a dead and dangling constant hanging off of here.
if (Constant *C = dyn_cast<Constant>(V))
- return SafeToDestroyConstant(C);
+ return isSafeToDestroyConstant(C);
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
@@ -1373,8 +1178,7 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
// PN's type is pointer to struct. Make a new PHI of pointer to struct
// field.
- StructType *ST =
- cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
+ StructType *ST = cast<StructType>(PN->getType()->getPointerElementType());
PHINode *NewPN =
PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
@@ -1505,7 +1309,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
if (StructType *ST = dyn_cast<StructType>(FieldTy))
TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
- Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(CI->getType());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
NElems, 0,
@@ -1735,7 +1539,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// If this is a fixed size array, transform the Malloc to be an alloc of
// structs. malloc [100 x struct],1 -> malloc struct, 100
if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
- Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(CI->getType());
unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
@@ -1917,13 +1721,12 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
if (!GV->hasLocalLinkage())
return false;
- SmallPtrSet<const PHINode*, 16> PHIUsers;
GlobalStatus GS;
- if (AnalyzeGlobal(GV, GS, PHIUsers))
+ if (GlobalStatus::analyzeGlobal(GV, GS))
return false;
- if (!GS.isCompared && !GV->hasUnnamedAddr()) {
+ if (!GS.IsCompared && !GV->hasUnnamedAddr()) {
GV->setUnnamedAddr(true);
NumUnnamed++;
}
@@ -1931,14 +1734,13 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
if (GV->isConstant() || !GV->hasInitializer())
return false;
- return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
+ return ProcessInternalGlobal(GV, GVI, GS);
}
/// ProcessInternalGlobal - Analyze the specified global variable and optimize
/// it if possible. If we make a change, return true.
bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
Module::global_iterator &GVI,
- const SmallPtrSet<const PHINode*, 16> &PHIUsers,
const GlobalStatus &GS) {
// If this is a first class global and has only one accessing function
// and this function is main (which we know is not recursive), we replace
@@ -1971,7 +1773,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// If the global is never loaded (but may be stored to), it is dead.
// Delete it now.
- if (!GS.isLoaded) {
+ if (!GS.IsLoaded) {
DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
bool Changed;
@@ -1992,7 +1794,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
}
return Changed;
- } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
+ } else if (GS.StoredType <= GlobalStatus::InitializerStored) {
DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
GV->setConstant(true);
@@ -2015,7 +1817,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
GVI = FirstNewGV; // Don't skip the newly produced globals!
return true;
}
- } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
+ } else if (GS.StoredType == GlobalStatus::StoredOnce) {
// If the initial value for the global was an undef value, and if only
// one other value was stored into it, we can just change the
// initializer to be the stored value, then delete all stores to the
@@ -2048,11 +1850,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// Otherwise, if the global was not a boolean, we can shrink it to be a
// boolean.
- if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
- if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
- ++NumShrunkToBool;
- return true;
+ if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
+ if (GS.Ordering == NotAtomic) {
+ if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+ ++NumShrunkToBool;
+ return true;
+ }
}
+ }
}
return false;
@@ -2210,8 +2015,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
CSVals[1] = 0;
StructType *StructTy =
- cast <StructType>(
- cast<ArrayType>(GCL->getType()->getElementType())->getElementType());
+ cast<StructType>(GCL->getType()->getElementType()->getArrayElementType());
// Create the new init list.
std::vector<Constant*> CAList;
@@ -3041,14 +2845,8 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
return true;
}
-static int compareNames(const void *A, const void *B) {
- const GlobalValue *VA = *reinterpret_cast<GlobalValue* const*>(A);
- const GlobalValue *VB = *reinterpret_cast<GlobalValue* const*>(B);
- if (VA->getName() < VB->getName())
- return -1;
- if (VB->getName() < VA->getName())
- return 1;
- return 0;
+static int compareNames(Constant *const *A, Constant *const *B) {
+ return (*A)->getName().compare((*B)->getName());
}
static void setUsedInitializer(GlobalVariable &V,
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index a0095da..437597e 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -63,7 +63,7 @@ public:
char AlwaysInliner::ID = 0;
INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index a4f7026..57379a3 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -28,7 +28,7 @@ using namespace llvm;
namespace {
-/// \brief Actaul inliner pass implementation.
+/// \brief Actual inliner pass implementation.
///
/// The common implementation of the inlining logic is shared between this
/// inliner pass and the always inliner pass. The two passes use different cost
@@ -61,7 +61,7 @@ public:
char SimpleInliner::ID = 0;
INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index d56a06f..64e2ced 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -11,6 +11,12 @@
// If the function or variable is not in the list of external names given to
// the pass it is marked as internal.
//
+// This transformation would not be legal in a regular compilation, but it gets
+// extra information from the linker about what is safe.
+//
+// For example: Internalizing a function with external linkage. Only if we are
+// told it is only used from within this module, it is safe to do it.
+//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "internalize"
@@ -23,6 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <fstream>
#include <set>
@@ -50,10 +57,8 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit InternalizePass();
- explicit InternalizePass(ArrayRef<const char *> exportList);
+ explicit InternalizePass(ArrayRef<const char *> ExportList);
void LoadFile(const char *Filename);
- void ClearExportList();
- void AddToExportList(const std::string &val);
virtual bool runOnModule(Module &M);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -72,15 +77,14 @@ InternalizePass::InternalizePass()
initializeInternalizePassPass(*PassRegistry::getPassRegistry());
if (!APIFile.empty()) // If a filename is specified, use it.
LoadFile(APIFile.c_str());
- if (!APIList.empty()) // If a list is specified, use it as well.
- ExternalNames.insert(APIList.begin(), APIList.end());
+ ExternalNames.insert(APIList.begin(), APIList.end());
}
-InternalizePass::InternalizePass(ArrayRef<const char *> exportList)
+InternalizePass::InternalizePass(ArrayRef<const char *> ExportList)
: ModulePass(ID){
initializeInternalizePassPass(*PassRegistry::getPassRegistry());
- for(ArrayRef<const char *>::const_iterator itr = exportList.begin();
- itr != exportList.end(); itr++) {
+ for(ArrayRef<const char *>::const_iterator itr = ExportList.begin();
+ itr != ExportList.end(); itr++) {
ExternalNames.insert(*itr);
}
}
@@ -101,12 +105,25 @@ void InternalizePass::LoadFile(const char *Filename) {
}
}
-void InternalizePass::ClearExportList() {
- ExternalNames.clear();
-}
+static bool shouldInternalize(const GlobalValue &GV,
+ const std::set<std::string> &ExternalNames) {
+ // Function must be defined here
+ if (GV.isDeclaration())
+ return false;
+
+ // Available externally is really just a "declaration with a body".
+ if (GV.hasAvailableExternallyLinkage())
+ return false;
-void InternalizePass::AddToExportList(const std::string &val) {
- ExternalNames.insert(val);
+ // Already has internal linkage
+ if (GV.hasLocalLinkage())
+ return false;
+
+ // Marked to keep external?
+ if (ExternalNames.count(GV.getName()))
+ return false;
+
+ return true;
}
bool InternalizePass::runOnModule(Module &M) {
@@ -114,11 +131,6 @@ bool InternalizePass::runOnModule(Module &M) {
CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
bool Changed = false;
- // Never internalize functions which code-gen might insert.
- // FIXME: We should probably add this (and the __stack_chk_guard) via some
- // type of call-back in CodeGen.
- ExternalNames.insert("__stack_chk_fail");
-
SmallPtrSet<GlobalValue *, 8> Used;
collectUsedGlobalVariables(M, Used, false);
@@ -139,19 +151,20 @@ bool InternalizePass::runOnModule(Module &M) {
// Mark all functions not in the api as internal.
// FIXME: maybe use private linkage?
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!I->isDeclaration() && // Function must be defined here
- // Available externally is really just a "declaration with a body".
- !I->hasAvailableExternallyLinkage() &&
- !I->hasLocalLinkage() && // Can't already have internal linkage
- !ExternalNames.count(I->getName())) {// Not marked to keep external?
- I->setLinkage(GlobalValue::InternalLinkage);
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (!shouldInternalize(*I, ExternalNames))
+ continue;
+
+ I->setLinkage(GlobalValue::InternalLinkage);
+
+ if (ExternalNode)
// Remove a callgraph edge from the external node to this function.
- if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
- Changed = true;
- ++NumFunctions;
- DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
- }
+ ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+
+ Changed = true;
+ ++NumFunctions;
+ DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
+ }
// Never internalize the llvm.used symbol. It is used to implement
// attribute((used)).
@@ -166,35 +179,36 @@ bool InternalizePass::runOnModule(Module &M) {
ExternalNames.insert("llvm.global.annotations");
// Never internalize symbols code-gen inserts.
+ // FIXME: We should probably add this (and the __stack_chk_guard) via some
+ // type of call-back in CodeGen.
+ ExternalNames.insert("__stack_chk_fail");
ExternalNames.insert("__stack_chk_guard");
// Mark all global variables with initializers that are not in the api as
// internal as well.
// FIXME: maybe use private linkage?
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- if (!I->isDeclaration() && !I->hasLocalLinkage() &&
- // Available externally is really just a "declaration with a body".
- !I->hasAvailableExternallyLinkage() &&
- !ExternalNames.count(I->getName())) {
- I->setLinkage(GlobalValue::InternalLinkage);
- Changed = true;
- ++NumGlobals;
- DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
- }
+ I != E; ++I) {
+ if (!shouldInternalize(*I, ExternalNames))
+ continue;
+
+ I->setLinkage(GlobalValue::InternalLinkage);
+ Changed = true;
+ ++NumGlobals;
+ DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
+ }
// Mark all aliases that are not in the api as internal as well.
for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E; ++I)
- if (!I->isDeclaration() && !I->hasInternalLinkage() &&
- // Available externally is really just a "declaration with a body".
- !I->hasAvailableExternallyLinkage() &&
- !ExternalNames.count(I->getName())) {
- I->setLinkage(GlobalValue::InternalLinkage);
- Changed = true;
- ++NumAliases;
- DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
- }
+ I != E; ++I) {
+ if (!shouldInternalize(*I, ExternalNames))
+ continue;
+
+ I->setLinkage(GlobalValue::InternalLinkage);
+ Changed = true;
+ ++NumAliases;
+ DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
+ }
return Changed;
}
@@ -203,6 +217,6 @@ ModulePass *llvm::createInternalizePass() {
return new InternalizePass();
}
-ModulePass *llvm::createInternalizePass(ArrayRef<const char *> el) {
- return new InternalizePass(el);
+ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) {
+ return new InternalizePass(ExportList);
}
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 4ce749c..3861421 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -210,16 +210,20 @@ private:
// Any two pointers in the same address space are equivalent, intptr_t and
// pointers are equivalent. Otherwise, standard type equivalence rules apply.
bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
+
+ PointerType *PTy1 = dyn_cast<PointerType>(Ty1);
+ PointerType *PTy2 = dyn_cast<PointerType>(Ty2);
+
+ if (TD) {
+ if (PTy1 && PTy1->getAddressSpace() == 0) Ty1 = TD->getIntPtrType(Ty1);
+ if (PTy2 && PTy2->getAddressSpace() == 0) Ty2 = TD->getIntPtrType(Ty2);
+ }
+
if (Ty1 == Ty2)
return true;
- if (Ty1->getTypeID() != Ty2->getTypeID()) {
- if (TD) {
- LLVMContext &Ctx = Ty1->getContext();
- if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true;
- if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true;
- }
+
+ if (Ty1->getTypeID() != Ty2->getTypeID())
return false;
- }
switch (Ty1->getTypeID()) {
default:
@@ -241,8 +245,7 @@ bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
return true;
case Type::PointerTyID: {
- PointerType *PTy1 = cast<PointerType>(Ty1);
- PointerType *PTy2 = cast<PointerType>(Ty2);
+ assert(PTy1 && PTy2 && "Both types must be pointers here.");
return PTy1->getAddressSpace() == PTy2->getAddressSpace();
}
@@ -352,14 +355,19 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
// Determine whether two GEP operations perform the same underlying arithmetic.
bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
const GEPOperator *GEP2) {
- // When we have target data, we can reduce the GEP down to the value in bytes
- // added to the address.
- unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 1;
- APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0);
- if (TD &&
- GEP1->accumulateConstantOffset(*TD, Offset1) &&
- GEP2->accumulateConstantOffset(*TD, Offset2)) {
- return Offset1 == Offset2;
+ unsigned AS = GEP1->getPointerAddressSpace();
+ if (AS != GEP2->getPointerAddressSpace())
+ return false;
+
+ if (TD) {
+ // When we have target data, we can reduce the GEP down to the value in bytes
+ // added to the address.
+ unsigned BitWidth = TD ? TD->getPointerSizeInBits(AS) : 1;
+ APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0);
+ if (GEP1->accumulateConstantOffset(*TD, Offset1) &&
+ GEP2->accumulateConstantOffset(*TD, Offset2)) {
+ return Offset1 == Offset2;
+ }
}
if (GEP1->getPointerOperand()->getType() !=
@@ -713,6 +721,19 @@ void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
writeThunk(F, G);
}
+// Helper for writeThunk,
+// Selects proper bitcast operation,
+// but a bit simplier then CastInst::getCastOpcode.
+static Value* createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
+ Type *SrcTy = V->getType();
+ if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
+ return Builder.CreateIntToPtr(V, DestTy);
+ else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
+ return Builder.CreatePtrToInt(V, DestTy);
+ else
+ return Builder.CreateBitCast(V, DestTy);
+}
+
// Replace G with a simple tail call to bitcast(F). Also replace direct uses
// of G with bitcast(F). Deletes G.
void MergeFunctions::writeThunk(Function *F, Function *G) {
@@ -738,7 +759,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
FunctionType *FFTy = F->getFunctionType();
for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
AI != AE; ++AI) {
- Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i)));
+ Args.push_back(createCast(Builder, (Value*)AI, FFTy->getParamType(i)));
++i;
}
@@ -748,13 +769,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
if (NewG->getReturnType()->isVoidTy()) {
Builder.CreateRetVoid();
} else {
- Type *RetTy = NewG->getReturnType();
- if (CI->getType()->isIntegerTy() && RetTy->isPointerTy())
- Builder.CreateRet(Builder.CreateIntToPtr(CI, RetTy));
- else if (CI->getType()->isPointerTy() && RetTy->isIntegerTy())
- Builder.CreateRet(Builder.CreatePtrToInt(CI, RetTy));
- else
- Builder.CreateRet(Builder.CreateBitCast(CI, RetTy));
+ Builder.CreateRet(createCast(Builder, CI, NewG->getReturnType()));
}
NewG->copyAttributesFrom(G);
@@ -829,6 +844,18 @@ bool MergeFunctions::insert(ComparableFunction &NewF) {
const ComparableFunction &OldF = *Result.first;
+ // Don't merge tiny functions, since it can just end up making the function
+ // larger.
+ // FIXME: Should still merge them if they are unnamed_addr and produce an
+ // alias.
+ if (NewF.getFunc()->size() == 1) {
+ if (NewF.getFunc()->front().size() <= 2) {
+ DEBUG(dbgs() << NewF.getFunc()->getName()
+ << " is to small to bother merging\n");
+ return false;
+ }
+ }
+
// Never thunk a strong function to a weak function.
assert(!OldF.getFunc()->mayBeOverridden() ||
NewF.getFunc()->mayBeOverridden());
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index a6b3f4e..24c5018 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -29,20 +29,20 @@
using namespace llvm;
static cl::opt<bool>
-RunLoopVectorization("vectorize-loops",
+RunLoopVectorization("vectorize-loops", cl::Hidden,
cl::desc("Run the Loop vectorization passes"));
static cl::opt<bool>
-LateVectorization("late-vectorize", cl::init(false), cl::Hidden,
+LateVectorization("late-vectorize", cl::init(true), cl::Hidden,
cl::desc("Run the vectorization pasess late in the pass "
"pipeline (after the inliner)"));
static cl::opt<bool>
-RunSLPVectorization("vectorize-slp",
+RunSLPVectorization("vectorize-slp", cl::Hidden,
cl::desc("Run the SLP vectorization passes"));
static cl::opt<bool>
-RunBBVectorization("vectorize-slp-aggressive",
+RunBBVectorization("vectorize-slp-aggressive", cl::Hidden,
cl::desc("Run the BB vectorization passes"));
static cl::opt<bool>
@@ -54,6 +54,10 @@ static cl::opt<bool> UseNewSROA("use-new-sroa",
cl::init(true), cl::Hidden,
cl::desc("Enable the new, experimental SROA pass"));
+static cl::opt<bool>
+RunLoopRerolling("reroll-loops", cl::Hidden,
+ cl::desc("Run the loop rerolling pass"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -65,6 +69,7 @@ PassManagerBuilder::PassManagerBuilder() {
SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
LateVectorize = LateVectorization;
+ RerollLoops = RunLoopRerolling;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -195,8 +200,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createLoopDeletionPass()); // Delete dead loops
- if (!LateVectorize && LoopVectorize && OptLevel > 1 && SizeLevel < 2)
- MPM.add(createLoopVectorizePass());
+ if (!LateVectorize && LoopVectorize)
+ MPM.add(createLoopVectorizePass(DisableUnrollLoops));
if (!DisableUnrollLoops)
MPM.add(createLoopUnrollPass()); // Unroll small loops
@@ -216,22 +221,22 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
- if (!LateVectorize) {
- if (SLPVectorize)
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
-
- if (BBVectorize) {
- MPM.add(createBBVectorizePass());
- MPM.add(createInstructionCombiningPass());
- if (OptLevel > 1 && UseGVNAfterVectorization)
- MPM.add(createGVNPass()); // Remove redundancies
- else
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
-
- // BBVectorize may have significantly shortened a loop body; unroll again.
- if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass());
- }
+ if (RerollLoops)
+ MPM.add(createLoopRerollPass());
+ if (SLPVectorize)
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+
+ if (BBVectorize) {
+ MPM.add(createBBVectorizePass());
+ MPM.add(createInstructionCombiningPass());
+ if (OptLevel > 1 && UseGVNAfterVectorization)
+ MPM.add(createGVNPass()); // Remove redundancies
+ else
+ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+
+ // BBVectorize may have significantly shortened a loop body; unroll again.
+ if (!DisableUnrollLoops)
+ MPM.add(createLoopUnrollPass());
}
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
@@ -241,7 +246,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// As an experimental mode, run any vectorization passes in a separate
// pipeline from the CGSCC pass manager that runs iteratively with the
// inliner.
- if (LateVectorize) {
+ if (LateVectorize && LoopVectorize) {
// FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
// pass manager that we are specifically trying to avoid. To prevent this
// we must insert a no-op module pass to reset the pass manager.
@@ -249,35 +254,9 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// Add the various vectorization passes and relevant cleanup passes for
// them since we are no longer in the middle of the main scalar pipeline.
- if (LoopVectorize && OptLevel > 1 && SizeLevel < 2) {
- MPM.add(createLoopVectorizePass());
-
- if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass()); // Unroll small loops
-
- // FIXME: Is this necessary/useful? Should we also do SimplifyCFG?
- MPM.add(createInstructionCombiningPass());
- }
-
- if (SLPVectorize) {
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
-
- // FIXME: Is this necessary/useful? Should we also do SimplifyCFG?
- MPM.add(createInstructionCombiningPass());
- }
-
- if (BBVectorize) {
- MPM.add(createBBVectorizePass());
- MPM.add(createInstructionCombiningPass());
- if (OptLevel > 1 && UseGVNAfterVectorization)
- MPM.add(createGVNPass()); // Remove redundancies
- else
- MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
-
- // BBVectorize may have significantly shortened a loop body; unroll again.
- if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass());
- }
+ MPM.add(createLoopVectorizePass(DisableUnrollLoops));
+ MPM.add(createInstructionCombiningPass());
+ MPM.add(createCFGSimplificationPass());
}
if (!DisableUnitAtATime) {
@@ -304,11 +283,8 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// Now that composite has been compiled, scan through the module, looking
// for a main function. If main is defined, mark all other functions
// internal.
- if (Internalize) {
- std::vector<const char*> E;
- E.push_back("main");
- PM.add(createInternalizePass(E));
- }
+ if (Internalize)
+ PM.add(createInternalizePass("main"));
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
@@ -349,6 +325,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// The IPO passes may leave cruft around. Clean up after them.
PM.add(createInstructionCombiningPass());
PM.add(createJumpThreadingPass());
+
// Break up allocas
if (UseNewSROA)
PM.add(createSROAPass());
@@ -362,6 +339,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
PM.add(createLICMPass()); // Hoist loop invariants.
PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
PM.add(createMemCpyOptPass()); // Remove dead memcpys.
+
// Nuke dead stores.
PM.add(createDeadStoreEliminationPass());
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 89529de..b160913 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -51,7 +51,7 @@ namespace {
char PruneEH::ID = 0;
INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
"Remove unused exception handling info", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_END(PruneEH, "prune-eh",
"Remove unused exception handling info", false, false)
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 2791106..c4f5cfc 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -9,7 +9,7 @@
//
// The StripSymbols transformation implements code stripping. Specifically, it
// can delete:
-//
+//
// * names for virtual registers
// * symbols for internal globals and functions
// * debug information
@@ -39,7 +39,7 @@ namespace {
bool OnlyDebugInfo;
public:
static char ID; // Pass identification, replacement for typeid
- explicit StripSymbols(bool ODI = false)
+ explicit StripSymbols(bool ODI = false)
: ModulePass(ID), OnlyDebugInfo(ODI) {
initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
}
@@ -144,7 +144,7 @@ static void RemoveDeadConstant(Constant *C) {
assert(C->use_empty() && "Constant is not dead!");
SmallPtrSet<Constant*, 4> Operands;
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
- if (OnlyUsedBy(C->getOperand(i), C))
+ if (OnlyUsedBy(C->getOperand(i), C))
Operands.insert(cast<Constant>(C->getOperand(i)));
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
if (!GV->hasLocalLinkage()) return; // Don't delete non static globals.
@@ -182,7 +182,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
StructType *STy = StructTypes[i];
if (STy->isLiteral() || STy->getName().empty()) continue;
-
+
if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
continue;
@@ -199,7 +199,7 @@ static void findUsedValues(GlobalVariable *LLVMUsed,
ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
- if (GlobalValue *GV =
+ if (GlobalValue *GV =
dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
UsedValues.insert(GV);
}
@@ -217,71 +217,20 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
I->setName(""); // Internal symbols can't participate in linkage
}
-
+
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
I->setName(""); // Internal symbols can't participate in linkage
StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
}
-
+
// Remove all names from types.
StripTypeNames(M, PreserveDbgInfo);
return true;
}
-// StripDebugInfo - Strip debug info in the module if it exists.
-// To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and
-// llvm.dbg.region.end calls, and any globals they point to if now dead.
-static bool StripDebugInfo(Module &M) {
-
- bool Changed = false;
-
- // Remove all of the calls to the debugger intrinsics, and remove them from
- // the module.
- if (Function *Declare = M.getFunction("llvm.dbg.declare")) {
- while (!Declare->use_empty()) {
- CallInst *CI = cast<CallInst>(Declare->use_back());
- CI->eraseFromParent();
- }
- Declare->eraseFromParent();
- Changed = true;
- }
-
- if (Function *DbgVal = M.getFunction("llvm.dbg.value")) {
- while (!DbgVal->use_empty()) {
- CallInst *CI = cast<CallInst>(DbgVal->use_back());
- CI->eraseFromParent();
- }
- DbgVal->eraseFromParent();
- Changed = true;
- }
-
- for (Module::named_metadata_iterator NMI = M.named_metadata_begin(),
- NME = M.named_metadata_end(); NMI != NME;) {
- NamedMDNode *NMD = NMI;
- ++NMI;
- if (NMD->getName().startswith("llvm.dbg.")) {
- NMD->eraseFromParent();
- Changed = true;
- }
- }
-
- for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
- for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
- ++FI)
- for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
- ++BI) {
- if (!BI->getDebugLoc().isUnknown()) {
- Changed = true;
- BI->setDebugLoc(DebugLoc());
- }
- }
-
- return Changed;
-}
-
bool StripSymbols::runOnModule(Module &M) {
bool Changed = false;
Changed |= StripDebugInfo(M);
@@ -307,13 +256,13 @@ bool StripDebugDeclare::runOnModule(Module &M) {
assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
CI->eraseFromParent();
if (Arg1->use_empty()) {
- if (Constant *C = dyn_cast<Constant>(Arg1))
+ if (Constant *C = dyn_cast<Constant>(Arg1))
DeadConstants.push_back(C);
- else
+ else
RecursivelyDeleteTriviallyDeadInstructions(Arg1);
}
if (Arg2->use_empty())
- if (Constant *C = dyn_cast<Constant>(Arg2))
+ if (Constant *C = dyn_cast<Constant>(Arg2))
DeadConstants.push_back(C);
}
Declare->eraseFromParent();
@@ -332,76 +281,107 @@ bool StripDebugDeclare::runOnModule(Module &M) {
return true;
}
+/// Remove any debug info for global variables/functions in the given module for
+/// which said global variable/function no longer exists (i.e. is null).
+///
+/// Debugging information is encoded in llvm IR using metadata. This is designed
+/// such a way that debug info for symbols preserved even if symbols are
+/// optimized away by the optimizer. This special pass removes debug info for
+/// such symbols.
bool StripDeadDebugInfo::runOnModule(Module &M) {
bool Changed = false;
- // Debugging infomration is encoded in llvm IR using metadata. This is designed
- // such a way that debug info for symbols preserved even if symbols are
- // optimized away by the optimizer. This special pass removes debug info for
- // such symbols.
-
- // llvm.dbg.gv keeps track of debug info for global variables.
- if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
- SmallVector<MDNode *, 8> MDs;
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
- if (NMD->getOperand(i)) {
- assert(DIGlobalVariable(NMD->getOperand(i)).isGlobalVariable() &&
- "A MDNode in llvm.dbg.gv should be a DIGlobalVariable.");
- MDs.push_back(NMD->getOperand(i));
- }
- else
- Changed = true;
- NMD->eraseFromParent();
- NMD = NULL;
-
- for (SmallVectorImpl<MDNode *>::iterator I = MDs.begin(),
- E = MDs.end(); I != E; ++I) {
- GlobalVariable *GV = DIGlobalVariable(*I).getGlobal();
- if (GV && M.getGlobalVariable(GV->getName(), true)) {
- if (!NMD)
- NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
- NMD->addOperand(*I);
- }
+ LLVMContext &C = M.getContext();
+
+ // Find all debug info in F. This is actually overkill in terms of what we
+ // want to do, but we want to try and be as resilient as possible in the face
+ // of potential debug info changes by using the formal interfaces given to us
+ // as much as possible.
+ DebugInfoFinder F;
+ F.processModule(M);
+
+ // For each compile unit, find the live set of global variables/functions and
+ // replace the current list of potentially dead global variables/functions
+ // with the live list.
+ SmallVector<Value *, 64> LiveGlobalVariables;
+ SmallVector<Value *, 64> LiveSubprograms;
+ DenseSet<const MDNode *> VisitedSet;
+
+ for (DebugInfoFinder::iterator CI = F.compile_unit_begin(),
+ CE = F.compile_unit_end(); CI != CE; ++CI) {
+ // Create our compile unit.
+ DICompileUnit DIC(*CI);
+ assert(DIC.Verify() && "DIC must verify as a DICompileUnit.");
+
+ // Create our live subprogram list.
+ DIArray SPs = DIC.getSubprograms();
+ bool SubprogramChange = false;
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+ DISubprogram DISP(SPs.getElement(i));
+ assert(DISP.Verify() && "DISP must verify as a DISubprogram.");
+
+ // Make sure we visit each subprogram only once.
+ if (!VisitedSet.insert(DISP).second)
+ continue;
+
+ // If the function referenced by DISP is not null, the function is live.
+ if (DISP.getFunction())
+ LiveSubprograms.push_back(DISP);
else
- Changed = true;
+ SubprogramChange = true;
}
- }
- // llvm.dbg.sp keeps track of debug info for subprograms.
- if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) {
- SmallVector<MDNode *, 8> MDs;
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
- if (NMD->getOperand(i)) {
- assert(DISubprogram(NMD->getOperand(i)).isSubprogram() &&
- "A MDNode in llvm.dbg.sp should be a DISubprogram.");
- MDs.push_back(NMD->getOperand(i));
- }
+ // Create our live global variable list.
+ DIArray GVs = DIC.getGlobalVariables();
+ bool GlobalVariableChange = false;
+ for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
+ DIGlobalVariable DIG(GVs.getElement(i));
+ assert(DIG.Verify() && "DIG must verify as DIGlobalVariable.");
+
+ // Make sure we only visit each global variable only once.
+ if (!VisitedSet.insert(DIG).second)
+ continue;
+
+ // If the global variable referenced by DIG is not null, the global
+ // variable is live.
+ if (DIG.getGlobal())
+ LiveGlobalVariables.push_back(DIG);
else
- Changed = true;
- NMD->eraseFromParent();
- NMD = NULL;
-
- for (SmallVectorImpl<MDNode *>::iterator I = MDs.begin(),
- E = MDs.end(); I != E; ++I) {
- bool FnIsLive = false;
- if (Function *F = DISubprogram(*I).getFunction())
- if (M.getFunction(F->getName()))
- FnIsLive = true;
- if (FnIsLive) {
- if (!NMD)
- NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
- NMD->addOperand(*I);
- } else {
- // Remove llvm.dbg.lv.fnname named mdnode which may have been used
- // to hold debug info for dead function's local variables.
- StringRef FName = DISubprogram(*I).getLinkageName();
- if (FName.empty())
- FName = DISubprogram(*I).getName();
- if (NamedMDNode *LVNMD = M.getNamedMetadata(
- "llvm.dbg.lv." + Function::getRealLinkageName(FName)))
- LVNMD->eraseFromParent();
- }
+ GlobalVariableChange = true;
+ }
+
+ // If we found dead subprograms or global variables, replace the current
+ // subprogram list/global variable list with our new live subprogram/global
+ // variable list.
+ if (SubprogramChange) {
+ // Make sure that 9 is still the index of the subprograms. This is to make
+ // sure that an assert is hit if the location of the subprogram array
+ // changes. This is just to make sure that this is updated if such an
+ // event occurs.
+ assert(DIC->getNumOperands() >= 10 &&
+ SPs == DIC->getOperand(9) &&
+ "DICompileUnits is expected to store Subprograms in operand "
+ "9.");
+ DIC->replaceOperandWith(9, MDNode::get(C, LiveSubprograms));
+ Changed = true;
}
+
+ if (GlobalVariableChange) {
+ // Make sure that 10 is still the index of global variables. This is to
+ // make sure that an assert is hit if the location of the subprogram array
+ // changes. This is just to make sure that this index is updated if such
+ // an event occurs.
+ assert(DIC->getNumOperands() >= 11 &&
+ GVs == DIC->getOperand(10) &&
+ "DICompileUnits is expected to store Global Variables in operand "
+ "10.");
+ DIC->replaceOperandWith(10, MDNode::get(C, LiveGlobalVariables));
+ Changed = true;
+ }
+
+ // Reset lists for the next iteration.
+ LiveSubprograms.clear();
+ LiveGlobalVariables.clear();
}
return Changed;
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index b3084cc..a5eddc2 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -158,8 +158,8 @@ public:
ConstantInt *DivRHS);
Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI,
ConstantInt *DivRHS);
- Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
- ICmpInst::Predicate Pred, Value *TheAdd);
+ Instruction *FoldICmpAddOpCst(Instruction &ICI, Value *X, ConstantInt *CI,
+ ICmpInst::Predicate Pred);
Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond, Instruction &I);
Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
@@ -178,6 +178,7 @@ public:
Instruction *visitPtrToInt(PtrToIntInst &CI);
Instruction *visitIntToPtr(IntToPtrInst &CI);
Instruction *visitBitCast(BitCastInst &CI);
+ Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI);
Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
Instruction *FI);
Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
@@ -212,8 +213,8 @@ private:
bool ShouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const;
- Type *FindElementAtOffset(Type *Ty, int64_t Offset,
- SmallVectorImpl<Value*> &NewIndices);
+ Type *FindElementAtOffset(Type *PtrTy, int64_t Offset,
+ SmallVectorImpl<Value*> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
@@ -271,7 +272,7 @@ public:
if (&I == V)
V = UndefValue::get(I.getType());
- DEBUG(errs() << "IC: Replacing " << I << "\n"
+ DEBUG(dbgs() << "IC: Replacing " << I << "\n"
" with " << *V << '\n');
I.replaceAllUsesWith(V);
@@ -283,7 +284,7 @@ public:
// instruction. Instead, visit methods should return the value returned by
// this function.
Instruction *EraseInstFromFunction(Instruction &I) {
- DEBUG(errs() << "IC: ERASE " << I << '\n');
+ DEBUG(dbgs() << "IC: ERASE " << I << '\n');
assert(I.use_empty() && "Cannot erase instruction that is used!");
// Make sure that we reprocess all operands now that we reduced their
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b474bd8..88bb69b 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -488,6 +488,26 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
return result;
}
+/// Convert an analysis of a masked ICmp into its equivalent if all boolean
+/// operations had the opposite sense. Since each "NotXXX" flag (recording !=)
+/// is adjacent to the corresponding normal flag (recording ==), this just
+/// involves swapping those bits over.
+static unsigned conjugateICmpMask(unsigned Mask) {
+ unsigned NewMask;
+ NewMask = (Mask & (FoldMskICmp_AMask_AllOnes | FoldMskICmp_BMask_AllOnes |
+ FoldMskICmp_Mask_AllZeroes | FoldMskICmp_AMask_Mixed |
+ FoldMskICmp_BMask_Mixed))
+ << 1;
+
+ NewMask |=
+ (Mask & (FoldMskICmp_AMask_NotAllOnes | FoldMskICmp_BMask_NotAllOnes |
+ FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_AMask_NotMixed |
+ FoldMskICmp_BMask_NotMixed))
+ >> 1;
+
+ return NewMask;
+}
+
/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z)
/// if possible. The returned predicate is either == or !=. Returns false if
/// decomposition fails.
@@ -548,14 +568,22 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
L21 = L22 = L1 = 0;
} else {
// Look for ANDs in the LHS icmp.
- if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
- if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
- L21 = L22 = 0;
- } else {
- if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
- return 0;
- std::swap(L1, L2);
+ if (!L1->getType()->isIntegerTy()) {
+ // You can icmp pointers, for example. They really aren't masks.
+ L11 = L12 = 0;
+ } else if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+ // Any icmp can be viewed as being trivially masked; if it allows us to
+ // remove one, it's worth it.
+ L11 = L1;
+ L12 = Constant::getAllOnesValue(L1->getType());
+ }
+
+ if (!L2->getType()->isIntegerTy()) {
+ // You can icmp pointers, for example. They really aren't masks.
L21 = L22 = 0;
+ } else if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) {
+ L21 = L2;
+ L22 = Constant::getAllOnesValue(L2->getType());
}
}
@@ -576,7 +604,14 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
return 0;
}
E = R2; R1 = 0; ok = true;
- } else if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+ } else if (R1->getType()->isIntegerTy()) {
+ if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+ // As before, model no mask as a trivial mask if it'll let us do an
+ // optimisation.
+ R11 = R1;
+ R12 = Constant::getAllOnesValue(R1->getType());
+ }
+
if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
A = R11; D = R12; E = R2; ok = true;
} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
@@ -589,7 +624,12 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
return 0;
// Look for ANDs in on the right side of the RHS icmp.
- if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+ if (!ok && R2->getType()->isIntegerTy()) {
+ if (!match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+ R11 = R2;
+ R12 = Constant::getAllOnesValue(R2->getType());
+ }
+
if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
A = R11; D = R12; E = R1; ok = true;
} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
@@ -618,8 +658,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
/// foldLogOpOfMaskedICmps:
/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
/// into a single (icmp(A & X) ==/!= Y)
-static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
- ICmpInst::Predicate NEWCC,
+static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
llvm::InstCombiner::BuilderTy* Builder) {
Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -629,8 +668,24 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) &&
"foldLogOpOfMaskedICmpsHelper must return an equality predicate.");
- if (NEWCC == ICmpInst::ICMP_NE)
- mask >>= 1; // treat "Not"-states as normal states
+ // In full generality:
+ // (icmp (A & B) Op C) | (icmp (A & D) Op E)
+ // == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ]
+ //
+ // If the latter can be converted into (icmp (A & X) Op Y) then the former is
+ // equivalent to (icmp (A & X) !Op Y).
+ //
+ // Therefore, we can pretend for the rest of this function that we're dealing
+ // with the conjunction, provided we flip the sense of any comparisons (both
+ // input and output).
+
+ // In most cases we're going to produce an EQ for the "&&" case.
+ ICmpInst::Predicate NEWCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
+ if (!IsAnd) {
+ // Convert the masking analysis into its equivalent with negated
+ // comparisons.
+ mask = conjugateICmpMask(mask);
+ }
if (mask & FoldMskICmp_Mask_AllZeroes) {
// (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
@@ -657,6 +712,40 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
Value* newAnd = Builder->CreateAnd(A, newAnd1);
return Builder->CreateICmp(NEWCC, newAnd, A);
}
+
+ // Remaining cases assume at least that B and D are constant, and depend on
+ // their actual values. This isn't strictly, necessary, just a "handle the
+ // easy cases for now" decision.
+ ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+ if (BCst == 0) return 0;
+ ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+ if (DCst == 0) return 0;
+
+ if (mask & (FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_BMask_NotAllOnes)) {
+ // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0)
+ // Only valid if one of the masks is a superset of the other (check "B&D" is
+ // the same as either B or D).
+ APInt NewMask = BCst->getValue() & DCst->getValue();
+
+ if (NewMask == BCst->getValue())
+ return LHS;
+ else if (NewMask == DCst->getValue())
+ return RHS;
+ }
+ if (mask & FoldMskICmp_AMask_NotAllOnes) {
+ // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+ // -> (icmp ne (A & B), A) or (icmp ne (A & D), A)
+ // Only valid if one of the masks is a superset of the other (check "B|D" is
+ // the same as either B or D).
+ APInt NewMask = BCst->getValue() | DCst->getValue();
+
+ if (NewMask == BCst->getValue())
+ return LHS;
+ else if (NewMask == DCst->getValue())
+ return RHS;
+ }
if (mask & FoldMskICmp_BMask_Mixed) {
// (icmp eq (A & B), C) & (icmp eq (A & D), E)
// We already know that B & C == C && D & E == E.
@@ -665,14 +754,9 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
// contradict, then we can transform to
// -> (icmp eq (A & (B|D)), (C|E))
// Currently, we only handle the case of B, C, D, and E being constant.
- ConstantInt *BCst = dyn_cast<ConstantInt>(B);
- if (BCst == 0) return 0;
- ConstantInt *DCst = dyn_cast<ConstantInt>(D);
- if (DCst == 0) return 0;
// we can't simply use C and E, because we might actually handle
// (icmp ne (A & B), B) & (icmp eq (A & D), D)
// with B and D, having a single bit set
-
ConstantInt *CCst = dyn_cast<ConstantInt>(C);
if (CCst == 0) return 0;
if (LHSCC != NEWCC)
@@ -715,7 +799,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
// handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E)
- if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder))
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, true, Builder))
return V;
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
@@ -849,10 +933,15 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15
return RHS;
case ICmpInst::ICMP_NE:
+ // Special case to get the ordering right when the values wrap around
+ // zero.
+ if (LHSCst->getValue() == 0 && RHSCst->getValue().isAllOnesValue())
+ std::swap(LHSCst, RHSCst);
if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
Constant *AddCST = ConstantExpr::getNeg(LHSCst);
Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
- return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1));
+ return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1),
+ Val->getName()+".cmp");
}
break; // (X != 13 & X != 15) -> no change
}
@@ -1454,10 +1543,60 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
return 0;
}
+/// IsOneHotValue - Returns true for "one-hot" values (values where at most
+/// one bit can be set).
+static bool IsOneHotValue(Value *V) {
+ // Match 1<<K.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
+ if (BO->getOpcode() == Instruction::Shl) {
+ ConstantInt *One = dyn_cast<ConstantInt>(BO->getOperand(0));
+ return One && One->isOne();
+ }
+
+ // Check for power of two integer constants.
+ if (ConstantInt *K = dyn_cast<ConstantInt>(V))
+ return K->getValue().isPowerOf2();
+
+ return false;
+}
+
/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+ // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
+ // if K1 and K2 are a one-bit mask.
+ ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+
+ if (LHS->getPredicate() == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero() &&
+ RHS->getPredicate() == ICmpInst::ICMP_EQ && RHSCst && RHSCst->isZero()) {
+
+ BinaryOperator *LAnd = dyn_cast<BinaryOperator>(LHS->getOperand(0));
+ BinaryOperator *RAnd = dyn_cast<BinaryOperator>(RHS->getOperand(0));
+ if (LAnd && RAnd && LAnd->hasOneUse() && RHS->hasOneUse() &&
+ LAnd->getOpcode() == Instruction::And &&
+ RAnd->getOpcode() == Instruction::And) {
+
+ Value *Mask = 0;
+ Value *Masked = 0;
+ if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
+ IsOneHotValue(LAnd->getOperand(1)) &&
+ IsOneHotValue(RAnd->getOperand(1))) {
+ Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
+ Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
+ } else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
+ IsOneHotValue(LAnd->getOperand(0)) &&
+ IsOneHotValue(RAnd->getOperand(0))) {
+ Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
+ Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
+ }
+
+ if (Masked)
+ return Builder->CreateICmp(ICmpInst::ICMP_NE, Masked, Mask);
+ }
+ }
+
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
if (PredicatesFoldable(LHSCC, RHSCC)) {
if (LHS->getOperand(0) == RHS->getOperand(1) &&
@@ -1474,13 +1613,10 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// handle (roughly):
// (icmp ne (A & B), C) | (icmp ne (A & D), E)
- if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder))
+ if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, false, Builder))
return V;
Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
- ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
- ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
-
if (LHS->hasOneUse() || RHS->hasOneUse()) {
// (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
// (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1)
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 9f74fd6..0cd7b14 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -999,20 +999,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// Check to see if we are changing the return type...
if (OldRetTy != NewRetTy) {
- if (Callee->isDeclaration() &&
- // Conversion is ok if changing from one pointer type to another or from
- // a pointer to an integer of the same size.
- !((OldRetTy->isPointerTy() || !TD ||
- OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
- (NewRetTy->isPointerTy() || !TD ||
- NewRetTy == TD->getIntPtrType(Caller->getContext()))))
- return false; // Cannot transform this return value.
+ if (!CastInst::isBitCastable(NewRetTy, OldRetTy)) {
+ if (Callee->isDeclaration())
+ return false; // Cannot transform this return value.
- if (!Caller->use_empty() &&
- // void -> non-void is handled specially
- !NewRetTy->isVoidTy() &&
- !CastInst::isBitCastable(NewRetTy, OldRetTy))
+ if (!Caller->use_empty() &&
+ // void -> non-void is handled specially
+ !NewRetTy->isVoidTy())
return false; // Cannot transform this return value.
+ }
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
@@ -1045,9 +1040,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
Type *ParamTy = FT->getParamType(i);
Type *ActTy = (*AI)->getType();
- if (!CastInst::isBitCastable(ActTy, ParamTy)) {
+ if (!CastInst::isBitCastable(ActTy, ParamTy))
return false; // Cannot transform this parameter value.
- }
if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
hasAttributes(AttributeFuncs::
@@ -1063,21 +1057,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
return false;
- Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
+ Type *CurElTy = ActTy->getPointerElementType();
if (TD->getTypeAllocSize(CurElTy) !=
TD->getTypeAllocSize(ParamPTy->getElementType()))
return false;
}
-
- // Converting from one pointer type to another or between a pointer and an
- // integer of the same size is safe even if we do not have a body.
- bool isConvertible = ActTy == ParamTy ||
- (TD && ((ParamTy->isPointerTy() ||
- ParamTy == TD->getIntPtrType(Caller->getContext())) &&
- (ActTy->isPointerTy() ||
- ActTy == TD->getIntPtrType(Caller->getContext()))));
- if (Callee->isDeclaration() && !isConvertible)
- return false;
}
if (Callee->isDeclaration()) {
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 361acdd..72377dc 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1229,6 +1229,19 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
}
}
+ // (fptrunc (select cond, R1, Cst)) -->
+ // (select cond, (fptrunc R1), (fptrunc Cst))
+ SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0));
+ if (SI &&
+ (isa<ConstantFP>(SI->getOperand(1)) ||
+ isa<ConstantFP>(SI->getOperand(2)))) {
+ Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1),
+ CI.getType());
+ Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2),
+ CI.getType());
+ return SelectInst::Create(SI->getOperand(0), LHSTrunc, RHSTrunc);
+ }
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0));
if (II) {
switch (II->getIntrinsicID()) {
@@ -1249,9 +1262,14 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
}
// Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
+ // Note that we restrict this transformation based on
+ // TLI->has(LibFunc::sqrtf), even for the sqrt intrinsic, because
+ // TLI->has(LibFunc::sqrtf) is sufficient to guarantee that the
+ // single-precision intrinsic can be expanded in the backend.
CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
- Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) &&
+ (Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) ||
+ Call->getCalledFunction()->getIntrinsicID() == Intrinsic::sqrt) &&
Call->getNumArgOperands() == 1 &&
Call->hasOneUse()) {
CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
@@ -1262,11 +1280,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
Arg->getOperand(0)->getType()->isFloatTy()) {
Function *Callee = Call->getCalledFunction();
Module *M = CI.getParent()->getParent()->getParent();
- Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
- Callee->getAttributes(),
- Builder->getFloatTy(),
- Builder->getFloatTy(),
- NULL);
+ Constant *SqrtfFunc = (Callee->getIntrinsicID() == Intrinsic::sqrt) ?
+ Intrinsic::getDeclaration(M, Intrinsic::sqrt, Builder->getFloatTy()) :
+ M->getOrInsertFunction("sqrtf", Callee->getAttributes(),
+ Builder->getFloatTy(), Builder->getFloatTy(),
+ NULL);
CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
"sqrtfcall");
ret->setAttributes(Callee->getAttributes());
@@ -1338,14 +1356,18 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
// If the source integer type is not the intptr_t type for this target, do a
// trunc or zext to the intptr_t type, then inttoptr of it. This allows the
// cast to be exposed to other transforms.
- if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() !=
- TD->getPointerSizeInBits()) {
- Type *Ty = TD->getIntPtrType(CI.getContext());
- if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
- Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
-
- Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
- return new IntToPtrInst(P, CI.getType());
+
+ if (TD) {
+ unsigned AS = CI.getAddressSpace();
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+ TD->getPointerSizeInBits(AS)) {
+ Type *Ty = TD->getIntPtrType(CI.getContext(), AS);
+ if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+ Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+ Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+ return new IntToPtrInst(P, CI.getType());
+ }
}
if (Instruction *I = commonCastTransforms(CI))
@@ -1370,25 +1392,32 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
return &CI;
}
+ if (!TD)
+ return commonCastTransforms(CI);
+
// If the GEP has a single use, and the base pointer is a bitcast, and the
// GEP computes a constant offset, see if we can convert these three
// instructions into fewer. This typically happens with unions and other
// non-type-safe code.
- APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
- if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
+ unsigned AS = GEP->getPointerAddressSpace();
+ unsigned OffsetBits = TD->getPointerSizeInBits(AS);
+ APInt Offset(OffsetBits, 0);
+ BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0));
+ if (GEP->hasOneUse() &&
+ BCI &&
GEP->accumulateConstantOffset(*TD, Offset)) {
// Get the base pointer input of the bitcast, and the type it points to.
- Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
- Type *GEPIdxTy =
- cast<PointerType>(OrigBase->getType())->getElementType();
+ Value *OrigBase = BCI->getOperand(0);
SmallVector<Value*, 8> NewIndices;
- if (FindElementAtOffset(GEPIdxTy, Offset.getSExtValue(), NewIndices)) {
+ if (FindElementAtOffset(OrigBase->getType(),
+ Offset.getSExtValue(),
+ NewIndices)) {
// If we were able to index down into an element, create the GEP
// and bitcast the result. This eliminates one bitcast, potentially
// two.
Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
- Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
- Builder->CreateGEP(OrigBase, NewIndices);
+ Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
+ Builder->CreateGEP(OrigBase, NewIndices);
NGEP->takeName(GEP);
if (isa<BitCastInst>(CI))
@@ -1406,16 +1435,22 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
// If the destination integer type is not the intptr_t type for this target,
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
// to be exposed to other transforms.
- if (TD && CI.getType()->getScalarSizeInBits() != TD->getPointerSizeInBits()) {
- Type *Ty = TD->getIntPtrType(CI.getContext());
- if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
- Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
- Value *P = Builder->CreatePtrToInt(CI.getOperand(0), Ty);
- return CastInst::CreateIntegerCast(P, CI.getType(), /*isSigned=*/false);
- }
+ if (!TD)
+ return commonPointerCastTransforms(CI);
+
+ Type *Ty = CI.getType();
+ unsigned AS = CI.getPointerAddressSpace();
+
+ if (Ty->getScalarSizeInBits() == TD->getPointerSizeInBits(AS))
+ return commonPointerCastTransforms(CI);
- return commonPointerCastTransforms(CI);
+ Type *PtrTy = TD->getIntPtrType(CI.getContext(), AS);
+ if (Ty->isVectorTy()) // Handle vectors of pointers.
+ PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
+
+ Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy);
+ return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
}
/// OptimizeVectorResize - This input value (which is known to have vector type)
@@ -1488,12 +1523,17 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
/// insertions into the vector. See the example in the comment for
/// OptimizeIntegerToVectorInsertions for the pattern this handles.
/// The type of V is always a non-zero multiple of VecEltTy's size.
+/// Shift is the number of bits between the lsb of V and the lsb of
+/// the vector.
///
/// This returns false if the pattern can't be matched or true if it can,
/// filling in Elements with the elements found here.
-static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+static bool CollectInsertionElements(Value *V, unsigned Shift,
SmallVectorImpl<Value*> &Elements,
- Type *VecEltTy) {
+ Type *VecEltTy, InstCombiner &IC) {
+ assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
+ "Shift should be a multiple of the element type size");
+
// Undef values never contribute useful bits to the result.
if (isa<UndefValue>(V)) return true;
@@ -1505,8 +1545,12 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
if (C->isNullValue())
return true;
+ unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
+ if (IC.getDataLayout()->isBigEndian())
+ ElementIndex = Elements.size() - ElementIndex - 1;
+
// Fail if multiple elements are inserted into this slot.
- if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+ if (Elements[ElementIndex] != 0)
return false;
Elements[ElementIndex] = V;
@@ -1522,7 +1566,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
// it to the right type so it gets properly inserted.
if (NumElts == 1)
return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
- ElementIndex, Elements, VecEltTy);
+ Shift, Elements, VecEltTy, IC);
// Okay, this is a constant that covers multiple elements. Slice it up into
// pieces and insert each element-sized piece into the vector.
@@ -1533,10 +1577,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
for (unsigned i = 0; i != NumElts; ++i) {
+ unsigned ShiftI = Shift+i*ElementSize;
Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
- i*ElementSize));
+ ShiftI));
Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
- if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+ if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC))
return false;
}
return true;
@@ -1549,29 +1594,28 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
- return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC);
case Instruction::ZExt:
if (!isMultipleOfTypeSize(
I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
VecEltTy))
return false;
- return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC);
case Instruction::Or:
- return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy) &&
- CollectInsertionElements(I->getOperand(1), ElementIndex,
- Elements, VecEltTy);
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC) &&
+ CollectInsertionElements(I->getOperand(1), Shift,
+ Elements, VecEltTy, IC);
case Instruction::Shl: {
// Must be shifting by a constant that is a multiple of the element size.
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
if (CI == 0) return false;
- if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
- unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
-
- return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
- Elements, VecEltTy);
+ Shift += CI->getZExtValue();
+ if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
+ return CollectInsertionElements(I->getOperand(0), Shift,
+ Elements, VecEltTy, IC);
}
}
@@ -1594,12 +1638,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
/// Into two insertelements that do "buildvector{%inc, %inc5}".
static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
InstCombiner &IC) {
+ // We need to know the target byte order to perform this optimization.
+ if (!IC.getDataLayout()) return 0;
+
VectorType *DestVecTy = cast<VectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
if (!CollectInsertionElements(IntInput, 0, Elements,
- DestVecTy->getElementType()))
+ DestVecTy->getElementType(), IC))
return 0;
// If we succeeded, we know that all of the element are specified by Elements
@@ -1785,10 +1832,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// Okay, we have (bitcast (shuffle ..)). Check to see if this is
// a bitcast to a vector with the same # elts.
if (SVI->hasOneUse() && DestTy->isVectorTy() &&
- cast<VectorType>(DestTy)->getNumElements() ==
- SVI->getType()->getNumElements() &&
+ DestTy->getVectorNumElements() == SVI->getType()->getNumElements() &&
SVI->getType()->getNumElements() ==
- cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) {
+ SVI->getOperand(0)->getType()->getVectorNumElements()) {
BitCastInst *Tmp;
// If either of the operands is a cast from CI.getType(), then
// evaluating the shuffle in the casted destination's type will allow
@@ -1810,3 +1856,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
return commonPointerCastTransforms(CI);
return commonCastTransforms(CI);
}
+
+Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
+ return commonCastTransforms(CI);
+}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c0225ae..9bb65ef 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -227,7 +227,8 @@ Instruction *InstCombiner::
FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
CmpInst &ICI, ConstantInt *AndCst) {
// We need TD information to know the pointer size unless this is inbounds.
- if (!GEP->isInBounds() && TD == 0) return 0;
+ if (!GEP->isInBounds() && TD == 0)
+ return 0;
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
@@ -393,9 +394,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// If the index is larger than the pointer size of the target, truncate the
// index down like the GEP would do implicitly. We don't have to do this for
// an inbounds GEP because the index can't be out of range.
- if (!GEP->isInBounds() &&
- Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
- Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
+ if (!GEP->isInBounds()) {
+ Type *IntPtrTy = TD->getIntPtrType(GEP->getType());
+ unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
+ if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
+ Idx = Builder->CreateTrunc(Idx, IntPtrTy);
+ }
// If the comparison is only true for one or two elements, emit direct
// comparisons.
@@ -562,16 +566,18 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
}
}
+
+
// Okay, we know we have a single variable index, which must be a
// pointer/array/vector index. If there is no offset, life is simple, return
// the index.
- unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ Type *IntPtrTy = TD.getIntPtrType(GEP->getOperand(0)->getType());
+ unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth();
if (Offset == 0) {
// Cast to intptrty in case a truncation occurs. If an extension is needed,
// we don't need to bother extending: the extension won't affect where the
// computation crosses zero.
if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
- Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy);
}
return VariableIdx;
@@ -593,7 +599,6 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
return 0;
// Okay, we can do this evaluation. Start by converting the index to intptr.
- Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
if (VariableIdx->getType() != IntPtrTy)
VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy,
true /*Signed*/);
@@ -737,10 +742,9 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
}
/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
-Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
+Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI,
Value *X, ConstantInt *CI,
- ICmpInst::Predicate Pred,
- Value *TheAdd) {
+ ICmpInst::Predicate Pred) {
// If we have X+0, exit early (simplifying logic below) and let it get folded
// elsewhere. icmp X+0, X -> icmp X, X
if (CI->isZero()) {
@@ -1194,11 +1198,16 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
Type *AndTy = AndCST->getType(); // Type of the and.
// We can fold this as long as we can't shift unknown bits
- // into the mask. This can only happen with signed shift
- // rights, as they sign-extend.
+ // into the mask. This can happen with signed shift
+ // rights, as they sign-extend. With logical shifts,
+ // we must still make sure the comparison is not signed
+ // because we are effectively changing the
+ // position of the sign bit (PR17827).
+ // TODO: We can relax these constraints a bit more.
if (ShAmt) {
- bool CanFold = Shift->isLogicalShift();
- if (!CanFold) {
+ bool CanFold = false;
+ unsigned ShiftOpcode = Shift->getOpcode();
+ if (ShiftOpcode == Instruction::AShr) {
// To test for the bad case of the signed shr, see if any
// of the bits shifted in could be tested after the mask.
uint32_t TyBits = Ty->getPrimitiveSizeInBits();
@@ -1208,6 +1217,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
AndCST->getValue()) == 0)
CanFold = true;
+ } else if (ShiftOpcode == Instruction::Shl ||
+ ShiftOpcode == Instruction::LShr) {
+ CanFold = !ICI.isSigned();
}
if (CanFold) {
@@ -1781,8 +1793,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
- TD->getPointerSizeInBits() ==
- cast<IntegerType>(DestTy)->getBitWidth()) {
+ TD->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
Value *RHSOp = 0;
if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
@@ -2035,14 +2046,59 @@ static APInt DemandedBitsLHSMask(ICmpInst &I,
}
+/// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst
+/// should be swapped.
+/// The descision is based on how many times these two operands are reused
+/// as subtract operands and their positions in those instructions.
+/// The rational is that several architectures use the same instruction for
+/// both subtract and cmp, thus it is better if the order of those operands
+/// match.
+/// \return true if Op0 and Op1 should be swapped.
+static bool swapMayExposeCSEOpportunities(const Value * Op0,
+ const Value * Op1) {
+ // Filter out pointer value as those cannot appears directly in subtract.
+ // FIXME: we may want to go through inttoptrs or bitcasts.
+ if (Op0->getType()->isPointerTy())
+ return false;
+ // Count every uses of both Op0 and Op1 in a subtract.
+ // Each time Op0 is the first operand, count -1: swapping is bad, the
+ // subtract has already the same layout as the compare.
+ // Each time Op0 is the second operand, count +1: swapping is good, the
+ // subtract has a diffrent layout as the compare.
+ // At the end, if the benefit is greater than 0, Op0 should come second to
+ // expose more CSE opportunities.
+ int GlobalSwapBenefits = 0;
+ for (Value::const_use_iterator UI = Op0->use_begin(), UIEnd = Op0->use_end(); UI != UIEnd; ++UI) {
+ const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(*UI);
+ if (!BinOp || BinOp->getOpcode() != Instruction::Sub)
+ continue;
+ // If Op0 is the first argument, this is not beneficial to swap the
+ // arguments.
+ int LocalSwapBenefits = -1;
+ unsigned Op1Idx = 1;
+ if (BinOp->getOperand(Op1Idx) == Op0) {
+ Op1Idx = 0;
+ LocalSwapBenefits = 1;
+ }
+ if (BinOp->getOperand(Op1Idx) != Op1)
+ continue;
+ GlobalSwapBenefits += LocalSwapBenefits;
+ }
+ return GlobalSwapBenefits > 0;
+}
+
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
bool Changed = false;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ unsigned Op0Cplxity = getComplexity(Op0);
+ unsigned Op1Cplxity = getComplexity(Op1);
/// Orders the operands of the compare so that they are listed from most
/// complex to least complex. This puts constants before unary operators,
/// before binary operators.
- if (getComplexity(Op0) < getComplexity(Op1)) {
+ if (Op0Cplxity < Op1Cplxity ||
+ (Op0Cplxity == Op1Cplxity &&
+ swapMayExposeCSEOpportunities(Op0, Op1))) {
I.swapOperands();
std::swap(Op0, Op1);
Changed = true;
@@ -2477,7 +2533,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
if (RHSC->isNullValue() && TD &&
- TD->getIntPtrType(RHSC->getContext()) ==
+ TD->getIntPtrType(RHSC->getType()) ==
LHSI->getOperand(0)->getType())
return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
Constant::getNullValue(LHSI->getOperand(0)->getType()));
@@ -2900,6 +2956,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Builder->CreateTrunc(B, A->getType()));
}
+ // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
+ // For lshr and ashr pairs.
+ if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
+ match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
+ (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
+ match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
+ unsigned TypeBits = Cst1->getBitWidth();
+ unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
+ if (ShAmt < TypeBits && ShAmt != 0) {
+ ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE
+ ? ICmpInst::ICMP_UGE
+ : ICmpInst::ICMP_ULT;
+ Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+ APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
+ return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal));
+ }
+ }
+
// Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
// "icmp (and X, mask), cst"
uint64_t ShAmt = 0;
@@ -2930,20 +3004,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Value *X; ConstantInt *Cst;
// icmp X+Cst, X
if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
- return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0);
+ return FoldICmpAddOpCst(I, X, Cst, I.getPredicate());
// icmp X, X+Cst
if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
- return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1);
+ return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate());
}
return Changed ? &I : 0;
}
-
-
-
-
-
/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
///
Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index e2d7966..4c861b3 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -154,7 +154,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Ensure that the alloca array size argument has type intptr_t, so that
// any casting is exposed early.
if (TD) {
- Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
+ Type *IntPtrTy = TD->getIntPtrType(AI.getType());
if (AI.getArraySize()->getType() != IntPtrTy) {
Value *V = Builder->CreateIntCast(AI.getArraySize(),
IntPtrTy, false);
@@ -180,12 +180,13 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Now that I is pointing to the first non-allocation-inst in the block,
// insert our getelementptr instruction...
//
- Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
- Value *Idx[2];
- Idx[0] = NullIdx;
- Idx[1] = NullIdx;
+ Type *IdxTy = TD
+ ? TD->getIntPtrType(AI.getType())
+ : Type::getInt64Ty(AI.getContext());
+ Value *NullIdx = Constant::getNullValue(IdxTy);
+ Value *Idx[2] = { NullIdx, NullIdx };
Instruction *GEP =
- GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub");
+ GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
InsertNewInstBefore(GEP, *It);
// Now make everything use the getelementptr instead of the original
@@ -262,9 +263,9 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
EraseInstFromFunction(*ToDelete[i]);
Constant *TheSrc = cast<Constant>(Copy->getSource());
- Instruction *NewI
- = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
- AI.getType()));
+ Constant *Cast
+ = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
+ Instruction *NewI = ReplaceInstUsesWith(AI, Cast);
EraseInstFromFunction(*Copy);
++NumGlobalCopies;
return NewI;
@@ -302,9 +303,11 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
if (Constant *CSrc = dyn_cast<Constant>(CastOp))
if (ASrcTy->getNumElements() != 0) {
- Value *Idxs[2];
- Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
- Idxs[1] = Idxs[0];
+ Type *IdxTy = TD
+ ? TD->getIntPtrType(SrcTy)
+ : Type::getInt64Ty(SrcTy->getContext());
+ Value *Idx = Constant::getNullValue(IdxTy);
+ Value *Idxs[2] = { Idx, Idx };
CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
SrcTy = cast<PointerType>(CastOp->getType());
SrcPTy = SrcTy->getElementType();
@@ -315,7 +318,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
SrcPTy->isVectorTy()) &&
// Do not allow turning this into a load of an integer, which is then
// casted to a pointer, this pessimizes pointer analysis a lot.
- (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
+ (SrcPTy->isPtrOrPtrVectorTy() ==
+ LI.getType()->isPtrOrPtrVectorTy()) &&
IC.getDataLayout()->getTypeSizeInBits(SrcPTy) ==
IC.getDataLayout()->getTypeSizeInBits(DestPTy)) {
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index cc6a301..a759548 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -374,9 +374,12 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
} else {
if (C0) {
// (C0 / X) * C => (C0 * C) / X
- ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
- if (isNormalFp(F))
- R = BinaryOperator::CreateFDiv(F, Opnd1);
+ if (FMulOrDiv->hasOneUse()) {
+ // It would otherwise introduce another div.
+ ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
+ if (isNormalFp(F))
+ R = BinaryOperator::CreateFDiv(F, Opnd1);
+ }
} else {
// (X / C1) * C => X * (C/C1) if C/C1 is not a denormal
ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFDiv(C, C1));
@@ -460,10 +463,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (Swap && FAddSub->getOpcode() == Instruction::FSub)
std::swap(M0, M1);
- Value *R = (FAddSub->getOpcode() == Instruction::FAdd) ?
- BinaryOperator::CreateFAdd(M0, M1) :
- BinaryOperator::CreateFSub(M0, M1);
- Instruction *RI = cast<Instruction>(R);
+ Instruction *RI = (FAddSub->getOpcode() == Instruction::FAdd)
+ ? BinaryOperator::CreateFAdd(M0, M1)
+ : BinaryOperator::CreateFSub(M0, M1);
RI->copyFastMathFlags(&I);
return RI;
}
@@ -490,13 +492,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
// if pattern detected emit alternate sequence
if (OpX && OpY) {
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->SetFastMathFlags(Log2->getFastMathFlags());
Log2->setArgOperand(0, OpY);
Value *FMulVal = Builder->CreateFMul(OpX, Log2);
- Instruction *FMul = cast<Instruction>(FMulVal);
- FMul->copyFastMathFlags(Log2);
- Instruction *FSub = BinaryOperator::CreateFSub(FMulVal, OpX);
- FSub->copyFastMathFlags(Log2);
- return FSub;
+ Value *FSub = Builder->CreateFSub(FMulVal, OpX);
+ FSub->takeName(&I);
+ return ReplaceInstUsesWith(I, FSub);
}
}
@@ -506,6 +508,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
for (int i = 0; i < 2; i++) {
bool IgnoreZeroSign = I.hasNoSignedZeros();
if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->SetFastMathFlags(I.getFastMathFlags());
+
Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
@@ -516,13 +521,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (Opnd0->hasOneUse()) {
// -X * Y => -(X*Y) (Promote negation as high as possible)
Value *T = Builder->CreateFMul(N0, Opnd1);
- cast<Instruction>(T)->setDebugLoc(I.getDebugLoc());
- Instruction *Neg = BinaryOperator::CreateFNeg(T);
- if (I.getFastMathFlags().any()) {
- cast<Instruction>(T)->copyFastMathFlags(&I);
- Neg->copyFastMathFlags(&I);
- }
- return Neg;
+ Value *Neg = Builder->CreateFNeg(T);
+ Neg->takeName(&I);
+ return ReplaceInstUsesWith(I, Neg);
}
}
@@ -545,13 +546,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
Y = Opnd0_0;
if (Y) {
- Instruction *T = cast<Instruction>(Builder->CreateFMul(Opnd1, Opnd1));
- T->copyFastMathFlags(&I);
- T->setDebugLoc(I.getDebugLoc());
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->SetFastMathFlags(I.getFastMathFlags());
+ Value *T = Builder->CreateFMul(Opnd1, Opnd1);
- Instruction *R = BinaryOperator::CreateFMul(T, Y);
- R->copyFastMathFlags(&I);
- return R;
+ Value *R = Builder->CreateFMul(T, Y);
+ R->takeName(&I);
+ return ReplaceInstUsesWith(I, R);
}
}
}
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index bd14e81..4c6d0c4 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -604,8 +604,6 @@ namespace llvm {
LHS.Width == RHS.Width;
}
};
- template <>
- struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
}
@@ -688,10 +686,10 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// extracted out of it. First, sort the users by their offset and size.
array_pod_sort(PHIUsers.begin(), PHIUsers.end());
- DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
- for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
- errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
- );
+ DEBUG(dbgs() << "SLICING UP PHI: " << FirstPhi << '\n';
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ dbgs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] << '\n';
+ );
// PredValues - This is a temporary used when rewriting PHI nodes. It is
// hoisted out here to avoid construction/destruction thrashing.
@@ -772,7 +770,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
}
PredValues.clear();
- DEBUG(errs() << " Made element PHI for offset " << Offset << ": "
+ DEBUG(dbgs() << " Made element PHI for offset " << Offset << ": "
<< *EltPHI << '\n');
ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
}
@@ -792,7 +790,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// PHINode simplification
//
Instruction *InstCombiner::visitPHINode(PHINode &PN) {
- if (Value *V = SimplifyInstruction(&PN, TD))
+ if (Value *V = SimplifyInstruction(&PN, TD, TLI))
return ReplaceInstUsesWith(PN, V);
// If all PHI operands are the same operation, pull them through the PHI,
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 7581dbe..283bec2 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -367,7 +367,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
Value *FalseVal,
InstCombiner::BuilderTy *Builder) {
const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
- if (!IC || !IC->isEquality())
+ if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
return 0;
Value *CmpLHS = IC->getOperand(0);
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index a7bfe09..c831ddd 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -808,7 +808,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// TODO: Could compute known zero/one bits based on the input.
break;
}
- case Intrinsic::x86_sse42_crc32_64_8:
case Intrinsic::x86_sse42_crc32_64_64:
KnownZero = APInt::getHighBitsSet(64, 32);
return 0;
@@ -845,21 +844,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
- unsigned ShlAmt = cast<ConstantInt>(Shl->getOperand(1))->getZExtValue();
- unsigned ShrAmt = cast<ConstantInt>(Shr->getOperand(1))->getZExtValue();
+ const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
+ const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
+ if (!ShlOp1 || !ShrOp1)
+ return 0; // Noop.
+
+ Value *VarX = Shr->getOperand(0);
+ Type *Ty = VarX->getType();
+ unsigned BitWidth = Ty->getIntegerBitWidth();
+ if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
+ return 0; // Undef.
+
+ unsigned ShlAmt = ShlOp1.getZExtValue();
+ unsigned ShrAmt = ShrOp1.getZExtValue();
KnownOne.clearAllBits();
KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
KnownZero &= DemandedMask;
- if (ShlAmt == 0 || ShrAmt == 0)
- return 0;
-
- Value *VarX = Shr->getOperand(0);
- Type *Ty = VarX->getType();
-
- APInt BitMask1(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
- APInt BitMask2(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
+ APInt BitMask1(APInt::getAllOnesValue(BitWidth));
+ APInt BitMask2(APInt::getAllOnesValue(BitWidth));
bool isLshr = (Shr->getOpcode() == Instruction::LShr);
BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index f3de6e2..1e72410 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -106,8 +106,8 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
}
// If we have a PHI node with a vector type that has only 2 uses: feed
-// itself and be an operand of extractelemnt at a constant location,
-// try to replace the PHI of the vector type with a PHI of a scalar type
+// itself and be an operand of extractelement at a constant location,
+// try to replace the PHI of the vector type with a PHI of a scalar type.
Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
if (!PN->hasNUses(2))
@@ -282,6 +282,38 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
Worklist.AddValue(EE);
return CastInst::Create(CI->getOpcode(), EE, EI.getType());
}
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ if (SI->hasOneUse()) {
+ // TODO: For a select on vectors, it might be useful to do this if it
+ // has multiple extractelement uses. For vector select, that seems to
+ // fight the vectorizer.
+
+ // If we are extracting an element from a vector select or a select on
+ // vectors, a select on the scalars extracted from the vector arguments.
+ Value *TrueVal = SI->getTrueValue();
+ Value *FalseVal = SI->getFalseValue();
+
+ Value *Cond = SI->getCondition();
+ if (Cond->getType()->isVectorTy()) {
+ Cond = Builder->CreateExtractElement(Cond,
+ EI.getIndexOperand(),
+ Cond->getName() + ".elt");
+ }
+
+ Value *V1Elem
+ = Builder->CreateExtractElement(TrueVal,
+ EI.getIndexOperand(),
+ TrueVal->getName() + ".elt");
+
+ Value *V2Elem
+ = Builder->CreateExtractElement(FalseVal,
+ EI.getIndexOperand(),
+ FalseVal->getName() + ".elt");
+ return SelectInst::Create(Cond,
+ V1Elem,
+ V2Elem,
+ SI->getName() + ".elt");
+ }
}
}
return 0;
@@ -294,7 +326,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
SmallVectorImpl<Constant*> &Mask) {
assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
"Invalid CollectSingleShuffleElements");
- unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+ unsigned NumElts = V->getType()->getVectorNumElements();
if (isa<UndefValue>(V)) {
Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 19959c0..f84db27 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -37,7 +37,7 @@ public:
/// in it.
void Add(Instruction *I) {
if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
- DEBUG(errs() << "IC: ADD: " << *I << '\n');
+ DEBUG(dbgs() << "IC: ADD: " << *I << '\n');
Worklist.push_back(I);
}
}
@@ -54,7 +54,7 @@ public:
assert(Worklist.empty() && "Worklist must be empty to add initial group");
Worklist.reserve(NumEntries+16);
WorklistMap.resize(NumEntries);
- DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
+ DEBUG(dbgs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
for (unsigned Idx = 0; NumEntries; --NumEntries) {
Instruction *I = List[NumEntries-1];
WorklistMap.insert(std::make_pair(I, Idx++));
@@ -74,8 +74,7 @@ public:
}
Instruction *RemoveOne() {
- Instruction *I = Worklist.back();
- Worklist.pop_back();
+ Instruction *I = Worklist.pop_back_val();
WorklistMap.erase(I);
return I;
}
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index b34ae21..191a101 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -699,7 +699,10 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
Value *InV = 0;
- if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+ // Beware of ConstantExpr: it may eventually evaluate to getNullValue,
+ // even if currently isNullValue gives false.
+ Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
+ if (InC && !isa<ConstantExpr>(InC))
InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
else
InV = Builder->CreateSelect(PN->getIncomingValue(i),
@@ -755,19 +758,25 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
return ReplaceInstUsesWith(I, NewPN);
}
-/// FindElementAtOffset - Given a type and a constant offset, determine whether
-/// or not there is a sequence of GEP indices into the type that will land us at
-/// the specified offset. If so, fill them into NewIndices and return the
-/// resultant element type, otherwise return null.
-Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
- SmallVectorImpl<Value*> &NewIndices) {
- if (!TD) return 0;
- if (!Ty->isSized()) return 0;
+/// FindElementAtOffset - Given a pointer type and a constant offset, determine
+/// whether or not there is a sequence of GEP indices into the pointed type that
+/// will land us at the specified offset. If so, fill them into NewIndices and
+/// return the resultant element type, otherwise return null.
+Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
+ SmallVectorImpl<Value*> &NewIndices) {
+ assert(PtrTy->isPtrOrPtrVectorTy());
+
+ if (!TD)
+ return 0;
+
+ Type *Ty = PtrTy->getPointerElementType();
+ if (!Ty->isSized())
+ return 0;
// Start with the index over the outer type. Note that the type size
// might be zero (even if the offset isn't zero) if the indexed type
// is something like [0 x {int, int}]
- Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+ Type *IntPtrTy = TD->getIntPtrType(PtrTy);
int64_t FirstIdx = 0;
if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
FirstIdx = Offset/TySize;
@@ -1176,6 +1185,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName());
}
+ // Canonicalize (gep i8* X, -(ptrtoint Y)) to (sub (ptrtoint X), (ptrtoint Y))
+ // The GEP pattern is emitted by the SCEV expander for certain kinds of
+ // pointer arithmetic.
+ if (TD && GEP.getNumIndices() == 1 &&
+ match(GEP.getOperand(1), m_Neg(m_PtrToInt(m_Value())))) {
+ unsigned AS = GEP.getPointerAddressSpace();
+ if (GEP.getType() == Builder->getInt8PtrTy(AS) &&
+ GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
+ TD->getPointerSizeInBits(AS)) {
+ Operator *Index = cast<Operator>(GEP.getOperand(1));
+ Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType());
+ Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1));
+ return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType());
+ }
+ }
+
// Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
Value *StrippedPtr = PtrOp->stripPointerCasts();
PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType());
@@ -1231,13 +1256,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
Type *SrcElTy = StrippedPtrTy->getElementType();
- Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
+ Type *ResElTy = PtrOp->getType()->getPointerElementType();
if (TD && SrcElTy->isArrayTy() &&
- TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
+ TD->getTypeAllocSize(SrcElTy->getArrayElementType()) ==
TD->getTypeAllocSize(ResElTy)) {
- Value *Idx[2];
- Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
- Idx[1] = GEP.getOperand(1);
+ Type *IdxType = TD->getIntPtrType(GEP.getType());
+ Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
Value *NewGEP = GEP.isInBounds() ?
Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
@@ -1261,7 +1285,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1287,8 +1311,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Check that changing to the array element type amounts to dividing the
// index by a scale factor.
uint64_t ResSize = TD->getTypeAllocSize(ResElTy);
- uint64_t ArrayEltSize =
- TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
+ uint64_t ArrayEltSize
+ = TD->getTypeAllocSize(SrcElTy->getArrayElementType());
if (ResSize && ArrayEltSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1296,7 +1320,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+ assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1304,9 +1328,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
// If the multiplication NewIdx * Scale may overflow then the new
// GEP may not be "inbounds".
- Value *Off[2];
- Off[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
- Off[1] = NewIdx;
+ Value *Off[2] = {
+ Constant::getNullValue(TD->getIntPtrType(GEP.getType())),
+ NewIdx
+ };
+
Value *NewGEP = GEP.isInBounds() && NSW ?
Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
@@ -1318,15 +1344,20 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
+ if (!TD)
+ return 0;
+
/// See if we can simplify:
/// X = bitcast A* to B*
/// Y = gep X, <...constant indices...>
/// into a gep of the original struct. This is important for SROA and alias
/// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
- APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
- if (TD &&
- !isa<BitCastInst>(BCI->getOperand(0)) &&
+ Value *Operand = BCI->getOperand(0);
+ PointerType *OpType = cast<PointerType>(Operand->getType());
+ unsigned OffsetBits = TD->getPointerTypeSizeInBits(OpType);
+ APInt Offset(OffsetBits, 0);
+ if (!isa<BitCastInst>(Operand) &&
GEP.accumulateConstantOffset(*TD, Offset) &&
StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
@@ -1335,8 +1366,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!Offset) {
// If the bitcast is of an allocation, and the allocation will be
// converted to match the type of the cast, don't touch this.
- if (isa<AllocaInst>(BCI->getOperand(0)) ||
- isAllocationFn(BCI->getOperand(0), TLI)) {
+ if (isa<AllocaInst>(Operand) || isAllocationFn(Operand, TLI)) {
// See if the bitcast simplifies, if so, don't nuke this GEP yet.
if (Instruction *I = visitBitCast(*BCI)) {
if (I != BCI) {
@@ -1347,19 +1377,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return &GEP;
}
}
- return new BitCastInst(BCI->getOperand(0), GEP.getType());
+ return new BitCastInst(Operand, GEP.getType());
}
// Otherwise, if the offset is non-zero, we need to find out if there is a
// field at Offset in 'A's type. If so, we can pull the cast through the
// GEP.
SmallVector<Value*, 8> NewIndices;
- Type *InTy =
- cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
- if (FindElementAtOffset(InTy, Offset.getSExtValue(), NewIndices)) {
+ if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
Value *NGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) :
- Builder->CreateGEP(BCI->getOperand(0), NewIndices);
+ Builder->CreateInBoundsGEP(Operand, NewIndices) :
+ Builder->CreateGEP(Operand, NewIndices);
if (NGEP->getType() == GEP.getType())
return ReplaceInstUsesWith(GEP, NGEP);
@@ -1372,8 +1400,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return 0;
}
-
-
static bool
isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
const TargetLibraryInfo *TLI) {
@@ -2209,7 +2235,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
// DCE instruction if trivially dead.
if (isInstructionTriviallyDead(Inst, TLI)) {
++NumDeadInst;
- DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
+ DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
Inst->eraseFromParent();
continue;
}
@@ -2217,7 +2243,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
// ConstantProp instruction if trivially constant.
if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) {
- DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+ DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
<< *Inst << '\n');
Inst->replaceAllUsesWith(C);
++NumConstProp;
@@ -2293,7 +2319,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
MadeIRChange = false;
- DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+ DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
<< F.getName() << "\n");
{
@@ -2338,7 +2364,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Check to see if we can DCE the instruction.
if (isInstructionTriviallyDead(I, TLI)) {
- DEBUG(errs() << "IC: DCE: " << *I << '\n');
+ DEBUG(dbgs() << "IC: DCE: " << *I << '\n');
EraseInstFromFunction(*I);
++NumDeadInst;
MadeIRChange = true;
@@ -2348,7 +2374,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Instruction isn't dead, see if we can constant propagate it.
if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
- DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+ DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
// Add operands to the worklist.
ReplaceInstUsesWith(*I, C);
@@ -2396,13 +2422,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
std::string OrigI;
#endif
DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
- DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
+ DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
if (Instruction *Result = visit(*I)) {
++NumCombined;
// Should we replace the old instruction with a new one?
if (Result != I) {
- DEBUG(errs() << "IC: Old = " << *I << '\n'
+ DEBUG(dbgs() << "IC: Old = " << *I << '\n'
<< " New = " << *Result << '\n');
if (!I->getDebugLoc().isUnknown())
@@ -2431,7 +2457,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
EraseInstFromFunction(*I);
} else {
#ifndef NDEBUG
- DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
+ DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
<< " New = " << *I << '\n');
#endif
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index d77e20b..d731ec5 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/DIBuilder.h"
@@ -59,6 +60,7 @@ static const uint64_t kDefaultShort64bitShadowOffset = 0x7FFF8000; // < 2G.
static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41;
static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa8000;
+static const size_t kMinStackMallocSize = 1 << 6; // 64B
static const size_t kMaxStackMallocSize = 1 << 16; // 64K
static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
@@ -75,21 +77,30 @@ static const char *const kAsanUnregisterGlobalsName =
static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
static const char *const kAsanInitName = "__asan_init_v3";
+static const char *const kAsanCovName = "__sanitizer_cov";
static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
static const char *const kAsanMappingOffsetName = "__asan_mapping_offset";
static const char *const kAsanMappingScaleName = "__asan_mapping_scale";
-static const char *const kAsanStackMallocName = "__asan_stack_malloc";
-static const char *const kAsanStackFreeName = "__asan_stack_free";
+static const int kMaxAsanStackMallocSizeClass = 10;
+static const char *const kAsanStackMallocNameTemplate = "__asan_stack_malloc_";
+static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_";
static const char *const kAsanGenPrefix = "__asan_gen_";
static const char *const kAsanPoisonStackMemoryName =
"__asan_poison_stack_memory";
static const char *const kAsanUnpoisonStackMemoryName =
"__asan_unpoison_stack_memory";
+static const char *const kAsanOptionDetectUAR =
+ "__asan_option_detect_stack_use_after_return";
+
+// These constants must match the definitions in the run-time library.
static const int kAsanStackLeftRedzoneMagic = 0xf1;
static const int kAsanStackMidRedzoneMagic = 0xf2;
static const int kAsanStackRightRedzoneMagic = 0xf3;
static const int kAsanStackPartialRedzoneMagic = 0xf4;
+#ifndef NDEBUG
+static const int kAsanStackAfterReturnMagic = 0xf5;
+#endif
// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
static const size_t kNumberOfAccessSizes = 5;
@@ -124,6 +135,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
// This flag may need to be replaced with -f[no]asan-globals.
static cl::opt<bool> ClGlobals("asan-globals",
cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClCoverage("asan-coverage",
+ cl::desc("ASan coverage"), cl::Hidden, cl::init(false));
static cl::opt<bool> ClInitializers("asan-initialization-order",
cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
static cl::opt<bool> ClMemIntrin("asan-memintrin",
@@ -184,6 +197,13 @@ static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"),
static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
cl::Hidden, cl::init(-1));
+STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
+STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
+STATISTIC(NumOptimizedAccessesToGlobalArray,
+ "Number of optimized accesses to global arrays");
+STATISTIC(NumOptimizedAccessesToGlobalVar,
+ "Number of optimized accesses to global vars");
+
namespace {
/// A set of dynamically initialized globals extracted from metadata.
class SetOfDynamicallyInitializedGlobals {
@@ -306,6 +326,8 @@ struct AddressSanitizer : public FunctionPass {
bool ShouldInstrumentGlobal(GlobalVariable *G);
bool LooksLikeCodeInBug11395(Instruction *I);
void FindDynamicInitializers(Module &M);
+ bool GlobalIsLinkerInitialized(GlobalVariable *G);
+ bool InjectCoverage(Function &F);
bool CheckInitOrder;
bool CheckUseAfterReturn;
@@ -321,6 +343,7 @@ struct AddressSanitizer : public FunctionPass {
Function *AsanCtorFunction;
Function *AsanInitFunction;
Function *AsanHandleNoReturnFunc;
+ Function *AsanCovFunction;
OwningPtr<SpecialCaseList> BL;
// This array is indexed by AccessIsWrite and log2(AccessSize).
Function *AsanErrorCallback[2][kNumberOfAccessSizes];
@@ -396,12 +419,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
uint64_t TotalStackSize;
unsigned StackAlignment;
- Function *AsanStackMallocFunc, *AsanStackFreeFunc;
+ Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
+ *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc;
// Stores a place and arguments of poisoning/unpoisoning call for alloca.
struct AllocaPoisonCall {
IntrinsicInst *InsBefore;
+ AllocaInst *AI;
uint64_t Size;
bool DoPoison;
};
@@ -480,7 +505,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
AllocaInst *AI = findAllocaForValue(II.getArgOperand(1));
if (!AI) return;
bool DoPoison = (ID == Intrinsic::lifetime_end);
- AllocaPoisonCall APC = {&II, SizeValue, DoPoison};
+ AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison};
AllocaPoisonCallVec.push_back(APC);
}
@@ -488,7 +513,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
void initializeCallbacks(Module &M);
// Check if we want (and can) handle this alloca.
- bool isInterestingAlloca(AllocaInst &AI) {
+ bool isInterestingAlloca(AllocaInst &AI) const {
return (!AI.isArrayAllocation() &&
AI.isStaticAlloca() &&
AI.getAlignment() <= RedzoneSize() &&
@@ -498,24 +523,27 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
size_t RedzoneSize() const {
return RedzoneSizeForScale(Mapping.Scale);
}
- uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
+ uint64_t getAllocaSizeInBytes(AllocaInst *AI) const {
Type *Ty = AI->getAllocatedType();
uint64_t SizeInBytes = ASan.TD->getTypeAllocSize(Ty);
return SizeInBytes;
}
- uint64_t getAlignedSize(uint64_t SizeInBytes) {
+ uint64_t getAlignedSize(uint64_t SizeInBytes) const {
size_t RZ = RedzoneSize();
return ((SizeInBytes + RZ - 1) / RZ) * RZ;
}
- uint64_t getAlignedAllocaSize(AllocaInst *AI) {
+ uint64_t getAlignedAllocaSize(AllocaInst *AI) const {
uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
return getAlignedSize(SizeInBytes);
}
/// Finds alloca where the value comes from.
AllocaInst *findAllocaForValue(Value *V);
- void poisonRedZones(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
+ void poisonRedZones(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> &IRB,
Value *ShadowBase, bool DoPoison);
- void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> IRB, bool DoPoison);
+ void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> &IRB, bool DoPoison);
+
+ void SetShadowToStackAfterReturnInlined(IRBuilder<> &IRB, Value *ShadowBase,
+ int Size);
};
} // namespace
@@ -642,6 +670,13 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
return NULL;
}
+bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
+ // If a global variable does not have dynamic initialization we don't
+ // have to instrument it. However, if a global does not have initializer
+ // at all, we assume it has dynamic initializer (in other TU).
+ return G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G);
+}
+
void AddressSanitizer::instrumentMop(Instruction *I) {
bool IsWrite = false;
Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
@@ -650,13 +685,19 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
// If initialization order checking is disabled, a simple access to a
// dynamically initialized global is always valid.
- if (!CheckInitOrder)
- return;
- // If a global variable does not have dynamic initialization we don't
- // have to instrument it. However, if a global does not have initailizer
- // at all, we assume it has dynamic initializer (in other TU).
- if (G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G))
+ if (!CheckInitOrder || GlobalIsLinkerInitialized(G)) {
+ NumOptimizedAccessesToGlobalVar++;
return;
+ }
+ }
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr);
+ if (CE && CE->isGEPWithNoNotionalOverIndexing()) {
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
+ if (CE->getOperand(1)->isNullValue() && GlobalIsLinkerInitialized(G)) {
+ NumOptimizedAccessesToGlobalArray++;
+ return;
+ }
+ }
}
}
@@ -668,6 +709,11 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
assert((TypeSize % 8) == 0);
+ if (IsWrite)
+ NumInstrumentedWrites++;
+ else
+ NumInstrumentedReads++;
+
// Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check.
if (TypeSize == 8 || TypeSize == 16 ||
TypeSize == 32 || TypeSize == 64 || TypeSize == 128)
@@ -883,7 +929,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
TD = getAnalysisIfAvailable<DataLayout>();
if (!TD)
return false;
- BL.reset(new SpecialCaseList(BlacklistFile));
+ BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
if (BL->isIn(M)) return false;
C = &(M.getContext());
int LongSize = TD->getPointerSizeInBits();
@@ -914,8 +960,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
IntptrTy, IntptrTy,
IntptrTy, IntptrTy, NULL);
- SmallVector<Constant *, 16> Initializers(n), DynamicInit;
-
+ SmallVector<Constant *, 16> Initializers(n);
Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
assert(CtorFunc);
@@ -1046,6 +1091,8 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
+ AsanCovFunction = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanCovName, IRB.getVoidTy(), IntptrTy, NULL));
// We insert an empty inline asm after __asan_report* to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
@@ -1076,7 +1123,7 @@ bool AddressSanitizer::doInitialization(Module &M) {
if (!TD)
return false;
- BL.reset(new SpecialCaseList(BlacklistFile));
+ BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
DynamicallyInitializedGlobals.Init(M);
C = &(M.getContext());
@@ -1117,6 +1164,47 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
return false;
}
+// Poor man's coverage that works with ASan.
+// We create a Guard boolean variable with the same linkage
+// as the function and inject this code into the entry block:
+// if (*Guard) {
+// __sanitizer_cov(&F);
+// *Guard = 1;
+// }
+// The accesses to Guard are atomic. The rest of the logic is
+// in __sanitizer_cov (it's fine to call it more than once).
+//
+// This coverage implementation provides very limited data:
+// it only tells if a given function was ever executed.
+// No counters, no per-basic-block or per-edge data.
+// But for many use cases this is what we need and the added slowdown
+// is negligible. This simple implementation will probably be obsoleted
+// by the upcoming Clang-based coverage implementation.
+// By having it here and now we hope to
+// a) get the functionality to users earlier and
+// b) collect usage statistics to help improve Clang coverage design.
+bool AddressSanitizer::InjectCoverage(Function &F) {
+ if (!ClCoverage) return false;
+ IRBuilder<> IRB(F.getEntryBlock().getFirstInsertionPt());
+ Type *Int8Ty = IRB.getInt8Ty();
+ GlobalVariable *Guard = new GlobalVariable(
+ *F.getParent(), Int8Ty, false, GlobalValue::PrivateLinkage,
+ Constant::getNullValue(Int8Ty), "__asan_gen_cov_" + F.getName());
+ LoadInst *Load = IRB.CreateLoad(Guard);
+ Load->setAtomic(Monotonic);
+ Load->setAlignment(1);
+ Value *Cmp = IRB.CreateICmpEQ(Constant::getNullValue(Int8Ty), Load);
+ Instruction *Ins = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+ IRB.SetInsertPoint(Ins);
+ // We pass &F to __sanitizer_cov. We could avoid this and rely on
+ // GET_CALLER_PC, but having the PC of the first instruction is just nice.
+ IRB.CreateCall(AsanCovFunction, IRB.CreatePointerCast(&F, IntptrTy));
+ StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int8Ty, 1), Guard);
+ Store->setAtomic(Monotonic);
+ Store->setAlignment(1);
+ return true;
+}
+
bool AddressSanitizer::runOnFunction(Function &F) {
if (BL->isIn(F)) return false;
if (&F == AsanCtorFunction) return false;
@@ -1212,6 +1300,10 @@ bool AddressSanitizer::runOnFunction(Function &F) {
}
bool res = NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
+
+ if (InjectCoverage(F))
+ res = true;
+
DEBUG(dbgs() << "ASAN done instrumenting: " << res << " " << F << "\n");
if (ClKeepUninstrumented) {
@@ -1271,11 +1363,15 @@ bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
void FunctionStackPoisoner::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
- AsanStackMallocFunc = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL));
- AsanStackFreeFunc = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanStackFreeName, IRB.getVoidTy(),
- IntptrTy, IntptrTy, IntptrTy, NULL));
+ for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) {
+ std::string Suffix = itostr(i);
+ AsanStackMallocFunc[i] = checkInterfaceFunction(
+ M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy,
+ IntptrTy, IntptrTy, NULL));
+ AsanStackFreeFunc[i] = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanStackFreeNameTemplate + Suffix, IRB.getVoidTy(), IntptrTy,
+ IntptrTy, IntptrTy, NULL));
+ }
AsanPoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
AsanUnpoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
@@ -1283,7 +1379,7 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
}
void FunctionStackPoisoner::poisonRedZones(
- const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, Value *ShadowBase,
+ const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> &IRB, Value *ShadowBase,
bool DoPoison) {
size_t ShadowRZSize = RedzoneSize() >> Mapping.Scale;
assert(ShadowRZSize >= 1 && ShadowRZSize <= 4);
@@ -1344,12 +1440,40 @@ void FunctionStackPoisoner::poisonRedZones(
}
}
+// Fake stack allocator (asan_fake_stack.h) has 11 size classes
+// for every power of 2 from kMinStackMallocSize to kMaxAsanStackMallocSizeClass
+static int StackMallocSizeClass(uint64_t LocalStackSize) {
+ assert(LocalStackSize <= kMaxStackMallocSize);
+ uint64_t MaxSize = kMinStackMallocSize;
+ for (int i = 0; ; i++, MaxSize *= 2)
+ if (LocalStackSize <= MaxSize)
+ return i;
+ llvm_unreachable("impossible LocalStackSize");
+}
+
+// Set Size bytes starting from ShadowBase to kAsanStackAfterReturnMagic.
+// We can not use MemSet intrinsic because it may end up calling the actual
+// memset. Size is a multiple of 8.
+// Currently this generates 8-byte stores on x86_64; it may be better to
+// generate wider stores.
+void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined(
+ IRBuilder<> &IRB, Value *ShadowBase, int Size) {
+ assert(!(Size % 8));
+ assert(kAsanStackAfterReturnMagic == 0xf5);
+ for (int i = 0; i < Size; i += 8) {
+ Value *p = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i));
+ IRB.CreateStore(ConstantInt::get(IRB.getInt64Ty(), 0xf5f5f5f5f5f5f5f5ULL),
+ IRB.CreateIntToPtr(p, IRB.getInt64Ty()->getPointerTo()));
+ }
+}
+
void FunctionStackPoisoner::poisonStack() {
uint64_t LocalStackSize = TotalStackSize +
(AllocaVec.size() + 1) * RedzoneSize();
bool DoStackMalloc = ASan.CheckUseAfterReturn
&& LocalStackSize <= kMaxStackMallocSize;
+ int StackMallocIdx = -1;
assert(AllocaVec.size() > 0);
Instruction *InsBefore = AllocaVec[0];
@@ -1367,8 +1491,28 @@ void FunctionStackPoisoner::poisonStack() {
Value *LocalStackBase = OrigStackBase;
if (DoStackMalloc) {
- LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc,
+ // LocalStackBase = OrigStackBase
+ // if (__asan_option_detect_stack_use_after_return)
+ // LocalStackBase = __asan_stack_malloc_N(LocalStackBase, OrigStackBase);
+ StackMallocIdx = StackMallocSizeClass(LocalStackSize);
+ assert(StackMallocIdx <= kMaxAsanStackMallocSizeClass);
+ Constant *OptionDetectUAR = F.getParent()->getOrInsertGlobal(
+ kAsanOptionDetectUAR, IRB.getInt32Ty());
+ Value *Cmp = IRB.CreateICmpNE(IRB.CreateLoad(OptionDetectUAR),
+ Constant::getNullValue(IRB.getInt32Ty()));
+ Instruction *Term =
+ SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+ BasicBlock *CmpBlock = cast<Instruction>(Cmp)->getParent();
+ IRBuilder<> IRBIf(Term);
+ LocalStackBase = IRBIf.CreateCall2(
+ AsanStackMallocFunc[StackMallocIdx],
ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
+ BasicBlock *SetBlock = cast<Instruction>(LocalStackBase)->getParent();
+ IRB.SetInsertPoint(InsBefore);
+ PHINode *Phi = IRB.CreatePHI(IntptrTy, 2);
+ Phi->addIncoming(OrigStackBase, CmpBlock);
+ Phi->addIncoming(LocalStackBase, SetBlock);
+ LocalStackBase = Phi;
}
// This string will be parsed by the run-time (DescribeAddressIfStack).
@@ -1380,11 +1524,10 @@ void FunctionStackPoisoner::poisonStack() {
bool HavePoisonedAllocas = false;
for (size_t i = 0, n = AllocaPoisonCallVec.size(); i < n; i++) {
const AllocaPoisonCall &APC = AllocaPoisonCallVec[i];
- IntrinsicInst *II = APC.InsBefore;
- AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
- assert(AI);
- IRBuilder<> IRB(II);
- poisonAlloca(AI, APC.Size, IRB, APC.DoPoison);
+ assert(APC.InsBefore);
+ assert(APC.AI);
+ IRBuilder<> IRB(APC.InsBefore);
+ poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison);
HavePoisonedAllocas |= APC.DoPoison;
}
@@ -1442,10 +1585,35 @@ void FunctionStackPoisoner::poisonStack() {
// Unpoison the stack.
poisonRedZones(AllocaVec, IRBRet, ShadowBase, false);
if (DoStackMalloc) {
+ assert(StackMallocIdx >= 0);
// In use-after-return mode, mark the whole stack frame unaddressable.
- IRBRet.CreateCall3(AsanStackFreeFunc, LocalStackBase,
- ConstantInt::get(IntptrTy, LocalStackSize),
- OrigStackBase);
+ if (StackMallocIdx <= 4) {
+ // For small sizes inline the whole thing:
+ // if LocalStackBase != OrigStackBase:
+ // memset(ShadowBase, kAsanStackAfterReturnMagic, ShadowSize);
+ // **SavedFlagPtr(LocalStackBase) = 0
+ // FIXME: if LocalStackBase != OrigStackBase don't call poisonRedZones.
+ Value *Cmp = IRBRet.CreateICmpNE(LocalStackBase, OrigStackBase);
+ TerminatorInst *PoisonTerm =
+ SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+ IRBuilder<> IRBPoison(PoisonTerm);
+ int ClassSize = kMinStackMallocSize << StackMallocIdx;
+ SetShadowToStackAfterReturnInlined(IRBPoison, ShadowBase,
+ ClassSize >> Mapping.Scale);
+ Value *SavedFlagPtrPtr = IRBPoison.CreateAdd(
+ LocalStackBase,
+ ConstantInt::get(IntptrTy, ClassSize - ASan.LongSize / 8));
+ Value *SavedFlagPtr = IRBPoison.CreateLoad(
+ IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy));
+ IRBPoison.CreateStore(
+ Constant::getNullValue(IRBPoison.getInt8Ty()),
+ IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy()));
+ } else {
+ // For larger frames call __asan_stack_free_*.
+ IRBRet.CreateCall3(AsanStackFreeFunc[StackMallocIdx], LocalStackBase,
+ ConstantInt::get(IntptrTy, LocalStackSize),
+ OrigStackBase);
+ }
} else if (HavePoisonedAllocas) {
// If we poisoned some allocas in llvm.lifetime analysis,
// unpoison whole stack frame now.
@@ -1460,7 +1628,7 @@ void FunctionStackPoisoner::poisonStack() {
}
void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
- IRBuilder<> IRB, bool DoPoison) {
+ IRBuilder<> &IRB, bool DoPoison) {
// For now just insert the call to ASan runtime.
Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy);
Value *SizeArg = ConstantInt::get(IntptrTy, Size);
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index b094d42..7a9f0f6 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -80,7 +80,7 @@ BasicBlock *BoundsChecking::getTrapBB() {
return TrapBB;
Function *Fn = Inst->getParent()->getParent();
- BasicBlock::iterator PrevInsertPoint = Builder->GetInsertPoint();
+ IRBuilder<>::InsertPointGuard Guard(*Builder);
TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
Builder->SetInsertPoint(TrapBB);
@@ -91,7 +91,6 @@ BasicBlock *BoundsChecking::getTrapBB() {
TrapCall->setDebugLoc(Inst->getDebugLoc());
Builder->CreateUnreachable();
- Builder->SetInsertPoint(PrevInsertPoint);
return TrapBB;
}
@@ -173,7 +172,8 @@ bool BoundsChecking::runOnFunction(Function &F) {
TrapBB = 0;
BuilderTy TheBuilder(F.getContext(), TargetFolder(TD));
Builder = &TheBuilder;
- ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext());
+ ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext(),
+ /*RoundToAlign=*/true);
ObjSizeEval = &TheObjSizeEval;
// check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 5e34863..3563593 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,14 +1,11 @@
add_llvm_library(LLVMInstrumentation
AddressSanitizer.cpp
BoundsChecking.cpp
+ DataFlowSanitizer.cpp
DebugIR.cpp
- EdgeProfiling.cpp
GCOVProfiling.cpp
MemorySanitizer.cpp
Instrumentation.cpp
- OptimalEdgeProfiling.cpp
- PathProfiling.cpp
- ProfilingUtils.cpp
ThreadSanitizer.cpp
)
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
new file mode 100644
index 0000000..9b9e725
--- /dev/null
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -0,0 +1,1397 @@
+//===-- DataFlowSanitizer.cpp - dynamic data flow analysis ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
+/// analysis.
+///
+/// Unlike other Sanitizer tools, this tool is not designed to detect a specific
+/// class of bugs on its own. Instead, it provides a generic dynamic data flow
+/// analysis framework to be used by clients to help detect application-specific
+/// issues within their own code.
+///
+/// The analysis is based on automatic propagation of data flow labels (also
+/// known as taint labels) through a program as it performs computation. Each
+/// byte of application memory is backed by two bytes of shadow memory which
+/// hold the label. On Linux/x86_64, memory is laid out as follows:
+///
+/// +--------------------+ 0x800000000000 (top of memory)
+/// | application memory |
+/// +--------------------+ 0x700000008000 (kAppAddr)
+/// | |
+/// | unused |
+/// | |
+/// +--------------------+ 0x200200000000 (kUnusedAddr)
+/// | union table |
+/// +--------------------+ 0x200000000000 (kUnionTableAddr)
+/// | shadow memory |
+/// +--------------------+ 0x000000010000 (kShadowAddr)
+/// | reserved by kernel |
+/// +--------------------+ 0x000000000000
+///
+/// To derive a shadow memory address from an application memory address,
+/// bits 44-46 are cleared to bring the address into the range
+/// [0x000000008000,0x100000000000). Then the address is shifted left by 1 to
+/// account for the double byte representation of shadow labels and move the
+/// address into the shadow memory range. See the function
+/// DataFlowSanitizer::getShadowAddress below.
+///
+/// For more information, please refer to the design document:
+/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SpecialCaseList.h"
+#include <iterator>
+
+using namespace llvm;
+
+// The -dfsan-preserve-alignment flag controls whether this pass assumes that
+// alignment requirements provided by the input IR are correct. For example,
+// if the input IR contains a load with alignment 8, this flag will cause
+// the shadow load to have alignment 16. This flag is disabled by default as
+// we have unfortunately encountered too much code (including Clang itself;
+// see PR14291) which performs misaligned access.
+static cl::opt<bool> ClPreserveAlignment(
+ "dfsan-preserve-alignment",
+ cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
+ cl::init(false));
+
+// The ABI list file controls how shadow parameters are passed. The pass treats
+// every function labelled "uninstrumented" in the ABI list file as conforming
+// to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
+// additional annotations for those functions, a call to one of those functions
+// will produce a warning message, as the labelling behaviour of the function is
+// unknown. The other supported annotations are "functional" and "discard",
+// which are described below under DataFlowSanitizer::WrapperKind.
+static cl::opt<std::string> ClABIListFile(
+ "dfsan-abilist",
+ cl::desc("File listing native ABI functions and how the pass treats them"),
+ cl::Hidden);
+
+// Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented
+// functions (see DataFlowSanitizer::InstrumentedABI below).
+static cl::opt<bool> ClArgsABI(
+ "dfsan-args-abi",
+ cl::desc("Use the argument ABI rather than the TLS ABI"),
+ cl::Hidden);
+
+static cl::opt<bool> ClDebugNonzeroLabels(
+ "dfsan-debug-nonzero-labels",
+ cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
+ "load or return with a nonzero label"),
+ cl::Hidden);
+
+namespace {
+
+class DataFlowSanitizer : public ModulePass {
+ friend struct DFSanFunction;
+ friend class DFSanVisitor;
+
+ enum {
+ ShadowWidth = 16
+ };
+
+ /// Which ABI should be used for instrumented functions?
+ enum InstrumentedABI {
+ /// Argument and return value labels are passed through additional
+ /// arguments and by modifying the return type.
+ IA_Args,
+
+ /// Argument and return value labels are passed through TLS variables
+ /// __dfsan_arg_tls and __dfsan_retval_tls.
+ IA_TLS
+ };
+
+ /// How should calls to uninstrumented functions be handled?
+ enum WrapperKind {
+ /// This function is present in an uninstrumented form but we don't know
+ /// how it should be handled. Print a warning and call the function anyway.
+ /// Don't label the return value.
+ WK_Warning,
+
+ /// This function does not write to (user-accessible) memory, and its return
+ /// value is unlabelled.
+ WK_Discard,
+
+ /// This function does not write to (user-accessible) memory, and the label
+ /// of its return value is the union of the label of its arguments.
+ WK_Functional,
+
+ /// Instead of calling the function, a custom wrapper __dfsw_F is called,
+ /// where F is the name of the function. This function may wrap the
+ /// original function or provide its own implementation. This is similar to
+ /// the IA_Args ABI, except that IA_Args uses a struct return type to
+ /// pass the return value shadow in a register, while WK_Custom uses an
+ /// extra pointer argument to return the shadow. This allows the wrapped
+ /// form of the function type to be expressed in C.
+ WK_Custom
+ };
+
+ DataLayout *DL;
+ Module *Mod;
+ LLVMContext *Ctx;
+ IntegerType *ShadowTy;
+ PointerType *ShadowPtrTy;
+ IntegerType *IntptrTy;
+ ConstantInt *ZeroShadow;
+ ConstantInt *ShadowPtrMask;
+ ConstantInt *ShadowPtrMul;
+ Constant *ArgTLS;
+ Constant *RetvalTLS;
+ void *(*GetArgTLSPtr)();
+ void *(*GetRetvalTLSPtr)();
+ Constant *GetArgTLS;
+ Constant *GetRetvalTLS;
+ FunctionType *DFSanUnionFnTy;
+ FunctionType *DFSanUnionLoadFnTy;
+ FunctionType *DFSanUnimplementedFnTy;
+ FunctionType *DFSanSetLabelFnTy;
+ FunctionType *DFSanNonzeroLabelFnTy;
+ Constant *DFSanUnionFn;
+ Constant *DFSanUnionLoadFn;
+ Constant *DFSanUnimplementedFn;
+ Constant *DFSanSetLabelFn;
+ Constant *DFSanNonzeroLabelFn;
+ MDNode *ColdCallWeights;
+ OwningPtr<SpecialCaseList> ABIList;
+ DenseMap<Value *, Function *> UnwrappedFnMap;
+ AttributeSet ReadOnlyNoneAttrs;
+
+ Value *getShadowAddress(Value *Addr, Instruction *Pos);
+ Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
+ bool isInstrumented(const Function *F);
+ bool isInstrumented(const GlobalAlias *GA);
+ FunctionType *getArgsFunctionType(FunctionType *T);
+ FunctionType *getTrampolineFunctionType(FunctionType *T);
+ FunctionType *getCustomFunctionType(FunctionType *T);
+ InstrumentedABI getInstrumentedABI();
+ WrapperKind getWrapperKind(Function *F);
+ void addGlobalNamePrefix(GlobalValue *GV);
+ Function *buildWrapperFunction(Function *F, StringRef NewFName,
+ GlobalValue::LinkageTypes NewFLink,
+ FunctionType *NewFT);
+ Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName);
+
+ public:
+ DataFlowSanitizer(StringRef ABIListFile = StringRef(),
+ void *(*getArgTLS)() = 0, void *(*getRetValTLS)() = 0);
+ static char ID;
+ bool doInitialization(Module &M);
+ bool runOnModule(Module &M);
+};
+
+struct DFSanFunction {
+ DataFlowSanitizer &DFS;
+ Function *F;
+ DataFlowSanitizer::InstrumentedABI IA;
+ bool IsNativeABI;
+ Value *ArgTLSPtr;
+ Value *RetvalTLSPtr;
+ AllocaInst *LabelReturnAlloca;
+ DenseMap<Value *, Value *> ValShadowMap;
+ DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
+ std::vector<std::pair<PHINode *, PHINode *> > PHIFixups;
+ DenseSet<Instruction *> SkipInsts;
+ DenseSet<Value *> NonZeroChecks;
+
+ DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
+ : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()),
+ IsNativeABI(IsNativeABI), ArgTLSPtr(0), RetvalTLSPtr(0),
+ LabelReturnAlloca(0) {}
+ Value *getArgTLSPtr();
+ Value *getArgTLS(unsigned Index, Instruction *Pos);
+ Value *getRetvalTLS();
+ Value *getShadow(Value *V);
+ void setShadow(Instruction *I, Value *Shadow);
+ Value *combineOperandShadows(Instruction *Inst);
+ Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align,
+ Instruction *Pos);
+ void storeShadow(Value *Addr, uint64_t Size, uint64_t Align, Value *Shadow,
+ Instruction *Pos);
+};
+
+class DFSanVisitor : public InstVisitor<DFSanVisitor> {
+ public:
+ DFSanFunction &DFSF;
+ DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
+
+ void visitOperandShadowInst(Instruction &I);
+
+ void visitBinaryOperator(BinaryOperator &BO);
+ void visitCastInst(CastInst &CI);
+ void visitCmpInst(CmpInst &CI);
+ void visitGetElementPtrInst(GetElementPtrInst &GEPI);
+ void visitLoadInst(LoadInst &LI);
+ void visitStoreInst(StoreInst &SI);
+ void visitReturnInst(ReturnInst &RI);
+ void visitCallSite(CallSite CS);
+ void visitPHINode(PHINode &PN);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitShuffleVectorInst(ShuffleVectorInst &I);
+ void visitExtractValueInst(ExtractValueInst &I);
+ void visitInsertValueInst(InsertValueInst &I);
+ void visitAllocaInst(AllocaInst &I);
+ void visitSelectInst(SelectInst &I);
+ void visitMemSetInst(MemSetInst &I);
+ void visitMemTransferInst(MemTransferInst &I);
+};
+
+}
+
+char DataFlowSanitizer::ID;
+INITIALIZE_PASS(DataFlowSanitizer, "dfsan",
+ "DataFlowSanitizer: dynamic data flow analysis.", false, false)
+
+ModulePass *llvm::createDataFlowSanitizerPass(StringRef ABIListFile,
+ void *(*getArgTLS)(),
+ void *(*getRetValTLS)()) {
+ return new DataFlowSanitizer(ABIListFile, getArgTLS, getRetValTLS);
+}
+
+DataFlowSanitizer::DataFlowSanitizer(StringRef ABIListFile,
+ void *(*getArgTLS)(),
+ void *(*getRetValTLS)())
+ : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS),
+ ABIList(SpecialCaseList::createOrDie(ABIListFile.empty() ? ClABIListFile
+ : ABIListFile)) {
+}
+
+FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
+ llvm::SmallVector<Type *, 4> ArgTypes;
+ std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes));
+ for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
+ ArgTypes.push_back(ShadowTy);
+ if (T->isVarArg())
+ ArgTypes.push_back(ShadowPtrTy);
+ Type *RetType = T->getReturnType();
+ if (!RetType->isVoidTy())
+ RetType = StructType::get(RetType, ShadowTy, (Type *)0);
+ return FunctionType::get(RetType, ArgTypes, T->isVarArg());
+}
+
+FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
+ assert(!T->isVarArg());
+ llvm::SmallVector<Type *, 4> ArgTypes;
+ ArgTypes.push_back(T->getPointerTo());
+ std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes));
+ for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
+ ArgTypes.push_back(ShadowTy);
+ Type *RetType = T->getReturnType();
+ if (!RetType->isVoidTy())
+ ArgTypes.push_back(ShadowPtrTy);
+ return FunctionType::get(T->getReturnType(), ArgTypes, false);
+}
+
+FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
+ assert(!T->isVarArg());
+ llvm::SmallVector<Type *, 4> ArgTypes;
+ for (FunctionType::param_iterator i = T->param_begin(), e = T->param_end();
+ i != e; ++i) {
+ FunctionType *FT;
+ if (isa<PointerType>(*i) && (FT = dyn_cast<FunctionType>(cast<PointerType>(
+ *i)->getElementType()))) {
+ ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo());
+ ArgTypes.push_back(Type::getInt8PtrTy(*Ctx));
+ } else {
+ ArgTypes.push_back(*i);
+ }
+ }
+ for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
+ ArgTypes.push_back(ShadowTy);
+ Type *RetType = T->getReturnType();
+ if (!RetType->isVoidTy())
+ ArgTypes.push_back(ShadowPtrTy);
+ return FunctionType::get(T->getReturnType(), ArgTypes, false);
+}
+
+bool DataFlowSanitizer::doInitialization(Module &M) {
+ DL = getAnalysisIfAvailable<DataLayout>();
+ if (!DL)
+ return false;
+
+ Mod = &M;
+ Ctx = &M.getContext();
+ ShadowTy = IntegerType::get(*Ctx, ShadowWidth);
+ ShadowPtrTy = PointerType::getUnqual(ShadowTy);
+ IntptrTy = DL->getIntPtrType(*Ctx);
+ ZeroShadow = ConstantInt::getSigned(ShadowTy, 0);
+ ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL);
+ ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidth / 8);
+
+ Type *DFSanUnionArgs[2] = { ShadowTy, ShadowTy };
+ DFSanUnionFnTy =
+ FunctionType::get(ShadowTy, DFSanUnionArgs, /*isVarArg=*/ false);
+ Type *DFSanUnionLoadArgs[2] = { ShadowPtrTy, IntptrTy };
+ DFSanUnionLoadFnTy =
+ FunctionType::get(ShadowTy, DFSanUnionLoadArgs, /*isVarArg=*/ false);
+ DFSanUnimplementedFnTy = FunctionType::get(
+ Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
+ Type *DFSanSetLabelArgs[3] = { ShadowTy, Type::getInt8PtrTy(*Ctx), IntptrTy };
+ DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ DFSanSetLabelArgs, /*isVarArg=*/false);
+ DFSanNonzeroLabelFnTy = FunctionType::get(
+ Type::getVoidTy(*Ctx), ArrayRef<Type *>(), /*isVarArg=*/false);
+
+ if (GetArgTLSPtr) {
+ Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
+ ArgTLS = 0;
+ GetArgTLS = ConstantExpr::getIntToPtr(
+ ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)),
+ PointerType::getUnqual(
+ FunctionType::get(PointerType::getUnqual(ArgTLSTy), (Type *)0)));
+ }
+ if (GetRetvalTLSPtr) {
+ RetvalTLS = 0;
+ GetRetvalTLS = ConstantExpr::getIntToPtr(
+ ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)),
+ PointerType::getUnqual(
+ FunctionType::get(PointerType::getUnqual(ShadowTy), (Type *)0)));
+ }
+
+ ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
+ return true;
+}
+
+bool DataFlowSanitizer::isInstrumented(const Function *F) {
+ return !ABIList->isIn(*F, "uninstrumented");
+}
+
+bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
+ return !ABIList->isIn(*GA, "uninstrumented");
+}
+
+DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
+ return ClArgsABI ? IA_Args : IA_TLS;
+}
+
+DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
+ if (ABIList->isIn(*F, "functional"))
+ return WK_Functional;
+ if (ABIList->isIn(*F, "discard"))
+ return WK_Discard;
+ if (ABIList->isIn(*F, "custom"))
+ return WK_Custom;
+
+ return WK_Warning;
+}
+
+void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) {
+ std::string GVName = GV->getName(), Prefix = "dfs$";
+ GV->setName(Prefix + GVName);
+
+ // Try to change the name of the function in module inline asm. We only do
+ // this for specific asm directives, currently only ".symver", to try to avoid
+ // corrupting asm which happens to contain the symbol name as a substring.
+ // Note that the substitution for .symver assumes that the versioned symbol
+ // also has an instrumented name.
+ std::string Asm = GV->getParent()->getModuleInlineAsm();
+ std::string SearchStr = ".symver " + GVName + ",";
+ size_t Pos = Asm.find(SearchStr);
+ if (Pos != std::string::npos) {
+ Asm.replace(Pos, SearchStr.size(),
+ ".symver " + Prefix + GVName + "," + Prefix);
+ GV->getParent()->setModuleInlineAsm(Asm);
+ }
+}
+
+Function *
+DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
+ GlobalValue::LinkageTypes NewFLink,
+ FunctionType *NewFT) {
+ FunctionType *FT = F->getFunctionType();
+ Function *NewF = Function::Create(NewFT, NewFLink, NewFName,
+ F->getParent());
+ NewF->copyAttributesFrom(F);
+ NewF->removeAttributes(
+ AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
+ AttributeSet::ReturnIndex));
+
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
+ std::vector<Value *> Args;
+ unsigned n = FT->getNumParams();
+ for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n)
+ Args.push_back(&*ai);
+ CallInst *CI = CallInst::Create(F, Args, "", BB);
+ if (FT->getReturnType()->isVoidTy())
+ ReturnInst::Create(*Ctx, BB);
+ else
+ ReturnInst::Create(*Ctx, CI, BB);
+
+ return NewF;
+}
+
+Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
+ StringRef FName) {
+ FunctionType *FTT = getTrampolineFunctionType(FT);
+ Constant *C = Mod->getOrInsertFunction(FName, FTT);
+ Function *F = dyn_cast<Function>(C);
+ if (F && F->isDeclaration()) {
+ F->setLinkage(GlobalValue::LinkOnceODRLinkage);
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+ std::vector<Value *> Args;
+ Function::arg_iterator AI = F->arg_begin(); ++AI;
+ for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
+ Args.push_back(&*AI);
+ CallInst *CI =
+ CallInst::Create(&F->getArgumentList().front(), Args, "", BB);
+ ReturnInst *RI;
+ if (FT->getReturnType()->isVoidTy())
+ RI = ReturnInst::Create(*Ctx, BB);
+ else
+ RI = ReturnInst::Create(*Ctx, CI, BB);
+
+ DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
+ Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI;
+ for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N)
+ DFSF.ValShadowMap[ValAI] = ShadowAI;
+ DFSanVisitor(DFSF).visitCallInst(*CI);
+ if (!FT->getReturnType()->isVoidTy())
+ new StoreInst(DFSF.getShadow(RI->getReturnValue()),
+ &F->getArgumentList().back(), RI);
+ }
+
+ return C;
+}
+
+bool DataFlowSanitizer::runOnModule(Module &M) {
+ if (!DL)
+ return false;
+
+ if (ABIList->isIn(M, "skip"))
+ return false;
+
+ if (!GetArgTLSPtr) {
+ Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
+ ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy);
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(ArgTLS))
+ G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
+ }
+ if (!GetRetvalTLSPtr) {
+ RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", ShadowTy);
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(RetvalTLS))
+ G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
+ }
+
+ DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy);
+ if (Function *F = dyn_cast<Function>(DFSanUnionFn)) {
+ F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
+ F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ F->addAttribute(1, Attribute::ZExt);
+ F->addAttribute(2, Attribute::ZExt);
+ }
+ DFSanUnionLoadFn =
+ Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy);
+ if (Function *F = dyn_cast<Function>(DFSanUnionLoadFn)) {
+ F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ }
+ DFSanUnimplementedFn =
+ Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
+ DFSanSetLabelFn =
+ Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy);
+ if (Function *F = dyn_cast<Function>(DFSanSetLabelFn)) {
+ F->addAttribute(1, Attribute::ZExt);
+ }
+ DFSanNonzeroLabelFn =
+ Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
+
+ std::vector<Function *> FnsToInstrument;
+ llvm::SmallPtrSet<Function *, 2> FnsWithNativeABI;
+ for (Module::iterator i = M.begin(), e = M.end(); i != e; ++i) {
+ if (!i->isIntrinsic() &&
+ i != DFSanUnionFn &&
+ i != DFSanUnionLoadFn &&
+ i != DFSanUnimplementedFn &&
+ i != DFSanSetLabelFn &&
+ i != DFSanNonzeroLabelFn)
+ FnsToInstrument.push_back(&*i);
+ }
+
+ // Give function aliases prefixes when necessary, and build wrappers where the
+ // instrumentedness is inconsistent.
+ for (Module::alias_iterator i = M.alias_begin(), e = M.alias_end(); i != e;) {
+ GlobalAlias *GA = &*i;
+ ++i;
+ // Don't stop on weak. We assume people aren't playing games with the
+ // instrumentedness of overridden weak aliases.
+ if (Function *F = dyn_cast<Function>(
+ GA->resolveAliasedGlobal(/*stopOnWeak=*/false))) {
+ bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
+ if (GAInst && FInst) {
+ addGlobalNamePrefix(GA);
+ } else if (GAInst != FInst) {
+ // Non-instrumented alias of an instrumented function, or vice versa.
+ // Replace the alias with a native-ABI wrapper of the aliasee. The pass
+ // below will take care of instrumenting it.
+ Function *NewF =
+ buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType());
+ GA->replaceAllUsesWith(NewF);
+ NewF->takeName(GA);
+ GA->eraseFromParent();
+ FnsToInstrument.push_back(NewF);
+ }
+ }
+ }
+
+ AttrBuilder B;
+ B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
+ ReadOnlyNoneAttrs = AttributeSet::get(*Ctx, AttributeSet::FunctionIndex, B);
+
+ // First, change the ABI of every function in the module. ABI-listed
+ // functions keep their original ABI and get a wrapper function.
+ for (std::vector<Function *>::iterator i = FnsToInstrument.begin(),
+ e = FnsToInstrument.end();
+ i != e; ++i) {
+ Function &F = **i;
+ FunctionType *FT = F.getFunctionType();
+
+ bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
+ FT->getReturnType()->isVoidTy());
+
+ if (isInstrumented(&F)) {
+ // Instrumented functions get a 'dfs$' prefix. This allows us to more
+ // easily identify cases of mismatching ABIs.
+ if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) {
+ FunctionType *NewFT = getArgsFunctionType(FT);
+ Function *NewF = Function::Create(NewFT, F.getLinkage(), "", &M);
+ NewF->copyAttributesFrom(&F);
+ NewF->removeAttributes(
+ AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
+ AttributeSet::ReturnIndex));
+ for (Function::arg_iterator FArg = F.arg_begin(),
+ NewFArg = NewF->arg_begin(),
+ FArgEnd = F.arg_end();
+ FArg != FArgEnd; ++FArg, ++NewFArg) {
+ FArg->replaceAllUsesWith(NewFArg);
+ }
+ NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList());
+
+ for (Function::use_iterator ui = F.use_begin(), ue = F.use_end();
+ ui != ue;) {
+ BlockAddress *BA = dyn_cast<BlockAddress>(ui.getUse().getUser());
+ ++ui;
+ if (BA) {
+ BA->replaceAllUsesWith(
+ BlockAddress::get(NewF, BA->getBasicBlock()));
+ delete BA;
+ }
+ }
+ F.replaceAllUsesWith(
+ ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)));
+ NewF->takeName(&F);
+ F.eraseFromParent();
+ *i = NewF;
+ addGlobalNamePrefix(NewF);
+ } else {
+ addGlobalNamePrefix(&F);
+ }
+ // Hopefully, nobody will try to indirectly call a vararg
+ // function... yet.
+ } else if (FT->isVarArg()) {
+ UnwrappedFnMap[&F] = &F;
+ *i = 0;
+ } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
+ // Build a wrapper function for F. The wrapper simply calls F, and is
+ // added to FnsToInstrument so that any instrumentation according to its
+ // WrapperKind is done in the second pass below.
+ FunctionType *NewFT = getInstrumentedABI() == IA_Args
+ ? getArgsFunctionType(FT)
+ : FT;
+ Function *NewF = buildWrapperFunction(
+ &F, std::string("dfsw$") + std::string(F.getName()),
+ GlobalValue::LinkOnceODRLinkage, NewFT);
+ if (getInstrumentedABI() == IA_TLS)
+ NewF->removeAttributes(AttributeSet::FunctionIndex, ReadOnlyNoneAttrs);
+
+ Value *WrappedFnCst =
+ ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
+ F.replaceAllUsesWith(WrappedFnCst);
+ UnwrappedFnMap[WrappedFnCst] = &F;
+ *i = NewF;
+
+ if (!F.isDeclaration()) {
+ // This function is probably defining an interposition of an
+ // uninstrumented function and hence needs to keep the original ABI.
+ // But any functions it may call need to use the instrumented ABI, so
+ // we instrument it in a mode which preserves the original ABI.
+ FnsWithNativeABI.insert(&F);
+
+ // This code needs to rebuild the iterators, as they may be invalidated
+ // by the push_back, taking care that the new range does not include
+ // any functions added by this code.
+ size_t N = i - FnsToInstrument.begin(),
+ Count = e - FnsToInstrument.begin();
+ FnsToInstrument.push_back(&F);
+ i = FnsToInstrument.begin() + N;
+ e = FnsToInstrument.begin() + Count;
+ }
+ }
+ }
+
+ for (std::vector<Function *>::iterator i = FnsToInstrument.begin(),
+ e = FnsToInstrument.end();
+ i != e; ++i) {
+ if (!*i || (*i)->isDeclaration())
+ continue;
+
+ removeUnreachableBlocks(**i);
+
+ DFSanFunction DFSF(*this, *i, FnsWithNativeABI.count(*i));
+
+ // DFSanVisitor may create new basic blocks, which confuses df_iterator.
+ // Build a copy of the list before iterating over it.
+ llvm::SmallVector<BasicBlock *, 4> BBList;
+ std::copy(df_begin(&(*i)->getEntryBlock()), df_end(&(*i)->getEntryBlock()),
+ std::back_inserter(BBList));
+
+ for (llvm::SmallVector<BasicBlock *, 4>::iterator i = BBList.begin(),
+ e = BBList.end();
+ i != e; ++i) {
+ Instruction *Inst = &(*i)->front();
+ while (1) {
+ // DFSanVisitor may split the current basic block, changing the current
+ // instruction's next pointer and moving the next instruction to the
+ // tail block from which we should continue.
+ Instruction *Next = Inst->getNextNode();
+ // DFSanVisitor may delete Inst, so keep track of whether it was a
+ // terminator.
+ bool IsTerminator = isa<TerminatorInst>(Inst);
+ if (!DFSF.SkipInsts.count(Inst))
+ DFSanVisitor(DFSF).visit(Inst);
+ if (IsTerminator)
+ break;
+ Inst = Next;
+ }
+ }
+
+ // We will not necessarily be able to compute the shadow for every phi node
+ // until we have visited every block. Therefore, the code that handles phi
+ // nodes adds them to the PHIFixups list so that they can be properly
+ // handled here.
+ for (std::vector<std::pair<PHINode *, PHINode *> >::iterator
+ i = DFSF.PHIFixups.begin(),
+ e = DFSF.PHIFixups.end();
+ i != e; ++i) {
+ for (unsigned val = 0, n = i->first->getNumIncomingValues(); val != n;
+ ++val) {
+ i->second->setIncomingValue(
+ val, DFSF.getShadow(i->first->getIncomingValue(val)));
+ }
+ }
+
+ // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
+ // places (i.e. instructions in basic blocks we haven't even begun visiting
+ // yet). To make our life easier, do this work in a pass after the main
+ // instrumentation.
+ if (ClDebugNonzeroLabels) {
+ for (DenseSet<Value *>::iterator i = DFSF.NonZeroChecks.begin(),
+ e = DFSF.NonZeroChecks.end();
+ i != e; ++i) {
+ Instruction *Pos;
+ if (Instruction *I = dyn_cast<Instruction>(*i))
+ Pos = I->getNextNode();
+ else
+ Pos = DFSF.F->getEntryBlock().begin();
+ while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
+ Pos = Pos->getNextNode();
+ IRBuilder<> IRB(Pos);
+ Instruction *NeInst = cast<Instruction>(
+ IRB.CreateICmpNE(*i, DFSF.DFS.ZeroShadow));
+ BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ NeInst, /*Unreachable=*/ false, ColdCallWeights));
+ IRBuilder<> ThenIRB(BI);
+ ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn);
+ }
+ }
+ }
+
+ return false;
+}
+
+Value *DFSanFunction::getArgTLSPtr() {
+ if (ArgTLSPtr)
+ return ArgTLSPtr;
+ if (DFS.ArgTLS)
+ return ArgTLSPtr = DFS.ArgTLS;
+
+ IRBuilder<> IRB(F->getEntryBlock().begin());
+ return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS);
+}
+
+Value *DFSanFunction::getRetvalTLS() {
+ if (RetvalTLSPtr)
+ return RetvalTLSPtr;
+ if (DFS.RetvalTLS)
+ return RetvalTLSPtr = DFS.RetvalTLS;
+
+ IRBuilder<> IRB(F->getEntryBlock().begin());
+ return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS);
+}
+
+Value *DFSanFunction::getArgTLS(unsigned Idx, Instruction *Pos) {
+ IRBuilder<> IRB(Pos);
+ return IRB.CreateConstGEP2_64(getArgTLSPtr(), 0, Idx);
+}
+
+Value *DFSanFunction::getShadow(Value *V) {
+ if (!isa<Argument>(V) && !isa<Instruction>(V))
+ return DFS.ZeroShadow;
+ Value *&Shadow = ValShadowMap[V];
+ if (!Shadow) {
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ if (IsNativeABI)
+ return DFS.ZeroShadow;
+ switch (IA) {
+ case DataFlowSanitizer::IA_TLS: {
+ Value *ArgTLSPtr = getArgTLSPtr();
+ Instruction *ArgTLSPos =
+ DFS.ArgTLS ? &*F->getEntryBlock().begin()
+ : cast<Instruction>(ArgTLSPtr)->getNextNode();
+ IRBuilder<> IRB(ArgTLSPos);
+ Shadow = IRB.CreateLoad(getArgTLS(A->getArgNo(), ArgTLSPos));
+ break;
+ }
+ case DataFlowSanitizer::IA_Args: {
+ unsigned ArgIdx = A->getArgNo() + F->getArgumentList().size() / 2;
+ Function::arg_iterator i = F->arg_begin();
+ while (ArgIdx--)
+ ++i;
+ Shadow = i;
+ assert(Shadow->getType() == DFS.ShadowTy);
+ break;
+ }
+ }
+ NonZeroChecks.insert(Shadow);
+ } else {
+ Shadow = DFS.ZeroShadow;
+ }
+ }
+ return Shadow;
+}
+
+void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
+ assert(!ValShadowMap.count(I));
+ assert(Shadow->getType() == DFS.ShadowTy);
+ ValShadowMap[I] = Shadow;
+}
+
+Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
+ assert(Addr != RetvalTLS && "Reinstrumenting?");
+ IRBuilder<> IRB(Pos);
+ return IRB.CreateIntToPtr(
+ IRB.CreateMul(
+ IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), ShadowPtrMask),
+ ShadowPtrMul),
+ ShadowPtrTy);
+}
+
+// Generates IR to compute the union of the two given shadows, inserting it
+// before Pos. Returns the computed union Value.
+Value *DataFlowSanitizer::combineShadows(Value *V1, Value *V2,
+ Instruction *Pos) {
+ if (V1 == ZeroShadow)
+ return V2;
+ if (V2 == ZeroShadow)
+ return V1;
+ if (V1 == V2)
+ return V1;
+ IRBuilder<> IRB(Pos);
+ BasicBlock *Head = Pos->getParent();
+ Value *Ne = IRB.CreateICmpNE(V1, V2);
+ Instruction *NeInst = dyn_cast<Instruction>(Ne);
+ if (NeInst) {
+ BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ NeInst, /*Unreachable=*/ false, ColdCallWeights));
+ IRBuilder<> ThenIRB(BI);
+ CallInst *Call = ThenIRB.CreateCall2(DFSanUnionFn, V1, V2);
+ Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ Call->addAttribute(1, Attribute::ZExt);
+ Call->addAttribute(2, Attribute::ZExt);
+
+ BasicBlock *Tail = BI->getSuccessor(0);
+ PHINode *Phi = PHINode::Create(ShadowTy, 2, "", Tail->begin());
+ Phi->addIncoming(Call, Call->getParent());
+ Phi->addIncoming(V1, Head);
+ Pos = Phi;
+ return Phi;
+ } else {
+ assert(0 && "todo");
+ return 0;
+ }
+}
+
+// A convenience function which folds the shadows of each of the operands
+// of the provided instruction Inst, inserting the IR before Inst. Returns
+// the computed union Value.
+Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
+ if (Inst->getNumOperands() == 0)
+ return DFS.ZeroShadow;
+
+ Value *Shadow = getShadow(Inst->getOperand(0));
+ for (unsigned i = 1, n = Inst->getNumOperands(); i != n; ++i) {
+ Shadow = DFS.combineShadows(Shadow, getShadow(Inst->getOperand(i)), Inst);
+ }
+ return Shadow;
+}
+
+void DFSanVisitor::visitOperandShadowInst(Instruction &I) {
+ Value *CombinedShadow = DFSF.combineOperandShadows(&I);
+ DFSF.setShadow(&I, CombinedShadow);
+}
+
+// Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where
+// Addr has alignment Align, and take the union of each of those shadows.
+Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
+ Instruction *Pos) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
+ llvm::DenseMap<AllocaInst *, AllocaInst *>::iterator i =
+ AllocaShadowMap.find(AI);
+ if (i != AllocaShadowMap.end()) {
+ IRBuilder<> IRB(Pos);
+ return IRB.CreateLoad(i->second);
+ }
+ }
+
+ uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
+ SmallVector<Value *, 2> Objs;
+ GetUnderlyingObjects(Addr, Objs, DFS.DL);
+ bool AllConstants = true;
+ for (SmallVector<Value *, 2>::iterator i = Objs.begin(), e = Objs.end();
+ i != e; ++i) {
+ if (isa<Function>(*i) || isa<BlockAddress>(*i))
+ continue;
+ if (isa<GlobalVariable>(*i) && cast<GlobalVariable>(*i)->isConstant())
+ continue;
+
+ AllConstants = false;
+ break;
+ }
+ if (AllConstants)
+ return DFS.ZeroShadow;
+
+ Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
+ switch (Size) {
+ case 0:
+ return DFS.ZeroShadow;
+ case 1: {
+ LoadInst *LI = new LoadInst(ShadowAddr, "", Pos);
+ LI->setAlignment(ShadowAlign);
+ return LI;
+ }
+ case 2: {
+ IRBuilder<> IRB(Pos);
+ Value *ShadowAddr1 =
+ IRB.CreateGEP(ShadowAddr, ConstantInt::get(DFS.IntptrTy, 1));
+ return DFS.combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign),
+ IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign),
+ Pos);
+ }
+ }
+ if (Size % (64 / DFS.ShadowWidth) == 0) {
+ // Fast path for the common case where each byte has identical shadow: load
+ // shadow 64 bits at a time, fall out to a __dfsan_union_load call if any
+ // shadow is non-equal.
+ BasicBlock *FallbackBB = BasicBlock::Create(*DFS.Ctx, "", F);
+ IRBuilder<> FallbackIRB(FallbackBB);
+ CallInst *FallbackCall = FallbackIRB.CreateCall2(
+ DFS.DFSanUnionLoadFn, ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size));
+ FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+
+ // Compare each of the shadows stored in the loaded 64 bits to each other,
+ // by computing (WideShadow rotl ShadowWidth) == WideShadow.
+ IRBuilder<> IRB(Pos);
+ Value *WideAddr =
+ IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx));
+ Value *WideShadow = IRB.CreateAlignedLoad(WideAddr, ShadowAlign);
+ Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.ShadowTy);
+ Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidth);
+ Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidth);
+ Value *RotShadow = IRB.CreateOr(ShlShadow, ShrShadow);
+ Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow);
+
+ BasicBlock *Head = Pos->getParent();
+ BasicBlock *Tail = Head->splitBasicBlock(Pos);
+ // In the following code LastBr will refer to the previous basic block's
+ // conditional branch instruction, whose true successor is fixed up to point
+ // to the next block during the loop below or to the tail after the final
+ // iteration.
+ BranchInst *LastBr = BranchInst::Create(FallbackBB, FallbackBB, ShadowsEq);
+ ReplaceInstWithInst(Head->getTerminator(), LastBr);
+
+ for (uint64_t Ofs = 64 / DFS.ShadowWidth; Ofs != Size;
+ Ofs += 64 / DFS.ShadowWidth) {
+ BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F);
+ IRBuilder<> NextIRB(NextBB);
+ WideAddr = NextIRB.CreateGEP(WideAddr, ConstantInt::get(DFS.IntptrTy, 1));
+ Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign);
+ ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
+ LastBr->setSuccessor(0, NextBB);
+ LastBr = NextIRB.CreateCondBr(ShadowsEq, FallbackBB, FallbackBB);
+ }
+
+ LastBr->setSuccessor(0, Tail);
+ FallbackIRB.CreateBr(Tail);
+ PHINode *Shadow = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
+ Shadow->addIncoming(FallbackCall, FallbackBB);
+ Shadow->addIncoming(TruncShadow, LastBr->getParent());
+ return Shadow;
+ }
+
+ IRBuilder<> IRB(Pos);
+ CallInst *FallbackCall = IRB.CreateCall2(
+ DFS.DFSanUnionLoadFn, ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size));
+ FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ return FallbackCall;
+}
+
+void DFSanVisitor::visitLoadInst(LoadInst &LI) {
+ uint64_t Size = DFSF.DFS.DL->getTypeStoreSize(LI.getType());
+ uint64_t Align;
+ if (ClPreserveAlignment) {
+ Align = LI.getAlignment();
+ if (Align == 0)
+ Align = DFSF.DFS.DL->getABITypeAlignment(LI.getType());
+ } else {
+ Align = 1;
+ }
+ IRBuilder<> IRB(&LI);
+ Value *LoadedShadow =
+ DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI);
+ Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
+ Value *CombinedShadow = DFSF.DFS.combineShadows(LoadedShadow, PtrShadow, &LI);
+ if (CombinedShadow != DFSF.DFS.ZeroShadow)
+ DFSF.NonZeroChecks.insert(CombinedShadow);
+
+ DFSF.setShadow(&LI, CombinedShadow);
+}
+
+void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
+ Value *Shadow, Instruction *Pos) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
+ llvm::DenseMap<AllocaInst *, AllocaInst *>::iterator i =
+ AllocaShadowMap.find(AI);
+ if (i != AllocaShadowMap.end()) {
+ IRBuilder<> IRB(Pos);
+ IRB.CreateStore(Shadow, i->second);
+ return;
+ }
+ }
+
+ uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
+ IRBuilder<> IRB(Pos);
+ Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
+ if (Shadow == DFS.ZeroShadow) {
+ IntegerType *ShadowTy = IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidth);
+ Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
+ Value *ExtShadowAddr =
+ IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy));
+ IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign);
+ return;
+ }
+
+ const unsigned ShadowVecSize = 128 / DFS.ShadowWidth;
+ uint64_t Offset = 0;
+ if (Size >= ShadowVecSize) {
+ VectorType *ShadowVecTy = VectorType::get(DFS.ShadowTy, ShadowVecSize);
+ Value *ShadowVec = UndefValue::get(ShadowVecTy);
+ for (unsigned i = 0; i != ShadowVecSize; ++i) {
+ ShadowVec = IRB.CreateInsertElement(
+ ShadowVec, Shadow, ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), i));
+ }
+ Value *ShadowVecAddr =
+ IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
+ do {
+ Value *CurShadowVecAddr = IRB.CreateConstGEP1_32(ShadowVecAddr, Offset);
+ IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
+ Size -= ShadowVecSize;
+ ++Offset;
+ } while (Size >= ShadowVecSize);
+ Offset *= ShadowVecSize;
+ }
+ while (Size > 0) {
+ Value *CurShadowAddr = IRB.CreateConstGEP1_32(ShadowAddr, Offset);
+ IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign);
+ --Size;
+ ++Offset;
+ }
+}
+
+void DFSanVisitor::visitStoreInst(StoreInst &SI) {
+ uint64_t Size =
+ DFSF.DFS.DL->getTypeStoreSize(SI.getValueOperand()->getType());
+ uint64_t Align;
+ if (ClPreserveAlignment) {
+ Align = SI.getAlignment();
+ if (Align == 0)
+ Align = DFSF.DFS.DL->getABITypeAlignment(SI.getValueOperand()->getType());
+ } else {
+ Align = 1;
+ }
+ DFSF.storeShadow(SI.getPointerOperand(), Size, Align,
+ DFSF.getShadow(SI.getValueOperand()), &SI);
+}
+
+void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
+ visitOperandShadowInst(BO);
+}
+
+void DFSanVisitor::visitCastInst(CastInst &CI) { visitOperandShadowInst(CI); }
+
+void DFSanVisitor::visitCmpInst(CmpInst &CI) { visitOperandShadowInst(CI); }
+
+void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ visitOperandShadowInst(GEPI);
+}
+
+void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
+ bool AllLoadsStores = true;
+ for (Instruction::use_iterator i = I.use_begin(), e = I.use_end(); i != e;
+ ++i) {
+ if (isa<LoadInst>(*i))
+ continue;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(*i)) {
+ if (SI->getPointerOperand() == &I)
+ continue;
+ }
+
+ AllLoadsStores = false;
+ break;
+ }
+ if (AllLoadsStores) {
+ IRBuilder<> IRB(&I);
+ DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.ShadowTy);
+ }
+ DFSF.setShadow(&I, DFSF.DFS.ZeroShadow);
+}
+
+void DFSanVisitor::visitSelectInst(SelectInst &I) {
+ Value *CondShadow = DFSF.getShadow(I.getCondition());
+ Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
+ Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
+
+ if (isa<VectorType>(I.getCondition()->getType())) {
+ DFSF.setShadow(
+ &I, DFSF.DFS.combineShadows(
+ CondShadow,
+ DFSF.DFS.combineShadows(TrueShadow, FalseShadow, &I), &I));
+ } else {
+ Value *ShadowSel;
+ if (TrueShadow == FalseShadow) {
+ ShadowSel = TrueShadow;
+ } else {
+ ShadowSel =
+ SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
+ }
+ DFSF.setShadow(&I, DFSF.DFS.combineShadows(CondShadow, ShadowSel, &I));
+ }
+}
+
+void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *ValShadow = DFSF.getShadow(I.getValue());
+ IRB.CreateCall3(
+ DFSF.DFS.DFSanSetLabelFn, ValShadow,
+ IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)),
+ IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy));
+}
+
+void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
+ Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I);
+ Value *LenShadow = IRB.CreateMul(
+ I.getLength(),
+ ConstantInt::get(I.getLength()->getType(), DFSF.DFS.ShadowWidth / 8));
+ Value *AlignShadow;
+ if (ClPreserveAlignment) {
+ AlignShadow = IRB.CreateMul(I.getAlignmentCst(),
+ ConstantInt::get(I.getAlignmentCst()->getType(),
+ DFSF.DFS.ShadowWidth / 8));
+ } else {
+ AlignShadow = ConstantInt::get(I.getAlignmentCst()->getType(),
+ DFSF.DFS.ShadowWidth / 8);
+ }
+ Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
+ DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr);
+ SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
+ IRB.CreateCall5(I.getCalledValue(), DestShadow, SrcShadow, LenShadow,
+ AlignShadow, I.getVolatileCst());
+}
+
+void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
+ if (!DFSF.IsNativeABI && RI.getReturnValue()) {
+ switch (DFSF.IA) {
+ case DataFlowSanitizer::IA_TLS: {
+ Value *S = DFSF.getShadow(RI.getReturnValue());
+ IRBuilder<> IRB(&RI);
+ IRB.CreateStore(S, DFSF.getRetvalTLS());
+ break;
+ }
+ case DataFlowSanitizer::IA_Args: {
+ IRBuilder<> IRB(&RI);
+ Type *RT = DFSF.F->getFunctionType()->getReturnType();
+ Value *InsVal =
+ IRB.CreateInsertValue(UndefValue::get(RT), RI.getReturnValue(), 0);
+ Value *InsShadow =
+ IRB.CreateInsertValue(InsVal, DFSF.getShadow(RI.getReturnValue()), 1);
+ RI.setOperand(0, InsShadow);
+ break;
+ }
+ }
+ }
+}
+
+void DFSanVisitor::visitCallSite(CallSite CS) {
+ Function *F = CS.getCalledFunction();
+ if ((F && F->isIntrinsic()) || isa<InlineAsm>(CS.getCalledValue())) {
+ visitOperandShadowInst(*CS.getInstruction());
+ return;
+ }
+
+ IRBuilder<> IRB(CS.getInstruction());
+
+ DenseMap<Value *, Function *>::iterator i =
+ DFSF.DFS.UnwrappedFnMap.find(CS.getCalledValue());
+ if (i != DFSF.DFS.UnwrappedFnMap.end()) {
+ Function *F = i->second;
+ switch (DFSF.DFS.getWrapperKind(F)) {
+ case DataFlowSanitizer::WK_Warning: {
+ CS.setCalledFunction(F);
+ IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
+ IRB.CreateGlobalStringPtr(F->getName()));
+ DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);
+ return;
+ }
+ case DataFlowSanitizer::WK_Discard: {
+ CS.setCalledFunction(F);
+ DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);
+ return;
+ }
+ case DataFlowSanitizer::WK_Functional: {
+ CS.setCalledFunction(F);
+ visitOperandShadowInst(*CS.getInstruction());
+ return;
+ }
+ case DataFlowSanitizer::WK_Custom: {
+ // Don't try to handle invokes of custom functions, it's too complicated.
+ // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
+ // wrapper.
+ if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+ FunctionType *FT = F->getFunctionType();
+ FunctionType *CustomFT = DFSF.DFS.getCustomFunctionType(FT);
+ std::string CustomFName = "__dfsw_";
+ CustomFName += F->getName();
+ Constant *CustomF =
+ DFSF.DFS.Mod->getOrInsertFunction(CustomFName, CustomFT);
+ if (Function *CustomFn = dyn_cast<Function>(CustomF)) {
+ CustomFn->copyAttributesFrom(F);
+
+ // Custom functions returning non-void will write to the return label.
+ if (!FT->getReturnType()->isVoidTy()) {
+ CustomFn->removeAttributes(AttributeSet::FunctionIndex,
+ DFSF.DFS.ReadOnlyNoneAttrs);
+ }
+ }
+
+ std::vector<Value *> Args;
+
+ CallSite::arg_iterator i = CS.arg_begin();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) {
+ Type *T = (*i)->getType();
+ FunctionType *ParamFT;
+ if (isa<PointerType>(T) &&
+ (ParamFT = dyn_cast<FunctionType>(
+ cast<PointerType>(T)->getElementType()))) {
+ std::string TName = "dfst";
+ TName += utostr(FT->getNumParams() - n);
+ TName += "$";
+ TName += F->getName();
+ Constant *T = DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName);
+ Args.push_back(T);
+ Args.push_back(
+ IRB.CreateBitCast(*i, Type::getInt8PtrTy(*DFSF.DFS.Ctx)));
+ } else {
+ Args.push_back(*i);
+ }
+ }
+
+ i = CS.arg_begin();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+ Args.push_back(DFSF.getShadow(*i));
+
+ if (!FT->getReturnType()->isVoidTy()) {
+ if (!DFSF.LabelReturnAlloca) {
+ DFSF.LabelReturnAlloca =
+ new AllocaInst(DFSF.DFS.ShadowTy, "labelreturn",
+ DFSF.F->getEntryBlock().begin());
+ }
+ Args.push_back(DFSF.LabelReturnAlloca);
+ }
+
+ CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
+ CustomCI->setCallingConv(CI->getCallingConv());
+ CustomCI->setAttributes(CI->getAttributes());
+
+ if (!FT->getReturnType()->isVoidTy()) {
+ LoadInst *LabelLoad = IRB.CreateLoad(DFSF.LabelReturnAlloca);
+ DFSF.setShadow(CustomCI, LabelLoad);
+ }
+
+ CI->replaceAllUsesWith(CustomCI);
+ CI->eraseFromParent();
+ return;
+ }
+ break;
+ }
+ }
+ }
+
+ FunctionType *FT = cast<FunctionType>(
+ CS.getCalledValue()->getType()->getPointerElementType());
+ if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
+ for (unsigned i = 0, n = FT->getNumParams(); i != n; ++i) {
+ IRB.CreateStore(DFSF.getShadow(CS.getArgument(i)),
+ DFSF.getArgTLS(i, CS.getInstruction()));
+ }
+ }
+
+ Instruction *Next = 0;
+ if (!CS.getType()->isVoidTy()) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ if (II->getNormalDest()->getSinglePredecessor()) {
+ Next = II->getNormalDest()->begin();
+ } else {
+ BasicBlock *NewBB =
+ SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DFS);
+ Next = NewBB->begin();
+ }
+ } else {
+ Next = CS->getNextNode();
+ }
+
+ if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
+ IRBuilder<> NextIRB(Next);
+ LoadInst *LI = NextIRB.CreateLoad(DFSF.getRetvalTLS());
+ DFSF.SkipInsts.insert(LI);
+ DFSF.setShadow(CS.getInstruction(), LI);
+ DFSF.NonZeroChecks.insert(LI);
+ }
+ }
+
+ // Do all instrumentation for IA_Args down here to defer tampering with the
+ // CFG in a way that SplitEdge may be able to detect.
+ if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) {
+ FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT);
+ Value *Func =
+ IRB.CreateBitCast(CS.getCalledValue(), PointerType::getUnqual(NewFT));
+ std::vector<Value *> Args;
+
+ CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+ Args.push_back(*i);
+
+ i = CS.arg_begin();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+ Args.push_back(DFSF.getShadow(*i));
+
+ if (FT->isVarArg()) {
+ unsigned VarArgSize = CS.arg_size() - FT->getNumParams();
+ ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
+ AllocaInst *VarArgShadow =
+ new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin());
+ Args.push_back(IRB.CreateConstGEP2_32(VarArgShadow, 0, 0));
+ for (unsigned n = 0; i != e; ++i, ++n) {
+ IRB.CreateStore(DFSF.getShadow(*i),
+ IRB.CreateConstGEP2_32(VarArgShadow, 0, n));
+ Args.push_back(*i);
+ }
+ }
+
+ CallSite NewCS;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ NewCS = IRB.CreateInvoke(Func, II->getNormalDest(), II->getUnwindDest(),
+ Args);
+ } else {
+ NewCS = IRB.CreateCall(Func, Args);
+ }
+ NewCS.setCallingConv(CS.getCallingConv());
+ NewCS.setAttributes(CS.getAttributes().removeAttributes(
+ *DFSF.DFS.Ctx, AttributeSet::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCS.getInstruction()->getType(),
+ AttributeSet::ReturnIndex)));
+
+ if (Next) {
+ ExtractValueInst *ExVal =
+ ExtractValueInst::Create(NewCS.getInstruction(), 0, "", Next);
+ DFSF.SkipInsts.insert(ExVal);
+ ExtractValueInst *ExShadow =
+ ExtractValueInst::Create(NewCS.getInstruction(), 1, "", Next);
+ DFSF.SkipInsts.insert(ExShadow);
+ DFSF.setShadow(ExVal, ExShadow);
+ DFSF.NonZeroChecks.insert(ExShadow);
+
+ CS.getInstruction()->replaceAllUsesWith(ExVal);
+ }
+
+ CS.getInstruction()->eraseFromParent();
+ }
+}
+
+void DFSanVisitor::visitPHINode(PHINode &PN) {
+ PHINode *ShadowPN =
+ PHINode::Create(DFSF.DFS.ShadowTy, PN.getNumIncomingValues(), "", &PN);
+
+ // Give the shadow phi node valid predecessors to fool SplitEdge into working.
+ Value *UndefShadow = UndefValue::get(DFSF.DFS.ShadowTy);
+ for (PHINode::block_iterator i = PN.block_begin(), e = PN.block_end(); i != e;
+ ++i) {
+ ShadowPN->addIncoming(UndefShadow, *i);
+ }
+
+ DFSF.PHIFixups.push_back(std::make_pair(&PN, ShadowPN));
+ DFSF.setShadow(&PN, ShadowPN);
+}
diff --git a/lib/Transforms/Instrumentation/DebugIR.cpp b/lib/Transforms/Instrumentation/DebugIR.cpp
index 651381d..f50a044 100644
--- a/lib/Transforms/Instrumentation/DebugIR.cpp
+++ b/lib/Transforms/Instrumentation/DebugIR.cpp
@@ -25,6 +25,7 @@
#include "llvm/InstVisitor.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -401,7 +402,7 @@ private:
Type *PointeeTy = T->getPointerElementType();
if (!(N = getType(PointeeTy)))
N = Builder.createPointerType(
- getOrCreateType(PointeeTy), Layout.getPointerSizeInBits(),
+ getOrCreateType(PointeeTy), Layout.getPointerTypeSizeInBits(T),
Layout.getPrefTypeAlignment(T), getTypeName(T));
} else if (T->isArrayTy()) {
SmallVector<Value *, 1> Subrange;
diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
deleted file mode 100644
index a2459fb..0000000
--- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-//===- EdgeProfiling.cpp - Insert counters for edge profiling -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass instruments the specified program with counters for edge profiling.
-// Edge profiling can give a reasonable approximation of the hot paths through a
-// program, and is used for a wide variety of program transformations.
-//
-// Note that this implementation is very naive. We insert a counter for *every*
-// edge in the program, instead of using control flow information to prune the
-// number of counters inserted.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "insert-edge-profiling"
-
-#include "llvm/Transforms/Instrumentation.h"
-#include "ProfilingUtils.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <set>
-using namespace llvm;
-
-STATISTIC(NumEdgesInserted, "The # of edges inserted.");
-
-namespace {
- class EdgeProfiler : public ModulePass {
- bool runOnModule(Module &M);
- public:
- static char ID; // Pass identification, replacement for typeid
- EdgeProfiler() : ModulePass(ID) {
- initializeEdgeProfilerPass(*PassRegistry::getPassRegistry());
- }
-
- virtual const char *getPassName() const {
- return "Edge Profiler";
- }
- };
-}
-
-char EdgeProfiler::ID = 0;
-INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
- "Insert instrumentation for edge profiling", false, false)
-
-ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
-
-bool EdgeProfiler::runOnModule(Module &M) {
- Function *Main = M.getFunction("main");
- if (Main == 0) {
- errs() << "WARNING: cannot insert edge profiling into a module"
- << " with no main function!\n";
- return false; // No main, no instrumentation!
- }
-
- std::set<BasicBlock*> BlocksToInstrument;
- unsigned NumEdges = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- // Reserve space for (0,entry) edge.
- ++NumEdges;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- // Keep track of which blocks need to be instrumented. We don't want to
- // instrument blocks that are added as the result of breaking critical
- // edges!
- BlocksToInstrument.insert(BB);
- NumEdges += BB->getTerminator()->getNumSuccessors();
- }
- }
-
- Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges);
- GlobalVariable *Counters =
- new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
- Constant::getNullValue(ATy), "EdgeProfCounters");
- NumEdgesInserted = NumEdges;
-
- // Instrument all of the edges...
- unsigned i = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- // Create counter for (0,entry) edge.
- IncrementCounterInBlock(&F->getEntryBlock(), i++, Counters);
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- if (BlocksToInstrument.count(BB)) { // Don't instrument inserted blocks
- // Okay, we have to add a counter of each outgoing edge. If the
- // outgoing edge is not critical don't split it, just insert the counter
- // in the source or destination of the edge.
- TerminatorInst *TI = BB->getTerminator();
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
- // If the edge is critical, split it.
- SplitCriticalEdge(TI, s, this);
-
- // Okay, we are guaranteed that the edge is no longer critical. If we
- // only have a single successor, insert the counter in this block,
- // otherwise insert it in the successor block.
- if (TI->getNumSuccessors() == 1) {
- // Insert counter at the start of the block
- IncrementCounterInBlock(BB, i++, Counters, false);
- } else {
- // Insert counter at the start of the block
- IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters);
- }
- }
- }
- }
-
- // Add the initialization call to main.
- InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters);
- return true;
-}
-
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 4c2681f..206bffb 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -17,7 +17,6 @@
#define DEBUG_TYPE "insert-gcov-profiling"
#include "llvm/Transforms/Instrumentation.h"
-#include "ProfilingUtils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
@@ -103,6 +102,7 @@ namespace {
Constant *getIncrementIndirectCounterFunc();
Constant *getEmitFunctionFunc();
Constant *getEmitArcsFunc();
+ Constant *getSummaryInfoFunc();
Constant *getDeleteWriteoutFunctionListFunc();
Constant *getDeleteFlushFunctionListFunc();
Constant *getEndFileFunc();
@@ -519,15 +519,15 @@ bool GCOVProfiler::emitProfileArcs() {
TerminatorInst *TI = BB->getTerminator();
int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
if (Successors) {
- IRBuilder<> Builder(TI);
-
if (Successors == 1) {
+ IRBuilder<> Builder(BB->getFirstInsertionPt());
Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
Edge);
Value *Count = Builder.CreateLoad(Counter);
Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Counter);
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ IRBuilder<> Builder(BI);
Value *Sel = Builder.CreateSelect(BI->getCondition(),
Builder.getInt64(Edge),
Builder.getInt64(Edge + 1));
@@ -543,6 +543,7 @@ bool GCOVProfiler::emitProfileArcs() {
for (int i = 0; i != Successors; ++i)
ComplexEdgeSuccs.insert(TI->getSuccessor(i));
}
+
Edge += Successors;
}
}
@@ -554,14 +555,13 @@ bool GCOVProfiler::emitProfileArcs() {
GlobalVariable *EdgeState = getEdgeStateValue();
for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
- IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
+ IRBuilder<> Builder(ComplexEdgePreds[i + 1]->getFirstInsertionPt());
Builder.CreateStore(Builder.getInt32(i), EdgeState);
}
+
for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
- // call runtime to perform increment
- BasicBlock::iterator InsertPt =
- ComplexEdgeSuccs[i+1]->getFirstInsertionPt();
- IRBuilder<> Builder(InsertPt);
+ // Call runtime to perform increment.
+ IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstInsertionPt());
Value *CounterPtrArray =
Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
i * ComplexEdgePreds.size());
@@ -599,7 +599,7 @@ bool GCOVProfiler::emitProfileArcs() {
};
FTy = FunctionType::get(Builder.getVoidTy(), Params, false);
- // Inialize the environment and register the local writeout and flush
+ // Initialize the environment and register the local writeout and flush
// functions.
Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
Builder.CreateCall2(GCOVInit, WriteoutF, FlushF);
@@ -701,6 +701,11 @@ Constant *GCOVProfiler::getEmitArcsFunc() {
return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
}
+Constant *GCOVProfiler::getSummaryInfoFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
+}
+
Constant *GCOVProfiler::getDeleteWriteoutFunctionListFunc() {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
return M->getOrInsertFunction("llvm_delete_writeout_function_list", FTy);
@@ -747,6 +752,7 @@ Function *GCOVProfiler::insertCounterWriteout(
Constant *StartFile = getStartFileFunc();
Constant *EmitFunction = getEmitFunctionFunc();
Constant *EmitArcs = getEmitArcsFunc();
+ Constant *SummaryInfo = getSummaryInfoFunc();
Constant *EndFile = getEndFileFunc();
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
@@ -773,6 +779,7 @@ Function *GCOVProfiler::insertCounterWriteout(
Builder.getInt32(Arcs),
Builder.CreateConstGEP2_64(GV, 0, 0));
}
+ Builder.CreateCall(SummaryInfo);
Builder.CreateCall(EndFile);
}
}
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index 9f35396..b1bea38 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -24,12 +24,10 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializeAddressSanitizerPass(Registry);
initializeAddressSanitizerModulePass(Registry);
initializeBoundsCheckingPass(Registry);
- initializeEdgeProfilerPass(Registry);
initializeGCOVProfilerPass(Registry);
- initializeOptimalEdgeProfilerPass(Registry);
- initializePathProfilerPass(Registry);
initializeMemorySanitizerPass(Registry);
initializeThreadSanitizerPass(Registry);
+ initializeDataFlowSanitizerPass(Registry);
}
/// LLVMInitializeInstrumentation - C binding for
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 0251f16..d547adc 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -66,6 +66,31 @@
/// avoids storing origin to memory when a fully initialized value is stored.
/// This way it avoids needless overwritting origin of the 4-byte region on
/// a short (i.e. 1 byte) clean store, and it is also good for performance.
+///
+/// Atomic handling.
+///
+/// Ideally, every atomic store of application value should update the
+/// corresponding shadow location in an atomic way. Unfortunately, atomic store
+/// of two disjoint locations can not be done without severe slowdown.
+///
+/// Therefore, we implement an approximation that may err on the safe side.
+/// In this implementation, every atomically accessed location in the program
+/// may only change from (partially) uninitialized to fully initialized, but
+/// not the other way around. We load the shadow _after_ the application load,
+/// and we store the shadow _before_ the app store. Also, we always store clean
+/// shadow (if the application store is atomic). This way, if the store-load
+/// pair constitutes a happens-before arc, shadow store and load are correctly
+/// ordered such that the load will get either the value that was stored, or
+/// some later value (which is always clean).
+///
+/// This does not work very well with Compare-And-Swap (CAS) and
+/// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
+/// must store the new shadow before the app operation, and load the shadow
+/// after the app operation. Computers don't work this way. Current
+/// implementation ignores the load aspect of CAS/RMW, always returning a clean
+/// value. It implements the store part as a simple atomic store by storing a
+/// clean shadow.
+
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "msan"
@@ -157,6 +182,18 @@ static cl::opt<std::string> ClBlacklistFile("msan-blacklist",
cl::desc("File containing the list of functions where MemorySanitizer "
"should not report bugs"), cl::Hidden);
+// Experimental. Wraps all indirect calls in the instrumented code with
+// a call to the given function. This is needed to assist the dynamic
+// helper tool (MSanDR) to regain control on transition between instrumented and
+// non-instrumented code.
+static cl::opt<std::string> ClWrapIndirectCalls("msan-wrap-indirect-calls",
+ cl::desc("Wrap indirect calls with a given function"),
+ cl::Hidden);
+
+static cl::opt<bool> ClWrapIndirectCallsFast("msan-wrap-indirect-calls-fast",
+ cl::desc("Do not wrap indirect calls with target in the same module"),
+ cl::Hidden, cl::init(true));
+
namespace {
/// \brief An instrumentation pass implementing detection of uninitialized
@@ -168,12 +205,12 @@ class MemorySanitizer : public FunctionPass {
public:
MemorySanitizer(bool TrackOrigins = false,
StringRef BlacklistFile = StringRef())
- : FunctionPass(ID),
- TrackOrigins(TrackOrigins || ClTrackOrigins),
- TD(0),
- WarningFn(0),
- BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
- : BlacklistFile) { }
+ : FunctionPass(ID),
+ TrackOrigins(TrackOrigins || ClTrackOrigins),
+ TD(0),
+ WarningFn(0),
+ BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile : BlacklistFile),
+ WrapIndirectCalls(!ClWrapIndirectCalls.empty()) {}
const char *getPassName() const { return "MemorySanitizer"; }
bool runOnFunction(Function &F);
bool doInitialization(Module &M);
@@ -207,13 +244,16 @@ class MemorySanitizer : public FunctionPass {
/// function.
GlobalVariable *OriginTLS;
+ GlobalVariable *MsandrModuleStart;
+ GlobalVariable *MsandrModuleEnd;
+
/// \brief The run-time callback to print a warning.
Value *WarningFn;
/// \brief Run-time helper that copies origin info for a memory range.
Value *MsanCopyOriginFn;
/// \brief Run-time helper that generates a new origin value for a stack
/// allocation.
- Value *MsanSetAllocaOriginFn;
+ Value *MsanSetAllocaOrigin4Fn;
/// \brief Run-time helper that poisons stack on function entry.
Value *MsanPoisonStackFn;
/// \brief MSan runtime replacements for memmove, memcpy and memset.
@@ -236,6 +276,12 @@ class MemorySanitizer : public FunctionPass {
/// \brief An empty volatile inline asm that prevents callback merge.
InlineAsm *EmptyAsm;
+ bool WrapIndirectCalls;
+ /// \brief Run-time wrapper for indirect calls.
+ Value *IndirectCallWrapperFn;
+ // Argument and return type of IndirectCallWrapperFn: void (*f)(void).
+ Type *AnyFunctionPtrTy;
+
friend struct MemorySanitizerVisitor;
friend struct VarArgAMD64Helper;
};
@@ -281,9 +327,9 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
MsanCopyOriginFn = M.getOrInsertFunction(
"__msan_copy_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(),
IRB.getInt8PtrTy(), IntptrTy, NULL);
- MsanSetAllocaOriginFn = M.getOrInsertFunction(
- "__msan_set_alloca_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
- IRB.getInt8PtrTy(), NULL);
+ MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
+ "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
+ IRB.getInt8PtrTy(), IntptrTy, NULL);
MsanPoisonStackFn = M.getOrInsertFunction(
"__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, NULL);
MemmoveFn = M.getOrInsertFunction(
@@ -329,6 +375,24 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
/*hasSideEffects=*/true);
+
+ if (WrapIndirectCalls) {
+ AnyFunctionPtrTy =
+ PointerType::getUnqual(FunctionType::get(IRB.getVoidTy(), false));
+ IndirectCallWrapperFn = M.getOrInsertFunction(
+ ClWrapIndirectCalls, AnyFunctionPtrTy, AnyFunctionPtrTy, NULL);
+ }
+
+ if (ClWrapIndirectCallsFast) {
+ MsandrModuleStart = new GlobalVariable(
+ M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage,
+ 0, "__executable_start");
+ MsandrModuleStart->setVisibility(GlobalVariable::HiddenVisibility);
+ MsandrModuleEnd = new GlobalVariable(
+ M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage,
+ 0, "_end");
+ MsandrModuleEnd->setVisibility(GlobalVariable::HiddenVisibility);
+ }
}
/// \brief Module-level initialization.
@@ -338,7 +402,7 @@ bool MemorySanitizer::doInitialization(Module &M) {
TD = getAnalysisIfAvailable<DataLayout>();
if (!TD)
return false;
- BL.reset(new SpecialCaseList(BlacklistFile));
+ BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
C = &(M.getContext());
unsigned PtrSize = TD->getPointerSizeInBits(/* AddressSpace */0);
switch (PtrSize) {
@@ -423,22 +487,27 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
MemorySanitizer &MS;
SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
ValueMap<Value*, Value*> ShadowMap, OriginMap;
+ OwningPtr<VarArgHelper> VAHelper;
+
+ // The following flags disable parts of MSan instrumentation based on
+ // blacklist contents and command-line options.
bool InsertChecks;
bool LoadShadow;
bool PoisonStack;
bool PoisonUndef;
- OwningPtr<VarArgHelper> VAHelper;
+ bool CheckReturnValue;
struct ShadowOriginAndInsertPoint {
- Instruction *Shadow;
- Instruction *Origin;
+ Value *Shadow;
+ Value *Origin;
Instruction *OrigIns;
- ShadowOriginAndInsertPoint(Instruction *S, Instruction *O, Instruction *I)
+ ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
: Shadow(S), Origin(O), OrigIns(I) { }
ShadowOriginAndInsertPoint() : Shadow(0), Origin(0), OrigIns(0) { }
};
SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
SmallVector<Instruction*, 16> StoreList;
+ SmallVector<CallSite, 16> IndirectCallList;
MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
: F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
@@ -449,6 +518,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
LoadShadow = SanitizeFunction;
PoisonStack = SanitizeFunction && ClPoisonStack;
PoisonUndef = SanitizeFunction && ClPoisonUndef;
+ // FIXME: Consider using SpecialCaseList to specify a list of functions that
+ // must always return fully initialized values. For now, we hardcode "main".
+ CheckReturnValue = SanitizeFunction && (F.getName() == "main");
DEBUG(if (!InsertChecks)
dbgs() << "MemorySanitizer is not inserting checks into '"
@@ -462,7 +534,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRB(&I);
Value *Val = I.getValueOperand();
Value *Addr = I.getPointerOperand();
- Value *Shadow = getShadow(Val);
+ Value *Shadow = I.isAtomic() ? getCleanShadow(Val) : getShadow(Val);
Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
StoreInst *NewSI =
@@ -471,7 +543,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
(void)NewSI;
if (ClCheckAccessAddress)
- insertCheck(Addr, &I);
+ insertShadowCheck(Addr, &I);
+
+ if (I.isAtomic())
+ I.setOrdering(addReleaseOrdering(I.getOrdering()));
if (MS.TrackOrigins) {
unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
@@ -481,11 +556,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
} else {
Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
- Constant *Cst = dyn_cast_or_null<Constant>(ConvertedShadow);
// TODO(eugenis): handle non-zero constant shadow by inserting an
// unconditional check (can not simply fail compilation as this could
// be in the dead code).
- if (Cst)
+ if (isa<Constant>(ConvertedShadow))
continue;
Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
@@ -503,12 +577,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void materializeChecks() {
for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) {
- Instruction *Shadow = InstrumentationList[i].Shadow;
+ Value *Shadow = InstrumentationList[i].Shadow;
Instruction *OrigIns = InstrumentationList[i].OrigIns;
IRBuilder<> IRB(OrigIns);
DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n");
Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n");
+ // See the comment in materializeStores().
+ if (isa<Constant>(ConvertedShadow))
+ continue;
Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
getCleanShadow(ConvertedShadow), "_mscmp");
Instruction *CheckTerm =
@@ -518,7 +595,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.SetInsertPoint(CheckTerm);
if (MS.TrackOrigins) {
- Instruction *Origin = InstrumentationList[i].Origin;
+ Value *Origin = InstrumentationList[i].Origin;
IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0),
MS.OriginTLS);
}
@@ -530,6 +607,48 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DEBUG(dbgs() << "DONE:\n" << F);
}
+ void materializeIndirectCalls() {
+ for (size_t i = 0, n = IndirectCallList.size(); i < n; i++) {
+ CallSite CS = IndirectCallList[i];
+ Instruction *I = CS.getInstruction();
+ BasicBlock *B = I->getParent();
+ IRBuilder<> IRB(I);
+ Value *Fn0 = CS.getCalledValue();
+ Value *Fn = IRB.CreateBitCast(Fn0, MS.AnyFunctionPtrTy);
+
+ if (ClWrapIndirectCallsFast) {
+ // Check that call target is inside this module limits.
+ Value *Start =
+ IRB.CreateBitCast(MS.MsandrModuleStart, MS.AnyFunctionPtrTy);
+ Value *End = IRB.CreateBitCast(MS.MsandrModuleEnd, MS.AnyFunctionPtrTy);
+
+ Value *NotInThisModule = IRB.CreateOr(IRB.CreateICmpULT(Fn, Start),
+ IRB.CreateICmpUGE(Fn, End));
+
+ PHINode *NewFnPhi =
+ IRB.CreatePHI(Fn0->getType(), 2, "msandr.indirect_target");
+
+ Instruction *CheckTerm = SplitBlockAndInsertIfThen(
+ cast<Instruction>(NotInThisModule),
+ /* Unreachable */ false, MS.ColdCallWeights);
+
+ IRB.SetInsertPoint(CheckTerm);
+ // Slow path: call wrapper function to possibly transform the call
+ // target.
+ Value *NewFn = IRB.CreateBitCast(
+ IRB.CreateCall(MS.IndirectCallWrapperFn, Fn), Fn0->getType());
+
+ NewFnPhi->addIncoming(Fn0, B);
+ NewFnPhi->addIncoming(NewFn, dyn_cast<Instruction>(NewFn)->getParent());
+ CS.setCalledFunction(NewFnPhi);
+ } else {
+ Value *NewFn = IRB.CreateBitCast(
+ IRB.CreateCall(MS.IndirectCallWrapperFn, Fn), Fn0->getType());
+ CS.setCalledFunction(NewFn);
+ }
+ }
+ }
+
/// \brief Add MemorySanitizer instrumentation to a function.
bool runOnFunction() {
MS.initializeCallbacks(*F.getParent());
@@ -572,6 +691,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Insert shadow value checks.
materializeChecks();
+ // Wrap indirect calls.
+ materializeIndirectCalls();
+
return true;
}
@@ -835,20 +957,63 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Remember the place where a shadow check should be inserted.
///
/// This location will be later instrumented with a check that will print a
- /// UMR warning in runtime if the value is not fully defined.
- void insertCheck(Value *Val, Instruction *OrigIns) {
- assert(Val);
+ /// UMR warning in runtime if the shadow value is not 0.
+ void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
+ assert(Shadow);
if (!InsertChecks) return;
- Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
- if (!Shadow) return;
#ifndef NDEBUG
Type *ShadowTy = Shadow->getType();
assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
"Can only insert checks for integer and vector shadow types");
#endif
- Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
InstrumentationList.push_back(
- ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
+ ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
+ }
+
+ /// \brief Remember the place where a shadow check should be inserted.
+ ///
+ /// This location will be later instrumented with a check that will print a
+ /// UMR warning in runtime if the value is not fully defined.
+ void insertShadowCheck(Value *Val, Instruction *OrigIns) {
+ assert(Val);
+ Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
+ if (!Shadow) return;
+ Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
+ insertShadowCheck(Shadow, Origin, OrigIns);
+ }
+
+ AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
+ switch (a) {
+ case NotAtomic:
+ return NotAtomic;
+ case Unordered:
+ case Monotonic:
+ case Release:
+ return Release;
+ case Acquire:
+ case AcquireRelease:
+ return AcquireRelease;
+ case SequentiallyConsistent:
+ return SequentiallyConsistent;
+ }
+ llvm_unreachable("Unknown ordering");
+ }
+
+ AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
+ switch (a) {
+ case NotAtomic:
+ return NotAtomic;
+ case Unordered:
+ case Monotonic:
+ case Acquire:
+ return Acquire;
+ case Release:
+ case AcquireRelease:
+ return AcquireRelease;
+ case SequentiallyConsistent:
+ return SequentiallyConsistent;
+ }
+ llvm_unreachable("Unknown ordering");
}
// ------------------- Visitors.
@@ -859,7 +1024,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// Optionally, checks that the load address is fully defined.
void visitLoadInst(LoadInst &I) {
assert(I.getType()->isSized() && "Load type must have size");
- IRBuilder<> IRB(&I);
+ IRBuilder<> IRB(I.getNextNode());
Type *ShadowTy = getShadowTy(&I);
Value *Addr = I.getPointerOperand();
if (LoadShadow) {
@@ -871,7 +1036,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
if (ClCheckAccessAddress)
- insertCheck(I.getPointerOperand(), &I);
+ insertShadowCheck(I.getPointerOperand(), &I);
+
+ if (I.isAtomic())
+ I.setOrdering(addAcquireOrdering(I.getOrdering()));
if (MS.TrackOrigins) {
if (LoadShadow) {
@@ -892,9 +1060,40 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
StoreList.push_back(&I);
}
+ void handleCASOrRMW(Instruction &I) {
+ assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
+
+ IRBuilder<> IRB(&I);
+ Value *Addr = I.getOperand(0);
+ Value *ShadowPtr = getShadowPtr(Addr, I.getType(), IRB);
+
+ if (ClCheckAccessAddress)
+ insertShadowCheck(Addr, &I);
+
+ // Only test the conditional argument of cmpxchg instruction.
+ // The other argument can potentially be uninitialized, but we can not
+ // detect this situation reliably without possible false positives.
+ if (isa<AtomicCmpXchgInst>(I))
+ insertShadowCheck(I.getOperand(1), &I);
+
+ IRB.CreateStore(getCleanShadow(&I), ShadowPtr);
+
+ setShadow(&I, getCleanShadow(&I));
+ }
+
+ void visitAtomicRMWInst(AtomicRMWInst &I) {
+ handleCASOrRMW(I);
+ I.setOrdering(addReleaseOrdering(I.getOrdering()));
+ }
+
+ void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
+ handleCASOrRMW(I);
+ I.setOrdering(addReleaseOrdering(I.getOrdering()));
+ }
+
// Vector manipulation.
void visitExtractElementInst(ExtractElementInst &I) {
- insertCheck(I.getOperand(1), &I);
+ insertShadowCheck(I.getOperand(1), &I);
IRBuilder<> IRB(&I);
setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
"_msprop"));
@@ -902,7 +1101,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
void visitInsertElementInst(InsertElementInst &I) {
- insertCheck(I.getOperand(2), &I);
+ insertShadowCheck(I.getOperand(2), &I);
IRBuilder<> IRB(&I);
setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
I.getOperand(2), "_msprop"));
@@ -910,7 +1109,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
void visitShuffleVectorInst(ShuffleVectorInst &I) {
- insertCheck(I.getOperand(2), &I);
+ insertShadowCheck(I.getOperand(2), &I);
IRBuilder<> IRB(&I);
setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
I.getOperand(2), "_msprop"));
@@ -1109,18 +1308,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Cast between two shadow types, extending or truncating as
/// necessary.
- Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy) {
+ Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
+ bool Signed = false) {
Type *srcTy = V->getType();
if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
- return IRB.CreateIntCast(V, dstTy, false);
+ return IRB.CreateIntCast(V, dstTy, Signed);
if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
dstTy->getVectorNumElements() == srcTy->getVectorNumElements())
- return IRB.CreateIntCast(V, dstTy, false);
+ return IRB.CreateIntCast(V, dstTy, Signed);
size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
Value *V2 =
- IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), false);
+ IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
return IRB.CreateBitCast(V2, dstTy);
// TODO: handle struct types.
}
@@ -1145,7 +1345,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void handleDiv(Instruction &I) {
IRBuilder<> IRB(&I);
// Strict on the second argument.
- insertCheck(I.getOperand(1), &I);
+ insertShadowCheck(I.getOperand(1), &I);
setShadow(&I, getShadow(&I, 0));
setOrigin(&I, getOrigin(&I, 0));
}
@@ -1428,7 +1628,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.CreateAlignedStore(Shadow, ShadowPtr, 1);
if (ClCheckAccessAddress)
- insertCheck(Addr, &I);
+ insertShadowCheck(Addr, &I);
// FIXME: use ClStoreCleanOrigin
// FIXME: factor out common code from materializeStores
@@ -1455,9 +1655,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setShadow(&I, getCleanShadow(&I));
}
-
if (ClCheckAccessAddress)
- insertCheck(Addr, &I);
+ insertShadowCheck(Addr, &I);
if (MS.TrackOrigins) {
if (LoadShadow)
@@ -1554,11 +1753,119 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getOrigin(Op));
}
+ // \brief Instrument vector convert instrinsic.
+ //
+ // This function instruments intrinsics like cvtsi2ss:
+ // %Out = int_xxx_cvtyyy(%ConvertOp)
+ // or
+ // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
+ // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
+ // number \p Out elements, and (if has 2 arguments) copies the rest of the
+ // elements from \p CopyOp.
+ // In most cases conversion involves floating-point value which may trigger a
+ // hardware exception when not fully initialized. For this reason we require
+ // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
+ // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
+ // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
+ // return a fully initialized value.
+ void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements) {
+ IRBuilder<> IRB(&I);
+ Value *CopyOp, *ConvertOp;
+
+ switch (I.getNumArgOperands()) {
+ case 2:
+ CopyOp = I.getArgOperand(0);
+ ConvertOp = I.getArgOperand(1);
+ break;
+ case 1:
+ ConvertOp = I.getArgOperand(0);
+ CopyOp = NULL;
+ break;
+ default:
+ llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
+ }
+
+ // The first *NumUsedElements* elements of ConvertOp are converted to the
+ // same number of output elements. The rest of the output is copied from
+ // CopyOp, or (if not available) filled with zeroes.
+ // Combine shadow for elements of ConvertOp that are used in this operation,
+ // and insert a check.
+ // FIXME: consider propagating shadow of ConvertOp, at least in the case of
+ // int->any conversion.
+ Value *ConvertShadow = getShadow(ConvertOp);
+ Value *AggShadow = 0;
+ if (ConvertOp->getType()->isVectorTy()) {
+ AggShadow = IRB.CreateExtractElement(
+ ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
+ for (int i = 1; i < NumUsedElements; ++i) {
+ Value *MoreShadow = IRB.CreateExtractElement(
+ ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
+ AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
+ }
+ } else {
+ AggShadow = ConvertShadow;
+ }
+ assert(AggShadow->getType()->isIntegerTy());
+ insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
+
+ // Build result shadow by zero-filling parts of CopyOp shadow that come from
+ // ConvertOp.
+ if (CopyOp) {
+ assert(CopyOp->getType() == I.getType());
+ assert(CopyOp->getType()->isVectorTy());
+ Value *ResultShadow = getShadow(CopyOp);
+ Type *EltTy = ResultShadow->getType()->getVectorElementType();
+ for (int i = 0; i < NumUsedElements; ++i) {
+ ResultShadow = IRB.CreateInsertElement(
+ ResultShadow, ConstantInt::getNullValue(EltTy),
+ ConstantInt::get(IRB.getInt32Ty(), i));
+ }
+ setShadow(&I, ResultShadow);
+ setOrigin(&I, getOrigin(CopyOp));
+ } else {
+ setShadow(&I, getCleanShadow(&I));
+ }
+ }
+
void visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
case llvm::Intrinsic::bswap:
handleBswap(I);
break;
+ case llvm::Intrinsic::x86_avx512_cvtsd2usi64:
+ case llvm::Intrinsic::x86_avx512_cvtsd2usi:
+ case llvm::Intrinsic::x86_avx512_cvtss2usi64:
+ case llvm::Intrinsic::x86_avx512_cvtss2usi:
+ case llvm::Intrinsic::x86_avx512_cvttss2usi64:
+ case llvm::Intrinsic::x86_avx512_cvttss2usi:
+ case llvm::Intrinsic::x86_avx512_cvttsd2usi64:
+ case llvm::Intrinsic::x86_avx512_cvttsd2usi:
+ case llvm::Intrinsic::x86_avx512_cvtusi2sd:
+ case llvm::Intrinsic::x86_avx512_cvtusi2ss:
+ case llvm::Intrinsic::x86_avx512_cvtusi642sd:
+ case llvm::Intrinsic::x86_avx512_cvtusi642ss:
+ case llvm::Intrinsic::x86_sse2_cvtsd2si64:
+ case llvm::Intrinsic::x86_sse2_cvtsd2si:
+ case llvm::Intrinsic::x86_sse2_cvtsd2ss:
+ case llvm::Intrinsic::x86_sse2_cvtsi2sd:
+ case llvm::Intrinsic::x86_sse2_cvtsi642sd:
+ case llvm::Intrinsic::x86_sse2_cvtss2sd:
+ case llvm::Intrinsic::x86_sse2_cvttsd2si64:
+ case llvm::Intrinsic::x86_sse2_cvttsd2si:
+ case llvm::Intrinsic::x86_sse_cvtsi2ss:
+ case llvm::Intrinsic::x86_sse_cvtsi642ss:
+ case llvm::Intrinsic::x86_sse_cvtss2si64:
+ case llvm::Intrinsic::x86_sse_cvtss2si:
+ case llvm::Intrinsic::x86_sse_cvttss2si64:
+ case llvm::Intrinsic::x86_sse_cvttss2si:
+ handleVectorConvertIntrinsic(I, 1);
+ break;
+ case llvm::Intrinsic::x86_sse2_cvtdq2pd:
+ case llvm::Intrinsic::x86_sse2_cvtps2pd:
+ case llvm::Intrinsic::x86_sse_cvtps2pi:
+ case llvm::Intrinsic::x86_sse_cvttps2pi:
+ handleVectorConvertIntrinsic(I, 2);
+ break;
default:
if (!handleUnknownIntrinsic(I))
visitInstruction(I);
@@ -1604,6 +1911,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
}
IRBuilder<> IRB(&I);
+
+ if (MS.WrapIndirectCalls && !CS.getCalledFunction())
+ IndirectCallList.push_back(CS);
+
unsigned ArgOffset = 0;
DEBUG(dbgs() << " CallSite: " << I << "\n");
for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
@@ -1647,7 +1958,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DEBUG(dbgs() << " done with call args\n");
FunctionType *FT =
- cast<FunctionType>(CS.getCalledValue()->getType()-> getContainedType(0));
+ cast<FunctionType>(CS.getCalledValue()->getType()->getContainedType(0));
if (FT->isVarArg()) {
VAHelper->visitCallSite(CS, IRB);
}
@@ -1686,12 +1997,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void visitReturnInst(ReturnInst &I) {
IRBuilder<> IRB(&I);
- if (Value *RetVal = I.getReturnValue()) {
- // Set the shadow for the RetVal.
+ Value *RetVal = I.getReturnValue();
+ if (!RetVal) return;
+ Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
+ if (CheckReturnValue) {
+ insertShadowCheck(RetVal, &I);
+ Value *Shadow = getCleanShadow(RetVal);
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+ } else {
Value *Shadow = getShadow(RetVal);
- Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
- DEBUG(dbgs() << "Return: " << *Shadow << "\n" << *ShadowPtr << "\n");
IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+ // FIXME: make it conditional if ClStoreCleanOrigin==0
if (MS.TrackOrigins)
IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
}
@@ -1734,18 +2050,34 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Descr =
createPrivateNonConstGlobalForString(*F.getParent(),
StackDescription.str());
- IRB.CreateCall3(MS.MsanSetAllocaOriginFn,
+
+ IRB.CreateCall4(MS.MsanSetAllocaOrigin4Fn,
IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
ConstantInt::get(MS.IntptrTy, Size),
- IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()));
+ IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(&F, MS.IntptrTy));
}
}
void visitSelectInst(SelectInst& I) {
IRBuilder<> IRB(&I);
- setShadow(&I, IRB.CreateSelect(I.getCondition(),
- getShadow(I.getTrueValue()), getShadow(I.getFalseValue()),
- "_msprop"));
+ // a = select b, c, d
+ Value *S = IRB.CreateSelect(I.getCondition(), getShadow(I.getTrueValue()),
+ getShadow(I.getFalseValue()));
+ if (I.getType()->isAggregateType()) {
+ // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
+ // an extra "select". This results in much more compact IR.
+ // Sa = select Sb, poisoned, (select b, Sc, Sd)
+ S = IRB.CreateSelect(getShadow(I.getCondition()),
+ getPoisonedShadow(getShadowTy(I.getType())), S,
+ "_msprop_select_agg");
+ } else {
+ // Sa = (sext Sb) | (select b, Sc, Sd)
+ S = IRB.CreateOr(S, CreateShadowCast(IRB, getShadow(I.getCondition()),
+ S->getType(), true),
+ "_msprop_select");
+ }
+ setShadow(&I, S);
if (MS.TrackOrigins) {
// Origins are always i32, so any vector conditions must be flattened.
// FIXME: consider tracking vector origins for app vectors?
@@ -1780,7 +2112,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
DEBUG(dbgs() << " ResShadow: " << *ResShadow << "\n");
setShadow(&I, ResShadow);
- setOrigin(&I, getCleanOrigin());
+ setOriginForNaryOp(I);
}
void visitInsertValueInst(InsertValueInst &I) {
@@ -1793,7 +2125,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
DEBUG(dbgs() << " Res: " << *Res << "\n");
setShadow(&I, Res);
- setOrigin(&I, getCleanOrigin());
+ setOriginForNaryOp(I);
}
void dumpInst(Instruction &I) {
@@ -1816,7 +2148,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
dumpInst(I);
DEBUG(dbgs() << "DEFAULT: " << I << "\n");
for (size_t i = 0, n = I.getNumOperands(); i < n; i++)
- insertCheck(I.getOperand(i), &I);
+ insertShadowCheck(I.getOperand(i), &I);
setShadow(&I, getCleanShadow(&I));
setOrigin(&I, getCleanOrigin());
}
@@ -1970,8 +2302,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
Value *OverflowArgAreaShadowPtr =
MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB);
- Value *SrcPtr =
- getShadowPtrForVAArgument(VAArgTLSCopy, IRB, AMD64FpEndOffset);
+ Value *SrcPtr = IRB.CreateConstGEP1_32(VAArgTLSCopy, AMD64FpEndOffset);
IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16);
}
}
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
deleted file mode 100644
index b45aef6..0000000
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ /dev/null
@@ -1,225 +0,0 @@
-//===- OptimalEdgeProfiling.cpp - Insert counters for opt. edge profiling -===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass instruments the specified program with counters for edge profiling.
-// Edge profiling can give a reasonable approximation of the hot paths through a
-// program, and is used for a wide variety of program transformations.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "insert-optimal-edge-profiling"
-#include "llvm/Transforms/Instrumentation.h"
-#include "MaximumSpanningTree.h"
-#include "ProfilingUtils.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-using namespace llvm;
-
-STATISTIC(NumEdgesInserted, "The # of edges inserted.");
-
-namespace {
- class OptimalEdgeProfiler : public ModulePass {
- bool runOnModule(Module &M);
- public:
- static char ID; // Pass identification, replacement for typeid
- OptimalEdgeProfiler() : ModulePass(ID) {
- initializeOptimalEdgeProfilerPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredID(ProfileEstimatorPassID);
- AU.addRequired<ProfileInfo>();
- }
-
- virtual const char *getPassName() const {
- return "Optimal Edge Profiler";
- }
- };
-}
-
-char OptimalEdgeProfiler::ID = 0;
-INITIALIZE_PASS_BEGIN(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
- "Insert optimal instrumentation for edge profiling",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(ProfileEstimatorPass)
-INITIALIZE_AG_DEPENDENCY(ProfileInfo)
-INITIALIZE_PASS_END(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
- "Insert optimal instrumentation for edge profiling",
- false, false)
-
-ModulePass *llvm::createOptimalEdgeProfilerPass() {
- return new OptimalEdgeProfiler();
-}
-
-inline static void printEdgeCounter(ProfileInfo::Edge e,
- BasicBlock* b,
- unsigned i) {
- DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \
- << ((b)?(b)->getName():"0") << " (# " << (i) << ")\n");
-}
-
-bool OptimalEdgeProfiler::runOnModule(Module &M) {
- Function *Main = M.getFunction("main");
- if (Main == 0) {
- errs() << "WARNING: cannot insert edge profiling into a module"
- << " with no main function!\n";
- return false; // No main, no instrumentation!
- }
-
- // NumEdges counts all the edges that may be instrumented. Later on its
- // decided which edges to actually instrument, to achieve optimal profiling.
- // For the entry block a virtual edge (0,entry) is reserved, for each block
- // with no successors an edge (BB,0) is reserved. These edges are necessary
- // to calculate a truly optimal maximum spanning tree and thus an optimal
- // instrumentation.
- unsigned NumEdges = 0;
-
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- // Reserve space for (0,entry) edge.
- ++NumEdges;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- // Keep track of which blocks need to be instrumented. We don't want to
- // instrument blocks that are added as the result of breaking critical
- // edges!
- if (BB->getTerminator()->getNumSuccessors() == 0) {
- // Reserve space for (BB,0) edge.
- ++NumEdges;
- } else {
- NumEdges += BB->getTerminator()->getNumSuccessors();
- }
- }
- }
-
- // In the profiling output a counter for each edge is reserved, but only few
- // are used. This is done to be able to read back in the profile without
- // calulating the maximum spanning tree again, instead each edge counter that
- // is not used is initialised with -1 to signal that this edge counter has to
- // be calculated from other edge counters on reading the profile info back
- // in.
-
- Type *Int32 = Type::getInt32Ty(M.getContext());
- ArrayType *ATy = ArrayType::get(Int32, NumEdges);
- GlobalVariable *Counters =
- new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
- Constant::getNullValue(ATy), "OptEdgeProfCounters");
- NumEdgesInserted = 0;
-
- std::vector<Constant*> Initializer(NumEdges);
- Constant *Zero = ConstantInt::get(Int32, 0);
- Constant *Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);
-
- // Instrument all of the edges not in MST...
- unsigned i = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration()) continue;
- DEBUG(dbgs() << "Working on " << F->getName() << "\n");
-
- // Calculate a Maximum Spanning Tree with the edge weights determined by
- // ProfileEstimator. ProfileEstimator also assign weights to the virtual
- // edges (0,entry) and (BB,0) (for blocks with no successors) and this
- // edges also participate in the maximum spanning tree calculation.
- // The third parameter of MaximumSpanningTree() has the effect that not the
- // actual MST is returned but the edges _not_ in the MST.
-
- ProfileInfo::EdgeWeights ECs =
- getAnalysis<ProfileInfo>(*F).getEdgeWeights(F);
- std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
- MaximumSpanningTree<BasicBlock> MST(EdgeVector);
- std::stable_sort(MST.begin(), MST.end());
-
- // Check if (0,entry) not in the MST. If not, instrument edge
- // (IncrementCounterInBlock()) and set the counter initially to zero, if
- // the edge is in the MST the counter is initialised to -1.
-
- BasicBlock *entry = &(F->getEntryBlock());
- ProfileInfo::Edge edge = ProfileInfo::getEdge(0, entry);
- if (!std::binary_search(MST.begin(), MST.end(), edge)) {
- printEdgeCounter(edge, entry, i);
- IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted;
- Initializer[i++] = (Zero);
- } else{
- Initializer[i++] = (Uncounted);
- }
-
- // InsertedBlocks contains all blocks that were inserted for splitting an
- // edge, this blocks do not have to be instrumented.
- DenseSet<BasicBlock*> InsertedBlocks;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- // Check if block was not inserted and thus does not have to be
- // instrumented.
- if (InsertedBlocks.count(BB)) continue;
-
- // Okay, we have to add a counter of each outgoing edge not in MST. If
- // the outgoing edge is not critical don't split it, just insert the
- // counter in the source or destination of the edge. Also, if the block
- // has no successors, the virtual edge (BB,0) is processed.
- TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumSuccessors() == 0) {
- ProfileInfo::Edge edge = ProfileInfo::getEdge(BB, 0);
- if (!std::binary_search(MST.begin(), MST.end(), edge)) {
- printEdgeCounter(edge, BB, i);
- IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
- Initializer[i++] = (Zero);
- } else{
- Initializer[i++] = (Uncounted);
- }
- }
- for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
- BasicBlock *Succ = TI->getSuccessor(s);
- ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ);
- if (!std::binary_search(MST.begin(), MST.end(), edge)) {
-
- // If the edge is critical, split it.
- bool wasInserted = SplitCriticalEdge(TI, s, this);
- Succ = TI->getSuccessor(s);
- if (wasInserted)
- InsertedBlocks.insert(Succ);
-
- // Okay, we are guaranteed that the edge is no longer critical. If
- // we only have a single successor, insert the counter in this block,
- // otherwise insert it in the successor block.
- if (TI->getNumSuccessors() == 1) {
- // Insert counter at the start of the block
- printEdgeCounter(edge, BB, i);
- IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
- } else {
- // Insert counter at the start of the block
- printEdgeCounter(edge, Succ, i);
- IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted;
- }
- Initializer[i++] = (Zero);
- } else {
- Initializer[i++] = (Uncounted);
- }
- }
- }
- }
-
- // Check if the number of edges counted at first was the number of edges we
- // considered for instrumentation.
- assert(i == NumEdges && "the number of edges in counting array is wrong");
-
- // Assign the now completely defined initialiser to the array.
- Constant *init = ConstantArray::get(ATy, Initializer);
- Counters->setInitializer(init);
-
- // Add the initialization call to main.
- InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters);
- return true;
-}
-
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
deleted file mode 100644
index 7de7326..0000000
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ /dev/null
@@ -1,1424 +0,0 @@
-//===- PathProfiling.cpp - Inserts counters for path profiling ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass instruments functions for Ball-Larus path profiling. Ball-Larus
-// profiling converts the CFG into a DAG by replacing backedges with edges
-// from entry to the start block and from the end block to exit. The paths
-// along the new DAG are enumrated, i.e. each path is given a path number.
-// Edges are instrumented to increment the path number register, such that the
-// path number register will equal the path number of the path taken at the
-// exit.
-//
-// This file defines classes for building a CFG for use with different stages
-// in the Ball-Larus path profiling instrumentation [Ball96]. The
-// requirements are formatting the llvm CFG into the Ball-Larus DAG, path
-// numbering, finding a spanning tree, moving increments from the spanning
-// tree to chords.
-//
-// Terms:
-// DAG - Directed Acyclic Graph.
-// Ball-Larus DAG - A CFG with an entry node, an exit node, and backedges
-// removed in the following manner. For every backedge
-// v->w, insert edge ENTRY->w and edge v->EXIT.
-// Path Number - The number corresponding to a specific path through a
-// Ball-Larus DAG.
-// Spanning Tree - A subgraph, S, is a spanning tree if S covers all
-// vertices and is a tree.
-// Chord - An edge not in the spanning tree.
-//
-// [Ball96]
-// T. Ball and J. R. Larus. "Efficient Path Profiling."
-// International Symposium on Microarchitecture, pages 46-57, 1996.
-// http://portal.acm.org/citation.cfm?id=243857
-//
-// [Ball94]
-// Thomas Ball. "Efficiently Counting Program Events with Support for
-// On-line queries."
-// ACM Transactions on Programmmg Languages and Systems, Vol 16, No 5,
-// September 1994, Pages 1399-1410.
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "insert-path-profiling"
-
-#include "llvm/Transforms/Instrumentation.h"
-#include "ProfilingUtils.h"
-#include "llvm/Analysis/PathNumbering.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/TypeBuilder.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <vector>
-
-#define HASH_THRESHHOLD 100000
-
-using namespace llvm;
-
-namespace {
-class BLInstrumentationNode;
-class BLInstrumentationEdge;
-class BLInstrumentationDag;
-
-// ---------------------------------------------------------------------------
-// BLInstrumentationNode extends BallLarusNode with member used by the
-// instrumentation algortihms.
-// ---------------------------------------------------------------------------
-class BLInstrumentationNode : public BallLarusNode {
-public:
- // Creates a new BLInstrumentationNode from a BasicBlock.
- BLInstrumentationNode(BasicBlock* BB);
-
- // Get/sets the Value corresponding to the pathNumber register,
- // constant or phinode. Used by the instrumentation code to remember
- // path number Values.
- Value* getStartingPathNumber();
- void setStartingPathNumber(Value* pathNumber);
-
- Value* getEndingPathNumber();
- void setEndingPathNumber(Value* pathNumber);
-
- // Get/set the PHINode Instruction for this node.
- PHINode* getPathPHI();
- void setPathPHI(PHINode* pathPHI);
-
-private:
-
- Value* _startingPathNumber; // The Value for the current pathNumber.
- Value* _endingPathNumber; // The Value for the current pathNumber.
- PHINode* _pathPHI; // The PHINode for current pathNumber.
-};
-
-// --------------------------------------------------------------------------
-// BLInstrumentationEdge extends BallLarusEdge with data about the
-// instrumentation that will end up on each edge.
-// --------------------------------------------------------------------------
-class BLInstrumentationEdge : public BallLarusEdge {
-public:
- BLInstrumentationEdge(BLInstrumentationNode* source,
- BLInstrumentationNode* target);
-
- // Sets the target node of this edge. Required to split edges.
- void setTarget(BallLarusNode* node);
-
- // Get/set whether edge is in the spanning tree.
- bool isInSpanningTree() const;
- void setIsInSpanningTree(bool isInSpanningTree);
-
- // Get/ set whether this edge will be instrumented with a path number
- // initialization.
- bool isInitialization() const;
- void setIsInitialization(bool isInitialization);
-
- // Get/set whether this edge will be instrumented with a path counter
- // increment. Notice this is incrementing the path counter
- // corresponding to the path number register. The path number
- // increment is determined by getIncrement().
- bool isCounterIncrement() const;
- void setIsCounterIncrement(bool isCounterIncrement);
-
- // Get/set the path number increment that this edge will be instrumented
- // with. This is distinct from the path counter increment and the
- // weight. The counter increment counts the number of executions of
- // some path, whereas the path number keeps track of which path number
- // the program is on.
- long getIncrement() const;
- void setIncrement(long increment);
-
- // Get/set whether the edge has been instrumented.
- bool hasInstrumentation();
- void setHasInstrumentation(bool hasInstrumentation);
-
- // Returns the successor number of this edge in the source.
- unsigned getSuccessorNumber();
-
-private:
- // The increment that the code will be instrumented with.
- long long _increment;
-
- // Whether this edge is in the spanning tree.
- bool _isInSpanningTree;
-
- // Whether this edge is an initialiation of the path number.
- bool _isInitialization;
-
- // Whether this edge is a path counter increment.
- bool _isCounterIncrement;
-
- // Whether this edge has been instrumented.
- bool _hasInstrumentation;
-};
-
-// ---------------------------------------------------------------------------
-// BLInstrumentationDag extends BallLarusDag with algorithms that
-// determine where instrumentation should be placed.
-// ---------------------------------------------------------------------------
-class BLInstrumentationDag : public BallLarusDag {
-public:
- BLInstrumentationDag(Function &F);
-
- // Returns the Exit->Root edge. This edge is required for creating
- // directed cycles in the algorithm for moving instrumentation off of
- // the spanning tree
- BallLarusEdge* getExitRootEdge();
-
- // Returns an array of phony edges which mark those nodes
- // with function calls
- BLEdgeVector getCallPhonyEdges();
-
- // Gets/sets the path counter array
- GlobalVariable* getCounterArray();
- void setCounterArray(GlobalVariable* c);
-
- // Calculates the increments for the chords, thereby removing
- // instrumentation from the spanning tree edges. Implementation is based
- // on the algorithm in Figure 4 of [Ball94]
- void calculateChordIncrements();
-
- // Updates the state when an edge has been split
- void splitUpdate(BLInstrumentationEdge* formerEdge, BasicBlock* newBlock);
-
- // Calculates a spanning tree of the DAG ignoring cycles. Whichever
- // edges are in the spanning tree will not be instrumented, but this
- // implementation does not try to minimize the instrumentation overhead
- // by trying to find hot edges.
- void calculateSpanningTree();
-
- // Pushes initialization further down in order to group the first
- // increment and initialization.
- void pushInitialization();
-
- // Pushes the path counter increments up in order to group the last path
- // number increment.
- void pushCounters();
-
- // Removes phony edges from the successor list of the source, and the
- // predecessor list of the target.
- void unlinkPhony();
-
- // Generate dot graph for the function
- void generateDotGraph();
-
-protected:
- // BLInstrumentationDag creates BLInstrumentationNode objects in this
- // method overriding the creation of BallLarusNode objects.
- //
- // Allows subclasses to determine which type of Node is created.
- // Override this method to produce subclasses of BallLarusNode if
- // necessary.
- virtual BallLarusNode* createNode(BasicBlock* BB);
-
- // BLInstrumentationDag create BLInstrumentationEdges.
- //
- // Allows subclasses to determine which type of Edge is created.
- // Override this method to produce subclasses of BallLarusEdge if
- // necessary. Parameters source and target will have been created by
- // createNode and can be cast to the subclass of BallLarusNode*
- // returned by createNode.
- virtual BallLarusEdge* createEdge(
- BallLarusNode* source, BallLarusNode* target, unsigned edgeNumber);
-
-private:
- BLEdgeVector _treeEdges; // All edges in the spanning tree.
- BLEdgeVector _chordEdges; // All edges not in the spanning tree.
- GlobalVariable* _counterArray; // Array to store path counters
-
- // Removes the edge from the appropriate predecessor and successor lists.
- void unlinkEdge(BallLarusEdge* edge);
-
- // Makes an edge part of the spanning tree.
- void makeEdgeSpanning(BLInstrumentationEdge* edge);
-
- // Pushes initialization and calls itself recursively.
- void pushInitializationFromEdge(BLInstrumentationEdge* edge);
-
- // Pushes path counter increments up recursively.
- void pushCountersFromEdge(BLInstrumentationEdge* edge);
-
- // Depth first algorithm for determining the chord increments.f
- void calculateChordIncrementsDfs(
- long weight, BallLarusNode* v, BallLarusEdge* e);
-
- // Determines the relative direction of two edges.
- int calculateChordIncrementsDir(BallLarusEdge* e, BallLarusEdge* f);
-};
-
-// ---------------------------------------------------------------------------
-// PathProfiler is a module pass which instruments path profiling instructions
-// ---------------------------------------------------------------------------
-class PathProfiler : public ModulePass {
-private:
- // Current context for multi threading support.
- LLVMContext* Context;
-
- // Which function are we currently instrumenting
- unsigned currentFunctionNumber;
-
- // The function prototype in the profiling runtime for incrementing a
- // single path counter in a hash table.
- Constant* llvmIncrementHashFunction;
- Constant* llvmDecrementHashFunction;
-
- // Instruments each function with path profiling. 'main' is instrumented
- // with code to save the profile to disk.
- bool runOnModule(Module &M);
-
- // Analyzes the function for Ball-Larus path profiling, and inserts code.
- void runOnFunction(std::vector<Constant*> &ftInit, Function &F, Module &M);
-
- // Creates an increment constant representing incr.
- ConstantInt* createIncrementConstant(long incr, int bitsize);
-
- // Creates an increment constant representing the value in
- // edge->getIncrement().
- ConstantInt* createIncrementConstant(BLInstrumentationEdge* edge);
-
- // Finds the insertion point after pathNumber in block. PathNumber may
- // be NULL.
- BasicBlock::iterator getInsertionPoint(
- BasicBlock* block, Value* pathNumber);
-
- // Inserts source's pathNumber Value* into target. Target may or may not
- // have multiple predecessors, and may or may not have its phiNode
- // initalized.
- void pushValueIntoNode(
- BLInstrumentationNode* source, BLInstrumentationNode* target);
-
- // Inserts source's pathNumber Value* into the appropriate slot of
- // target's phiNode.
- void pushValueIntoPHI(
- BLInstrumentationNode* target, BLInstrumentationNode* source);
-
- // The Value* in node, oldVal, is updated with a Value* correspodning to
- // oldVal + addition.
- void insertNumberIncrement(BLInstrumentationNode* node, Value* addition,
- bool atBeginning);
-
- // Creates a counter increment in the given node. The Value* in node is
- // taken as the index into a hash table.
- void insertCounterIncrement(
- Value* incValue,
- BasicBlock::iterator insertPoint,
- BLInstrumentationDag* dag,
- bool increment = true);
-
- // A PHINode is created in the node, and its values initialized to -1U.
- void preparePHI(BLInstrumentationNode* node);
-
- // Inserts instrumentation for the given edge
- //
- // Pre: The edge's source node has pathNumber set if edge is non zero
- // path number increment.
- //
- // Post: Edge's target node has a pathNumber set to the path number Value
- // corresponding to the value of the path register after edge's
- // execution.
- void insertInstrumentationStartingAt(
- BLInstrumentationEdge* edge,
- BLInstrumentationDag* dag);
-
- // If this edge is a critical edge, then inserts a node at this edge.
- // This edge becomes the first edge, and a new BallLarusEdge is created.
- bool splitCritical(BLInstrumentationEdge* edge, BLInstrumentationDag* dag);
-
- // Inserts instrumentation according to the marked edges in dag. Phony
- // edges must be unlinked from the DAG, but accessible from the
- // backedges. Dag must have initializations, path number increments, and
- // counter increments present.
- //
- // Counter storage is created here.
- void insertInstrumentation( BLInstrumentationDag& dag, Module &M);
-
-public:
- static char ID; // Pass identification, replacement for typeid
- PathProfiler() : ModulePass(ID) {
- initializePathProfilerPass(*PassRegistry::getPassRegistry());
- }
-
- virtual const char *getPassName() const {
- return "Path Profiler";
- }
-};
-} // end anonymous namespace
-
-// Should we print the dot-graphs
-static cl::opt<bool> DotPathDag("path-profile-pathdag", cl::Hidden,
- cl::desc("Output the path profiling DAG for each function."));
-
-// Register the path profiler as a pass
-char PathProfiler::ID = 0;
-INITIALIZE_PASS(PathProfiler, "insert-path-profiling",
- "Insert instrumentation for Ball-Larus path profiling",
- false, false)
-
-ModulePass *llvm::createPathProfilerPass() { return new PathProfiler(); }
-
-namespace llvm {
- class PathProfilingFunctionTable {};
-
- // Type for global array storing references to hashes or arrays
- template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable,
- xcompile> {
- public:
- static StructType *get(LLVMContext& C) {
- return( StructType::get(
- TypeBuilder<types::i<32>, xcompile>::get(C), // type
- TypeBuilder<types::i<32>, xcompile>::get(C), // array size
- TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr
- NULL));
- }
- };
-
- typedef TypeBuilder<PathProfilingFunctionTable, true>
- ftEntryTypeBuilder;
-
- // BallLarusEdge << operator overloading
- raw_ostream& operator<<(raw_ostream& os,
- const BLInstrumentationEdge& edge)
- LLVM_ATTRIBUTE_USED;
- raw_ostream& operator<<(raw_ostream& os,
- const BLInstrumentationEdge& edge) {
- os << "[" << edge.getSource()->getName() << " -> "
- << edge.getTarget()->getName() << "] init: "
- << (edge.isInitialization() ? "yes" : "no")
- << " incr:" << edge.getIncrement() << " cinc: "
- << (edge.isCounterIncrement() ? "yes" : "no");
- return(os);
- }
-}
-
-// Creates a new BLInstrumentationNode from a BasicBlock.
-BLInstrumentationNode::BLInstrumentationNode(BasicBlock* BB) :
- BallLarusNode(BB),
- _startingPathNumber(NULL), _endingPathNumber(NULL), _pathPHI(NULL) {}
-
-// Constructor for BLInstrumentationEdge.
-BLInstrumentationEdge::BLInstrumentationEdge(BLInstrumentationNode* source,
- BLInstrumentationNode* target)
- : BallLarusEdge(source, target, 0),
- _increment(0), _isInSpanningTree(false), _isInitialization(false),
- _isCounterIncrement(false), _hasInstrumentation(false) {}
-
-// Sets the target node of this edge. Required to split edges.
-void BLInstrumentationEdge::setTarget(BallLarusNode* node) {
- _target = node;
-}
-
-// Returns whether this edge is in the spanning tree.
-bool BLInstrumentationEdge::isInSpanningTree() const {
- return(_isInSpanningTree);
-}
-
-// Sets whether this edge is in the spanning tree.
-void BLInstrumentationEdge::setIsInSpanningTree(bool isInSpanningTree) {
- _isInSpanningTree = isInSpanningTree;
-}
-
-// Returns whether this edge will be instrumented with a path number
-// initialization.
-bool BLInstrumentationEdge::isInitialization() const {
- return(_isInitialization);
-}
-
-// Sets whether this edge will be instrumented with a path number
-// initialization.
-void BLInstrumentationEdge::setIsInitialization(bool isInitialization) {
- _isInitialization = isInitialization;
-}
-
-// Returns whether this edge will be instrumented with a path counter
-// increment. Notice this is incrementing the path counter
-// corresponding to the path number register. The path number
-// increment is determined by getIncrement().
-bool BLInstrumentationEdge::isCounterIncrement() const {
- return(_isCounterIncrement);
-}
-
-// Sets whether this edge will be instrumented with a path counter
-// increment.
-void BLInstrumentationEdge::setIsCounterIncrement(bool isCounterIncrement) {
- _isCounterIncrement = isCounterIncrement;
-}
-
-// Gets the path number increment that this edge will be instrumented
-// with. This is distinct from the path counter increment and the
-// weight. The counter increment is counts the number of executions of
-// some path, whereas the path number keeps track of which path number
-// the program is on.
-long BLInstrumentationEdge::getIncrement() const {
- return(_increment);
-}
-
-// Set whether this edge will be instrumented with a path number
-// increment.
-void BLInstrumentationEdge::setIncrement(long increment) {
- _increment = increment;
-}
-
-// True iff the edge has already been instrumented.
-bool BLInstrumentationEdge::hasInstrumentation() {
- return(_hasInstrumentation);
-}
-
-// Set whether this edge has been instrumented.
-void BLInstrumentationEdge::setHasInstrumentation(bool hasInstrumentation) {
- _hasInstrumentation = hasInstrumentation;
-}
-
-// Returns the successor number of this edge in the source.
-unsigned BLInstrumentationEdge::getSuccessorNumber() {
- BallLarusNode* sourceNode = getSource();
- BallLarusNode* targetNode = getTarget();
- BasicBlock* source = sourceNode->getBlock();
- BasicBlock* target = targetNode->getBlock();
-
- if(source == NULL || target == NULL)
- return(0);
-
- TerminatorInst* terminator = source->getTerminator();
-
- unsigned i;
- for(i=0; i < terminator->getNumSuccessors(); i++) {
- if(terminator->getSuccessor(i) == target)
- break;
- }
-
- return(i);
-}
-
-// BLInstrumentationDag constructor initializes a DAG for the given Function.
-BLInstrumentationDag::BLInstrumentationDag(Function &F) : BallLarusDag(F),
- _counterArray(0) {
-}
-
-// Returns the Exit->Root edge. This edge is required for creating
-// directed cycles in the algorithm for moving instrumentation off of
-// the spanning tree
-BallLarusEdge* BLInstrumentationDag::getExitRootEdge() {
- BLEdgeIterator erEdge = getExit()->succBegin();
- return(*erEdge);
-}
-
-BLEdgeVector BLInstrumentationDag::getCallPhonyEdges () {
- BLEdgeVector callEdges;
-
- for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
- edge != end; edge++ ) {
- if( (*edge)->getType() == BallLarusEdge::CALLEDGE_PHONY )
- callEdges.push_back(*edge);
- }
-
- return callEdges;
-}
-
-// Gets the path counter array
-GlobalVariable* BLInstrumentationDag::getCounterArray() {
- return _counterArray;
-}
-
-void BLInstrumentationDag::setCounterArray(GlobalVariable* c) {
- _counterArray = c;
-}
-
-// Calculates the increment for the chords, thereby removing
-// instrumentation from the spanning tree edges. Implementation is based on
-// the algorithm in Figure 4 of [Ball94]
-void BLInstrumentationDag::calculateChordIncrements() {
- calculateChordIncrementsDfs(0, getRoot(), NULL);
-
- BLInstrumentationEdge* chord;
- for(BLEdgeIterator chordEdge = _chordEdges.begin(),
- end = _chordEdges.end(); chordEdge != end; chordEdge++) {
- chord = (BLInstrumentationEdge*) *chordEdge;
- chord->setIncrement(chord->getIncrement() + chord->getWeight());
- }
-}
-
-// Updates the state when an edge has been split
-void BLInstrumentationDag::splitUpdate(BLInstrumentationEdge* formerEdge,
- BasicBlock* newBlock) {
- BallLarusNode* oldTarget = formerEdge->getTarget();
- BallLarusNode* newNode = addNode(newBlock);
- formerEdge->setTarget(newNode);
- newNode->addPredEdge(formerEdge);
-
- DEBUG(dbgs() << " Edge split: " << *formerEdge << "\n");
-
- oldTarget->removePredEdge(formerEdge);
- BallLarusEdge* newEdge = addEdge(newNode, oldTarget,0);
-
- if( formerEdge->getType() == BallLarusEdge::BACKEDGE ||
- formerEdge->getType() == BallLarusEdge::SPLITEDGE) {
- newEdge->setType(formerEdge->getType());
- newEdge->setPhonyRoot(formerEdge->getPhonyRoot());
- newEdge->setPhonyExit(formerEdge->getPhonyExit());
- formerEdge->setType(BallLarusEdge::NORMAL);
- formerEdge->setPhonyRoot(NULL);
- formerEdge->setPhonyExit(NULL);
- }
-}
-
-// Calculates a spanning tree of the DAG ignoring cycles. Whichever
-// edges are in the spanning tree will not be instrumented, but this
-// implementation does not try to minimize the instrumentation overhead
-// by trying to find hot edges.
-void BLInstrumentationDag::calculateSpanningTree() {
- std::stack<BallLarusNode*> dfsStack;
-
- for(BLNodeIterator nodeIt = _nodes.begin(), end = _nodes.end();
- nodeIt != end; nodeIt++) {
- (*nodeIt)->setColor(BallLarusNode::WHITE);
- }
-
- dfsStack.push(getRoot());
- while(dfsStack.size() > 0) {
- BallLarusNode* node = dfsStack.top();
- dfsStack.pop();
-
- if(node->getColor() == BallLarusNode::WHITE)
- continue;
-
- BallLarusNode* nextNode;
- bool forward = true;
- BLEdgeIterator succEnd = node->succEnd();
-
- node->setColor(BallLarusNode::WHITE);
- // first iterate over successors then predecessors
- for(BLEdgeIterator edge = node->succBegin(), predEnd = node->predEnd();
- edge != predEnd; edge++) {
- if(edge == succEnd) {
- edge = node->predBegin();
- forward = false;
- }
-
- // Ignore split edges
- if ((*edge)->getType() == BallLarusEdge::SPLITEDGE)
- continue;
-
- nextNode = forward? (*edge)->getTarget(): (*edge)->getSource();
- if(nextNode->getColor() != BallLarusNode::WHITE) {
- nextNode->setColor(BallLarusNode::WHITE);
- makeEdgeSpanning((BLInstrumentationEdge*)(*edge));
- }
- }
- }
-
- for(BLEdgeIterator edge = _edges.begin(), end = _edges.end();
- edge != end; edge++) {
- BLInstrumentationEdge* instEdge = (BLInstrumentationEdge*) (*edge);
- // safe since createEdge is overriden
- if(!instEdge->isInSpanningTree() && (*edge)->getType()
- != BallLarusEdge::SPLITEDGE)
- _chordEdges.push_back(instEdge);
- }
-}
-
-// Pushes initialization further down in order to group the first
-// increment and initialization.
-void BLInstrumentationDag::pushInitialization() {
- BLInstrumentationEdge* exitRootEdge =
- (BLInstrumentationEdge*) getExitRootEdge();
- exitRootEdge->setIsInitialization(true);
- pushInitializationFromEdge(exitRootEdge);
-}
-
-// Pushes the path counter increments up in order to group the last path
-// number increment.
-void BLInstrumentationDag::pushCounters() {
- BLInstrumentationEdge* exitRootEdge =
- (BLInstrumentationEdge*) getExitRootEdge();
- exitRootEdge->setIsCounterIncrement(true);
- pushCountersFromEdge(exitRootEdge);
-}
-
-// Removes phony edges from the successor list of the source, and the
-// predecessor list of the target.
-void BLInstrumentationDag::unlinkPhony() {
- BallLarusEdge* edge;
-
- for(BLEdgeIterator next = _edges.begin(),
- end = _edges.end(); next != end; next++) {
- edge = (*next);
-
- if( edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
- edge->getType() == BallLarusEdge::SPLITEDGE_PHONY ||
- edge->getType() == BallLarusEdge::CALLEDGE_PHONY ) {
- unlinkEdge(edge);
- }
- }
-}
-
-// Generate a .dot graph to represent the DAG and pathNumbers
-void BLInstrumentationDag::generateDotGraph() {
- std::string errorInfo;
- std::string functionName = getFunction().getName().str();
- std::string filename = "pathdag." + functionName + ".dot";
-
- DEBUG (dbgs() << "Writing '" << filename << "'...\n");
- raw_fd_ostream dotFile(filename.c_str(), errorInfo);
-
- if (!errorInfo.empty()) {
- errs() << "Error opening '" << filename.c_str() <<"' for writing!";
- errs() << "\n";
- return;
- }
-
- dotFile << "digraph " << functionName << " {\n";
-
- for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
- edge != end; edge++) {
- std::string sourceName = (*edge)->getSource()->getName();
- std::string targetName = (*edge)->getTarget()->getName();
-
- dotFile << "\t\"" << sourceName.c_str() << "\" -> \""
- << targetName.c_str() << "\" ";
-
- long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
-
- switch( (*edge)->getType() ) {
- case BallLarusEdge::NORMAL:
- dotFile << "[label=" << inc << "] [color=black];\n";
- break;
-
- case BallLarusEdge::BACKEDGE:
- dotFile << "[color=cyan];\n";
- break;
-
- case BallLarusEdge::BACKEDGE_PHONY:
- dotFile << "[label=" << inc
- << "] [color=blue];\n";
- break;
-
- case BallLarusEdge::SPLITEDGE:
- dotFile << "[color=violet];\n";
- break;
-
- case BallLarusEdge::SPLITEDGE_PHONY:
- dotFile << "[label=" << inc << "] [color=red];\n";
- break;
-
- case BallLarusEdge::CALLEDGE_PHONY:
- dotFile << "[label=" << inc << "] [color=green];\n";
- break;
- }
- }
-
- dotFile << "}\n";
-}
-
-// Allows subclasses to determine which type of Node is created.
-// Override this method to produce subclasses of BallLarusNode if
-// necessary. The destructor of BallLarusDag will call free on each pointer
-// created.
-BallLarusNode* BLInstrumentationDag::createNode(BasicBlock* BB) {
- return( new BLInstrumentationNode(BB) );
-}
-
-// Allows subclasses to determine which type of Edge is created.
-// Override this method to produce subclasses of BallLarusEdge if
-// necessary. The destructor of BallLarusDag will call free on each pointer
-// created.
-BallLarusEdge* BLInstrumentationDag::createEdge(BallLarusNode* source,
- BallLarusNode* target, unsigned edgeNumber) {
- // One can cast from BallLarusNode to BLInstrumentationNode since createNode
- // is overriden to produce BLInstrumentationNode.
- return( new BLInstrumentationEdge((BLInstrumentationNode*)source,
- (BLInstrumentationNode*)target) );
-}
-
-// Sets the Value corresponding to the pathNumber register, constant,
-// or phinode. Used by the instrumentation code to remember path
-// number Values.
-Value* BLInstrumentationNode::getStartingPathNumber(){
- return(_startingPathNumber);
-}
-
-// Sets the Value of the pathNumber. Used by the instrumentation code.
-void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) {
- DEBUG(dbgs() << " SPN-" << getName() << " <-- " << (pathNumber ?
- pathNumber->getName() :
- "unused") << "\n");
- _startingPathNumber = pathNumber;
-}
-
-Value* BLInstrumentationNode::getEndingPathNumber(){
- return(_endingPathNumber);
-}
-
-void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) {
- DEBUG(dbgs() << " EPN-" << getName() << " <-- "
- << (pathNumber ? pathNumber->getName() : "unused") << "\n");
- _endingPathNumber = pathNumber;
-}
-
-// Get the PHINode Instruction for this node. Used by instrumentation
-// code.
-PHINode* BLInstrumentationNode::getPathPHI() {
- return(_pathPHI);
-}
-
-// Set the PHINode Instruction for this node. Used by instrumentation
-// code.
-void BLInstrumentationNode::setPathPHI(PHINode* pathPHI) {
- _pathPHI = pathPHI;
-}
-
-// Removes the edge from the appropriate predecessor and successor
-// lists.
-void BLInstrumentationDag::unlinkEdge(BallLarusEdge* edge) {
- if(edge == getExitRootEdge())
- DEBUG(dbgs() << " Removing exit->root edge\n");
-
- edge->getSource()->removeSuccEdge(edge);
- edge->getTarget()->removePredEdge(edge);
-}
-
-// Makes an edge part of the spanning tree.
-void BLInstrumentationDag::makeEdgeSpanning(BLInstrumentationEdge* edge) {
- edge->setIsInSpanningTree(true);
- _treeEdges.push_back(edge);
-}
-
-// Pushes initialization and calls itself recursively.
-void BLInstrumentationDag::pushInitializationFromEdge(
- BLInstrumentationEdge* edge) {
- BallLarusNode* target;
-
- target = edge->getTarget();
- if( target->getNumberPredEdges() > 1 || target == getExit() ) {
- return;
- } else {
- for(BLEdgeIterator next = target->succBegin(),
- end = target->succEnd(); next != end; next++) {
- BLInstrumentationEdge* intoEdge = (BLInstrumentationEdge*) *next;
-
- // Skip split edges
- if (intoEdge->getType() == BallLarusEdge::SPLITEDGE)
- continue;
-
- intoEdge->setIncrement(intoEdge->getIncrement() +
- edge->getIncrement());
- intoEdge->setIsInitialization(true);
- pushInitializationFromEdge(intoEdge);
- }
-
- edge->setIncrement(0);
- edge->setIsInitialization(false);
- }
-}
-
-// Pushes path counter increments up recursively.
-void BLInstrumentationDag::pushCountersFromEdge(BLInstrumentationEdge* edge) {
- BallLarusNode* source;
-
- source = edge->getSource();
- if(source->getNumberSuccEdges() > 1 || source == getRoot()
- || edge->isInitialization()) {
- return;
- } else {
- for(BLEdgeIterator previous = source->predBegin(),
- end = source->predEnd(); previous != end; previous++) {
- BLInstrumentationEdge* fromEdge = (BLInstrumentationEdge*) *previous;
-
- // Skip split edges
- if (fromEdge->getType() == BallLarusEdge::SPLITEDGE)
- continue;
-
- fromEdge->setIncrement(fromEdge->getIncrement() +
- edge->getIncrement());
- fromEdge->setIsCounterIncrement(true);
- pushCountersFromEdge(fromEdge);
- }
-
- edge->setIncrement(0);
- edge->setIsCounterIncrement(false);
- }
-}
-
-// Depth first algorithm for determining the chord increments.
-void BLInstrumentationDag::calculateChordIncrementsDfs(long weight,
- BallLarusNode* v, BallLarusEdge* e) {
- BLInstrumentationEdge* f;
-
- for(BLEdgeIterator treeEdge = _treeEdges.begin(),
- end = _treeEdges.end(); treeEdge != end; treeEdge++) {
- f = (BLInstrumentationEdge*) *treeEdge;
- if(e != f && v == f->getTarget()) {
- calculateChordIncrementsDfs(
- calculateChordIncrementsDir(e,f)*(weight) +
- f->getWeight(), f->getSource(), f);
- }
- if(e != f && v == f->getSource()) {
- calculateChordIncrementsDfs(
- calculateChordIncrementsDir(e,f)*(weight) +
- f->getWeight(), f->getTarget(), f);
- }
- }
-
- for(BLEdgeIterator chordEdge = _chordEdges.begin(),
- end = _chordEdges.end(); chordEdge != end; chordEdge++) {
- f = (BLInstrumentationEdge*) *chordEdge;
- if(v == f->getSource() || v == f->getTarget()) {
- f->setIncrement(f->getIncrement() +
- calculateChordIncrementsDir(e,f)*weight);
- }
- }
-}
-
-// Determines the relative direction of two edges.
-int BLInstrumentationDag::calculateChordIncrementsDir(BallLarusEdge* e,
- BallLarusEdge* f) {
- if( e == NULL)
- return(1);
- else if(e->getSource() == f->getTarget()
- || e->getTarget() == f->getSource())
- return(1);
-
- return(-1);
-}
-
-// Creates an increment constant representing incr.
-ConstantInt* PathProfiler::createIncrementConstant(long incr,
- int bitsize) {
- return(ConstantInt::get(IntegerType::get(*Context, 32), incr));
-}
-
-// Creates an increment constant representing the value in
-// edge->getIncrement().
-ConstantInt* PathProfiler::createIncrementConstant(
- BLInstrumentationEdge* edge) {
- return(createIncrementConstant(edge->getIncrement(), 32));
-}
-
-// Finds the insertion point after pathNumber in block. PathNumber may
-// be NULL.
-BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
- pathNumber) {
- if(pathNumber == NULL || isa<ConstantInt>(pathNumber)
- || (((Instruction*)(pathNumber))->getParent()) != block) {
- return(block->getFirstInsertionPt());
- } else {
- Instruction* pathNumberInst = (Instruction*) (pathNumber);
- BasicBlock::iterator insertPoint;
- BasicBlock::iterator end = block->end();
-
- for(insertPoint = block->begin();
- insertPoint != end; insertPoint++) {
- Instruction* insertInst = &(*insertPoint);
-
- if(insertInst == pathNumberInst)
- return(++insertPoint);
- }
-
- return(insertPoint);
- }
-}
-
-// A PHINode is created in the node, and its values initialized to -1U.
-void PathProfiler::preparePHI(BLInstrumentationNode* node) {
- BasicBlock* block = node->getBlock();
- BasicBlock::iterator insertPoint = block->getFirstInsertionPt();
- pred_iterator PB = pred_begin(node->getBlock()),
- PE = pred_end(node->getBlock());
- PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context),
- std::distance(PB, PE), "pathNumber",
- insertPoint );
- node->setPathPHI(phi);
- node->setStartingPathNumber(phi);
- node->setEndingPathNumber(phi);
-
- for(pred_iterator predIt = PB; predIt != PE; predIt++) {
- BasicBlock* pred = (*predIt);
-
- if(pred != NULL)
- phi->addIncoming(createIncrementConstant((long)-1, 32), pred);
- }
-}
-
-// Inserts source's pathNumber Value* into target. Target may or may not
-// have multiple predecessors, and may or may not have its phiNode
-// initalized.
-void PathProfiler::pushValueIntoNode(BLInstrumentationNode* source,
- BLInstrumentationNode* target) {
- if(target->getBlock() == NULL)
- return;
-
-
- if(target->getNumberPredEdges() <= 1) {
- assert(target->getStartingPathNumber() == NULL &&
- "Target already has path number");
- target->setStartingPathNumber(source->getEndingPathNumber());
- target->setEndingPathNumber(source->getEndingPathNumber());
- DEBUG(dbgs() << " Passing path number"
- << (source->getEndingPathNumber() ? "" : " (null)")
- << " value through.\n");
- } else {
- if(target->getPathPHI() == NULL) {
- DEBUG(dbgs() << " Initializing PHI node for block '"
- << target->getName() << "'\n");
- preparePHI(target);
- }
- pushValueIntoPHI(target, source);
- DEBUG(dbgs() << " Passing number value into PHI for block '"
- << target->getName() << "'\n");
- }
-}
-
-// Inserts source's pathNumber Value* into the appropriate slot of
-// target's phiNode.
-void PathProfiler::pushValueIntoPHI(BLInstrumentationNode* target,
- BLInstrumentationNode* source) {
- PHINode* phi = target->getPathPHI();
- assert(phi != NULL && " Tried to push value into node with PHI, but node"
- " actually had no PHI.");
- phi->removeIncomingValue(source->getBlock(), false);
- phi->addIncoming(source->getEndingPathNumber(), source->getBlock());
-}
-
-// The Value* in node, oldVal, is updated with a Value* correspodning to
-// oldVal + addition.
-void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node,
- Value* addition, bool atBeginning) {
- BasicBlock* block = node->getBlock();
- assert(node->getStartingPathNumber() != NULL);
- assert(node->getEndingPathNumber() != NULL);
-
- BasicBlock::iterator insertPoint;
-
- if( atBeginning )
- insertPoint = block->getFirstInsertionPt();
- else
- insertPoint = block->getTerminator();
-
- DEBUG(errs() << " Creating addition instruction.\n");
- Value* newpn = BinaryOperator::Create(Instruction::Add,
- node->getStartingPathNumber(),
- addition, "pathNumber", insertPoint);
-
- node->setEndingPathNumber(newpn);
-
- if( atBeginning )
- node->setStartingPathNumber(newpn);
-}
-
-// Creates a counter increment in the given node. The Value* in node is
-// taken as the index into an array or hash table. The hash table access
-// is a call to the runtime.
-void PathProfiler::insertCounterIncrement(Value* incValue,
- BasicBlock::iterator insertPoint,
- BLInstrumentationDag* dag,
- bool increment) {
- // Counter increment for array
- if( dag->getNumberOfPaths() <= HASH_THRESHHOLD ) {
- // Get pointer to the array location
- std::vector<Value*> gepIndices(2);
- gepIndices[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
- gepIndices[1] = incValue;
-
- GetElementPtrInst* pcPointer =
- GetElementPtrInst::Create(dag->getCounterArray(), gepIndices,
- "counterInc", insertPoint);
-
- // Load from the array - call it oldPC
- LoadInst* oldPc = new LoadInst(pcPointer, "oldPC", insertPoint);
-
- // Test to see whether adding 1 will overflow the counter
- ICmpInst* isMax = new ICmpInst(insertPoint, CmpInst::ICMP_ULT, oldPc,
- createIncrementConstant(0xffffffff, 32),
- "isMax");
-
- // Select increment for the path counter based on overflow
- SelectInst* inc =
- SelectInst::Create( isMax, createIncrementConstant(increment?1:-1,32),
- createIncrementConstant(0,32),
- "pathInc", insertPoint);
-
- // newPc = oldPc + inc
- BinaryOperator* newPc = BinaryOperator::Create(Instruction::Add,
- oldPc, inc, "newPC",
- insertPoint);
-
- // Store back in to the array
- new StoreInst(newPc, pcPointer, insertPoint);
- } else { // Counter increment for hash
- std::vector<Value*> args(2);
- args[0] = ConstantInt::get(Type::getInt32Ty(*Context),
- currentFunctionNumber);
- args[1] = incValue;
-
- CallInst::Create(
- increment ? llvmIncrementHashFunction : llvmDecrementHashFunction,
- args, "", insertPoint);
- }
-}
-
-// Inserts instrumentation for the given edge
-//
-// Pre: The edge's source node has pathNumber set if edge is non zero
-// path number increment.
-//
-// Post: Edge's target node has a pathNumber set to the path number Value
-// corresponding to the value of the path register after edge's
-// execution.
-//
-// FIXME: This should be reworked so it's not recursive.
-void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge,
- BLInstrumentationDag* dag) {
- // Mark the edge as instrumented
- edge->setHasInstrumentation(true);
- DEBUG(dbgs() << "\nInstrumenting edge: " << (*edge) << "\n");
-
- // create a new node for this edge's instrumentation
- splitCritical(edge, dag);
-
- BLInstrumentationNode* sourceNode = (BLInstrumentationNode*)edge->getSource();
- BLInstrumentationNode* targetNode = (BLInstrumentationNode*)edge->getTarget();
- BLInstrumentationNode* instrumentNode;
- BLInstrumentationNode* nextSourceNode;
-
- bool atBeginning = false;
-
- // Source node has only 1 successor so any information can be simply
- // inserted in to it without splitting
- if( sourceNode->getBlock() && sourceNode->getNumberSuccEdges() <= 1) {
- DEBUG(dbgs() << " Potential instructions to be placed in: "
- << sourceNode->getName() << " (at end)\n");
- instrumentNode = sourceNode;
- nextSourceNode = targetNode; // ... since we never made any new nodes
- }
-
- // The target node only has one predecessor, so we can safely insert edge
- // instrumentation into it. If there was splitting, it must have been
- // successful.
- else if( targetNode->getNumberPredEdges() == 1 ) {
- DEBUG(dbgs() << " Potential instructions to be placed in: "
- << targetNode->getName() << " (at beginning)\n");
- pushValueIntoNode(sourceNode, targetNode);
- instrumentNode = targetNode;
- nextSourceNode = NULL; // ... otherwise we'll just keep splitting
- atBeginning = true;
- }
-
- // Somehow, splitting must have failed.
- else {
- errs() << "Instrumenting could not split a critical edge.\n";
- DEBUG(dbgs() << " Couldn't split edge " << (*edge) << ".\n");
- return;
- }
-
- // Insert instrumentation if this is a back or split edge
- if( edge->getType() == BallLarusEdge::BACKEDGE ||
- edge->getType() == BallLarusEdge::SPLITEDGE ) {
- BLInstrumentationEdge* top =
- (BLInstrumentationEdge*) edge->getPhonyRoot();
- BLInstrumentationEdge* bottom =
- (BLInstrumentationEdge*) edge->getPhonyExit();
-
- assert( top->isInitialization() && " Top phony edge did not"
- " contain a path number initialization.");
- assert( bottom->isCounterIncrement() && " Bottom phony edge"
- " did not contain a path counter increment.");
-
- // split edge has yet to be initialized
- if( !instrumentNode->getEndingPathNumber() ) {
- instrumentNode->setStartingPathNumber(createIncrementConstant(0,32));
- instrumentNode->setEndingPathNumber(createIncrementConstant(0,32));
- }
-
- BasicBlock::iterator insertPoint = atBeginning ?
- instrumentNode->getBlock()->getFirstInsertionPt() :
- instrumentNode->getBlock()->getTerminator();
-
- // add information from the bottom edge, if it exists
- if( bottom->getIncrement() ) {
- Value* newpn =
- BinaryOperator::Create(Instruction::Add,
- instrumentNode->getStartingPathNumber(),
- createIncrementConstant(bottom),
- "pathNumber", insertPoint);
- instrumentNode->setEndingPathNumber(newpn);
- }
-
- insertCounterIncrement(instrumentNode->getEndingPathNumber(),
- insertPoint, dag);
-
- if( atBeginning )
- instrumentNode->setStartingPathNumber(createIncrementConstant(top));
-
- instrumentNode->setEndingPathNumber(createIncrementConstant(top));
-
- // Check for path counter increments
- if( top->isCounterIncrement() ) {
- insertCounterIncrement(instrumentNode->getEndingPathNumber(),
- instrumentNode->getBlock()->getTerminator(),dag);
- instrumentNode->setEndingPathNumber(0);
- }
- }
-
- // Insert instrumentation if this is a normal edge
- else {
- BasicBlock::iterator insertPoint = atBeginning ?
- instrumentNode->getBlock()->getFirstInsertionPt() :
- instrumentNode->getBlock()->getTerminator();
-
- if( edge->isInitialization() ) { // initialize path number
- instrumentNode->setEndingPathNumber(createIncrementConstant(edge));
- } else if( edge->getIncrement() ) {// increment path number
- Value* newpn =
- BinaryOperator::Create(Instruction::Add,
- instrumentNode->getStartingPathNumber(),
- createIncrementConstant(edge),
- "pathNumber", insertPoint);
- instrumentNode->setEndingPathNumber(newpn);
-
- if( atBeginning )
- instrumentNode->setStartingPathNumber(newpn);
- }
-
- // Check for path counter increments
- if( edge->isCounterIncrement() ) {
- insertCounterIncrement(instrumentNode->getEndingPathNumber(),
- insertPoint, dag);
- instrumentNode->setEndingPathNumber(0);
- }
- }
-
- // Push it along
- if (nextSourceNode && instrumentNode->getEndingPathNumber())
- pushValueIntoNode(instrumentNode, nextSourceNode);
-
- // Add all the successors
- for( BLEdgeIterator next = targetNode->succBegin(),
- end = targetNode->succEnd(); next != end; next++ ) {
- // So long as it is un-instrumented, add it to the list
- if( !((BLInstrumentationEdge*)(*next))->hasInstrumentation() )
- insertInstrumentationStartingAt((BLInstrumentationEdge*)*next,dag);
- else
- DEBUG(dbgs() << " Edge " << *(BLInstrumentationEdge*)(*next)
- << " already instrumented.\n");
- }
-}
-
-// Inserts instrumentation according to the marked edges in dag. Phony edges
-// must be unlinked from the DAG, but accessible from the backedges. Dag
-// must have initializations, path number increments, and counter increments
-// present.
-//
-// Counter storage is created here.
-void PathProfiler::insertInstrumentation(
- BLInstrumentationDag& dag, Module &M) {
-
- BLInstrumentationEdge* exitRootEdge =
- (BLInstrumentationEdge*) dag.getExitRootEdge();
- insertInstrumentationStartingAt(exitRootEdge, &dag);
-
- // Iterate through each call edge and apply the appropriate hash increment
- // and decrement functions
- BLEdgeVector callEdges = dag.getCallPhonyEdges();
- for( BLEdgeIterator edge = callEdges.begin(),
- end = callEdges.end(); edge != end; edge++ ) {
- BLInstrumentationNode* node =
- (BLInstrumentationNode*)(*edge)->getSource();
- BasicBlock::iterator insertPoint = node->getBlock()->getFirstInsertionPt();
-
- // Find the first function call
- while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call )
- insertPoint++;
-
- DEBUG(dbgs() << "\nInstrumenting method call block '"
- << node->getBlock()->getName() << "'\n");
- DEBUG(dbgs() << " Path number initialized: "
- << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n");
-
- Value* newpn;
- if( node->getStartingPathNumber() ) {
- long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
- if ( inc )
- newpn = BinaryOperator::Create(Instruction::Add,
- node->getStartingPathNumber(),
- createIncrementConstant(inc,32),
- "pathNumber", insertPoint);
- else
- newpn = node->getStartingPathNumber();
- } else {
- newpn = (Value*)createIncrementConstant(
- ((BLInstrumentationEdge*)(*edge))->getIncrement(), 32);
- }
-
- insertCounterIncrement(newpn, insertPoint, &dag);
- insertCounterIncrement(newpn, node->getBlock()->getTerminator(),
- &dag, false);
- }
-}
-
-// Entry point of the module
-void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit,
- Function &F, Module &M) {
- // Build DAG from CFG
- BLInstrumentationDag dag = BLInstrumentationDag(F);
- dag.init();
-
- // give each path a unique integer value
- dag.calculatePathNumbers();
-
- // modify path increments to increase the efficiency
- // of instrumentation
- dag.calculateSpanningTree();
- dag.calculateChordIncrements();
- dag.pushInitialization();
- dag.pushCounters();
- dag.unlinkPhony();
-
- // potentially generate .dot graph for the dag
- if (DotPathDag)
- dag.generateDotGraph ();
-
- // Should we store the information in an array or hash
- if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) {
- Type* t = ArrayType::get(Type::getInt32Ty(*Context),
- dag.getNumberOfPaths());
-
- dag.setCounterArray(new GlobalVariable(M, t, false,
- GlobalValue::InternalLinkage,
- Constant::getNullValue(t), ""));
- }
-
- insertInstrumentation(dag, M);
-
- // Add to global function reference table
- unsigned type;
- Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context);
-
- if( dag.getNumberOfPaths() <= HASH_THRESHHOLD )
- type = ProfilingArray;
- else
- type = ProfilingHash;
-
- std::vector<Constant*> entryArray(3);
- entryArray[0] = createIncrementConstant(type,32);
- entryArray[1] = createIncrementConstant(dag.getNumberOfPaths(),32);
- entryArray[2] = dag.getCounterArray() ?
- ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) :
- Constant::getNullValue(voidPtr);
-
- StructType* at = ftEntryTypeBuilder::get(*Context);
- ConstantStruct* functionEntry =
- (ConstantStruct*)ConstantStruct::get(at, entryArray);
- ftInit.push_back(functionEntry);
-}
-
-// Output the bitcode if we want to observe instrumentation changess
-#define PRINT_MODULE dbgs() << \
- "\n\n============= MODULE BEGIN ===============\n" << M << \
- "\n============== MODULE END ================\n"
-
-bool PathProfiler::runOnModule(Module &M) {
- Context = &M.getContext();
-
- DEBUG(dbgs()
- << "****************************************\n"
- << "****************************************\n"
- << "** **\n"
- << "** PATH PROFILING INSTRUMENTATION **\n"
- << "** **\n"
- << "****************************************\n"
- << "****************************************\n");
-
- // No main, no instrumentation!
- Function *Main = M.getFunction("main");
-
- // Using fortran? ... this kind of works
- if (!Main)
- Main = M.getFunction("MAIN__");
-
- if (!Main) {
- errs() << "WARNING: cannot insert path profiling into a module"
- << " with no main function!\n";
- return false;
- }
-
- llvmIncrementHashFunction = M.getOrInsertFunction(
- "llvm_increment_path_count",
- Type::getVoidTy(*Context), // return type
- Type::getInt32Ty(*Context), // function number
- Type::getInt32Ty(*Context), // path number
- NULL );
-
- llvmDecrementHashFunction = M.getOrInsertFunction(
- "llvm_decrement_path_count",
- Type::getVoidTy(*Context), // return type
- Type::getInt32Ty(*Context), // function number
- Type::getInt32Ty(*Context), // path number
- NULL );
-
- std::vector<Constant*> ftInit;
- unsigned functionNumber = 0;
- for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
- if (F->isDeclaration())
- continue;
-
- DEBUG(dbgs() << "Function: " << F->getName() << "\n");
- functionNumber++;
-
- // set function number
- currentFunctionNumber = functionNumber;
- runOnFunction(ftInit, *F, M);
- }
-
- Type *t = ftEntryTypeBuilder::get(*Context);
- ArrayType* ftArrayType = ArrayType::get(t, ftInit.size());
- Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit);
-
- DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n");
-
- GlobalVariable* functionTable =
- new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage,
- ftInitConstant, "functionPathTable");
- Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0);
- InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable,
- PointerType::getUnqual(eltType));
-
- DEBUG(PRINT_MODULE);
-
- return true;
-}
-
-// If this edge is a critical edge, then inserts a node at this edge.
-// This edge becomes the first edge, and a new BallLarusEdge is created.
-// Returns true if the edge was split
-bool PathProfiler::splitCritical(BLInstrumentationEdge* edge,
- BLInstrumentationDag* dag) {
- unsigned succNum = edge->getSuccessorNumber();
- BallLarusNode* sourceNode = edge->getSource();
- BallLarusNode* targetNode = edge->getTarget();
- BasicBlock* sourceBlock = sourceNode->getBlock();
- BasicBlock* targetBlock = targetNode->getBlock();
-
- if(sourceBlock == NULL || targetBlock == NULL
- || sourceNode->getNumberSuccEdges() <= 1
- || targetNode->getNumberPredEdges() == 1 ) {
- return(false);
- }
-
- TerminatorInst* terminator = sourceBlock->getTerminator();
-
- if( SplitCriticalEdge(terminator, succNum, this, false)) {
- BasicBlock* newBlock = terminator->getSuccessor(succNum);
- dag->splitUpdate(edge, newBlock);
- return(true);
- } else
- return(false);
-}
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
deleted file mode 100644
index 4b3de6d..0000000
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-//===- ProfilingUtils.cpp - Helper functions shared by profilers ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a few helper functions which are used by profile
-// instrumentation code to instrument the code. This allows the profiler pass
-// to worry about *what* to insert, and these functions take care of *how* to do
-// it.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ProfilingUtils.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-
-void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
- GlobalValue *Array,
- PointerType *arrayType) {
- LLVMContext &Context = MainFn->getContext();
- Type *ArgVTy =
- PointerType::getUnqual(Type::getInt8PtrTy(Context));
- PointerType *UIntPtr = arrayType ? arrayType :
- Type::getInt32PtrTy(Context);
- Module &M = *MainFn->getParent();
- Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context),
- Type::getInt32Ty(Context),
- ArgVTy, UIntPtr,
- Type::getInt32Ty(Context),
- (Type *)0);
-
- // This could force argc and argv into programs that wouldn't otherwise have
- // them, but instead we just pass null values in.
- std::vector<Value*> Args(4);
- Args[0] = Constant::getNullValue(Type::getInt32Ty(Context));
- Args[1] = Constant::getNullValue(ArgVTy);
-
- // Skip over any allocas in the entry block.
- BasicBlock *Entry = MainFn->begin();
- BasicBlock::iterator InsertPos = Entry->begin();
- while (isa<AllocaInst>(InsertPos)) ++InsertPos;
-
- std::vector<Constant*> GEPIndices(2,
- Constant::getNullValue(Type::getInt32Ty(Context)));
- unsigned NumElements = 0;
- if (Array) {
- Args[2] = ConstantExpr::getGetElementPtr(Array, GEPIndices);
- NumElements =
- cast<ArrayType>(Array->getType()->getElementType())->getNumElements();
- } else {
- // If this profiling instrumentation doesn't have a constant array, just
- // pass null.
- Args[2] = ConstantPointerNull::get(UIntPtr);
- }
- Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
-
- CallInst *InitCall = CallInst::Create(InitFn, Args, "newargc", InsertPos);
-
- // If argc or argv are not available in main, just pass null values in.
- Function::arg_iterator AI;
- switch (MainFn->arg_size()) {
- default:
- case 2:
- AI = MainFn->arg_begin(); ++AI;
- if (AI->getType() != ArgVTy) {
- Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
- false);
- InitCall->setArgOperand(1,
- CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
- } else {
- InitCall->setArgOperand(1, AI);
- }
- /* FALL THROUGH */
-
- case 1:
- AI = MainFn->arg_begin();
- // If the program looked at argc, have it look at the return value of the
- // init call instead.
- if (!AI->getType()->isIntegerTy(32)) {
- Instruction::CastOps opcode;
- if (!AI->use_empty()) {
- opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
- AI->replaceAllUsesWith(
- CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos));
- }
- opcode = CastInst::getCastOpcode(AI, true,
- Type::getInt32Ty(Context), true);
- InitCall->setArgOperand(0,
- CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
- "argc.cast", InitCall));
- } else {
- AI->replaceAllUsesWith(InitCall);
- InitCall->setArgOperand(0, AI);
- }
-
- case 0: break;
- }
-}
-
-void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
- GlobalValue *CounterArray, bool beginning) {
- // Insert the increment after any alloca or PHI instructions...
- BasicBlock::iterator InsertPos = beginning ? BB->getFirstInsertionPt() :
- BB->getTerminator();
- while (isa<AllocaInst>(InsertPos))
- ++InsertPos;
-
- LLVMContext &Context = BB->getContext();
-
- // Create the getelementptr constant expression
- std::vector<Constant*> Indices(2);
- Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
- Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
- Constant *ElementPtr =
- ConstantExpr::getGetElementPtr(CounterArray, Indices);
-
- // Load, increment and store the value back.
- Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
- Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal,
- ConstantInt::get(Type::getInt32Ty(Context), 1),
- "NewFuncCounter", InsertPos);
- new StoreInst(NewVal, ElementPtr, InsertPos);
-}
-
-void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
- // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those
- // types.
- Type *GlobalDtorElems[2] = {
- Type::getInt32Ty(Mod->getContext()),
- FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo()
- };
- StructType *GlobalDtorElemTy =
- StructType::get(Mod->getContext(), GlobalDtorElems, false);
-
- // Construct the new element we'll be adding.
- Constant *Elem[2] = {
- ConstantInt::get(Type::getInt32Ty(Mod->getContext()), 65535),
- ConstantExpr::getBitCast(Callee, GlobalDtorElems[1])
- };
-
- // If llvm.global_dtors exists, make a copy of the things in its list and
- // delete it, to replace it with one that has a larger array type.
- std::vector<Constant *> dtors;
- if (GlobalVariable *GlobalDtors = Mod->getNamedGlobal("llvm.global_dtors")) {
- if (ConstantArray *InitList =
- dyn_cast<ConstantArray>(GlobalDtors->getInitializer())) {
- for (unsigned i = 0, e = InitList->getType()->getNumElements();
- i != e; ++i)
- dtors.push_back(cast<Constant>(InitList->getOperand(i)));
- }
- GlobalDtors->eraseFromParent();
- }
-
- // Build up llvm.global_dtors with our new item in it.
- GlobalVariable *GlobalDtors = new GlobalVariable(
- *Mod, ArrayType::get(GlobalDtorElemTy, 1), false,
- GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors");
-
- dtors.push_back(ConstantStruct::get(GlobalDtorElemTy, Elem));
- GlobalDtors->setInitializer(ConstantArray::get(
- cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors));
-}
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.h b/lib/Transforms/Instrumentation/ProfilingUtils.h
deleted file mode 100644
index 09b2217..0000000
--- a/lib/Transforms/Instrumentation/ProfilingUtils.h
+++ /dev/null
@@ -1,36 +0,0 @@
-//===- ProfilingUtils.h - Helper functions shared by profilers --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a few helper functions which are used by profile
-// instrumentation code to instrument the code. This allows the profiler pass
-// to worry about *what* to insert, and these functions take care of *how* to do
-// it.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PROFILINGUTILS_H
-#define PROFILINGUTILS_H
-
-namespace llvm {
- class BasicBlock;
- class Function;
- class GlobalValue;
- class Module;
- class PointerType;
-
- void InsertProfilingInitCall(Function *MainFn, const char *FnName,
- GlobalValue *Arr = 0,
- PointerType *arrayType = 0);
- void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
- GlobalValue *CounterArray,
- bool beginning = true);
- void InsertProfilingShutdownCall(Function *Callee, Module *Mod);
-}
-
-#endif
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index cc971a3..89fb746 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -227,7 +227,7 @@ bool ThreadSanitizer::doInitialization(Module &M) {
TD = getAnalysisIfAvailable<DataLayout>();
if (!TD)
return false;
- BL.reset(new SpecialCaseList(BlacklistFile));
+ BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
// Always insert a call to __tsan_init into the module's CTORs.
IRBuilder<> IRB(M.getContext());
@@ -240,12 +240,8 @@ bool ThreadSanitizer::doInitialization(Module &M) {
}
static bool isVtableAccess(Instruction *I) {
- if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) {
- if (Tag->getNumOperands() < 1) return false;
- if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
- if (Tag1->getString() == "vtable pointer") return true;
- }
- }
+ if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa))
+ return Tag->isTBAAVtableAccess();
return false;
}
@@ -362,7 +358,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
// (e.g. variables that do not escape, etc).
// Instrument memory accesses.
- if (ClInstrumentMemoryAccesses)
+ if (ClInstrumentMemoryAccesses && F.hasFnAttribute(Attribute::SanitizeThread))
for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) {
Res |= instrumentLoadOrStore(AllLoadsAndStores[i]);
}
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 6f94a7c..2976df6 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -176,91 +176,6 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
return 0;
}
-/// \brief Test whether the given retainable object pointer escapes.
-///
-/// This differs from regular escape analysis in that a use as an
-/// argument to a call is not considered an escape.
-///
-static bool DoesRetainableObjPtrEscape(const User *Ptr) {
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Target: " << *Ptr << "\n");
-
- // Walk the def-use chains.
- SmallVector<const Value *, 4> Worklist;
- Worklist.push_back(Ptr);
- // If Ptr has any operands add them as well.
- for (User::const_op_iterator I = Ptr->op_begin(), E = Ptr->op_end(); I != E;
- ++I) {
- Worklist.push_back(*I);
- }
-
- // Ensure we do not visit any value twice.
- SmallPtrSet<const Value *, 8> VisitedSet;
-
- do {
- const Value *V = Worklist.pop_back_val();
-
- DEBUG(dbgs() << "Visiting: " << *V << "\n");
-
- for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
- UI != UE; ++UI) {
- const User *UUser = *UI;
-
- DEBUG(dbgs() << "User: " << *UUser << "\n");
-
- // Special - Use by a call (callee or argument) is not considered
- // to be an escape.
- switch (GetBasicInstructionClass(UUser)) {
- case IC_StoreWeak:
- case IC_InitWeak:
- case IC_StoreStrong:
- case IC_Autorelease:
- case IC_AutoreleaseRV: {
- DEBUG(dbgs() << "User copies pointer arguments. Pointer Escapes!\n");
- // These special functions make copies of their pointer arguments.
- return true;
- }
- case IC_IntrinsicUser:
- // Use by the use intrinsic is not an escape.
- continue;
- case IC_User:
- case IC_None:
- // Use by an instruction which copies the value is an escape if the
- // result is an escape.
- if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
- isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
-
- if (VisitedSet.insert(UUser)) {
- DEBUG(dbgs() << "User copies value. Ptr escapes if result escapes."
- " Adding to list.\n");
- Worklist.push_back(UUser);
- } else {
- DEBUG(dbgs() << "Already visited node.\n");
- }
- continue;
- }
- // Use by a load is not an escape.
- if (isa<LoadInst>(UUser))
- continue;
- // Use by a store is not an escape if the use is the address.
- if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
- if (V != SI->getValueOperand())
- continue;
- break;
- default:
- // Regular calls and other stuff are not considered escapes.
- continue;
- }
- // Otherwise, conservatively assume an escape.
- DEBUG(dbgs() << "Assuming ptr escapes.\n");
- return true;
- }
- } while (!Worklist.empty());
-
- // No escapes found.
- DEBUG(dbgs() << "Ptr does not escape.\n");
- return false;
-}
-
/// This is a wrapper around getUnderlyingObjCPtr along the lines of
/// GetUnderlyingObjects except that it returns early when it sees the first
/// alloca.
@@ -517,7 +432,7 @@ namespace {
bool Partial;
/// The current position in the sequence.
- Sequence Seq : 8;
+ unsigned char Seq : 8;
/// Unidirectional information about the current sequence.
RRInfo RRI;
@@ -583,7 +498,7 @@ namespace {
}
Sequence GetSeq() const {
- return Seq;
+ return static_cast<Sequence>(Seq);
}
void ClearSequenceProgress() {
@@ -623,7 +538,8 @@ namespace {
void
PtrState::Merge(const PtrState &Other, bool TopDown) {
- Seq = MergeSeqs(Seq, Other.Seq, TopDown);
+ Seq = MergeSeqs(static_cast<Sequence>(Seq), static_cast<Sequence>(Other.Seq),
+ TopDown);
KnownPositiveRefCount &= Other.KnownPositiveRefCount;
// If we're not in a sequence (anymore), drop all associated state.
@@ -674,7 +590,9 @@ namespace {
SmallVector<BasicBlock *, 2> Succs;
public:
- BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
+ static const unsigned OverflowOccurredValue;
+
+ BBState() : TopDownPathCount(0), BottomUpPathCount(0) { }
typedef MapTy::iterator ptr_iterator;
typedef MapTy::const_iterator ptr_const_iterator;
@@ -745,27 +663,31 @@ namespace {
/// Returns true if overflow occured. Returns false if overflow did not
/// occur.
bool GetAllPathCountWithOverflow(unsigned &PathCount) const {
- assert(TopDownPathCount != 0);
- assert(BottomUpPathCount != 0);
+ if (TopDownPathCount == OverflowOccurredValue ||
+ BottomUpPathCount == OverflowOccurredValue)
+ return true;
unsigned long long Product =
(unsigned long long)TopDownPathCount*BottomUpPathCount;
- PathCount = Product;
- // Overflow occured if any of the upper bits of Product are set.
- return Product >> 32;
+ // Overflow occured if any of the upper bits of Product are set or if all
+ // the lower bits of Product are all set.
+ return (Product >> 32) ||
+ ((PathCount = Product) == OverflowOccurredValue);
}
// Specialized CFG utilities.
typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator;
- edge_iterator pred_begin() { return Preds.begin(); }
- edge_iterator pred_end() { return Preds.end(); }
- edge_iterator succ_begin() { return Succs.begin(); }
- edge_iterator succ_end() { return Succs.end(); }
+ edge_iterator pred_begin() const { return Preds.begin(); }
+ edge_iterator pred_end() const { return Preds.end(); }
+ edge_iterator succ_begin() const { return Succs.begin(); }
+ edge_iterator succ_end() const { return Succs.end(); }
void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
bool isExit() const { return Succs.empty(); }
};
+
+ const unsigned BBState::OverflowOccurredValue = 0xffffffff;
}
void BBState::InitFromPred(const BBState &Other) {
@@ -781,13 +703,25 @@ void BBState::InitFromSucc(const BBState &Other) {
/// The top-down traversal uses this to merge information about predecessors to
/// form the initial state for a new block.
void BBState::MergePred(const BBState &Other) {
+ if (TopDownPathCount == OverflowOccurredValue)
+ return;
+
// Other.TopDownPathCount can be 0, in which case it is either dead or a
// loop backedge. Loop backedges are special.
TopDownPathCount += Other.TopDownPathCount;
+ // In order to be consistent, we clear the top down pointers when by adding
+ // TopDownPathCount becomes OverflowOccurredValue even though "true" overflow
+ // has not occured.
+ if (TopDownPathCount == OverflowOccurredValue) {
+ clearTopDownPointers();
+ return;
+ }
+
// Check for overflow. If we have overflow, fall back to conservative
// behavior.
if (TopDownPathCount < Other.TopDownPathCount) {
+ TopDownPathCount = OverflowOccurredValue;
clearTopDownPointers();
return;
}
@@ -813,13 +747,25 @@ void BBState::MergePred(const BBState &Other) {
/// The bottom-up traversal uses this to merge information about successors to
/// form the initial state for a new block.
void BBState::MergeSucc(const BBState &Other) {
+ if (BottomUpPathCount == OverflowOccurredValue)
+ return;
+
// Other.BottomUpPathCount can be 0, in which case it is either dead or a
// loop backedge. Loop backedges are special.
BottomUpPathCount += Other.BottomUpPathCount;
+ // In order to be consistent, we clear the top down pointers when by adding
+ // BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow
+ // has not occured.
+ if (BottomUpPathCount == OverflowOccurredValue) {
+ clearBottomUpPointers();
+ return;
+ }
+
// Check for overflow. If we have overflow, fall back to conservative
// behavior.
if (BottomUpPathCount < Other.BottomUpPathCount) {
+ BottomUpPathCount = OverflowOccurredValue;
clearBottomUpPointers();
return;
}
@@ -1158,13 +1104,9 @@ namespace {
unsigned ARCAnnotationProvenanceSourceMDKind;
#endif // ARC_ANNOATIONS
- bool IsRetainBlockOptimizable(const Instruction *Inst);
-
bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
InstructionClass &Class);
- bool OptimizeRetainBlockCall(Function &F, Instruction *RetainBlock,
- InstructionClass &Class);
void OptimizeIndividualCalls(Function &F);
void CheckForCFGHazards(const BasicBlock *BB,
@@ -1253,22 +1195,6 @@ void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
}
-bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
- // Without the magic metadata tag, we have to assume this might be an
- // objc_retainBlock call inserted to convert a block pointer to an id,
- // in which case it really is needed.
- if (!Inst->getMetadata(CopyOnEscapeMDKind))
- return false;
-
- // If the pointer "escapes" (not including being used in a call),
- // the copy may be needed.
- if (DoesRetainableObjPtrEscape(Inst))
- return false;
-
- // Otherwise, it's not needed.
- return true;
-}
-
/// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
/// not a return value. Or, if it can be paired with an
/// objc_autoreleaseReturnValue, delete the pair and return true.
@@ -1369,41 +1295,6 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
}
-// \brief Attempt to strength reduce objc_retainBlock calls to objc_retain
-// calls.
-//
-// Specifically: If an objc_retainBlock call has the copy_on_escape metadata and
-// does not escape (following the rules of block escaping), strength reduce the
-// objc_retainBlock to an objc_retain.
-//
-// TODO: If an objc_retainBlock call is dominated period by a previous
-// objc_retainBlock call, strength reduce the objc_retainBlock to an
-// objc_retain.
-bool
-ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst,
- InstructionClass &Class) {
- assert(GetBasicInstructionClass(Inst) == Class);
- assert(IC_RetainBlock == Class);
-
- // If we can not optimize Inst, return false.
- if (!IsRetainBlockOptimizable(Inst))
- return false;
-
- Changed = true;
- ++NumPeeps;
-
- DEBUG(dbgs() << "Strength reduced retainBlock => retain.\n");
- DEBUG(dbgs() << "Old: " << *Inst << "\n");
- CallInst *RetainBlock = cast<CallInst>(Inst);
- Constant *NewDecl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
- RetainBlock->setCalledFunction(NewDecl);
- // Remove copy_on_escape metadata.
- RetainBlock->setMetadata(CopyOnEscapeMDKind, 0);
- Class = IC_Retain;
- DEBUG(dbgs() << "New: " << *Inst << "\n");
- return true;
-}
-
/// Visit each call, one at a time, and make simplifications without doing any
/// additional analysis.
void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
@@ -1480,11 +1371,6 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
}
break;
}
- case IC_RetainBlock:
- // If we strength reduce an objc_retainBlock to an objc_retain, continue
- // onto the objc_retain peephole optimizations. Otherwise break.
- OptimizeRetainBlockCall(F, Inst, Class);
- break;
case IC_RetainRV:
if (OptimizeRetainRVCall(F, Inst))
continue;
@@ -2520,15 +2406,26 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (Jt == Releases.end())
return false;
const RRInfo &NewRetainReleaseRRI = Jt->second;
- assert(NewRetainReleaseRRI.Calls.count(NewRetain));
+
+ // If the release does not have a reference to the retain as well,
+ // something happened which is unaccounted for. Do not do anything.
+ //
+ // This can happen if we catch an additive overflow during path count
+ // merging.
+ if (!NewRetainReleaseRRI.Calls.count(NewRetain))
+ return false;
+
if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
// If we overflow when we compute the path count, don't remove/move
// anything.
const BBState &NRRBBState = BBStates[NewRetainRelease->getParent()];
- unsigned PathCount;
+ unsigned PathCount = BBState::OverflowOccurredValue;
if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
return false;
+ assert(PathCount != BBState::OverflowOccurredValue &&
+ "PathCount at this point can not be "
+ "OverflowOccurredValue.");
OldDelta -= PathCount;
// Merge the ReleaseMetadata and IsTailCallRelease values.
@@ -2558,8 +2455,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
// If we overflow when we compute the path count, don't
// remove/move anything.
const BBState &RIPBBState = BBStates[RIP->getParent()];
+ PathCount = BBState::OverflowOccurredValue;
if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
return false;
+ assert(PathCount != BBState::OverflowOccurredValue &&
+ "PathCount at this point can not be "
+ "OverflowOccurredValue.");
NewDelta -= PathCount;
}
}
@@ -2589,15 +2490,25 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (Jt == Retains.end())
return false;
const RRInfo &NewReleaseRetainRRI = Jt->second;
- assert(NewReleaseRetainRRI.Calls.count(NewRelease));
- if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
+ // If the retain does not have a reference to the release as well,
+ // something happened which is unaccounted for. Do not do anything.
+ //
+ // This can happen if we catch an additive overflow during path count
+ // merging.
+ if (!NewReleaseRetainRRI.Calls.count(NewRelease))
+ return false;
+
+ if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
// If we overflow when we compute the path count, don't remove/move
// anything.
const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()];
- unsigned PathCount;
+ unsigned PathCount = BBState::OverflowOccurredValue;
if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
return false;
+ assert(PathCount != BBState::OverflowOccurredValue &&
+ "PathCount at this point can not be "
+ "OverflowOccurredValue.");
OldDelta += PathCount;
OldCount += PathCount;
@@ -2612,8 +2523,13 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
// If we overflow when we compute the path count, don't
// remove/move anything.
const BBState &RIPBBState = BBStates[RIP->getParent()];
+
+ PathCount = BBState::OverflowOccurredValue;
if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
return false;
+ assert(PathCount != BBState::OverflowOccurredValue &&
+ "PathCount at this point can not be "
+ "OverflowOccurredValue.");
NewDelta += PathCount;
NewCount += PathCount;
}
diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
deleted file mode 100644
index e755008..0000000
--- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-//===-- BasicBlockPlacement.cpp - Basic Block Code Layout optimization ----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a very simple profile guided basic block placement
-// algorithm. The idea is to put frequently executed blocks together at the
-// start of the function, and hopefully increase the number of fall-through
-// conditional branches. If there is no profile information for a particular
-// function, this pass basically orders blocks in depth-first order
-//
-// The algorithm implemented here is basically "Algo1" from "Profile Guided Code
-// Positioning" by Pettis and Hansen, except that it uses basic block counts
-// instead of edge counts. This should be improved in many ways, but is very
-// simple for now.
-//
-// Basically we "place" the entry block, then loop over all successors in a DFO,
-// placing the most frequently executed successor until we run out of blocks. I
-// told you this was _extremely_ simplistic. :) This is also much slower than it
-// could be. When it becomes important, this pass will be rewritten to use a
-// better algorithm, and then we can worry about efficiency.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "block-placement"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include <set>
-using namespace llvm;
-
-STATISTIC(NumMoved, "Number of basic blocks moved");
-
-namespace {
- struct BlockPlacement : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- BlockPlacement() : FunctionPass(ID) {
- initializeBlockPlacementPass(*PassRegistry::getPassRegistry());
- }
-
- virtual bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<ProfileInfo>();
- //AU.addPreserved<ProfileInfo>(); // Does this work?
- }
- private:
- /// PI - The profile information that is guiding us.
- ///
- ProfileInfo *PI;
-
- /// NumMovedBlocks - Every time we move a block, increment this counter.
- ///
- unsigned NumMovedBlocks;
-
- /// PlacedBlocks - Every time we place a block, remember it so we don't get
- /// into infinite loops.
- std::set<BasicBlock*> PlacedBlocks;
-
- /// InsertPos - This an iterator to the next place we want to insert a
- /// block.
- Function::iterator InsertPos;
-
- /// PlaceBlocks - Recursively place the specified blocks and any unplaced
- /// successors.
- void PlaceBlocks(BasicBlock *BB);
- };
-}
-
-char BlockPlacement::ID = 0;
-INITIALIZE_PASS_BEGIN(BlockPlacement, "block-placement",
- "Profile Guided Basic Block Placement", false, false)
-INITIALIZE_AG_DEPENDENCY(ProfileInfo)
-INITIALIZE_PASS_END(BlockPlacement, "block-placement",
- "Profile Guided Basic Block Placement", false, false)
-
-FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
-
-bool BlockPlacement::runOnFunction(Function &F) {
- PI = &getAnalysis<ProfileInfo>();
-
- NumMovedBlocks = 0;
- InsertPos = F.begin();
-
- // Recursively place all blocks.
- PlaceBlocks(F.begin());
-
- PlacedBlocks.clear();
- NumMoved += NumMovedBlocks;
- return NumMovedBlocks != 0;
-}
-
-
-/// PlaceBlocks - Recursively place the specified blocks and any unplaced
-/// successors.
-void BlockPlacement::PlaceBlocks(BasicBlock *BB) {
- assert(!PlacedBlocks.count(BB) && "Already placed this block!");
- PlacedBlocks.insert(BB);
-
- // Place the specified block.
- if (&*InsertPos != BB) {
- // Use splice to move the block into the right place. This avoids having to
- // remove the block from the function then readd it, which causes a bunch of
- // symbol table traffic that is entirely pointless.
- Function::BasicBlockListType &Blocks = BB->getParent()->getBasicBlockList();
- Blocks.splice(InsertPos, Blocks, BB);
-
- ++NumMovedBlocks;
- } else {
- // This block is already in the right place, we don't have to do anything.
- ++InsertPos;
- }
-
- // Keep placing successors until we run out of ones to place. Note that this
- // loop is very inefficient (N^2) for blocks with many successors, like switch
- // statements. FIXME!
- while (1) {
- // Okay, now place any unplaced successors.
- succ_iterator SI = succ_begin(BB), E = succ_end(BB);
-
- // Scan for the first unplaced successor.
- for (; SI != E && PlacedBlocks.count(*SI); ++SI)
- /*empty*/;
- if (SI == E) return; // No more successors to place.
-
- double MaxExecutionCount = PI->getExecutionCount(*SI);
- BasicBlock *MaxSuccessor = *SI;
-
- // Scan for more frequently executed successors
- for (; SI != E; ++SI)
- if (!PlacedBlocks.count(*SI)) {
- double Count = PI->getExecutionCount(*SI);
- if (Count > MaxExecutionCount ||
- // Prefer to not disturb the code.
- (Count == MaxExecutionCount && *SI == &*InsertPos)) {
- MaxExecutionCount = Count;
- MaxSuccessor = *SI;
- }
- }
-
- // Now that we picked the maximally executed successor, place it.
- PlaceBlocks(MaxSuccessor);
- }
-}
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index f5d1db1..626c810 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -1,6 +1,5 @@
add_llvm_library(LLVMScalarOpts
ADCE.cpp
- BasicBlockPlacement.cpp
CodeGenPrepare.cpp
ConstantProp.cpp
CorrelatedValuePropagation.cpp
@@ -17,12 +16,15 @@ add_llvm_library(LLVMScalarOpts
LoopInstSimplify.cpp
LoopRotation.cpp
LoopStrengthReduce.cpp
+ LoopRerollPass.cpp
LoopUnrollPass.cpp
LoopUnswitch.cpp
LowerAtomic.cpp
MemCpyOptimizer.cpp
+ PartiallyInlineLibCalls.cpp
Reassociate.cpp
Reg2Mem.cpp
+ SampleProfile.cpp
SCCP.cpp
SROA.cpp
Scalar.cpp
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 44804a2..007e9b7 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -22,7 +22,6 @@
#include "llvm/Analysis/DominatorInternals.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -80,7 +79,6 @@ namespace {
const TargetLowering *TLI;
const TargetLibraryInfo *TLInfo;
DominatorTree *DT;
- ProfileInfo *PFI;
/// CurInstIterator - As we scan instructions optimizing them, this is the
/// next instruction to optimize. Xforms that can invalidate this should
@@ -111,7 +109,6 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
- AU.addPreserved<ProfileInfo>();
AU.addRequired<TargetLibraryInfo>();
}
@@ -151,7 +148,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (TM) TLI = TM->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfo>();
DT = getAnalysisIfAvailable<DominatorTree>();
- PFI = getAnalysisIfAvailable<ProfileInfo>();
OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize);
@@ -442,10 +438,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
DT->changeImmediateDominator(DestBB, NewIDom);
DT->eraseNode(BB);
}
- if (PFI) {
- PFI->replaceAllUses(BB, DestBB);
- PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
- }
BB->eraseFromParent();
++NumBlocksElim;
@@ -840,10 +832,12 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
}
};
+#ifndef NDEBUG
static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
AM.print(OS);
return OS;
}
+#endif
void ExtAddrMode::print(raw_ostream &OS) const {
bool NeedPlus = false;
@@ -1035,7 +1029,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
case Instruction::IntToPtr:
// This inttoptr is a no-op if the integer type is pointer sized.
if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
- TLI.getPointerTy())
+ TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
return MatchAddr(AddrInst->getOperand(0), Depth);
return false;
case Instruction::BitCast:
@@ -1418,8 +1412,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
Value *Address = User->getOperand(OpNo);
if (!Address->getType()->isPointerTy())
return false;
- Type *AddressAccessTy =
- cast<PointerType>(Address->getType())->getElementType();
+ Type *AddressAccessTy = Address->getType()->getPointerElementType();
// Do a match against the root of this address, ignoring profitability. This
// will tell us if the addressing mode for the memory operation will
@@ -1573,9 +1566,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
- Type *IntPtrTy =
- TLI->getDataLayout()->getIntPtrType(AccessTy->getContext());
-
+ Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
Value *Result = 0;
// Start with the base register. Do this first so that subsequent address
@@ -1894,7 +1885,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
- if (Value *V = SimplifyInstruction(P)) {
+ if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0,
+ TLInfo, DT)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
++NumPHIsElim;
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 3c08634..5266894 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -72,11 +72,6 @@ namespace {
}
namespace llvm {
-// SimpleValue is POD.
-template<> struct isPodLike<SimpleValue> {
- static const bool value = true;
-};
-
template<> struct DenseMapInfo<SimpleValue> {
static inline SimpleValue getEmptyKey() {
return DenseMapInfo<Instruction*>::getEmptyKey();
@@ -220,11 +215,6 @@ namespace {
}
namespace llvm {
- // CallValue is POD.
- template<> struct isPodLike<CallValue> {
- static const bool value = true;
- };
-
template<> struct DenseMapInfo<CallValue> {
static inline CallValue getEmptyKey() {
return DenseMapInfo<Instruction*>::getEmptyKey();
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index bc418af..6af269d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
@@ -507,7 +508,9 @@ namespace {
enum ValType {
SimpleVal, // A simple offsetted value that is accessed.
LoadVal, // A value produced by a load.
- MemIntrin // A memory intrinsic which is loaded from.
+ MemIntrin, // A memory intrinsic which is loaded from.
+ UndefVal // A UndefValue representing a value from dead block (which
+ // is not yet physically removed from the CFG).
};
/// V - The value that is live out of the block.
@@ -545,10 +548,20 @@ namespace {
Res.Offset = Offset;
return Res;
}
-
+
+ static AvailableValueInBlock getUndef(BasicBlock *BB) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(0);
+ Res.Val.setInt(UndefVal);
+ Res.Offset = 0;
+ return Res;
+ }
+
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+ bool isUndefValue() const { return Val.getInt() == UndefVal; }
Value *getSimpleValue() const {
assert(isSimpleValue() && "Wrong accessor");
@@ -576,6 +589,7 @@ namespace {
DominatorTree *DT;
const DataLayout *TD;
const TargetLibraryInfo *TLI;
+ SetVector<BasicBlock *> DeadBlocks;
ValueTable VN;
@@ -698,6 +712,9 @@ namespace {
unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
const BasicBlockEdge &Root);
bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
+ bool processFoldableCondBr(BranchInst *BI);
+ void addDeadBlock(BasicBlock *BB);
+ void assignValNumForDeadCode();
};
char GVN::ID = 0;
@@ -1071,14 +1088,15 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
if (Offset == -1)
return Offset;
+ unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
- llvm::Type::getInt8PtrTy(Src->getContext()));
+ Type::getInt8PtrTy(Src->getContext(), AS));
Constant *OffsetCst =
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
- Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
if (ConstantFoldLoadFromConstPtr(Src, &TD))
return Offset;
return -1;
@@ -1155,7 +1173,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
Type *DestPTy =
IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
DestPTy = PointerType::get(DestPTy,
- cast<PointerType>(PtrVal->getType())->getAddressSpace());
+ PtrVal->getType()->getPointerAddressSpace());
Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
@@ -1230,15 +1248,16 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
// Otherwise, this is a memcpy/memmove from a constant global.
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
Constant *Src = cast<Constant>(MTI->getSource());
+ unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
Src = ConstantExpr::getBitCast(Src,
- llvm::Type::getInt8PtrTy(Src->getContext()));
+ Type::getInt8PtrTy(Src->getContext(), AS));
Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
- Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
return ConstantFoldLoadFromConstPtr(Src, &TD);
}
@@ -1253,8 +1272,10 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
// just use the dominating value directly.
if (ValuesPerBlock.size() == 1 &&
gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
- LI->getParent()))
+ LI->getParent())) {
+ assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block");
return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
+ }
// Otherwise, we have to construct SSA form.
SmallVector<PHINode*, 8> NewPHIs;
@@ -1324,7 +1345,7 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
<< *getCoercedLoadValue() << '\n'
<< *Res << '\n' << "\n\n\n");
}
- } else {
+ } else if (isMemIntrinValue()) {
const DataLayout *TD = gvn.getDataLayout();
assert(TD && "Need target data to handle type mismatch case");
Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
@@ -1332,6 +1353,10 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n' << "\n\n\n");
+ } else {
+ assert(isUndefValue() && "Should be UndefVal");
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
+ return UndefValue::get(LoadTy);
}
return Res;
}
@@ -1355,6 +1380,13 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
+ if (DeadBlocks.count(DepBB)) {
+ // Dead dependent mem-op disguise as a load evaluating the same value
+ // as the load in question.
+ ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
+ continue;
+ }
+
if (!DepInfo.isDef() && !DepInfo.isClobber()) {
UnavailableBlocks.push_back(DepBB);
continue;
@@ -2191,11 +2223,13 @@ bool GVN::processInstruction(Instruction *I) {
// For conditional branches, we can perform simple conditional propagation on
// the condition value itself.
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
- if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+ if (!BI->isConditional())
return false;
- Value *BranchCond = BI->getCondition();
+ if (isa<Constant>(BI->getCondition()))
+ return processFoldableCondBr(BI);
+ Value *BranchCond = BI->getCondition();
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
// Avoid multiple edges early.
@@ -2312,6 +2346,9 @@ bool GVN::runOnFunction(Function& F) {
}
if (EnablePRE) {
+ // Fabricate val-num for dead-code in order to suppress assertion in
+ // performPRE().
+ assignValNumForDeadCode();
bool PREChanged = true;
while (PREChanged) {
PREChanged = performPRE(F);
@@ -2325,6 +2362,9 @@ bool GVN::runOnFunction(Function& F) {
// Actually, when this happens, we should just fully integrate PRE into GVN.
cleanupGlobalSets();
+ // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
+ // iteration.
+ DeadBlocks.clear();
return Changed;
}
@@ -2335,6 +2375,9 @@ bool GVN::processBlock(BasicBlock *BB) {
// (and incrementing BI before processing an instruction).
assert(InstrsToErase.empty() &&
"We expect InstrsToErase to be empty across iterations");
+ if (DeadBlocks.count(BB))
+ return false;
+
bool ChangedFunction = false;
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
@@ -2628,3 +2671,133 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
}
}
}
+
+// BB is declared dead, which implied other blocks become dead as well. This
+// function is to add all these blocks to "DeadBlocks". For the dead blocks'
+// live successors, update their phi nodes by replacing the operands
+// corresponding to dead blocks with UndefVal.
+//
+void GVN::addDeadBlock(BasicBlock *BB) {
+ SmallVector<BasicBlock *, 4> NewDead;
+ SmallSetVector<BasicBlock *, 4> DF;
+
+ NewDead.push_back(BB);
+ while (!NewDead.empty()) {
+ BasicBlock *D = NewDead.pop_back_val();
+ if (DeadBlocks.count(D))
+ continue;
+
+ // All blocks dominated by D are dead.
+ SmallVector<BasicBlock *, 8> Dom;
+ DT->getDescendants(D, Dom);
+ DeadBlocks.insert(Dom.begin(), Dom.end());
+
+ // Figure out the dominance-frontier(D).
+ for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(),
+ E = Dom.end(); I != E; I++) {
+ BasicBlock *B = *I;
+ for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) {
+ BasicBlock *S = *SI;
+ if (DeadBlocks.count(S))
+ continue;
+
+ bool AllPredDead = true;
+ for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++)
+ if (!DeadBlocks.count(*PI)) {
+ AllPredDead = false;
+ break;
+ }
+
+ if (!AllPredDead) {
+ // S could be proved dead later on. That is why we don't update phi
+ // operands at this moment.
+ DF.insert(S);
+ } else {
+ // While S is not dominated by D, it is dead by now. This could take
+ // place if S already have a dead predecessor before D is declared
+ // dead.
+ NewDead.push_back(S);
+ }
+ }
+ }
+ }
+
+ // For the dead blocks' live successors, update their phi nodes by replacing
+ // the operands corresponding to dead blocks with UndefVal.
+ for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end();
+ I != E; I++) {
+ BasicBlock *B = *I;
+ if (DeadBlocks.count(B))
+ continue;
+
+ SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B));
+ for (SmallVectorImpl<BasicBlock *>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; PI++) {
+ BasicBlock *P = *PI;
+
+ if (!DeadBlocks.count(P))
+ continue;
+
+ if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) {
+ if (BasicBlock *S = splitCriticalEdges(P, B))
+ DeadBlocks.insert(P = S);
+ }
+
+ for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
+ PHINode &Phi = cast<PHINode>(*II);
+ Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
+ UndefValue::get(Phi.getType()));
+ }
+ }
+ }
+}
+
+// If the given branch is recognized as a foldable branch (i.e. conditional
+// branch with constant condition), it will perform following analyses and
+// transformation.
+// 1) If the dead out-coming edge is a critical-edge, split it. Let
+// R be the target of the dead out-coming edge.
+// 1) Identify the set of dead blocks implied by the branch's dead outcoming
+// edge. The result of this step will be {X| X is dominated by R}
+// 2) Identify those blocks which haves at least one dead prodecessor. The
+// result of this step will be dominance-frontier(R).
+// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
+// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
+//
+// Return true iff *NEW* dead code are found.
+bool GVN::processFoldableCondBr(BranchInst *BI) {
+ if (!BI || BI->isUnconditional())
+ return false;
+
+ ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+ if (!Cond)
+ return false;
+
+ BasicBlock *DeadRoot = Cond->getZExtValue() ?
+ BI->getSuccessor(1) : BI->getSuccessor(0);
+ if (DeadBlocks.count(DeadRoot))
+ return false;
+
+ if (!DeadRoot->getSinglePredecessor())
+ DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
+
+ addDeadBlock(DeadRoot);
+ return true;
+}
+
+// performPRE() will trigger assert if it come across an instruciton without
+// associated val-num. As it normally has far more live instructions than dead
+// instructions, it makes more sense just to "fabricate" a val-number for the
+// dead code than checking if instruction involved is dead or not.
+void GVN::assignValNumForDeadCode() {
+ for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(),
+ E = DeadBlocks.end(); I != E; I++) {
+ BasicBlock *BB = *I;
+ for (BasicBlock::iterator II = BB->begin(), EE = BB->end();
+ II != EE; II++) {
+ Instruction *Inst = &*II;
+ unsigned ValNum = VN.lookup_or_add(Inst);
+ addToLeaderTable(ValNum, Inst, BB);
+ }
+ }
+}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index d51e034..235aaaa 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -532,7 +532,8 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// and varies predictably *inside* the loop. Evaluate the value it
// contains when the loop exits, if possible.
const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
- if (!SE->isLoopInvariant(ExitValue, L) || !isSafeToExpand(ExitValue))
+ if (!SE->isLoopInvariant(ExitValue, L) ||
+ !isSafeToExpand(ExitValue, *SE))
continue;
// Computing the value outside of the loop brings no benefit if :
@@ -1479,8 +1480,14 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
if (IndVar->getType()->isPointerTy()
&& !IVCount->getType()->isPointerTy()) {
+ // IVOffset will be the new GEP offset that is interpreted by GEP as a
+ // signed value. IVCount on the other hand represents the loop trip count,
+ // which is an unsigned value. FindLoopCounter only allows induction
+ // variables that have a positive unit stride of one. This means we don't
+ // have to handle the case of negative offsets (yet) and just need to zero
+ // extend IVCount.
Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
- const SCEV *IVOffset = SE->getTruncateOrSignExtend(IVCount, OfsTy);
+ const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
// Expand the code for the iteration count.
assert(SE->isLoopInvariant(IVOffset, L) &&
@@ -1492,7 +1499,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
// We could handle pointer IVs other than i8*, but we need to compensate for
// gep index scaling. See canExpandBackedgeTakenCount comments.
- assert(SE->getSizeOfExpr(
+ assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
&& "unit stride pointer IV must be i8*");
@@ -1506,9 +1513,10 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
// BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
//
// Valid Cases: (1) both integers is most common; (2) both may be pointers
- // for simple memset-style loops; (3) IVInit is an integer and IVCount is a
- // pointer may occur when enable-iv-rewrite generates a canonical IV on top
- // of case #2.
+ // for simple memset-style loops.
+ //
+ // IVInit integer and IVCount pointer would only occur if a canonical IV
+ // were generated on top of case #2, which is not expected.
const SCEV *IVLimit = 0;
// For unit stride, IVCount = Start + BECount with 2's complement overflow.
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 0b8906d..b3ec2fc 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -827,7 +827,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
return false;
}
-
/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
/// load instruction, eliminate it by replacing it with a PHI node. This is an
/// important optimization that encourages jump threading, and needs to be run
@@ -842,6 +841,12 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (LoadBB->getSinglePredecessor())
return false;
+ // If the load is defined in a landing pad, it can't be partially redundant,
+ // because the edges between the invoke and the landing pad cannot have other
+ // instructions between them.
+ if (LoadBB->isLandingPad())
+ return false;
+
Value *LoadedPtr = LI->getOperand(0);
// If the loaded operand is defined in the LoadBB, it can't be available.
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 07d991b..952b76b 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -314,7 +314,7 @@ bool NclPopcountRecognize::preliminaryScreen() {
if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
return false;
- // Counting population are usually conducted by few arithmetic instrutions.
+ // Counting population are usually conducted by few arithmetic instructions.
// Such instructions can be easilly "absorbed" by vacant slots in a
// non-compact loop. Therefore, recognizing popcount idiom only makes sense
// in a compact loop.
@@ -953,6 +953,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = 0;
+ unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
+
// If we're allowed to form a memset, and the stored value would be acceptable
// for memset, use it.
if (SplatValue && TLI->has(LibFunc::memset) &&
@@ -961,8 +963,10 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
CurLoop->isLoopInvariant(SplatValue)) {
// Keep and use SplatValue.
PatternValue = 0;
- } else if (TLI->has(LibFunc::memset_pattern16) &&
+ } else if (DestAS == 0 &&
+ TLI->has(LibFunc::memset_pattern16) &&
(PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
+ // Don't create memset_pattern16s with address spaces.
// It looks like we can use PatternValue!
SplatValue = 0;
} else {
@@ -978,20 +982,20 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
IRBuilder<> Builder(Preheader->getTerminator());
SCEVExpander Expander(*SE, "loop-idiom");
+ Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
+
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
// this into a memset in the loop preheader now if we want. However, this
// would be unsafe to do if there is anything else in the loop that may read
// or write to the aliased location. Check for any overlap by generating the
// base pointer and checking the region.
- unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
Value *BasePtr =
- Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
+ Expander.expandCodeFor(Ev->getStart(), DestInt8PtrTy,
Preheader->getTerminator());
-
if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
CurLoop, BECount,
- StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
+ StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) {
Expander.clear();
// If we generated new code for the base pointer, clean up.
deleteIfDeadInstruction(BasePtr, *SE, TLI);
@@ -1002,27 +1006,35 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+ Type *IntPtr = Builder.getIntPtrTy(TD, DestAS);
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
SCEV::FlagNUW);
- if (StoreSize != 1)
+ if (StoreSize != 1) {
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
SCEV::FlagNUW);
+ }
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
CallInst *NewCall;
- if (SplatValue)
- NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
- else {
+ if (SplatValue) {
+ NewCall = Builder.CreateMemSet(BasePtr,
+ SplatValue,
+ NumBytes,
+ StoreAlignment);
+ } else {
+ // Everything is emitted in default address space
+ Type *Int8PtrTy = DestInt8PtrTy;
+
Module *M = TheStore->getParent()->getParent()->getParent();
Value *MSP = M->getOrInsertFunction("memset_pattern16",
Builder.getVoidTy(),
- Builder.getInt8PtrTy(),
- Builder.getInt8PtrTy(), IntPtr,
+ Int8PtrTy,
+ Int8PtrTy,
+ IntPtr,
(void*)0);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
@@ -1032,7 +1044,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
PatternValue, ".memset_pattern");
GV->setUnnamedAddr(true); // Ok to merge these.
GV->setAlignment(16);
- Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
+ Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
}
@@ -1108,17 +1120,17 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
// pointer size if it isn't already.
- Type *IntPtr = TD->getIntPtrType(SI->getContext());
- BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+ Type *IntPtrTy = Builder.getIntPtrTy(TD, SI->getPointerAddressSpace());
+ BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
- const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+ const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtrTy, 1),
SCEV::FlagNUW);
if (StoreSize != 1)
- NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
SCEV::FlagNUW);
Value *NumBytes =
- Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+ Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
CallInst *NewCall =
Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
new file mode 100644
index 0000000..335af81
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -0,0 +1,1184 @@
+//===-- LoopReroll.cpp - Loop rerolling pass ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a simple loop reroller.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-reroll"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+using namespace llvm;
+
+STATISTIC(NumRerolledLoops, "Number of rerolled loops");
+
+static cl::opt<unsigned>
+MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
+ cl::desc("The maximum increment for loop rerolling"));
+
+// This loop re-rolling transformation aims to transform loops like this:
+//
+// int foo(int a);
+// void bar(int *x) {
+// for (int i = 0; i < 500; i += 3) {
+// foo(i);
+// foo(i+1);
+// foo(i+2);
+// }
+// }
+//
+// into a loop like this:
+//
+// void bar(int *x) {
+// for (int i = 0; i < 500; ++i)
+// foo(i);
+// }
+//
+// It does this by looking for loops that, besides the latch code, are composed
+// of isomorphic DAGs of instructions, with each DAG rooted at some increment
+// to the induction variable, and where each DAG is isomorphic to the DAG
+// rooted at the induction variable (excepting the sub-DAGs which root the
+// other induction-variable increments). In other words, we're looking for loop
+// bodies of the form:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// f(%iv)
+// %iv.1 = add %iv, 1 <-- a root increment
+// f(%iv.1)
+// %iv.2 = add %iv, 2 <-- a root increment
+// f(%iv.2)
+// %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
+// f(%iv.scale_m_1)
+// ...
+// %iv.next = add %iv, scale
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+//
+// where each f(i) is a set of instructions that, collectively, are a function
+// only of i (and other loop-invariant values).
+//
+// As a special case, we can also reroll loops like this:
+//
+// int foo(int);
+// void bar(int *x) {
+// for (int i = 0; i < 500; ++i) {
+// x[3*i] = foo(0);
+// x[3*i+1] = foo(0);
+// x[3*i+2] = foo(0);
+// }
+// }
+//
+// into this:
+//
+// void bar(int *x) {
+// for (int i = 0; i < 1500; ++i)
+// x[i] = foo(0);
+// }
+//
+// in which case, we're looking for inputs like this:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// %scaled.iv = mul %iv, scale
+// f(%scaled.iv)
+// %scaled.iv.1 = add %scaled.iv, 1
+// f(%scaled.iv.1)
+// %scaled.iv.2 = add %scaled.iv, 2
+// f(%scaled.iv.2)
+// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
+// f(%scaled.iv.scale_m_1)
+// ...
+// %iv.next = add %iv, 1
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+
+namespace {
+ class LoopReroll : public LoopPass {
+ public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopReroll() : LoopPass(ID) {
+ initializeLoopRerollPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<TargetLibraryInfo>();
+ }
+
+protected:
+ AliasAnalysis *AA;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ TargetLibraryInfo *TLI;
+ DominatorTree *DT;
+
+ typedef SmallVector<Instruction *, 16> SmallInstructionVector;
+ typedef SmallSet<Instruction *, 16> SmallInstructionSet;
+
+ // A chain of isomorphic instructions, indentified by a single-use PHI,
+ // representing a reduction. Only the last value may be used outside the
+ // loop.
+ struct SimpleLoopReduction {
+ SimpleLoopReduction(Instruction *P, Loop *L)
+ : Valid(false), Instructions(1, P) {
+ assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
+ add(L);
+ }
+
+ bool valid() const {
+ return Valid;
+ }
+
+ Instruction *getPHI() const {
+ assert(Valid && "Using invalid reduction");
+ return Instructions.front();
+ }
+
+ Instruction *getReducedValue() const {
+ assert(Valid && "Using invalid reduction");
+ return Instructions.back();
+ }
+
+ Instruction *get(size_t i) const {
+ assert(Valid && "Using invalid reduction");
+ return Instructions[i+1];
+ }
+
+ Instruction *operator [] (size_t i) const { return get(i); }
+
+ // The size, ignoring the initial PHI.
+ size_t size() const {
+ assert(Valid && "Using invalid reduction");
+ return Instructions.size()-1;
+ }
+
+ typedef SmallInstructionVector::iterator iterator;
+ typedef SmallInstructionVector::const_iterator const_iterator;
+
+ iterator begin() {
+ assert(Valid && "Using invalid reduction");
+ return llvm::next(Instructions.begin());
+ }
+
+ const_iterator begin() const {
+ assert(Valid && "Using invalid reduction");
+ return llvm::next(Instructions.begin());
+ }
+
+ iterator end() { return Instructions.end(); }
+ const_iterator end() const { return Instructions.end(); }
+
+ protected:
+ bool Valid;
+ SmallInstructionVector Instructions;
+
+ void add(Loop *L);
+ };
+
+ // The set of all reductions, and state tracking of possible reductions
+ // during loop instruction processing.
+ struct ReductionTracker {
+ typedef SmallVector<SimpleLoopReduction, 16> SmallReductionVector;
+
+ // Add a new possible reduction.
+ void addSLR(SimpleLoopReduction &SLR) {
+ PossibleReds.push_back(SLR);
+ }
+
+ // Setup to track possible reductions corresponding to the provided
+ // rerolling scale. Only reductions with a number of non-PHI instructions
+ // that is divisible by the scale are considered. Three instructions sets
+ // are filled in:
+ // - A set of all possible instructions in eligible reductions.
+ // - A set of all PHIs in eligible reductions
+ // - A set of all reduced values (last instructions) in eligible reductions.
+ void restrictToScale(uint64_t Scale,
+ SmallInstructionSet &PossibleRedSet,
+ SmallInstructionSet &PossibleRedPHISet,
+ SmallInstructionSet &PossibleRedLastSet) {
+ PossibleRedIdx.clear();
+ PossibleRedIter.clear();
+ Reds.clear();
+
+ for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
+ if (PossibleReds[i].size() % Scale == 0) {
+ PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
+ PossibleRedPHISet.insert(PossibleReds[i].getPHI());
+
+ PossibleRedSet.insert(PossibleReds[i].getPHI());
+ PossibleRedIdx[PossibleReds[i].getPHI()] = i;
+ for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(),
+ JE = PossibleReds[i].end(); J != JE; ++J) {
+ PossibleRedSet.insert(*J);
+ PossibleRedIdx[*J] = i;
+ }
+ }
+ }
+
+ // The functions below are used while processing the loop instructions.
+
+ // Are the two instructions both from reductions, and furthermore, from
+ // the same reduction?
+ bool isPairInSame(Instruction *J1, Instruction *J2) {
+ DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
+ if (J1I != PossibleRedIdx.end()) {
+ DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
+ if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
+ return true;
+ }
+
+ return false;
+ }
+
+ // The two provided instructions, the first from the base iteration, and
+ // the second from iteration i, form a matched pair. If these are part of
+ // a reduction, record that fact.
+ void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
+ if (PossibleRedIdx.count(J1)) {
+ assert(PossibleRedIdx.count(J2) &&
+ "Recording reduction vs. non-reduction instruction?");
+
+ PossibleRedIter[J1] = 0;
+ PossibleRedIter[J2] = i;
+
+ int Idx = PossibleRedIdx[J1];
+ assert(Idx == PossibleRedIdx[J2] &&
+ "Recording pair from different reductions?");
+ Reds.insert(Idx);
+ }
+ }
+
+ // The functions below can be called after we've finished processing all
+ // instructions in the loop, and we know which reductions were selected.
+
+ // Is the provided instruction the PHI of a reduction selected for
+ // rerolling?
+ bool isSelectedPHI(Instruction *J) {
+ if (!isa<PHINode>(J))
+ return false;
+
+ for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
+ RI != RIE; ++RI) {
+ int i = *RI;
+ if (cast<Instruction>(J) == PossibleReds[i].getPHI())
+ return true;
+ }
+
+ return false;
+ }
+
+ bool validateSelected();
+ void replaceSelected();
+
+ protected:
+ // The vector of all possible reductions (for any scale).
+ SmallReductionVector PossibleReds;
+
+ DenseMap<Instruction *, int> PossibleRedIdx;
+ DenseMap<Instruction *, int> PossibleRedIter;
+ DenseSet<int> Reds;
+ };
+
+ void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
+ void collectPossibleReductions(Loop *L,
+ ReductionTracker &Reductions);
+ void collectInLoopUserSet(Loop *L,
+ const SmallInstructionVector &Roots,
+ const SmallInstructionSet &Exclude,
+ const SmallInstructionSet &Final,
+ DenseSet<Instruction *> &Users);
+ void collectInLoopUserSet(Loop *L,
+ Instruction * Root,
+ const SmallInstructionSet &Exclude,
+ const SmallInstructionSet &Final,
+ DenseSet<Instruction *> &Users);
+ bool findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
+ Instruction *&IV,
+ SmallInstructionVector &LoopIncs);
+ bool collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale, Instruction *IV,
+ SmallVector<SmallInstructionVector, 32> &Roots,
+ SmallInstructionSet &AllRoots,
+ SmallInstructionVector &LoopIncs);
+ bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
+ ReductionTracker &Reductions);
+ };
+}
+
+char LoopReroll::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
+
+Pass *llvm::createLoopRerollPass() {
+ return new LoopReroll;
+}
+
+// Returns true if the provided instruction is used outside the given loop.
+// This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
+// non-loop blocks to be outside the loop.
+static bool hasUsesOutsideLoop(Instruction *I, Loop *L) {
+ for (Value::use_iterator UI = I->use_begin(),
+ UIE = I->use_end(); UI != UIE; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (!L->contains(User))
+ return true;
+ }
+
+ return false;
+}
+
+// Collect the list of loop induction variables with respect to which it might
+// be possible to reroll the loop.
+void LoopReroll::collectPossibleIVs(Loop *L,
+ SmallInstructionVector &PossibleIVs) {
+ BasicBlock *Header = L->getHeader();
+ for (BasicBlock::iterator I = Header->begin(),
+ IE = Header->getFirstInsertionPt(); I != IE; ++I) {
+ if (!isa<PHINode>(I))
+ continue;
+ if (!I->getType()->isIntegerTy())
+ continue;
+
+ if (const SCEVAddRecExpr *PHISCEV =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(I))) {
+ if (PHISCEV->getLoop() != L)
+ continue;
+ if (!PHISCEV->isAffine())
+ continue;
+ if (const SCEVConstant *IncSCEV =
+ dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) {
+ if (!IncSCEV->getValue()->getValue().isStrictlyPositive())
+ continue;
+ if (IncSCEV->getValue()->uge(MaxInc))
+ continue;
+
+ DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " <<
+ *PHISCEV << "\n");
+ PossibleIVs.push_back(I);
+ }
+ }
+ }
+}
+
+// Add the remainder of the reduction-variable chain to the instruction vector
+// (the initial PHINode has already been added). If successful, the object is
+// marked as valid.
+void LoopReroll::SimpleLoopReduction::add(Loop *L) {
+ assert(!Valid && "Cannot add to an already-valid chain");
+
+ // The reduction variable must be a chain of single-use instructions
+ // (including the PHI), except for the last value (which is used by the PHI
+ // and also outside the loop).
+ Instruction *C = Instructions.front();
+
+ do {
+ C = cast<Instruction>(*C->use_begin());
+ if (C->hasOneUse()) {
+ if (!C->isBinaryOp())
+ return;
+
+ if (!(isa<PHINode>(Instructions.back()) ||
+ C->isSameOperationAs(Instructions.back())))
+ return;
+
+ Instructions.push_back(C);
+ }
+ } while (C->hasOneUse());
+
+ if (Instructions.size() < 2 ||
+ !C->isSameOperationAs(Instructions.back()) ||
+ C->use_begin() == C->use_end())
+ return;
+
+ // C is now the (potential) last instruction in the reduction chain.
+ for (Value::use_iterator UI = C->use_begin(), UIE = C->use_end();
+ UI != UIE; ++UI) {
+ // The only in-loop user can be the initial PHI.
+ if (L->contains(cast<Instruction>(*UI)))
+ if (cast<Instruction>(*UI ) != Instructions.front())
+ return;
+ }
+
+ Instructions.push_back(C);
+ Valid = true;
+}
+
+// Collect the vector of possible reduction variables.
+void LoopReroll::collectPossibleReductions(Loop *L,
+ ReductionTracker &Reductions) {
+ BasicBlock *Header = L->getHeader();
+ for (BasicBlock::iterator I = Header->begin(),
+ IE = Header->getFirstInsertionPt(); I != IE; ++I) {
+ if (!isa<PHINode>(I))
+ continue;
+ if (!I->getType()->isSingleValueType())
+ continue;
+
+ SimpleLoopReduction SLR(I, L);
+ if (!SLR.valid())
+ continue;
+
+ DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with " <<
+ SLR.size() << " chained instructions)\n");
+ Reductions.addSLR(SLR);
+ }
+}
+
+// Collect the set of all users of the provided root instruction. This set of
+// users contains not only the direct users of the root instruction, but also
+// all users of those users, and so on. There are two exceptions:
+//
+// 1. Instructions in the set of excluded instructions are never added to the
+// use set (even if they are users). This is used, for example, to exclude
+// including root increments in the use set of the primary IV.
+//
+// 2. Instructions in the set of final instructions are added to the use set
+// if they are users, but their users are not added. This is used, for
+// example, to prevent a reduction update from forcing all later reduction
+// updates into the use set.
+void LoopReroll::collectInLoopUserSet(Loop *L,
+ Instruction *Root, const SmallInstructionSet &Exclude,
+ const SmallInstructionSet &Final,
+ DenseSet<Instruction *> &Users) {
+ SmallInstructionVector Queue(1, Root);
+ while (!Queue.empty()) {
+ Instruction *I = Queue.pop_back_val();
+ if (!Users.insert(I).second)
+ continue;
+
+ if (!Final.count(I))
+ for (Value::use_iterator UI = I->use_begin(),
+ UIE = I->use_end(); UI != UIE; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (PHINode *PN = dyn_cast<PHINode>(User)) {
+ // Ignore "wrap-around" uses to PHIs of this loop's header.
+ if (PN->getIncomingBlock(UI) == L->getHeader())
+ continue;
+ }
+
+ if (L->contains(User) && !Exclude.count(User)) {
+ Queue.push_back(User);
+ }
+ }
+
+ // We also want to collect single-user "feeder" values.
+ for (User::op_iterator OI = I->op_begin(),
+ OIE = I->op_end(); OI != OIE; ++OI) {
+ if (Instruction *Op = dyn_cast<Instruction>(*OI))
+ if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
+ !Final.count(Op))
+ Queue.push_back(Op);
+ }
+ }
+}
+
+// Collect all of the users of all of the provided root instructions (combined
+// into a single set).
+void LoopReroll::collectInLoopUserSet(Loop *L,
+ const SmallInstructionVector &Roots,
+ const SmallInstructionSet &Exclude,
+ const SmallInstructionSet &Final,
+ DenseSet<Instruction *> &Users) {
+ for (SmallInstructionVector::const_iterator I = Roots.begin(),
+ IE = Roots.end(); I != IE; ++I)
+ collectInLoopUserSet(L, *I, Exclude, Final, Users);
+}
+
+static bool isSimpleLoadStore(Instruction *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->isSimple();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->isSimple();
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
+ return !MI->isVolatile();
+ return false;
+}
+
+// Recognize loops that are setup like this:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// %scaled.iv = mul %iv, scale
+// f(%scaled.iv)
+// %scaled.iv.1 = add %scaled.iv, 1
+// f(%scaled.iv.1)
+// %scaled.iv.2 = add %scaled.iv, 2
+// f(%scaled.iv.2)
+// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
+// f(%scaled.iv.scale_m_1)
+// ...
+// %iv.next = add %iv, 1
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+//
+// and, if found, set IV = %scaled.iv, and add %iv.next to LoopIncs.
+bool LoopReroll::findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
+ Instruction *&IV,
+ SmallInstructionVector &LoopIncs) {
+ // This is a special case: here we're looking for all uses (except for
+ // the increment) to be multiplied by a common factor. The increment must
+ // be by one. This is to capture loops like:
+ // for (int i = 0; i < 500; ++i) {
+ // foo(3*i); foo(3*i+1); foo(3*i+2);
+ // }
+ if (RealIV->getNumUses() != 2)
+ return false;
+ const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(RealIV));
+ Instruction *User1 = cast<Instruction>(*RealIV->use_begin()),
+ *User2 = cast<Instruction>(*llvm::next(RealIV->use_begin()));
+ if (!SE->isSCEVable(User1->getType()) || !SE->isSCEVable(User2->getType()))
+ return false;
+ const SCEVAddRecExpr *User1SCEV =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User1)),
+ *User2SCEV =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User2));
+ if (!User1SCEV || !User1SCEV->isAffine() ||
+ !User2SCEV || !User2SCEV->isAffine())
+ return false;
+
+ // We assume below that User1 is the scale multiply and User2 is the
+ // increment. If this can't be true, then swap them.
+ if (User1SCEV == RealIVSCEV->getPostIncExpr(*SE)) {
+ std::swap(User1, User2);
+ std::swap(User1SCEV, User2SCEV);
+ }
+
+ if (User2SCEV != RealIVSCEV->getPostIncExpr(*SE))
+ return false;
+ assert(User2SCEV->getStepRecurrence(*SE)->isOne() &&
+ "Invalid non-unit step for multiplicative scaling");
+ LoopIncs.push_back(User2);
+
+ if (const SCEVConstant *MulScale =
+ dyn_cast<SCEVConstant>(User1SCEV->getStepRecurrence(*SE))) {
+ // Make sure that both the start and step have the same multiplier.
+ if (RealIVSCEV->getStart()->getType() != MulScale->getType())
+ return false;
+ if (SE->getMulExpr(RealIVSCEV->getStart(), MulScale) !=
+ User1SCEV->getStart())
+ return false;
+
+ ConstantInt *MulScaleCI = MulScale->getValue();
+ if (!MulScaleCI->uge(2) || MulScaleCI->uge(MaxInc))
+ return false;
+ Scale = MulScaleCI->getZExtValue();
+ IV = User1;
+ } else
+ return false;
+
+ DEBUG(dbgs() << "LRR: Found possible scaling " << *User1 << "\n");
+ return true;
+}
+
+// Collect all root increments with respect to the provided induction variable
+// (normally the PHI, but sometimes a multiply). A root increment is an
+// instruction, normally an add, with a positive constant less than Scale. In a
+// rerollable loop, each of these increments is the root of an instruction
+// graph isomorphic to the others. Also, we collect the final induction
+// increment (the increment equal to the Scale), and its users in LoopIncs.
+bool LoopReroll::collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale,
+ Instruction *IV,
+ SmallVector<SmallInstructionVector, 32> &Roots,
+ SmallInstructionSet &AllRoots,
+ SmallInstructionVector &LoopIncs) {
+ for (Value::use_iterator UI = IV->use_begin(),
+ UIE = IV->use_end(); UI != UIE; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+ if (!SE->isSCEVable(User->getType()))
+ continue;
+ if (User->getType() != IV->getType())
+ continue;
+ if (!L->contains(User))
+ continue;
+ if (hasUsesOutsideLoop(User, L))
+ continue;
+
+ if (const SCEVConstant *Diff = dyn_cast<SCEVConstant>(SE->getMinusSCEV(
+ SE->getSCEV(User), SE->getSCEV(IV)))) {
+ uint64_t Idx = Diff->getValue()->getValue().getZExtValue();
+ if (Idx > 0 && Idx < Scale) {
+ Roots[Idx-1].push_back(User);
+ AllRoots.insert(User);
+ } else if (Idx == Scale && Inc > 1) {
+ LoopIncs.push_back(User);
+ }
+ }
+ }
+
+ if (Roots[0].empty())
+ return false;
+ bool AllSame = true;
+ for (unsigned i = 1; i < Scale-1; ++i)
+ if (Roots[i].size() != Roots[0].size()) {
+ AllSame = false;
+ break;
+ }
+
+ if (!AllSame)
+ return false;
+
+ return true;
+}
+
+// Validate the selected reductions. All iterations must have an isomorphic
+// part of the reduction chain and, for non-associative reductions, the chain
+// entries must appear in order.
+bool LoopReroll::ReductionTracker::validateSelected() {
+ // For a non-associative reduction, the chain entries must appear in order.
+ for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
+ RI != RIE; ++RI) {
+ int i = *RI;
+ int PrevIter = 0, BaseCount = 0, Count = 0;
+ for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(),
+ JE = PossibleReds[i].end(); J != JE; ++J) {
+ // Note that all instructions in the chain must have been found because
+ // all instructions in the function must have been assigned to some
+ // iteration.
+ int Iter = PossibleRedIter[*J];
+ if (Iter != PrevIter && Iter != PrevIter + 1 &&
+ !PossibleReds[i].getReducedValue()->isAssociative()) {
+ DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: " <<
+ *J << "\n");
+ return false;
+ }
+
+ if (Iter != PrevIter) {
+ if (Count != BaseCount) {
+ DEBUG(dbgs() << "LRR: Iteration " << PrevIter <<
+ " reduction use count " << Count <<
+ " is not equal to the base use count " <<
+ BaseCount << "\n");
+ return false;
+ }
+
+ Count = 0;
+ }
+
+ ++Count;
+ if (Iter == 0)
+ ++BaseCount;
+
+ PrevIter = Iter;
+ }
+ }
+
+ return true;
+}
+
+// For all selected reductions, remove all parts except those in the first
+// iteration (and the PHI). Replace outside uses of the reduced value with uses
+// of the first-iteration reduced value (in other words, reroll the selected
+// reductions).
+void LoopReroll::ReductionTracker::replaceSelected() {
+ // Fixup reductions to refer to the last instruction associated with the
+ // first iteration (not the last).
+ for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
+ RI != RIE; ++RI) {
+ int i = *RI;
+ int j = 0;
+ for (int e = PossibleReds[i].size(); j != e; ++j)
+ if (PossibleRedIter[PossibleReds[i][j]] != 0) {
+ --j;
+ break;
+ }
+
+ // Replace users with the new end-of-chain value.
+ SmallInstructionVector Users;
+ for (Value::use_iterator UI =
+ PossibleReds[i].getReducedValue()->use_begin(),
+ UIE = PossibleReds[i].getReducedValue()->use_end(); UI != UIE; ++UI)
+ Users.push_back(cast<Instruction>(*UI));
+
+ for (SmallInstructionVector::iterator J = Users.begin(),
+ JE = Users.end(); J != JE; ++J)
+ (*J)->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
+ PossibleReds[i][j]);
+ }
+}
+
+// Reroll the provided loop with respect to the provided induction variable.
+// Generally, we're looking for a loop like this:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// f(%iv)
+// %iv.1 = add %iv, 1 <-- a root increment
+// f(%iv.1)
+// %iv.2 = add %iv, 2 <-- a root increment
+// f(%iv.2)
+// %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
+// f(%iv.scale_m_1)
+// ...
+// %iv.next = add %iv, scale
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+//
+// Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
+// instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
+// be intermixed with eachother. The restriction imposed by this algorithm is
+// that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
+// etc. be the same.
+//
+// First, we collect the use set of %iv, excluding the other increment roots.
+// This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
+// times, having collected the use set of f(%iv.(i+1)), during which we:
+// - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
+// the next unmatched instruction in f(%iv.(i+1)).
+// - Ensure that both matched instructions don't have any external users
+// (with the exception of last-in-chain reduction instructions).
+// - Track the (aliasing) write set, and other side effects, of all
+// instructions that belong to future iterations that come before the matched
+// instructions. If the matched instructions read from that write set, then
+// f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
+// f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
+// if any of these future instructions had side effects (could not be
+// speculatively executed), and so do the matched instructions, when we
+// cannot reorder those side-effect-producing instructions, and rerolling
+// fails.
+//
+// Finally, we make sure that all loop instructions are either loop increment
+// roots, belong to simple latch code, parts of validated reductions, part of
+// f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
+// have been validated), then we reroll the loop.
+bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
+ const SCEV *IterCount,
+ ReductionTracker &Reductions) {
+ const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
+ uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
+ getValue()->getZExtValue();
+ // The collection of loop increment instructions.
+ SmallInstructionVector LoopIncs;
+ uint64_t Scale = Inc;
+
+ // The effective induction variable, IV, is normally also the real induction
+ // variable. When we're dealing with a loop like:
+ // for (int i = 0; i < 500; ++i)
+ // x[3*i] = ...;
+ // x[3*i+1] = ...;
+ // x[3*i+2] = ...;
+ // then the real IV is still i, but the effective IV is (3*i).
+ Instruction *RealIV = IV;
+ if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs))
+ return false;
+
+ assert(Scale <= MaxInc && "Scale is too large");
+ assert(Scale > 1 && "Scale must be at least 2");
+
+ // The set of increment instructions for each increment value.
+ SmallVector<SmallInstructionVector, 32> Roots(Scale-1);
+ SmallInstructionSet AllRoots;
+ if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs))
+ return false;
+
+ DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
+ *RealIV << "\n");
+
+ // An array of just the possible reductions for this scale factor. When we
+ // collect the set of all users of some root instructions, these reduction
+ // instructions are treated as 'final' (their uses are not considered).
+ // This is important because we don't want the root use set to search down
+ // the reduction chain.
+ SmallInstructionSet PossibleRedSet;
+ SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet;
+ Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet,
+ PossibleRedLastSet);
+
+ // We now need to check for equivalence of the use graph of each root with
+ // that of the primary induction variable (excluding the roots). Our goal
+ // here is not to solve the full graph isomorphism problem, but rather to
+ // catch common cases without a lot of work. As a result, we will assume
+ // that the relative order of the instructions in each unrolled iteration
+ // is the same (although we will not make an assumption about how the
+ // different iterations are intermixed). Note that while the order must be
+ // the same, the instructions may not be in the same basic block.
+ SmallInstructionSet Exclude(AllRoots);
+ Exclude.insert(LoopIncs.begin(), LoopIncs.end());
+
+ DenseSet<Instruction *> BaseUseSet;
+ collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet);
+
+ DenseSet<Instruction *> AllRootUses;
+ std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1);
+
+ bool MatchFailed = false;
+ for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) {
+ DenseSet<Instruction *> &RootUseSet = RootUseSets[i];
+ collectInLoopUserSet(L, Roots[i], SmallInstructionSet(),
+ PossibleRedSet, RootUseSet);
+
+ DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() <<
+ " vs. iteration increment " << (i+1) <<
+ " use set size: " << RootUseSet.size() << "\n");
+
+ if (BaseUseSet.size() != RootUseSet.size()) {
+ MatchFailed = true;
+ break;
+ }
+
+ // In addition to regular aliasing information, we need to look for
+ // instructions from later (future) iterations that have side effects
+ // preventing us from reordering them past other instructions with side
+ // effects.
+ bool FutureSideEffects = false;
+ AliasSetTracker AST(*AA);
+
+ // The map between instructions in f(%iv.(i+1)) and f(%iv).
+ DenseMap<Value *, Value *> BaseMap;
+
+ assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops");
+ for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(),
+ JE = Header->end(); J1 != JE && !MatchFailed; ++J1) {
+ if (cast<Instruction>(J1) == RealIV)
+ continue;
+ if (cast<Instruction>(J1) == IV)
+ continue;
+ if (!BaseUseSet.count(J1))
+ continue;
+ if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs.
+ continue;
+
+ while (J2 != JE && (!RootUseSet.count(J2) ||
+ std::find(Roots[i].begin(), Roots[i].end(), J2) !=
+ Roots[i].end())) {
+ // As we iterate through the instructions, instructions that don't
+ // belong to previous iterations (or the base case), must belong to
+ // future iterations. We want to track the alias set of writes from
+ // previous iterations.
+ if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) &&
+ !AllRootUses.count(J2)) {
+ if (J2->mayWriteToMemory())
+ AST.add(J2);
+
+ // Note: This is specifically guarded by a check on isa<PHINode>,
+ // which while a valid (somewhat arbitrary) micro-optimization, is
+ // needed because otherwise isSafeToSpeculativelyExecute returns
+ // false on PHI nodes.
+ if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL))
+ FutureSideEffects = true;
+ }
+
+ ++J2;
+ }
+
+ if (!J1->isSameOperationAs(J2)) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 << "\n");
+ MatchFailed = true;
+ break;
+ }
+
+ // Make sure that this instruction, which is in the use set of this
+ // root instruction, does not also belong to the base set or the set of
+ // some previous root instruction.
+ if (BaseUseSet.count(J2) || AllRootUses.count(J2)) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 << " (prev. case overlap)\n");
+ MatchFailed = true;
+ break;
+ }
+
+ // Make sure that we don't alias with any instruction in the alias set
+ // tracker. If we do, then we depend on a future iteration, and we
+ // can't reroll.
+ if (J2->mayReadFromMemory()) {
+ for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end();
+ K != KE && !MatchFailed; ++K) {
+ if (K->aliasesUnknownInst(J2, *AA)) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 << " (depends on future store)\n");
+ MatchFailed = true;
+ break;
+ }
+ }
+ }
+
+ // If we've past an instruction from a future iteration that may have
+ // side effects, and this instruction might also, then we can't reorder
+ // them, and this matching fails. As an exception, we allow the alias
+ // set tracker to handle regular (simple) load/store dependencies.
+ if (FutureSideEffects &&
+ ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) ||
+ (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 <<
+ " (side effects prevent reordering)\n");
+ MatchFailed = true;
+ break;
+ }
+
+ // For instructions that are part of a reduction, if the operation is
+ // associative, then don't bother matching the operands (because we
+ // already know that the instructions are isomorphic, and the order
+ // within the iteration does not matter). For non-associative reductions,
+ // we do need to match the operands, because we need to reject
+ // out-of-order instructions within an iteration!
+ // For example (assume floating-point addition), we need to reject this:
+ // x += a[i]; x += b[i];
+ // x += a[i+1]; x += b[i+1];
+ // x += b[i+2]; x += a[i+2];
+ bool InReduction = Reductions.isPairInSame(J1, J2);
+
+ if (!(InReduction && J1->isAssociative())) {
+ bool Swapped = false, SomeOpMatched = false;;
+ for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
+ Value *Op2 = J2->getOperand(j);
+
+ // If this is part of a reduction (and the operation is not
+ // associatve), then we match all operands, but not those that are
+ // part of the reduction.
+ if (InReduction)
+ if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
+ if (Reductions.isPairInSame(J2, Op2I))
+ continue;
+
+ DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
+ if (BMI != BaseMap.end())
+ Op2 = BMI->second;
+ else if (std::find(Roots[i].begin(), Roots[i].end(),
+ (Instruction*) Op2) != Roots[i].end())
+ Op2 = IV;
+
+ if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
+ // If we've not already decided to swap the matched operands, and
+ // we've not already matched our first operand (note that we could
+ // have skipped matching the first operand because it is part of a
+ // reduction above), and the instruction is commutative, then try
+ // the swapped match.
+ if (!Swapped && J1->isCommutative() && !SomeOpMatched &&
+ J1->getOperand(!j) == Op2) {
+ Swapped = true;
+ } else {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 << " (operand " << j << ")\n");
+ MatchFailed = true;
+ break;
+ }
+ }
+
+ SomeOpMatched = true;
+ }
+ }
+
+ if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) ||
+ (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+ " vs. " << *J2 << " (uses outside loop)\n");
+ MatchFailed = true;
+ break;
+ }
+
+ if (!MatchFailed)
+ BaseMap.insert(std::pair<Value *, Value *>(J2, J1));
+
+ AllRootUses.insert(J2);
+ Reductions.recordPair(J1, J2, i+1);
+
+ ++J2;
+ }
+ }
+
+ if (MatchFailed)
+ return false;
+
+ DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
+ *RealIV << "\n");
+
+ DenseSet<Instruction *> LoopIncUseSet;
+ collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(),
+ SmallInstructionSet(), LoopIncUseSet);
+ DEBUG(dbgs() << "LRR: Loop increment set size: " <<
+ LoopIncUseSet.size() << "\n");
+
+ // Make sure that all instructions in the loop have been included in some
+ // use set.
+ for (BasicBlock::iterator J = Header->begin(), JE = Header->end();
+ J != JE; ++J) {
+ if (isa<DbgInfoIntrinsic>(J))
+ continue;
+ if (cast<Instruction>(J) == RealIV)
+ continue;
+ if (cast<Instruction>(J) == IV)
+ continue;
+ if (BaseUseSet.count(J) || AllRootUses.count(J) ||
+ (LoopIncUseSet.count(J) && (J->isTerminator() ||
+ isSafeToSpeculativelyExecute(J, DL))))
+ continue;
+
+ if (AllRoots.count(J))
+ continue;
+
+ if (Reductions.isSelectedPHI(J))
+ continue;
+
+ DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV <<
+ " unprocessed instruction found: " << *J << "\n");
+ MatchFailed = true;
+ break;
+ }
+
+ if (MatchFailed)
+ return false;
+
+ DEBUG(dbgs() << "LRR: all instructions processed from " <<
+ *RealIV << "\n");
+
+ if (!Reductions.validateSelected())
+ return false;
+
+ // At this point, we've validated the rerolling, and we're committed to
+ // making changes!
+
+ Reductions.replaceSelected();
+
+ // Remove instructions associated with non-base iterations.
+ for (BasicBlock::reverse_iterator J = Header->rbegin();
+ J != Header->rend();) {
+ if (AllRootUses.count(&*J)) {
+ Instruction *D = &*J;
+ DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
+ D->eraseFromParent();
+ continue;
+ }
+
+ ++J;
+ }
+
+ // Insert the new induction variable.
+ const SCEV *Start = RealIVSCEV->getStart();
+ if (Inc == 1)
+ Start = SE->getMulExpr(Start,
+ SE->getConstant(Start->getType(), Scale));
+ const SCEVAddRecExpr *H =
+ cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start,
+ SE->getConstant(RealIVSCEV->getType(), 1),
+ L, SCEV::FlagAnyWrap));
+ { // Limit the lifetime of SCEVExpander.
+ SCEVExpander Expander(*SE, "reroll");
+ PHINode *NewIV =
+ cast<PHINode>(Expander.expandCodeFor(H, IV->getType(),
+ Header->begin()));
+ for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
+ JE = BaseUseSet.end(); J != JE; ++J)
+ (*J)->replaceUsesOfWith(IV, NewIV);
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
+ if (LoopIncUseSet.count(BI)) {
+ const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
+ if (Inc == 1)
+ ICSCEV =
+ SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
+ Value *IC;
+ if (isa<SCEVConstant>(ICSCEV)) {
+ IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), BI);
+ } else {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader)
+ Preheader = InsertPreheaderForLoop(L, this);
+
+ IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(),
+ Preheader->getTerminator());
+ }
+
+ Value *NewIVNext = NewIV->getIncomingValueForBlock(Header);
+ Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIVNext, IC,
+ "exitcond");
+ BI->setCondition(Cond);
+
+ if (BI->getSuccessor(1) != Header)
+ BI->swapSuccessors();
+ }
+ }
+ }
+
+ SimplifyInstructionsInBlock(Header, DL, TLI);
+ DeleteDeadPHIs(Header, TLI);
+ ++NumRerolledLoops;
+ return true;
+}
+
+bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+ AA = &getAnalysis<AliasAnalysis>();
+ LI = &getAnalysis<LoopInfo>();
+ SE = &getAnalysis<ScalarEvolution>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+ DL = getAnalysisIfAvailable<DataLayout>();
+ DT = &getAnalysis<DominatorTree>();
+
+ BasicBlock *Header = L->getHeader();
+ DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
+ "] Loop %" << Header->getName() << " (" <<
+ L->getNumBlocks() << " block(s))\n");
+
+ bool Changed = false;
+
+ // For now, we'll handle only single BB loops.
+ if (L->getNumBlocks() > 1)
+ return Changed;
+
+ if (!SE->hasLoopInvariantBackedgeTakenCount(L))
+ return Changed;
+
+ const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
+ const SCEV *IterCount =
+ SE->getAddExpr(LIBETC, SE->getConstant(LIBETC->getType(), 1));
+ DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
+
+ // First, we need to find the induction variable with respect to which we can
+ // reroll (there may be several possible options).
+ SmallInstructionVector PossibleIVs;
+ collectPossibleIVs(L, PossibleIVs);
+
+ if (PossibleIVs.empty()) {
+ DEBUG(dbgs() << "LRR: No possible IVs found\n");
+ return Changed;
+ }
+
+ ReductionTracker Reductions;
+ collectPossibleReductions(L, Reductions);
+
+ // For each possible IV, collect the associated possible set of 'root' nodes
+ // (i+1, i+2, etc.).
+ for (SmallInstructionVector::iterator I = PossibleIVs.begin(),
+ IE = PossibleIVs.end(); I != IE; ++I)
+ if (reroll(*I, L, Header, IterCount, Reductions)) {
+ Changed = true;
+ break;
+ }
+
+ return Changed;
+}
+
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 14cb979..eff5268 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1170,6 +1170,13 @@ public:
/// may be used.
bool AllFixupsOutsideLoop;
+ /// RigidFormula is set to true to guarantee that this use will be associated
+ /// with a single formula--the one that initially matched. Some SCEV
+ /// expressions cannot be expanded. This allows LSR to consider the registers
+ /// used by those expressions without the need to expand them later after
+ /// changing the formula.
+ bool RigidFormula;
+
/// WidestFixupType - This records the widest use type for any fixup using
/// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
/// max fixup widths to be equivalent, because the narrower one may be relying
@@ -1188,6 +1195,7 @@ public:
MinOffset(INT64_MAX),
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
+ RigidFormula(false),
WidestFixupType(0) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
@@ -1214,6 +1222,9 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRUse::InsertFormula(const Formula &F) {
+ if (!Formulae.empty() && RigidFormula)
+ return false;
+
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
@@ -1433,7 +1444,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
}
case LSRUse::ICmpZero:
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
- // Therefore, return 0 in case F.Scale == -1.
+ // Therefore, return 0 in case F.Scale == -1.
return F.Scale != -1;
case LSRUse::Basic:
@@ -2943,7 +2954,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
+ if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = TransformForPostIncUse(Normalize, N, CI, 0,
@@ -2986,6 +2997,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
/// and loop-computable portions.
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
+ // Mark uses whose expressions cannot be expanded.
+ if (!isSafeToExpand(S, SE))
+ LU.RigidFormula = true;
+
Formula F;
F.InitialMatch(S, L, SE);
bool Inserted = InsertFormula(LU, LUIdx, F);
@@ -4353,6 +4368,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) const {
const LSRUse &LU = Uses[LF.LUIdx];
+ if (LU.RigidFormula)
+ return LF.OperandValToReplace;
// Determine an input position which will be dominated by the operands and
// which will dominate the result.
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 80d060b..08ac38d 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -49,12 +49,17 @@ namespace {
class LoopUnroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopUnroll(int T = -1, int C = -1, int P = -1) : LoopPass(ID) {
+ LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
+ CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;
UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
+ UserAllowPartial = (P != -1) ||
+ (UnrollAllowPartial.getNumOccurrences() > 0);
+ UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0);
+ UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0);
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@@ -75,7 +80,11 @@ namespace {
unsigned CurrentCount;
unsigned CurrentThreshold;
bool CurrentAllowPartial;
+ bool CurrentRuntime;
+ bool UserCount; // CurrentCount is user-specified.
bool UserThreshold; // CurrentThreshold is user-specified.
+ bool UserAllowPartial; // CurrentAllowPartial is user-specified.
+ bool UserRuntime; // CurrentRuntime is user-specified.
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -110,8 +119,9 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
- return new LoopUnroll(Threshold, Count, AllowPartial);
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
+ int Runtime) {
+ return new LoopUnroll(Threshold, Count, AllowPartial, Runtime);
}
/// ApproximateLoopSize - Approximate the size of the loop.
@@ -145,16 +155,24 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
<< "] Loop %" << Header->getName() << "\n");
(void)Header;
+ TargetTransformInfo::UnrollingPreferences UP;
+ UP.Threshold = CurrentThreshold;
+ UP.OptSizeThreshold = OptSizeUnrollThreshold;
+ UP.Count = CurrentCount;
+ UP.Partial = CurrentAllowPartial;
+ UP.Runtime = CurrentRuntime;
+ TTI.getUnrollingPreferences(L, UP);
+
// Determine the current unrolling threshold. While this is normally set
// from UnrollThreshold, it is overridden to a smaller value if the current
// function is marked as optimize-for-size, and the unroll threshold was
// not user specified.
- unsigned Threshold = CurrentThreshold;
+ unsigned Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
if (!UserThreshold &&
Header->getParent()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize))
- Threshold = OptSizeUnrollThreshold;
+ Threshold = UP.OptSizeThreshold;
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
@@ -167,11 +185,14 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
}
+
+ bool Runtime = UserRuntime ? CurrentRuntime : UP.Runtime;
+
// Use a default unroll-count if the user doesn't specify a value
// and the trip count is a run-time value. The default is different
// for run-time or compile-time trip count loops.
- unsigned Count = CurrentCount;
- if (UnrollRuntime && CurrentCount == 0 && TripCount == 0)
+ unsigned Count = UserCount ? CurrentCount : UP.Count;
+ if (Runtime && Count == 0 && TripCount == 0)
Count = UnrollRuntimeCount;
if (Count == 0) {
@@ -204,7 +225,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
if (TripCount != 1 && Size > Threshold) {
DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
<< " because size: " << Size << ">" << Threshold << "\n");
- if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) {
+ bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
+ if (!AllowPartial && !(Runtime && TripCount == 0)) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
@@ -215,7 +237,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
while (Count != 0 && TripCount%Count != 0)
Count--;
}
- else if (UnrollRuntime) {
+ else if (Runtime) {
// Reduce unroll count to be a lower power-of-two value
while (Count != 0 && Size > Threshold) {
Count >>= 1;
@@ -231,7 +253,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Unroll the loop.
- if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM))
+ if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, &LPM))
return false;
return true;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 59aff31..c4ebfd5 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -212,8 +212,6 @@ namespace {
Instruction *InsertPt);
void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
- void RemoveBlockIfDead(BasicBlock *BB,
- std::vector<Instruction*> &Worklist, Loop *l);
void RemoveLoopFromHierarchy(Loop *L);
bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
BasicBlock **LoopExit = 0);
@@ -946,114 +944,6 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
++NumSimplify;
}
-/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop
-/// information, and remove any dead successors it has.
-///
-void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
- std::vector<Instruction*> &Worklist,
- Loop *L) {
- if (pred_begin(BB) != pred_end(BB)) {
- // This block isn't dead, since an edge to BB was just removed, see if there
- // are any easy simplifications we can do now.
- if (BasicBlock *Pred = BB->getSinglePredecessor()) {
- // If it has one pred, fold phi nodes in BB.
- while (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
- ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
-
- // If this is the header of a loop and the only pred is the latch, we now
- // have an unreachable loop.
- if (Loop *L = LI->getLoopFor(BB))
- if (loopHeader == BB && L->contains(Pred)) {
- // Remove the branch from the latch to the header block, this makes
- // the header dead, which will make the latch dead (because the header
- // dominates the latch).
- LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
- Pred->getTerminator()->eraseFromParent();
- new UnreachableInst(BB->getContext(), Pred);
-
- // The loop is now broken, remove it from LI.
- RemoveLoopFromHierarchy(L);
-
- // Reprocess the header, which now IS dead.
- RemoveBlockIfDead(BB, Worklist, L);
- return;
- }
-
- // If pred ends in a uncond branch, add uncond branch to worklist so that
- // the two blocks will get merged.
- if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
- if (BI->isUnconditional())
- Worklist.push_back(BI);
- }
- return;
- }
-
- DEBUG(dbgs() << "Nuking dead block: " << *BB);
-
- // Remove the instructions in the basic block from the worklist.
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- RemoveFromWorklist(I, Worklist);
-
- // Anything that uses the instructions in this basic block should have their
- // uses replaced with undefs.
- // If I is not void type then replaceAllUsesWith undef.
- // This allows ValueHandlers and custom metadata to adjust itself.
- if (!I->getType()->isVoidTy())
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
- }
-
- // If this is the edge to the header block for a loop, remove the loop and
- // promote all subloops.
- if (Loop *BBLoop = LI->getLoopFor(BB)) {
- if (BBLoop->getLoopLatch() == BB) {
- RemoveLoopFromHierarchy(BBLoop);
- if (currentLoop == BBLoop) {
- currentLoop = 0;
- redoLoop = false;
- }
- }
- }
-
- // Remove the block from the loop info, which removes it from any loops it
- // was in.
- LI->removeBlock(BB);
-
- // Remove phi node entries in successors for this block.
- TerminatorInst *TI = BB->getTerminator();
- SmallVector<BasicBlock*, 4> Succs;
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
- Succs.push_back(TI->getSuccessor(i));
- TI->getSuccessor(i)->removePredecessor(BB);
- }
-
- // Unique the successors, remove anything with multiple uses.
- array_pod_sort(Succs.begin(), Succs.end());
- Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
-
- // Remove the basic block, including all of the instructions contained in it.
- LPM->deleteSimpleAnalysisValue(BB, L);
- BB->eraseFromParent();
- // Remove successor blocks here that are not dead, so that we know we only
- // have dead blocks in this list. Nondead blocks have a way of becoming dead,
- // then getting removed before we revisit them, which is badness.
- //
- for (unsigned i = 0; i != Succs.size(); ++i)
- if (pred_begin(Succs[i]) != pred_end(Succs[i])) {
- // One exception is loop headers. If this block was the preheader for a
- // loop, then we DO want to visit the loop so the loop gets deleted.
- // We know that if the successor is a loop header, that this loop had to
- // be the preheader: the case where this was the latch block was handled
- // above and headers can only have two predecessors.
- if (!LI->isLoopHeader(Succs[i])) {
- Succs.erase(Succs.begin()+i);
- --i;
- }
- }
-
- for (unsigned i = 0, e = Succs.size(); i != e; ++i)
- RemoveBlockIfDead(Succs[i], Worklist, L);
-}
-
/// RemoveLoopFromHierarchy - We have discovered that the specified loop has
/// become unwrapped, either because the backedge was deleted, or because the
/// edge into the header was removed. If the edge into the header from the
@@ -1262,23 +1152,6 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
continue;
}
- if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
- // Conditional branch. Turn it into an unconditional branch, then
- // remove dead blocks.
- continue; // FIXME: Enable.
-
- DEBUG(dbgs() << "Folded branch: " << *BI);
- BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
- BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
- DeadSucc->removePredecessor(BI->getParent(), true);
- Worklist.push_back(BranchInst::Create(LiveSucc, BI));
- LPM->deleteSimpleAnalysisValue(BI, L);
- BI->eraseFromParent();
- RemoveFromWorklist(BI, Worklist);
- ++NumSimplify;
-
- RemoveBlockIfDead(DeadSucc, Worklist, L);
- }
continue;
}
}
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 8f61ffd..9912d3d 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -170,14 +170,17 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
// pessimize the llvm optimizer.
//
// Since we don't have perfect knowledge here, make some assumptions: assume
- // the maximum GPR width is the same size as the pointer size and assume that
- // this width can be stored. If so, check to see whether we will end up
- // actually reducing the number of stores used.
+ // the maximum GPR width is the same size as the largest legal integer
+ // size. If so, check to see whether we will end up actually reducing the
+ // number of stores used.
unsigned Bytes = unsigned(End-Start);
- unsigned NumPointerStores = Bytes/TD.getPointerSize();
+ unsigned MaxIntSize = TD.getLargestLegalIntTypeSize();
+ if (MaxIntSize == 0)
+ MaxIntSize = 1;
+ unsigned NumPointerStores = Bytes / MaxIntSize;
// Assume the remaining bytes if any are done a byte at a time.
- unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
+ unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize;
// If we will reduce the # stores (according to this heuristic), do the
// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
new file mode 100644
index 0000000..15cee44
--- /dev/null
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -0,0 +1,156 @@
+//===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to partially inline the fast path of well-known library
+// functions, such as using square-root instructions for cases where sqrt()
+// does not need to set errno.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "partially-inline-libcalls"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+namespace {
+ class PartiallyInlineLibCalls : public FunctionPass {
+ public:
+ static char ID;
+
+ PartiallyInlineLibCalls() :
+ FunctionPass(ID) {
+ initializePartiallyInlineLibCallsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool runOnFunction(Function &F);
+
+ private:
+ /// Optimize calls to sqrt.
+ bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
+ BasicBlock &CurrBB, Function::iterator &BB);
+ };
+
+ char PartiallyInlineLibCalls::ID = 0;
+}
+
+INITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls",
+ "Partially inline calls to library functions", false, false)
+
+void PartiallyInlineLibCalls::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetTransformInfo>();
+ FunctionPass::getAnalysisUsage(AU);
+}
+
+bool PartiallyInlineLibCalls::runOnFunction(Function &F) {
+ bool Changed = false;
+ Function::iterator CurrBB;
+ TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+ const TargetTransformInfo *TTI = &getAnalysis<TargetTransformInfo>();
+ for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
+ CurrBB = BB++;
+
+ for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
+ II != IE; ++II) {
+ CallInst *Call = dyn_cast<CallInst>(&*II);
+ Function *CalledFunc;
+
+ if (!Call || !(CalledFunc = Call->getCalledFunction()))
+ continue;
+
+ // Skip if function either has local linkage or is not a known library
+ // function.
+ LibFunc::Func LibFunc;
+ if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() ||
+ !TLI->getLibFunc(CalledFunc->getName(), LibFunc))
+ continue;
+
+ switch (LibFunc) {
+ case LibFunc::sqrtf:
+ case LibFunc::sqrt:
+ if (TTI->haveFastSqrt(Call->getType()) &&
+ optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
+ break;
+ continue;
+ default:
+ continue;
+ }
+
+ Changed = true;
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
+ Function *CalledFunc,
+ BasicBlock &CurrBB,
+ Function::iterator &BB) {
+ // There is no need to change the IR, since backend will emit sqrt
+ // instruction if the call has already been marked read-only.
+ if (Call->onlyReadsMemory())
+ return false;
+
+ // Do the following transformation:
+ //
+ // (before)
+ // dst = sqrt(src)
+ //
+ // (after)
+ // v0 = sqrt_noreadmem(src) # native sqrt instruction.
+ // if (v0 is a NaN)
+ // v1 = sqrt(src) # library call.
+ // dst = phi(v0, v1)
+ //
+
+ // Move all instructions following Call to newly created block JoinBB.
+ // Create phi and replace all uses.
+ BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode(), this);
+ IRBuilder<> Builder(JoinBB, JoinBB->begin());
+ PHINode *Phi = Builder.CreatePHI(Call->getType(), 2);
+ Call->replaceAllUsesWith(Phi);
+
+ // Create basic block LibCallBB and insert a call to library function sqrt.
+ BasicBlock *LibCallBB = BasicBlock::Create(CurrBB.getContext(), "call.sqrt",
+ CurrBB.getParent(), JoinBB);
+ Builder.SetInsertPoint(LibCallBB);
+ Instruction *LibCall = Call->clone();
+ Builder.Insert(LibCall);
+ Builder.CreateBr(JoinBB);
+
+ // Add attribute "readnone" so that backend can use a native sqrt instruction
+ // for this call. Insert a FP compare instruction and a conditional branch
+ // at the end of CurrBB.
+ Call->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
+ CurrBB.getTerminator()->eraseFromParent();
+ Builder.SetInsertPoint(&CurrBB);
+ Value *FCmp = Builder.CreateFCmpOEQ(Call, Call);
+ Builder.CreateCondBr(FCmp, JoinBB, LibCallBB);
+
+ // Add phi operands.
+ Phi->addIncoming(Call, &CurrBB);
+ Phi->addIncoming(LibCall, LibCallBB);
+
+ BB = JoinBB;
+ return true;
+}
+
+FunctionPass *llvm::createPartiallyInlineLibCallsPass() {
+ return new PartiallyInlineLibCalls();
+}
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 5c55143..9f3fc83 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -733,9 +733,9 @@ class AllocaPromoter : public LoadAndStorePromoter {
SmallVector<DbgValueInst *, 4> DVIs;
public:
- AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+ AllocaPromoter(const SmallVectorImpl<Instruction *> &Insts, SSAUpdater &S,
AllocaInst &AI, DIBuilder &DIB)
- : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
+ : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
void run(const SmallVectorImpl<Instruction*> &Insts) {
// Retain the debug information attached to the alloca for use when
@@ -762,9 +762,30 @@ public:
virtual bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction*> &Insts) const {
+ Value *Ptr;
if (LoadInst *LI = dyn_cast<LoadInst>(I))
- return LI->getOperand(0) == &AI;
- return cast<StoreInst>(I)->getPointerOperand() == &AI;
+ Ptr = LI->getOperand(0);
+ else
+ Ptr = cast<StoreInst>(I)->getPointerOperand();
+
+ // Only used to detect cycles, which will be rare and quickly found as
+ // we're walking up a chain of defs rather than down through uses.
+ SmallPtrSet<Value *, 4> Visited;
+
+ do {
+ if (Ptr == &AI)
+ return true;
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(Ptr))
+ Ptr = BCI->getOperand(0);
+ else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
+ Ptr = GEPI->getPointerOperand();
+ else
+ return false;
+
+ } while (Visited.insert(Ptr));
+
+ return false;
}
virtual void updateDebugInfo(Instruction *Inst) const {
@@ -917,6 +938,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
AllocaSlices::const_iterator E,
uint64_t EndOffset) {
Type *Ty = 0;
+ bool IgnoreNonIntegralTypes = false;
for (AllocaSlices::const_iterator I = B; I != E; ++I) {
Use *U = I->getUse();
if (isa<IntrinsicInst>(*U->getUser()))
@@ -925,29 +947,37 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
continue;
Type *UserTy = 0;
- if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser()))
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
UserTy = LI->getType();
- else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser()))
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
UserTy = SI->getValueOperand()->getType();
- else
- return 0; // Bail if we have weird uses.
+ } else {
+ IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
+ continue;
+ }
if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
// If the type is larger than the partition, skip it. We only encounter
// this for split integer operations where we want to use the type of the
- // entity causing the split.
- if (ITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
+ // entity causing the split. Also skip if the type is not a byte width
+ // multiple.
+ if (ITy->getBitWidth() % 8 != 0 ||
+ ITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
continue;
// If we have found an integer type use covering the alloca, use that
- // regardless of the other types, as integers are often used for a
- // "bucket
- // of bits" type.
+ // regardless of the other types, as integers are often used for
+ // a "bucket of bits" type.
+ //
+ // NB: This *must* be the only return from inside the loop so that the
+ // order of slices doesn't impact the computed type.
return ITy;
+ } else if (IgnoreNonIntegralTypes) {
+ continue;
}
if (Ty && Ty != UserTy)
- return 0;
+ IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
Ty = UserTy;
}
@@ -1431,6 +1461,10 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
return false;
+ // We can convert pointers to integers and vice-versa. Same for vectors
+ // of pointers and integers.
+ OldTy = OldTy->getScalarType();
+ NewTy = NewTy->getScalarType();
if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
if (NewTy->isPointerTy() && OldTy->isPointerTy())
return true;
@@ -1449,21 +1483,53 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
/// two types for viability with this routine.
static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
- Type *Ty) {
- assert(canConvertValue(DL, V->getType(), Ty) &&
- "Value not convertable to type");
- if (V->getType() == Ty)
+ Type *NewTy) {
+ Type *OldTy = V->getType();
+ assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type");
+
+ if (OldTy == NewTy)
return V;
- if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType()))
- if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty))
+
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
if (NewITy->getBitWidth() > OldITy->getBitWidth())
return IRB.CreateZExt(V, NewITy);
- if (V->getType()->isIntegerTy() && Ty->isPointerTy())
- return IRB.CreateIntToPtr(V, Ty);
- if (V->getType()->isPointerTy() && Ty->isIntegerTy())
- return IRB.CreatePtrToInt(V, Ty);
- return IRB.CreateBitCast(V, Ty);
+ // See if we need inttoptr for this type pair. A cast involving both scalars
+ // and vectors requires and additional bitcast.
+ if (OldTy->getScalarType()->isIntegerTy() &&
+ NewTy->getScalarType()->isPointerTy()) {
+ // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
+ if (OldTy->isVectorTy() && !NewTy->isVectorTy())
+ return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
+ NewTy);
+
+ // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
+ if (!OldTy->isVectorTy() && NewTy->isVectorTy())
+ return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
+ NewTy);
+
+ return IRB.CreateIntToPtr(V, NewTy);
+ }
+
+ // See if we need ptrtoint for this type pair. A cast involving both scalars
+ // and vectors requires and additional bitcast.
+ if (OldTy->getScalarType()->isPointerTy() &&
+ NewTy->getScalarType()->isIntegerTy()) {
+ // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
+ if (OldTy->isVectorTy() && !NewTy->isVectorTy())
+ return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
+ NewTy);
+
+ // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
+ if (!OldTy->isVectorTy() && NewTy->isVectorTy())
+ return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
+ NewTy);
+
+ return IRB.CreatePtrToInt(V, NewTy);
+ }
+
+ return IRB.CreateBitCast(V, NewTy);
}
/// \brief Test whether the given slice use can be promoted to a vector.
@@ -3364,12 +3430,12 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
}
static void enqueueUsersInWorklist(Instruction &I,
- SmallVectorImpl<Use *> &UseWorklist,
- SmallPtrSet<Use *, 8> &VisitedUses) {
+ SmallVectorImpl<Instruction *> &Worklist,
+ SmallPtrSet<Instruction *, 8> &Visited) {
for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
++UI)
- if (VisitedUses.insert(&UI.getUse()))
- UseWorklist.push_back(&UI.getUse());
+ if (Visited.insert(cast<Instruction>(*UI)))
+ Worklist.push_back(cast<Instruction>(*UI));
}
/// \brief Promote the allocas, using the best available technique.
@@ -3388,7 +3454,7 @@ bool SROA::promoteAllocas(Function &F) {
if (DT && !ForceSSAUpdater) {
DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
- PromoteMemToReg(PromotableAllocas, *DT, DL);
+ PromoteMemToReg(PromotableAllocas, *DT);
PromotableAllocas.clear();
return true;
}
@@ -3396,29 +3462,29 @@ bool SROA::promoteAllocas(Function &F) {
DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n");
SSAUpdater SSA;
DIBuilder DIB(*F.getParent());
- SmallVector<Instruction*, 64> Insts;
+ SmallVector<Instruction *, 64> Insts;
// We need a worklist to walk the uses of each alloca.
- SmallVector<Use *, 8> UseWorklist;
- SmallPtrSet<Use *, 8> VisitedUses;
+ SmallVector<Instruction *, 8> Worklist;
+ SmallPtrSet<Instruction *, 8> Visited;
SmallVector<Instruction *, 32> DeadInsts;
for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) {
AllocaInst *AI = PromotableAllocas[Idx];
- UseWorklist.clear();
- VisitedUses.clear();
+ Insts.clear();
+ Worklist.clear();
+ Visited.clear();
- enqueueUsersInWorklist(*AI, UseWorklist, VisitedUses);
+ enqueueUsersInWorklist(*AI, Worklist, Visited);
- while (!UseWorklist.empty()) {
- Use *U = UseWorklist.pop_back_val();
- Instruction &I = *cast<Instruction>(U->getUser());
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
// FIXME: Currently the SSAUpdater infrastructure doesn't reason about
// lifetime intrinsics and so we strip them (and the bitcasts+GEPs
// leading to them) here. Eventually it should use them to optimize the
// scalar values produced.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end);
II->eraseFromParent();
@@ -3428,12 +3494,12 @@ bool SROA::promoteAllocas(Function &F) {
// Push the loads and stores we find onto the list. SROA will already
// have validated that all loads and stores are viable candidates for
// promotion.
- if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
assert(LI->getType() == AI->getAllocatedType());
Insts.push_back(LI);
continue;
}
- if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
assert(SI->getValueOperand()->getType() == AI->getAllocatedType());
Insts.push_back(SI);
continue;
@@ -3442,11 +3508,10 @@ bool SROA::promoteAllocas(Function &F) {
// For everything else, we know that only no-op bitcasts and GEPs will
// make it this far, just recurse through them and recall them for later
// removal.
- DeadInsts.push_back(&I);
- enqueueUsersInWorklist(I, UseWorklist, VisitedUses);
+ DeadInsts.push_back(I);
+ enqueueUsersInWorklist(*I, Worklist, Visited);
}
AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts);
- Insts.clear();
while (!DeadInsts.empty())
DeadInsts.pop_back_val()->eraseFromParent();
AI->eraseFromParent();
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
new file mode 100644
index 0000000..9bcd702
--- /dev/null
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -0,0 +1,479 @@
+//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SampleProfileLoader transformation. This pass
+// reads a profile file generated by a sampling profiler (e.g. Linux Perf -
+// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
+// profile information in the given profile.
+//
+// This pass generates branch weight annotations on the IR:
+//
+// - prof: Represents branch weights. This annotation is added to branches
+// to indicate the weights of each edge coming out of the branch.
+// The weight of each edge is the weight of the target block for
+// that edge. The weight of a block B is computed as the maximum
+// number of samples found in B.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sample-profile"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+// Command line option to specify the file to read samples from. This is
+// mainly used for debugging.
+static cl::opt<std::string> SampleProfileFile(
+ "sample-profile-file", cl::init(""), cl::value_desc("filename"),
+ cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
+
+namespace {
+/// \brief Sample-based profile reader.
+///
+/// Each profile contains sample counts for all the functions
+/// executed. Inside each function, statements are annotated with the
+/// collected samples on all the instructions associated with that
+/// statement.
+///
+/// For this to produce meaningful data, the program needs to be
+/// compiled with some debug information (at minimum, line numbers:
+/// -gline-tables-only). Otherwise, it will be impossible to match IR
+/// instructions to the line numbers collected by the profiler.
+///
+/// From the profile file, we are interested in collecting the
+/// following information:
+///
+/// * A list of functions included in the profile (mangled names).
+///
+/// * For each function F:
+/// 1. The total number of samples collected in F.
+///
+/// 2. The samples collected at each line in F. To provide some
+/// protection against source code shuffling, line numbers should
+/// be relative to the start of the function.
+class SampleProfile {
+public:
+ SampleProfile(StringRef F) : Profiles(0), Filename(F) {}
+
+ void dump();
+ void loadText();
+ void loadNative() { llvm_unreachable("not implemented"); }
+ bool emitAnnotations(Function &F);
+ void printFunctionProfile(raw_ostream &OS, StringRef FName);
+ void dumpFunctionProfile(StringRef FName);
+
+protected:
+ typedef DenseMap<uint32_t, uint32_t> BodySampleMap;
+ typedef DenseMap<BasicBlock *, uint32_t> BlockWeightMap;
+
+ /// \brief Representation of the runtime profile for a function.
+ ///
+ /// This data structure contains the runtime profile for a given
+ /// function. It contains the total number of samples collected
+ /// in the function and a map of samples collected in every statement.
+ struct FunctionProfile {
+ /// \brief Total number of samples collected inside this function.
+ ///
+ /// Samples are cumulative, they include all the samples collected
+ /// inside this function and all its inlined callees.
+ unsigned TotalSamples;
+
+ // \brief Total number of samples collected at the head of the function.
+ unsigned TotalHeadSamples;
+
+ /// \brief Map line offsets to collected samples.
+ ///
+ /// Each entry in this map contains the number of samples
+ /// collected at the corresponding line offset. All line locations
+ /// are an offset from the start of the function.
+ BodySampleMap BodySamples;
+
+ /// \brief Map basic blocks to their computed weights.
+ ///
+ /// The weight of a basic block is defined to be the maximum
+ /// of all the instruction weights in that block.
+ BlockWeightMap BlockWeights;
+ };
+
+ uint32_t getInstWeight(Instruction &I, unsigned FirstLineno,
+ BodySampleMap &BodySamples);
+ uint32_t computeBlockWeight(BasicBlock *B, unsigned FirstLineno,
+ BodySampleMap &BodySamples);
+
+ /// \brief Map every function to its associated profile.
+ ///
+ /// The profile of every function executed at runtime is collected
+ /// in the structure FunctionProfile. This maps function objects
+ /// to their corresponding profiles.
+ StringMap<FunctionProfile> Profiles;
+
+ /// \brief Path name to the file holding the profile data.
+ ///
+ /// The format of this file is defined by each profiler
+ /// independently. If possible, the profiler should have a text
+ /// version of the profile format to be used in constructing test
+ /// cases and debugging.
+ StringRef Filename;
+};
+
+/// \brief Loader class for text-based profiles.
+///
+/// This class defines a simple interface to read text files containing
+/// profiles. It keeps track of line number information and location of
+/// the file pointer. Users of this class are responsible for actually
+/// parsing the lines returned by the readLine function.
+///
+/// TODO - This does not really belong here. It is a generic text file
+/// reader. It should be moved to the Support library and made more general.
+class ExternalProfileTextLoader {
+public:
+ ExternalProfileTextLoader(StringRef F) : Filename(F) {
+ error_code EC;
+ EC = MemoryBuffer::getFile(Filename, Buffer);
+ if (EC)
+ report_fatal_error("Could not open profile file " + Filename + ": " +
+ EC.message());
+ FP = Buffer->getBufferStart();
+ Lineno = 0;
+ }
+
+ /// \brief Read a line from the mapped file.
+ StringRef readLine() {
+ size_t Length = 0;
+ const char *start = FP;
+ while (FP != Buffer->getBufferEnd() && *FP != '\n') {
+ Length++;
+ FP++;
+ }
+ if (FP != Buffer->getBufferEnd())
+ FP++;
+ Lineno++;
+ return StringRef(start, Length);
+ }
+
+ /// \brief Return true, if we've reached EOF.
+ bool atEOF() const { return FP == Buffer->getBufferEnd(); }
+
+ /// \brief Report a parse error message and stop compilation.
+ void reportParseError(Twine Msg) const {
+ report_fatal_error(Filename + ":" + Twine(Lineno) + ": " + Msg + "\n");
+ }
+
+private:
+ /// \brief Memory buffer holding the text file.
+ OwningPtr<MemoryBuffer> Buffer;
+
+ /// \brief Current position into the memory buffer.
+ const char *FP;
+
+ /// \brief Current line number.
+ int64_t Lineno;
+
+ /// \brief Path name where to the profile file.
+ StringRef Filename;
+};
+
+/// \brief Sample profile pass.
+///
+/// This pass reads profile data from the file specified by
+/// -sample-profile-file and annotates every affected function with the
+/// profile information found in that file.
+class SampleProfileLoader : public FunctionPass {
+public:
+ // Class identification, replacement for typeinfo
+ static char ID;
+
+ SampleProfileLoader(StringRef Name = SampleProfileFile)
+ : FunctionPass(ID), Profiler(0), Filename(Name) {
+ initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool doInitialization(Module &M);
+
+ void dump() { Profiler->dump(); }
+
+ virtual const char *getPassName() const { return "Sample profile pass"; }
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ }
+
+protected:
+ /// \brief Profile reader object.
+ OwningPtr<SampleProfile> Profiler;
+
+ /// \brief Name of the profile file to load.
+ StringRef Filename;
+};
+}
+
+/// \brief Print the function profile for \p FName on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param FName Name of the function to print.
+void SampleProfile::printFunctionProfile(raw_ostream &OS, StringRef FName) {
+ FunctionProfile FProfile = Profiles[FName];
+ OS << "Function: " << FName << ", " << FProfile.TotalSamples << ", "
+ << FProfile.TotalHeadSamples << ", " << FProfile.BodySamples.size()
+ << " sampled lines\n";
+ for (BodySampleMap::const_iterator SI = FProfile.BodySamples.begin(),
+ SE = FProfile.BodySamples.end();
+ SI != SE; ++SI)
+ OS << "\tline offset: " << SI->first
+ << ", number of samples: " << SI->second << "\n";
+ OS << "\n";
+}
+
+/// \brief Dump the function profile for \p FName.
+///
+/// \param FName Name of the function to print.
+void SampleProfile::dumpFunctionProfile(StringRef FName) {
+ printFunctionProfile(dbgs(), FName);
+}
+
+/// \brief Dump all the function profiles found.
+void SampleProfile::dump() {
+ for (StringMap<FunctionProfile>::const_iterator I = Profiles.begin(),
+ E = Profiles.end();
+ I != E; ++I)
+ dumpFunctionProfile(I->getKey());
+}
+
+/// \brief Load samples from a text file.
+///
+/// The file is divided in two segments:
+///
+/// Symbol table (represented with the string "symbol table")
+/// Number of symbols in the table
+/// symbol 1
+/// symbol 2
+/// ...
+/// symbol N
+///
+/// Function body profiles
+/// function1:total_samples:total_head_samples:number_of_locations
+/// location_offset_1: number_of_samples
+/// location_offset_2: number_of_samples
+/// ...
+/// location_offset_N: number_of_samples
+///
+/// Function names must be mangled in order for the profile loader to
+/// match them in the current translation unit.
+///
+/// Since this is a flat profile, a function that shows up more than
+/// once gets all its samples aggregated across all its instances.
+/// TODO - flat profiles are too imprecise to provide good optimization
+/// opportunities. Convert them to context-sensitive profile.
+///
+/// This textual representation is useful to generate unit tests and
+/// for debugging purposes, but it should not be used to generate
+/// profiles for large programs, as the representation is extremely
+/// inefficient.
+void SampleProfile::loadText() {
+ ExternalProfileTextLoader Loader(Filename);
+
+ // Read the symbol table.
+ StringRef Line = Loader.readLine();
+ if (Line != "symbol table")
+ Loader.reportParseError("Expected 'symbol table', found " + Line);
+ int NumSymbols;
+ Line = Loader.readLine();
+ if (Line.getAsInteger(10, NumSymbols))
+ Loader.reportParseError("Expected a number, found " + Line);
+ for (int I = 0; I < NumSymbols; I++) {
+ StringRef FName = Loader.readLine();
+ FunctionProfile &FProfile = Profiles[FName];
+ FProfile.BodySamples.clear();
+ FProfile.TotalSamples = 0;
+ FProfile.TotalHeadSamples = 0;
+ }
+
+ // Read the profile of each function. Since each function may be
+ // mentioned more than once, and we are collecting flat profiles,
+ // accumulate samples as we parse them.
+ Regex HeadRE("^([^:]+):([0-9]+):([0-9]+):([0-9]+)$");
+ Regex LineSample("^([0-9]+): ([0-9]+)$");
+ while (!Loader.atEOF()) {
+ SmallVector<StringRef, 4> Matches;
+ Line = Loader.readLine();
+ if (!HeadRE.match(Line, &Matches))
+ Loader.reportParseError("Expected 'mangled_name:NUM:NUM:NUM', found " +
+ Line);
+ assert(Matches.size() == 5);
+ StringRef FName = Matches[1];
+ unsigned NumSamples, NumHeadSamples, NumSampledLines;
+ Matches[2].getAsInteger(10, NumSamples);
+ Matches[3].getAsInteger(10, NumHeadSamples);
+ Matches[4].getAsInteger(10, NumSampledLines);
+ FunctionProfile &FProfile = Profiles[FName];
+ FProfile.TotalSamples += NumSamples;
+ FProfile.TotalHeadSamples += NumHeadSamples;
+ BodySampleMap &SampleMap = FProfile.BodySamples;
+ unsigned I;
+ for (I = 0; I < NumSampledLines && !Loader.atEOF(); I++) {
+ Line = Loader.readLine();
+ if (!LineSample.match(Line, &Matches))
+ Loader.reportParseError("Expected 'NUM: NUM', found " + Line);
+ assert(Matches.size() == 3);
+ unsigned LineOffset, NumSamples;
+ Matches[1].getAsInteger(10, LineOffset);
+ Matches[2].getAsInteger(10, NumSamples);
+ SampleMap[LineOffset] += NumSamples;
+ }
+
+ if (I < NumSampledLines)
+ Loader.reportParseError("Unexpected end of file");
+ }
+}
+
+/// \brief Get the weight for an instruction.
+///
+/// The "weight" of an instruction \p Inst is the number of samples
+/// collected on that instruction at runtime. To retrieve it, we
+/// need to compute the line number of \p Inst relative to the start of its
+/// function. We use \p FirstLineno to compute the offset. We then
+/// look up the samples collected for \p Inst using \p BodySamples.
+///
+/// \param Inst Instruction to query.
+/// \param FirstLineno Line number of the first instruction in the function.
+/// \param BodySamples Map of relative source line locations to samples.
+///
+/// \returns The profiled weight of I.
+uint32_t SampleProfile::getInstWeight(Instruction &Inst, unsigned FirstLineno,
+ BodySampleMap &BodySamples) {
+ unsigned LOffset = Inst.getDebugLoc().getLine() - FirstLineno + 1;
+ return BodySamples.lookup(LOffset);
+}
+
+/// \brief Compute the weight of a basic block.
+///
+/// The weight of basic block \p B is the maximum weight of all the
+/// instructions in B.
+///
+/// \param B The basic block to query.
+/// \param FirstLineno The line number for the first line in the
+/// function holding B.
+/// \param BodySamples The map containing all the samples collected in that
+/// function.
+///
+/// \returns The computed weight of B.
+uint32_t SampleProfile::computeBlockWeight(BasicBlock *B, unsigned FirstLineno,
+ BodySampleMap &BodySamples) {
+ // If we've computed B's weight before, return it.
+ Function *F = B->getParent();
+ FunctionProfile &FProfile = Profiles[F->getName()];
+ std::pair<BlockWeightMap::iterator, bool> Entry =
+ FProfile.BlockWeights.insert(std::make_pair(B, 0));
+ if (!Entry.second)
+ return Entry.first->second;
+
+ // Otherwise, compute and cache B's weight.
+ uint32_t Weight = 0;
+ for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) {
+ uint32_t InstWeight = getInstWeight(*I, FirstLineno, BodySamples);
+ if (InstWeight > Weight)
+ Weight = InstWeight;
+ }
+ Entry.first->second = Weight;
+ return Weight;
+}
+
+/// \brief Generate branch weight metadata for all branches in \p F.
+///
+/// For every branch instruction B in \p F, we compute the weight of the
+/// target block for each of the edges out of B. This is the weight
+/// that we associate with that branch.
+///
+/// TODO - This weight assignment will most likely be wrong if the
+/// target branch has more than two predecessors. This needs to be done
+/// using some form of flow propagation.
+///
+/// Once all the branch weights are computed, we emit the MD_prof
+/// metadata on B using the computed values.
+///
+/// \param F The function to query.
+bool SampleProfile::emitAnnotations(Function &F) {
+ bool Changed = false;
+ FunctionProfile &FProfile = Profiles[F.getName()];
+ unsigned FirstLineno = inst_begin(F)->getDebugLoc().getLine();
+ MDBuilder MDB(F.getContext());
+
+ // Clear the block weights cache.
+ FProfile.BlockWeights.clear();
+
+ // When we find a branch instruction: For each edge E out of the branch,
+ // the weight of E is the weight of the target block.
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *B = I;
+ TerminatorInst *TI = B->getTerminator();
+ if (TI->getNumSuccessors() == 1)
+ continue;
+ if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
+ continue;
+
+ SmallVector<uint32_t, 4> Weights;
+ unsigned NSuccs = TI->getNumSuccessors();
+ for (unsigned I = 0; I < NSuccs; ++I) {
+ BasicBlock *Succ = TI->getSuccessor(I);
+ uint32_t Weight =
+ computeBlockWeight(Succ, FirstLineno, FProfile.BodySamples);
+ Weights.push_back(Weight);
+ }
+
+ TI->setMetadata(llvm::LLVMContext::MD_prof,
+ MDB.createBranchWeights(Weights));
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+char SampleProfileLoader::ID = 0;
+INITIALIZE_PASS(SampleProfileLoader, "sample-profile", "Sample Profile loader",
+ false, false)
+
+bool SampleProfileLoader::runOnFunction(Function &F) {
+ return Profiler->emitAnnotations(F);
+}
+
+bool SampleProfileLoader::doInitialization(Module &M) {
+ Profiler.reset(new SampleProfile(Filename));
+ Profiler->loadText();
+ return true;
+}
+
+FunctionPass *llvm::createSampleProfileLoaderPass() {
+ return new SampleProfileLoader(SampleProfileFile);
+}
+
+FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) {
+ return new SampleProfileLoader(Name);
+}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 758334d..857597e 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -28,7 +28,7 @@ using namespace llvm;
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeADCEPass(Registry);
- initializeBlockPlacementPass(Registry);
+ initializeSampleProfileLoaderPass(Registry);
initializeCodeGenPreparePass(Registry);
initializeConstantPropagationPass(Registry);
initializeCorrelatedValuePropagationPass(Registry);
@@ -44,12 +44,14 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoopInstSimplifyPass(Registry);
initializeLoopRotatePass(Registry);
initializeLoopStrengthReducePass(Registry);
+ initializeLoopRerollPass(Registry);
initializeLoopUnrollPass(Registry);
initializeLoopUnswitchPass(Registry);
initializeLoopIdiomRecognizePass(Registry);
initializeLowerAtomicPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
initializeMemCpyOptPass(Registry);
+ initializePartiallyInlineLibCallsPass(Registry);
initializeReassociatePass(Registry);
initializeRegToMemPass(Registry);
initializeSCCPPass(Registry);
@@ -111,6 +113,10 @@ void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopRotatePass());
}
+void LLVMAddLoopRerollPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopRerollPass());
+}
+
void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopUnrollPass());
}
@@ -123,6 +129,10 @@ void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createMemCpyOptPass());
}
+void LLVMAddPartiallyInlineLibCallsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createPartiallyInlineLibCallsPass());
+}
+
void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createPromoteMemoryToRegisterPass());
}
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 73b2edf..57b290e 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -963,7 +963,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
else if (SV->getType()->isPointerTy())
- SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()));
+ SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getType()));
// Zero extend or truncate the value if needed.
if (SV->getType() != AllocaType) {
@@ -1426,7 +1426,7 @@ bool SROA::performPromotion(Function &F) {
if (Allocas.empty()) break;
if (HasDomTree)
- PromoteMemToReg(Allocas, *DT, TD);
+ PromoteMemToReg(Allocas, *DT);
else {
SSAUpdater SSA;
for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 6d05640..8371f6d 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -66,161 +66,6 @@ FunctionPass *llvm::createCFGSimplificationPass() {
return new CFGSimplifyPass();
}
-/// changeToUnreachable - Insert an unreachable instruction before the specified
-/// instruction, making it and the rest of the code in the block dead.
-static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
- BasicBlock *BB = I->getParent();
- // Loop over all of the successors, removing BB's entry from any PHI
- // nodes.
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- (*SI)->removePredecessor(BB);
-
- // Insert a call to llvm.trap right before this. This turns the undefined
- // behavior into a hard fail instead of falling through into random code.
- if (UseLLVMTrap) {
- Function *TrapFn =
- Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
- CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
- CallTrap->setDebugLoc(I->getDebugLoc());
- }
- new UnreachableInst(I->getContext(), I);
-
- // All instructions after this are dead.
- BasicBlock::iterator BBI = I, BBE = BB->end();
- while (BBI != BBE) {
- if (!BBI->use_empty())
- BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
- BB->getInstList().erase(BBI++);
- }
-}
-
-/// changeToCall - Convert the specified invoke into a normal call.
-static void changeToCall(InvokeInst *II) {
- SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
- CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
- NewCall->takeName(II);
- NewCall->setCallingConv(II->getCallingConv());
- NewCall->setAttributes(II->getAttributes());
- NewCall->setDebugLoc(II->getDebugLoc());
- II->replaceAllUsesWith(NewCall);
-
- // Follow the call by a branch to the normal destination.
- BranchInst::Create(II->getNormalDest(), II);
-
- // Update PHI nodes in the unwind destination
- II->getUnwindDest()->removePredecessor(II->getParent());
- II->eraseFromParent();
-}
-
-static bool markAliveBlocks(BasicBlock *BB,
- SmallPtrSet<BasicBlock*, 128> &Reachable) {
-
- SmallVector<BasicBlock*, 128> Worklist;
- Worklist.push_back(BB);
- Reachable.insert(BB);
- bool Changed = false;
- do {
- BB = Worklist.pop_back_val();
-
- // Do a quick scan of the basic block, turning any obviously unreachable
- // instructions into LLVM unreachable insts. The instruction combining pass
- // canonicalizes unreachable insts into stores to null or undef.
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
- if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
- if (CI->doesNotReturn()) {
- // If we found a call to a no-return function, insert an unreachable
- // instruction after it. Make sure there isn't *already* one there
- // though.
- ++BBI;
- if (!isa<UnreachableInst>(BBI)) {
- // Don't insert a call to llvm.trap right before the unreachable.
- changeToUnreachable(BBI, false);
- Changed = true;
- }
- break;
- }
- }
-
- // Store to undef and store to null are undefined and used to signal that
- // they should be changed to unreachable by passes that can't modify the
- // CFG.
- if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
- // Don't touch volatile stores.
- if (SI->isVolatile()) continue;
-
- Value *Ptr = SI->getOperand(1);
-
- if (isa<UndefValue>(Ptr) ||
- (isa<ConstantPointerNull>(Ptr) &&
- SI->getPointerAddressSpace() == 0)) {
- changeToUnreachable(SI, true);
- Changed = true;
- break;
- }
- }
- }
-
- // Turn invokes that call 'nounwind' functions into ordinary calls.
- if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
- Value *Callee = II->getCalledValue();
- if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
- changeToUnreachable(II, true);
- Changed = true;
- } else if (II->doesNotThrow()) {
- if (II->use_empty() && II->onlyReadsMemory()) {
- // jump to the normal destination branch.
- BranchInst::Create(II->getNormalDest(), II);
- II->getUnwindDest()->removePredecessor(II->getParent());
- II->eraseFromParent();
- } else
- changeToCall(II);
- Changed = true;
- }
- }
-
- Changed |= ConstantFoldTerminator(BB, true);
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- if (Reachable.insert(*SI))
- Worklist.push_back(*SI);
- } while (!Worklist.empty());
- return Changed;
-}
-
-/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
-/// if they are in a dead cycle. Return true if a change was made, false
-/// otherwise.
-static bool removeUnreachableBlocksFromFn(Function &F) {
- SmallPtrSet<BasicBlock*, 128> Reachable;
- bool Changed = markAliveBlocks(F.begin(), Reachable);
-
- // If there are unreachable blocks in the CFG...
- if (Reachable.size() == F.size())
- return Changed;
-
- assert(Reachable.size() < F.size());
- NumSimpl += F.size()-Reachable.size();
-
- // Loop over all of the basic blocks that are not reachable, dropping all of
- // their internal references...
- for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
- if (Reachable.count(BB))
- continue;
-
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
- if (Reachable.count(*SI))
- (*SI)->removePredecessor(BB);
- BB->dropAllReferences();
- }
-
- for (Function::iterator I = ++F.begin(); I != F.end();)
- if (!Reachable.count(I))
- I = F.getBasicBlockList().erase(I);
- else
- ++I;
-
- return true;
-}
-
/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
/// node) return blocks, merge them together to promote recursive block merging.
static bool mergeEmptyReturnBlocks(Function &F) {
@@ -325,7 +170,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
bool CFGSimplifyPass::runOnFunction(Function &F) {
const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
- bool EverChanged = removeUnreachableBlocksFromFn(F);
+ bool EverChanged = removeUnreachableBlocks(F);
EverChanged |= mergeEmptyReturnBlocks(F);
EverChanged |= iterativelySimplifyCFG(F, TTI, TD);
@@ -333,16 +178,16 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
if (!EverChanged) return false;
// iterativelySimplifyCFG can (rarely) make some loops dead. If this happens,
- // removeUnreachableBlocksFromFn is needed to nuke them, which means we should
+ // removeUnreachableBlocks is needed to nuke them, which means we should
// iterate between the two optimizations. We structure the code like this to
// avoid reruning iterativelySimplifyCFG if the second pass of
- // removeUnreachableBlocksFromFn doesn't do anything.
- if (!removeUnreachableBlocksFromFn(F))
+ // removeUnreachableBlocks doesn't do anything.
+ if (!removeUnreachableBlocks(F))
return true;
do {
EverChanged = iterativelySimplifyCFG(F, TTI, TD);
- EverChanged |= removeUnreachableBlocksFromFn(F);
+ EverChanged |= removeUnreachableBlocks(F);
} while (EverChanged);
return true;
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index bb6f163..5045ff8 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -231,7 +231,7 @@ public:
StructurizeCFG() :
RegionPass(ID) {
- initializeRegionInfoPass(*PassRegistry::getPassRegistry());
+ initializeStructurizeCFGPass(*PassRegistry::getPassRegistry());
}
using Pass::doInitialization;
@@ -244,6 +244,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTree>();
AU.addPreserved<DominatorTree>();
RegionPass::getAnalysisUsage(AU);
@@ -256,6 +257,7 @@ char StructurizeCFG::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG",
false, false)
+INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(RegionInfo)
INITIALIZE_PASS_END(StructurizeCFG, "structurizecfg", "Structurize the CFG",
@@ -321,21 +323,32 @@ Value *StructurizeCFG::invert(Value *Condition) {
if (match(Condition, m_Not(m_Value(Condition))))
return Condition;
- // Third: Check all the users for an invert
- BasicBlock *Parent = cast<Instruction>(Condition)->getParent();
- for (Value::use_iterator I = Condition->use_begin(),
- E = Condition->use_end(); I != E; ++I) {
+ if (Instruction *Inst = dyn_cast<Instruction>(Condition)) {
+ // Third: Check all the users for an invert
+ BasicBlock *Parent = Inst->getParent();
+ for (Value::use_iterator I = Condition->use_begin(),
+ E = Condition->use_end(); I != E; ++I) {
- Instruction *User = dyn_cast<Instruction>(*I);
- if (!User || User->getParent() != Parent)
- continue;
+ Instruction *User = dyn_cast<Instruction>(*I);
+ if (!User || User->getParent() != Parent)
+ continue;
+
+ if (match(*I, m_Not(m_Specific(Condition))))
+ return *I;
+ }
- if (match(*I, m_Not(m_Specific(Condition))))
- return *I;
+ // Last option: Create a new instruction
+ return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
}
- // Last option: Create a new instruction
- return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
+ if (Argument *Arg = dyn_cast<Argument>(Condition)) {
+ BasicBlock &EntryBlock = Arg->getParent()->getEntryBlock();
+ return BinaryOperator::CreateNot(Condition,
+ Arg->getName() + ".inv",
+ EntryBlock.getTerminator());
+ }
+
+ llvm_unreachable("Unhandled condition to invert");
}
/// \brief Build the condition for one edge
@@ -766,6 +779,20 @@ void StructurizeCFG::handleLoops(bool ExitUseAllowed,
handleLoops(false, LoopEnd);
}
+ // If the start of the loop is the entry block, we can't branch to it so
+ // insert a new dummy entry block.
+ Function *LoopFunc = LoopStart->getParent();
+ if (LoopStart == &LoopFunc->getEntryBlock()) {
+ LoopStart->setName("entry.orig");
+
+ BasicBlock *NewEntry =
+ BasicBlock::Create(LoopStart->getContext(),
+ "entry",
+ LoopFunc,
+ LoopStart);
+ BranchInst::Create(LoopStart, NewEntry);
+ }
+
// Create an extra loop end node
LoopEnd = needPrefix(false);
BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index e17a416..12de9ee 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -248,7 +248,6 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
// If the edge isn't critical, then BB has a single successor or Succ has a
// single pred. Split the block.
- BasicBlock::iterator SplitPoint;
if (BasicBlock *SP = Succ->getSinglePredecessor()) {
// If the successor only has a single pred, split the top of the successor
// block.
@@ -401,8 +400,12 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
// If all incoming values for the new PHI would be the same, just don't
// make a new PHI. Instead, just remove the incoming values from the old
// PHI.
- for (unsigned i = 0, e = Preds.size(); i != e; ++i)
- PN->removeIncomingValue(Preds[i], false);
+ for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+ // Explicitly check the BB index here to handle duplicates in Preds.
+ int Idx = PN->getBasicBlockIndex(Preds[i]);
+ if (Idx >= 0)
+ PN->removeIncomingValue(Idx, false);
+ }
} else {
// If the values coming into the block are not the same, we need a PHI.
// Create the new PHI node, insert it into NewBB at the end of the block
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 8f3ff96..0e7f7f7 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -22,7 +22,6 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
@@ -45,7 +44,6 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTree>();
AU.addPreserved<LoopInfo>();
- AU.addPreserved<ProfileInfo>();
// No loop canonicalization guarantees are broken by this pass.
AU.addPreservedID(LoopSimplifyID);
@@ -213,10 +211,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
- ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
// If we have nothing to update, just return.
- if (DT == 0 && LI == 0 && PI == 0)
+ if (DT == 0 && LI == 0)
return NewBB;
// Now update analysis information. Since the only predecessor of NewBB is
@@ -369,9 +366,5 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
}
}
- // Update ProfileInfo if it is around.
- if (PI)
- PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges);
-
return NewBB;
}
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 3648fd6..5afd6b8 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_library(LLVMTransformUtils
CmpInstAnalysis.cpp
CodeExtractor.cpp
DemoteRegToStack.cpp
+ GlobalStatus.cpp
InlineFunction.cpp
InstructionNamer.cpp
IntegerDivision.cpp
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 82013f9..6f00864 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -665,8 +665,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
TheSwitch->setCondition(call);
TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
// Remove redundant case
- SwitchInst::CaseIt ToBeRemoved(TheSwitch, NumExitBlocks-1);
- TheSwitch->removeCase(ToBeRemoved);
+ TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
break;
}
}
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 9cbe15d..1da226b 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -266,8 +266,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
BasicBlock *CB;
BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
bool Iteration = true;
- BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
- BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+ IRBuilder<>::InsertPointGuard Guard(Builder);
Value *PC = PBI->getCondition();
do {
@@ -298,7 +297,6 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
new UnreachableInst(CB->getContext(), CB);
} while (Iteration);
- Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
return true;
}
@@ -372,7 +370,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
/// Check whether \param BB is the merge block of a if-region. If yes, check
/// whether there exists an adjacent if-region upstream, the two if-regions
-/// contain identical instuctions and can be legally merged. \returns true if
+/// contain identical instructions and can be legally merged. \returns true if
/// the two if-regions are merged.
///
/// From:
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
new file mode 100644
index 0000000..5f0a563
--- /dev/null
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -0,0 +1,183 @@
+//===-- GlobalStatus.cpp - Compute status info for globals -----------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+
+using namespace llvm;
+
+/// Return the stronger of the two ordering. If the two orderings are acquire
+/// and release, then return AcquireRelease.
+///
+static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
+ if (X == Acquire && Y == Release)
+ return AcquireRelease;
+ if (Y == Acquire && X == Release)
+ return AcquireRelease;
+ return (AtomicOrdering)std::max(X, Y);
+}
+
+/// It is safe to destroy a constant iff it is only used by constants itself.
+/// Note that constants cannot be cyclic, so this test is pretty easy to
+/// implement recursively.
+///
+bool llvm::isSafeToDestroyConstant(const Constant *C) {
+ if (isa<GlobalValue>(C))
+ return false;
+
+ for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
+ ++UI)
+ if (const Constant *CU = dyn_cast<Constant>(*UI)) {
+ if (!isSafeToDestroyConstant(CU))
+ return false;
+ } else
+ return false;
+ return true;
+}
+
+static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
+ SmallPtrSet<const PHINode *, 16> &PhiUsers) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+ ++UI) {
+ const User *U = *UI;
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+ GS.HasNonInstructionUser = true;
+
+ // If the result of the constantexpr isn't pointer type, then we won't
+ // know to expect it in various places. Just reject early.
+ if (!isa<PointerType>(CE->getType()))
+ return true;
+
+ if (analyzeGlobalAux(CE, GS, PhiUsers))
+ return true;
+ } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
+ if (!GS.HasMultipleAccessingFunctions) {
+ const Function *F = I->getParent()->getParent();
+ if (GS.AccessingFunction == 0)
+ GS.AccessingFunction = F;
+ else if (GS.AccessingFunction != F)
+ GS.HasMultipleAccessingFunctions = true;
+ }
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ GS.IsLoaded = true;
+ // Don't hack on volatile loads.
+ if (LI->isVolatile())
+ return true;
+ GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering());
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ // Don't allow a store OF the address, only stores TO the address.
+ if (SI->getOperand(0) == V)
+ return true;
+
+ // Don't hack on volatile stores.
+ if (SI->isVolatile())
+ return true;
+
+ GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering());
+
+ // If this is a direct store to the global (i.e., the global is a scalar
+ // value, not an aggregate), keep more specific information about
+ // stores.
+ if (GS.StoredType != GlobalStatus::Stored) {
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(SI->getOperand(1))) {
+ Value *StoredVal = SI->getOperand(0);
+
+ if (Constant *C = dyn_cast<Constant>(StoredVal)) {
+ if (C->isThreadDependent()) {
+ // The stored value changes between threads; don't track it.
+ return true;
+ }
+ }
+
+ if (StoredVal == GV->getInitializer()) {
+ if (GS.StoredType < GlobalStatus::InitializerStored)
+ GS.StoredType = GlobalStatus::InitializerStored;
+ } else if (isa<LoadInst>(StoredVal) &&
+ cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
+ if (GS.StoredType < GlobalStatus::InitializerStored)
+ GS.StoredType = GlobalStatus::InitializerStored;
+ } else if (GS.StoredType < GlobalStatus::StoredOnce) {
+ GS.StoredType = GlobalStatus::StoredOnce;
+ GS.StoredOnceValue = StoredVal;
+ } else if (GS.StoredType == GlobalStatus::StoredOnce &&
+ GS.StoredOnceValue == StoredVal) {
+ // noop.
+ } else {
+ GS.StoredType = GlobalStatus::Stored;
+ }
+ } else {
+ GS.StoredType = GlobalStatus::Stored;
+ }
+ }
+ } else if (isa<BitCastInst>(I)) {
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (isa<GetElementPtrInst>(I)) {
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (isa<SelectInst>(I)) {
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
+ // PHI nodes we can check just like select or GEP instructions, but we
+ // have to be careful about infinite recursion.
+ if (PhiUsers.insert(PN)) // Not already visited.
+ if (analyzeGlobalAux(I, GS, PhiUsers))
+ return true;
+ } else if (isa<CmpInst>(I)) {
+ GS.IsCompared = true;
+ } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
+ if (MTI->isVolatile())
+ return true;
+ if (MTI->getArgOperand(0) == V)
+ GS.StoredType = GlobalStatus::Stored;
+ if (MTI->getArgOperand(1) == V)
+ GS.IsLoaded = true;
+ } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
+ assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
+ if (MSI->isVolatile())
+ return true;
+ GS.StoredType = GlobalStatus::Stored;
+ } else if (ImmutableCallSite C = I) {
+ if (!C.isCallee(UI))
+ return true;
+ GS.IsLoaded = true;
+ } else {
+ return true; // Any other non-load instruction might take address!
+ }
+ } else if (const Constant *C = dyn_cast<Constant>(U)) {
+ GS.HasNonInstructionUser = true;
+ // We might have a dead and dangling constant hanging off of here.
+ if (!isSafeToDestroyConstant(C))
+ return true;
+ } else {
+ GS.HasNonInstructionUser = true;
+ // Otherwise must be some other user.
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) {
+ SmallPtrSet<const PHINode *, 16> PhiUsers;
+ return analyzeGlobalAux(V, GS, PhiUsers);
+}
+
+GlobalStatus::GlobalStatus()
+ : IsCompared(false), IsLoaded(false), StoredType(NotStored),
+ StoredOnceValue(0), AccessingFunction(0),
+ HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
+ Ordering(NotAtomic) {}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index dabb67b..d021bce 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -193,7 +193,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
CallInst *CI = dyn_cast<CallInst>(I);
// If this call cannot unwind, don't convert it to an invoke.
- if (!CI || CI->doesNotThrow())
+ // Inline asm calls cannot throw.
+ if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
continue;
// Convert this function call into an invoke instruction. First, split the
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 2d1b166..f15e8d5 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -55,7 +55,6 @@ namespace {
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
- std::vector<BasicBlock*> LoopBlocks;
PredIteratorCache PredCache;
Loop *L;
@@ -82,11 +81,6 @@ namespace {
// Check the special guarantees that LCSSA makes.
assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
}
-
- /// inLoop - returns true if the given block is within the current loop
- bool inLoop(BasicBlock *B) const {
- return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);
- }
};
}
@@ -129,11 +123,6 @@ bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
if (ExitBlocks.empty())
return false;
- // Speed up queries by creating a sorted vector of blocks.
- LoopBlocks.clear();
- LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
- array_pod_sort(LoopBlocks.begin(), LoopBlocks.end());
-
// Look at all the instructions in the loop, checking to see if they have uses
// outside the loop. If so, rewrite those uses.
bool MadeChange = false;
@@ -198,7 +187,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
if (PHINode *PN = dyn_cast<PHINode>(U))
UserBB = PN->getIncomingBlock(UI);
- if (InstBB != UserBB && !inLoop(UserBB))
+ if (InstBB != UserBB && !L->contains(UserBB))
UsesToRewrite.push_back(&UI.getUse());
}
@@ -244,7 +233,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
// If the exit block has a predecessor not within the loop, arrange for
// the incoming value use corresponding to that predecessor to be
// rewritten in terms of a different LCSSA PHI.
- if (!inLoop(*PI))
+ if (!L->contains(*PI))
UsesToRewrite.push_back(
&PN->getOperandUse(
PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 08e1808..2768041 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -16,10 +16,10 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/DIBuilder.h"
#include "llvm/DebugInfo.h"
@@ -43,6 +43,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
+
//===----------------------------------------------------------------------===//
// Local constant propagation.
//
@@ -193,33 +195,28 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Otherwise, we can fold this switch into a conditional branch
// instruction if it has only one non-default destination.
SwitchInst::CaseIt FirstCase = SI->case_begin();
- IntegersSubset& Case = FirstCase.getCaseValueEx();
- if (Case.isSingleNumber()) {
- // FIXME: Currently work with ConstantInt based numbers.
- Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
- Case.getSingleNumber(0).toConstantInt(),
- "cond");
-
- // Insert the new branch.
- BranchInst *NewBr = Builder.CreateCondBr(Cond,
- FirstCase.getCaseSuccessor(),
- SI->getDefaultDest());
- MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
- if (MD && MD->getNumOperands() == 3) {
- ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2));
- ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1));
- assert(SICase && SIDef);
- // The TrueWeight should be the weight for the single case of SI.
- NewBr->setMetadata(LLVMContext::MD_prof,
- MDBuilder(BB->getContext()).
- createBranchWeights(SICase->getValue().getZExtValue(),
- SIDef->getValue().getZExtValue()));
- }
+ Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
+ FirstCase.getCaseValue(), "cond");
- // Delete the old switch.
- SI->eraseFromParent();
- return true;
+ // Insert the new branch.
+ BranchInst *NewBr = Builder.CreateCondBr(Cond,
+ FirstCase.getCaseSuccessor(),
+ SI->getDefaultDest());
+ MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+ if (MD && MD->getNumOperands() == 3) {
+ ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2));
+ ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1));
+ assert(SICase && SIDef);
+ // The TrueWeight should be the weight for the single case of SI.
+ NewBr->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(BB->getContext()).
+ createBranchWeights(SICase->getValue().getZExtValue(),
+ SIDef->getValue().getZExtValue()));
}
+
+ // Delete the old switch.
+ SI->eraseFromParent();
+ return true;
}
return false;
}
@@ -415,7 +412,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
Instruction *Inst = BI++;
WeakVH BIHandle(BI);
- if (recursivelySimplifyInstruction(Inst, TD)) {
+ if (recursivelySimplifyInstruction(Inst, TD, TLI)) {
MadeChange = true;
if (BIHandle != BI)
BI = BB->begin();
@@ -515,11 +512,6 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
DT->changeImmediateDominator(DestBB, PredBBIDom);
DT->eraseNode(PredBB);
}
- ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
- if (PI) {
- PI->replaceAllUses(PredBB, DestBB);
- PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB));
- }
}
// Nuke BB.
PredBB->eraseFromParent();
@@ -533,7 +525,7 @@ static bool CanMergeValues(Value *First, Value *Second) {
}
/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
-/// almost-empty BB ending in an unconditional branch to Succ, into succ.
+/// almost-empty BB ending in an unconditional branch to Succ, into Succ.
///
/// Assumption: Succ is the single successor for BB.
///
@@ -1053,7 +1045,11 @@ bool llvm::LowerDbgDeclare(Function &F) {
for (SmallVectorImpl<DbgDeclareInst *>::iterator I = Dbgs.begin(),
E = Dbgs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
- if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+ AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
+ // If this is an alloca for a scalar variable, insert a dbg.value
+ // at each load and store to the alloca and erase the dbg.declare.
+ if (AI && !AI->isArrayAllocation()) {
+
// We only remove the dbg.declare intrinsic if all uses are
// converted to dbg.value intrinsics.
bool RemoveDDI = true;
@@ -1121,33 +1117,153 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
return true;
}
-bool llvm::removeUnreachableBlocks(Function &F) {
- SmallPtrSet<BasicBlock*, 16> Reachable;
+/// changeToUnreachable - Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+ BasicBlock *BB = I->getParent();
+ // Loop over all of the successors, removing BB's entry from any PHI
+ // nodes.
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ (*SI)->removePredecessor(BB);
+
+ // Insert a call to llvm.trap right before this. This turns the undefined
+ // behavior into a hard fail instead of falling through into random code.
+ if (UseLLVMTrap) {
+ Function *TrapFn =
+ Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+ CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
+ CallTrap->setDebugLoc(I->getDebugLoc());
+ }
+ new UnreachableInst(I->getContext(), I);
+
+ // All instructions after this are dead.
+ BasicBlock::iterator BBI = I, BBE = BB->end();
+ while (BBI != BBE) {
+ if (!BBI->use_empty())
+ BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+ BB->getInstList().erase(BBI++);
+ }
+}
+
+/// changeToCall - Convert the specified invoke into a normal call.
+static void changeToCall(InvokeInst *II) {
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+ CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
+ NewCall->takeName(II);
+ NewCall->setCallingConv(II->getCallingConv());
+ NewCall->setAttributes(II->getAttributes());
+ NewCall->setDebugLoc(II->getDebugLoc());
+ II->replaceAllUsesWith(NewCall);
+
+ // Follow the call by a branch to the normal destination.
+ BranchInst::Create(II->getNormalDest(), II);
+
+ // Update PHI nodes in the unwind destination
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ II->eraseFromParent();
+}
+
+static bool markAliveBlocks(BasicBlock *BB,
+ SmallPtrSet<BasicBlock*, 128> &Reachable) {
+
SmallVector<BasicBlock*, 128> Worklist;
- Worklist.push_back(&F.getEntryBlock());
- Reachable.insert(&F.getEntryBlock());
+ Worklist.push_back(BB);
+ Reachable.insert(BB);
+ bool Changed = false;
do {
- BasicBlock *BB = Worklist.pop_back_val();
+ BB = Worklist.pop_back_val();
+
+ // Do a quick scan of the basic block, turning any obviously unreachable
+ // instructions into LLVM unreachable insts. The instruction combining pass
+ // canonicalizes unreachable insts into stores to null or undef.
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+ if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
+ if (CI->doesNotReturn()) {
+ // If we found a call to a no-return function, insert an unreachable
+ // instruction after it. Make sure there isn't *already* one there
+ // though.
+ ++BBI;
+ if (!isa<UnreachableInst>(BBI)) {
+ // Don't insert a call to llvm.trap right before the unreachable.
+ changeToUnreachable(BBI, false);
+ Changed = true;
+ }
+ break;
+ }
+ }
+
+ // Store to undef and store to null are undefined and used to signal that
+ // they should be changed to unreachable by passes that can't modify the
+ // CFG.
+ if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ // Don't touch volatile stores.
+ if (SI->isVolatile()) continue;
+
+ Value *Ptr = SI->getOperand(1);
+
+ if (isa<UndefValue>(Ptr) ||
+ (isa<ConstantPointerNull>(Ptr) &&
+ SI->getPointerAddressSpace() == 0)) {
+ changeToUnreachable(SI, true);
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ // Turn invokes that call 'nounwind' functions into ordinary calls.
+ if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+ Value *Callee = II->getCalledValue();
+ if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+ changeToUnreachable(II, true);
+ Changed = true;
+ } else if (II->doesNotThrow()) {
+ if (II->use_empty() && II->onlyReadsMemory()) {
+ // jump to the normal destination branch.
+ BranchInst::Create(II->getNormalDest(), II);
+ II->getUnwindDest()->removePredecessor(II->getParent());
+ II->eraseFromParent();
+ } else
+ changeToCall(II);
+ Changed = true;
+ }
+ }
+
+ Changed |= ConstantFoldTerminator(BB, true);
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
if (Reachable.insert(*SI))
Worklist.push_back(*SI);
} while (!Worklist.empty());
+ return Changed;
+}
+/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// if they are in a dead cycle. Return true if a change was made, false
+/// otherwise.
+bool llvm::removeUnreachableBlocks(Function &F) {
+ SmallPtrSet<BasicBlock*, 128> Reachable;
+ bool Changed = markAliveBlocks(F.begin(), Reachable);
+
+ // If there are unreachable blocks in the CFG...
if (Reachable.size() == F.size())
- return false;
+ return Changed;
assert(Reachable.size() < F.size());
- for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ++I) {
- if (Reachable.count(I))
+ NumRemoved += F.size()-Reachable.size();
+
+ // Loop over all of the basic blocks that are not reachable, dropping all of
+ // their internal references...
+ for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Reachable.count(BB))
continue;
- for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI)
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
if (Reachable.count(*SI))
- (*SI)->removePredecessor(I);
- I->dropAllReferences();
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
}
- for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;)
+ for (Function::iterator I = ++F.begin(); I != F.end();)
if (!Reachable.count(I))
I = F.getBasicBlockList().erase(I);
else
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index cb581b3..162807d 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -90,7 +90,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
// Move all definitions in the successor to the predecessor...
OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
- std::string OldName = BB->getName();
+ // OldName will be valid until erased.
+ StringRef OldName = BB->getName();
// Erase basic block from the function...
@@ -102,12 +103,13 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
}
}
LI->removeBlock(BB);
- BB->eraseFromParent();
// Inherit predecessor's name if it exists...
if (!OldName.empty() && !OnlyPred->hasName())
OnlyPred->setName(OldName);
+ BB->eraseFromParent();
+
return OnlyPred;
}
@@ -239,8 +241,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
DEBUG(dbgs() << "!\n");
}
- std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
-
bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index 4aee8ff..e017f50 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -29,7 +29,7 @@
using namespace llvm;
-STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled");
+STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled");
static cl::opt<uint32_t>
LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index f66b54d..9799a30 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -346,7 +346,6 @@ splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
// Scan all of the uses and see if the live range is live across an unwind
// edge. If we find a use live across an invoke edge, create an alloca
// and spill the value.
- std::set<InvokeInst*> InvokesWithStoreInserted;
// Find all of the blocks that this value is live in.
std::set<BasicBlock*> LiveBBs;
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 955b853..2d2a8a5 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -66,6 +66,18 @@ namespace {
BasicBlock* OrigBlock, BasicBlock* Default);
unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
};
+
+ /// The comparison function for sorting the switch case values in the vector.
+ /// WARNING: Case ranges should be disjoint!
+ struct CaseCmp {
+ bool operator () (const LowerSwitch::CaseRange& C1,
+ const LowerSwitch::CaseRange& C2) {
+
+ const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+ const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+ return CI1->getValue().slt(CI2->getValue());
+ }
+ };
}
char LowerSwitch::ID = 0;
@@ -147,7 +159,7 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
Function::iterator FI = OrigBlock;
F->getBasicBlockList().insert(++FI, NewNode);
- ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_ULT,
+ ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
Val, Pivot.Low, "Pivot");
NewNode->getInstList().push_back(Comp);
BranchInst::Create(LBranch, RBranch, Comp, NewNode);
@@ -222,34 +234,40 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
// Clusterify - Transform simple list of Cases into list of CaseRange's
unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
-
- IntegersSubsetToBB TheClusterifier;
+ unsigned numCmps = 0;
// Start with "simple" cases
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
- BasicBlock *SuccBB = i.getCaseSuccessor();
- IntegersSubset CaseRanges = i.getCaseValueEx();
- TheClusterifier.add(CaseRanges, SuccBB);
- }
-
- TheClusterifier.optimize();
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
+ Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(),
+ i.getCaseSuccessor()));
- size_t numCmps = 0;
- for (IntegersSubsetToBB::RangeIterator i = TheClusterifier.begin(),
- e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
- IntegersSubsetToBB::Cluster &C = *i;
-
- // FIXME: Currently work with ConstantInt based numbers.
- // Changing it to APInt based is a pretty heavy for this commit.
- Cases.push_back(CaseRange(C.first.getLow().toConstantInt(),
- C.first.getHigh().toConstantInt(), C.second));
- if (C.first.isSingleNumber())
+ std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+ // Merge case into clusters
+ if (Cases.size()>=2)
+ for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
+ int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
+ int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+ BasicBlock* nextBB = J->BB;
+ BasicBlock* currentBB = I->BB;
+
+ // If the two neighboring cases go to the same destination, merge them
+ // into a single case.
+ if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+ I->High = J->High;
+ J = Cases.erase(J);
+ } else {
+ I = J++;
+ }
+ }
+
+ for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+ if (I->Low != I->High)
// A range counts double, since it requires two compares.
++numCmps;
}
- return numCmps;
+ return numCmps;
}
// processSwitchInst - Replace the specified switch instruction with a sequence
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index ebd7db6..61b3965 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -16,7 +16,6 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -28,7 +27,6 @@ STATISTIC(NumPromoted, "Number of alloca's promoted");
namespace {
struct PromotePass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
-
PromotePass() : FunctionPass(ID) {
initializePromotePassPass(*PassRegistry::getPassRegistry());
}
@@ -64,7 +62,6 @@ bool PromotePass::runOnFunction(Function &F) {
bool Changed = false;
DominatorTree &DT = getAnalysis<DominatorTree>();
- const DataLayout *DL = getAnalysisIfAvailable<DataLayout>();
while (1) {
Allocas.clear();
@@ -73,12 +70,12 @@ bool PromotePass::runOnFunction(Function &F) {
// the entry node
for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca?
- if (isAllocaPromotable(AI, DL))
+ if (isAllocaPromotable(AI))
Allocas.push_back(AI);
if (Allocas.empty()) break;
- PromoteMemToReg(Allocas, DT, DL);
+ PromoteMemToReg(Allocas, DT);
NumPromoted += Allocas.size();
Changed = true;
}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 6910180..8f6eee3 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -30,7 +30,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -46,7 +45,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
-#include "llvm/InstVisitor.h"
#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@@ -58,16 +56,56 @@ STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
-namespace {
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+ // FIXME: If the memory unit is of pointer or integer type, we can permit
+ // assignments to subsections of the memory unit.
+
+ // Only allow direct and non-volatile loads and stores...
+ for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE; ++UI) { // Loop over all of the uses of the alloca
+ const User *U = *UI;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // Note that atomic loads can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
+ if (LI->isVolatile())
+ return false;
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getOperand(0) == AI)
+ return false; // Don't allow a store OF the AI, only INTO the AI.
+ // Note that atomic stores can be transformed; atomic semantics do
+ // not have any meaning for a local alloca.
+ if (SI->isVolatile())
+ return false;
+ } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end)
+ return false;
+ } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
+ return false;
+ if (!onlyUsedByLifetimeMarkers(BCI))
+ return false;
+ } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
+ return false;
+ if (!GEPI->hasAllZeroIndices())
+ return false;
+ if (!onlyUsedByLifetimeMarkers(GEPI))
+ return false;
+ } else {
+ return false;
+ }
+ }
-struct AllocaInfo : private InstVisitor<AllocaInfo, bool> {
- const DataLayout *DL;
+ return true;
+}
+
+namespace {
+struct AllocaInfo {
SmallVector<BasicBlock *, 32> DefiningBlocks;
SmallVector<BasicBlock *, 32> UsingBlocks;
- SmallVector<Instruction *, 8> DeadInsts;
- Type *AllocaTy;
StoreInst *OnlyStore;
BasicBlock *OnlyBlock;
bool OnlyUsedInOneBlock;
@@ -75,13 +113,9 @@ struct AllocaInfo : private InstVisitor<AllocaInfo, bool> {
Value *AllocaPointerVal;
DbgDeclareInst *DbgDeclare;
- AllocaInfo(const DataLayout *DL) : DL(DL) {}
-
void clear() {
DefiningBlocks.clear();
UsingBlocks.clear();
- DeadInsts.clear();
- AllocaTy = 0;
OnlyStore = 0;
OnlyBlock = 0;
OnlyUsedInOneBlock = true;
@@ -91,116 +125,39 @@ struct AllocaInfo : private InstVisitor<AllocaInfo, bool> {
/// Scan the uses of the specified alloca, filling in the AllocaInfo used
/// by the rest of the pass to reason about the uses of this alloca.
- bool analyzeAlloca(AllocaInst &AI) {
+ void AnalyzeAlloca(AllocaInst *AI) {
clear();
- AllocaTy = AI.getAllocatedType();
- enqueueUsers(AI);
-
- // Walk queued up uses in the worklist to handle nested uses.
- while (!UseWorklist.empty()) {
- U = UseWorklist.pop_back_val();
- Instruction &I = *cast<Instruction>(U->getUser());
- if (!visit(I))
- return false; // Propagate failure to promote up.
+ // As we scan the uses of the alloca instruction, keep track of stores,
+ // and decide whether all of the loads and stores to the alloca are within
+ // the same basic block.
+ for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+ UI != E;) {
+ Instruction *User = cast<Instruction>(*UI++);
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+ // Remember the basic blocks which define new values for the alloca
+ DefiningBlocks.push_back(SI->getParent());
+ AllocaPointerVal = SI->getOperand(0);
+ OnlyStore = SI;
+ } else {
+ LoadInst *LI = cast<LoadInst>(User);
+ // Otherwise it must be a load instruction, keep track of variable
+ // reads.
+ UsingBlocks.push_back(LI->getParent());
+ AllocaPointerVal = LI;
+ }
if (OnlyUsedInOneBlock) {
if (OnlyBlock == 0)
- OnlyBlock = I.getParent();
- else if (OnlyBlock != I.getParent())
+ OnlyBlock = User->getParent();
+ else if (OnlyBlock != User->getParent())
OnlyUsedInOneBlock = false;
}
}
- DbgDeclare = FindAllocaDbgDeclare(&AI);
- return true;
- }
-
-private:
- // Befriend the base class so it can call through private visitor methods.
- friend class InstVisitor<AllocaInfo, bool>;
-
- /// \brief A use pointer that is non-null when visiting uses.
- Use *U;
-
- /// \brief A worklist for recursively visiting all uses of an alloca.
- SmallVector<Use *, 8> UseWorklist;
-
- /// \brief A set for preventing cyclic visitation.
- SmallPtrSet<Use *, 8> VisitedUses;
-
- void enqueueUsers(Instruction &I) {
- for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
- ++UI)
- if (VisitedUses.insert(&UI.getUse()))
- UseWorklist.push_back(&UI.getUse());
+ DbgDeclare = FindAllocaDbgDeclare(AI);
}
-
- bool visitLoadInst(LoadInst &LI) {
- if (LI.isVolatile() || LI.getType() != AllocaTy)
- return false;
-
- // Keep track of variable reads.
- UsingBlocks.push_back(LI.getParent());
- AllocaPointerVal = &LI;
- return true;
- }
-
- bool visitStoreInst(StoreInst &SI) {
- if (SI.isVolatile() || SI.getValueOperand() == U->get() ||
- SI.getValueOperand()->getType() != AllocaTy)
- return false;
-
- // Remember the basic blocks which define new values for the alloca
- DefiningBlocks.push_back(SI.getParent());
- AllocaPointerVal = SI.getOperand(0);
- OnlyStore = &SI;
- return true;
- }
-
- bool visitBitCastInst(BitCastInst &BC) {
- if (BC.use_empty())
- DeadInsts.push_back(&BC);
- else
- enqueueUsers(BC);
- return true;
- }
-
- bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
- if (GEPI.use_empty()) {
- DeadInsts.push_back(&GEPI);
- return true;
- }
-
- enqueueUsers(GEPI);
-
- return GEPI.hasAllZeroIndices();
- }
-
- // We can promote through debug info intrinsics as they don't alter the
- // value stored in memory.
- bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) {
- DeadInsts.push_back(&I);
- return true;
- }
-
- bool visitIntrinsicInst(IntrinsicInst &II) {
- switch (II.getIntrinsicID()) {
- default:
- return false;
-
- // Lifetime intrinsics don't preclude promoting the memory to a register.
- // FIXME: We should use these to promote to undef when outside of a valid
- // lifetime.
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- DeadInsts.push_back(&II);
- return true;
- }
- }
-
- // The fallback is that the alloca cannot be promoted.
- bool visitInstruction(Instruction &I) { return false; }
};
// Data package used by RenamePass()
@@ -278,7 +235,6 @@ struct PromoteMem2Reg {
std::vector<AllocaInst *> Allocas;
DominatorTree &DT;
DIBuilder DIB;
- const DataLayout *DL;
/// An AliasSetTracker object to update. If null, don't update it.
AliasSetTracker *AST;
@@ -324,9 +280,9 @@ struct PromoteMem2Reg {
public:
PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- const DataLayout *DL, AliasSetTracker *AST)
+ AliasSetTracker *AST)
: Allocas(Allocas.begin(), Allocas.end()), DT(DT),
- DIB(*DT.getRoot()->getParent()->getParent()), DL(DL), AST(AST) {}
+ DIB(*DT.getRoot()->getParent()->getParent()), AST(AST) {}
void run();
@@ -357,39 +313,27 @@ private:
} // end of anonymous namespace
-/// \brief Walk a small vector of dead instructions and recursively remove them
-/// and subsequently dead instructions.
-///
-/// This is only valid to call on dead instructions using an alloca which is
-/// promotable, as we leverage that assumption to delete them faster.
-static void removeDeadInstructions(AllocaInst *AI,
- SmallVectorImpl<Instruction *> &DeadInsts) {
- while (!DeadInsts.empty()) {
- Instruction *I = DeadInsts.pop_back_val();
-
- // Don't delete the alloca itself.
- if (I == AI)
- continue;
+static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+ // Knowing that this alloca is promotable, we know that it's safe to kill all
+ // instructions except for load and store.
- // Note that we open code the deletion algorithm here because we know
- // apriori that all of the instructions using an alloca that reaches here
- // are trivially dead when their use list becomes empty (The only risk are
- // lifetime markers which we specifically want to nuke). By coding it here
- // we can skip the triviality test and be more efficient.
- //
- // Null out all of the instruction's operands to see if any operand becomes
- // dead as we go.
- for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE;
- ++OI) {
- Instruction *Op = dyn_cast<Instruction>(*OI);
- if (!Op)
- continue;
-
- OI->set(0);
- if (!Op->use_empty())
- continue;
+ for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+ UI != UE;) {
+ Instruction *I = cast<Instruction>(*UI);
+ ++UI;
+ if (isa<LoadInst>(I) || isa<StoreInst>(I))
+ continue;
- DeadInsts.push_back(Op);
+ if (!I->getType()->isVoidTy()) {
+ // The only users of this bitcast/GEP instruction are lifetime intrinsics.
+ // Follow the use/def chain to erase them now instead of leaving it for
+ // dead code elimination later.
+ for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+ UI != UE;) {
+ Instruction *Inst = cast<Instruction>(*UI);
+ ++UI;
+ Inst->eraseFromParent();
+ }
}
I->eraseFromParent();
}
@@ -474,6 +418,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
DIBuilder DIB(*AI->getParent()->getParent()->getParent());
ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
DDI->eraseFromParent();
+ LBI.deleteValue(DDI);
}
// Remove the (now dead) store and alloca.
Info.OnlyStore->eraseFromParent();
@@ -486,16 +431,6 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
return true;
}
-namespace {
-/// This is a helper predicate used to search by the first element of a pair.
-struct StoreIndexSearchPredicate {
- bool operator()(const std::pair<unsigned, StoreInst *> &LHS,
- const std::pair<unsigned, StoreInst *> &RHS) {
- return LHS.first < RHS.first;
- }
-};
-}
-
/// Many allocas are only used within a single basic block. If this is the
/// case, avoid traversing the CFG and inserting a lot of potentially useless
/// PHI nodes by just performing a single linear pass over the basic block
@@ -528,8 +463,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// Sort the stores by their index, making it efficient to do a lookup with a
// binary search.
- std::sort(StoresByIndex.begin(), StoresByIndex.end(),
- StoreIndexSearchPredicate());
+ std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first());
// Walk all of the loads from this alloca, replacing them with the nearest
// store above them, if any.
@@ -544,7 +478,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
StoresByIndexTy::iterator I =
std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
std::make_pair(LoadIdx, static_cast<StoreInst *>(0)),
- StoreIndexSearchPredicate());
+ less_first());
if (I == StoresByIndex.begin())
// If there is no store before this load, the load takes the undef value.
@@ -577,8 +511,10 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
LBI.deleteValue(AI);
// The alloca's debuginfo can be removed as well.
- if (DbgDeclareInst *DDI = Info.DbgDeclare)
+ if (DbgDeclareInst *DDI = Info.DbgDeclare) {
DDI->eraseFromParent();
+ LBI.deleteValue(DDI);
+ }
++NumLocalPromoted;
}
@@ -590,23 +526,17 @@ void PromoteMem2Reg::run() {
PointerAllocaValues.resize(Allocas.size());
AllocaDbgDeclares.resize(Allocas.size());
- AllocaInfo Info(DL);
+ AllocaInfo Info;
LargeBlockInfo LBI;
for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
AllocaInst *AI = Allocas[AllocaNum];
+ assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
assert(AI->getParent()->getParent() == &F &&
"All allocas should be in the same function, which is same as DF!");
- // Calculate the set of read and write-locations for each alloca. This is
- // analogous to finding the 'uses' and 'definitions' of each variable.
- bool Good = Info.analyzeAlloca(*AI);
- (void)Good;
- assert(Good && "Cannot promote non-promotable alloca!");
-
- // Nuke all of the dead instructions.
- removeDeadInstructions(AI, Info.DeadInsts);
+ removeLifetimeIntrinsicUsers(AI);
if (AI->use_empty()) {
// If there are no uses of the alloca, just delete it now.
@@ -620,6 +550,10 @@ void PromoteMem2Reg::run() {
continue;
}
+ // Calculate the set of read and write-locations for each alloca. This is
+ // analogous to finding the 'uses' and 'definitions' of each variable.
+ Info.AnalyzeAlloca(AI);
+
// If there is only a single store to this value, replace any loads of
// it that are directly dominated by the definition with the value stored.
if (Info.DefiningBlocks.size() == 1) {
@@ -904,16 +838,6 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
}
}
-namespace {
-typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
-
-struct DomTreeNodeCompare {
- bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
- return LHS.second < RHS.second;
- }
-};
-} // end anonymous namespace
-
/// At this point, we're committed to promoting the alloca using IDF's, and the
/// standard SSA construction algorithm. Determine which blocks need phi nodes
/// and see if we can optimize out some work by avoiding insertion of dead phi
@@ -931,9 +855,9 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
// Use a priority queue keyed on dominator tree level so that inserted nodes
// are handled from the bottom of the dominator tree upwards.
- typedef std::priority_queue<DomTreeNodePair,
- SmallVector<DomTreeNodePair, 32>,
- DomTreeNodeCompare> IDFPriorityQueue;
+ typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
+ typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+ less_second> IDFPriorityQueue;
IDFPriorityQueue PQ;
for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(),
@@ -1145,19 +1069,11 @@ NextIteration:
goto NextIteration;
}
-bool llvm::isAllocaPromotable(const AllocaInst *AI, const DataLayout *DL) {
- // We cast away constness because we re-use the non-const analysis that the
- // actual promotion routine uses. While it is non-const, it doesn't actually
- // mutate anything at this phase, and we discard the non-const results that
- // promotion uses to mutate the alloca.
- return AllocaInfo(DL).analyzeAlloca(*const_cast<AllocaInst *>(AI));
-}
-
void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- const DataLayout *DL, AliasSetTracker *AST) {
+ AliasSetTracker *AST) {
// If there is nothing to do, bail out...
if (Allocas.empty())
return;
- PromoteMem2Reg(Allocas, DT, DL, AST).run();
+ PromoteMem2Reg(Allocas, DT, AST).run();
}
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index fc85ef3..30adbfa 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -63,7 +63,7 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
}
static bool IsEquivalentPHI(PHINode *PHI,
- DenseMap<BasicBlock*, Value*> &ValueMapping) {
+ SmallDenseMap<BasicBlock*, Value*, 8> &ValueMapping) {
unsigned PHINumValues = PHI->getNumIncomingValues();
if (PHINumValues != ValueMapping.size())
return false;
@@ -136,8 +136,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// Otherwise, we do need a PHI: check to see if we already have one available
// in this block that produces the right value.
if (isa<PHINode>(BB->begin())) {
- DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
- PredValues.end());
+ SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(),
+ PredValues.end());
PHINode *SomePHI;
for (BasicBlock::iterator It = BB->begin();
(SomePHI = dyn_cast<PHINode>(It)); ++It) {
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index c4c1423..ff50b12 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -475,9 +476,13 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
CV = ICI->getOperand(0);
// Unwrap any lossless ptrtoint cast.
- if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext()))
- if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV))
- CV = PTII->getOperand(0);
+ if (TD && CV) {
+ if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
+ Value *Ptr = PTII->getPointerOperand();
+ if (PTII->getType() == TD->getIntPtrType(Ptr->getType()))
+ CV = Ptr;
+ }
+ }
return CV;
}
@@ -699,9 +704,10 @@ namespace {
};
}
-static int ConstantIntSortPredicate(const void *P1, const void *P2) {
- const ConstantInt *LHS = *(const ConstantInt*const*)P1;
- const ConstantInt *RHS = *(const ConstantInt*const*)P2;
+static int ConstantIntSortPredicate(ConstantInt *const *P1,
+ ConstantInt *const *P2) {
+ const ConstantInt *LHS = *P1;
+ const ConstantInt *RHS = *P2;
if (LHS->getValue().ult(RHS->getValue()))
return 1;
if (LHS->getValue() == RHS->getValue())
@@ -924,7 +930,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// Convert pointer to int before we switch.
if (CV->getType()->isPointerTy()) {
assert(TD && "Cannot switch on pointer without DataLayout");
- CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()),
+ CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()),
"magicptr");
}
@@ -1556,6 +1562,19 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
return true;
}
+/// \returns True if this block contains a CallInst with the NoDuplicate
+/// attribute.
+static bool HasNoDuplicateCall(const BasicBlock *BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ const CallInst *CI = dyn_cast<CallInst>(I);
+ if (!CI)
+ continue;
+ if (CI->cannotDuplicate())
+ return true;
+ }
+ return false;
+}
+
/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch
/// across this block.
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
@@ -1603,6 +1622,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
// Now we know that this block has multiple preds and two succs.
if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
+ if (HasNoDuplicateCall(BB)) return false;
+
// Okay, this is a simple enough basic block. See if any phi values are
// constants.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -2069,14 +2090,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Ensure that any values used in the bonus instruction are also used
// by the terminator of the predecessor. This means that those values
// must already have been resolved, so we won't be inhibiting the
- // out-of-order core by speculating them earlier.
- if (BonusInst) {
+ // out-of-order core by speculating them earlier. We also allow
+ // instructions that are used by the terminator's condition because it
+ // exposes more merging opportunities.
+ bool UsedByBranch = (BonusInst && BonusInst->hasOneUse() &&
+ *BonusInst->use_begin() == Cond);
+
+ if (BonusInst && !UsedByBranch) {
// Collect the values used by the bonus inst
SmallPtrSet<Value*, 4> UsedValues;
for (Instruction::op_iterator OI = BonusInst->op_begin(),
OE = BonusInst->op_end(); OI != OE; ++OI) {
Value *V = *OI;
- if (!isa<Constant>(V))
+ if (!isa<Constant>(V) && !isa<Argument>(V))
UsedValues.insert(V);
}
@@ -2787,7 +2813,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
if (CompVal->getType()->isPointerTy()) {
assert(TD && "Cannot switch on pointer without DataLayout");
CompVal = Builder.CreatePtrToInt(CompVal,
- TD->getIntPtrType(CompVal->getContext()),
+ TD->getIntPtrType(CompVal->getType()),
"magicptr");
}
@@ -3160,7 +3186,7 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
/// and use it to remove dead cases.
static bool EliminateDeadSwitchCases(SwitchInst *SI) {
Value *Cond = SI->getCondition();
- unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth();
+ unsigned Bits = Cond->getType()->getIntegerBitWidth();
APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
ComputeMaskedBits(Cond, KnownZero, KnownOne);
@@ -3303,28 +3329,10 @@ static Constant *LookupConstant(Value *V,
/// simple instructions such as binary operations where both operands are
/// constant or can be replaced by constants from the ConstantPool. Returns the
/// resulting constant on success, 0 otherwise.
-static Constant *ConstantFold(Instruction *I,
- const SmallDenseMap<Value*, Constant*>& ConstantPool) {
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- Constant *A = LookupConstant(BO->getOperand(0), ConstantPool);
- if (!A)
- return 0;
- Constant *B = LookupConstant(BO->getOperand(1), ConstantPool);
- if (!B)
- return 0;
- return ConstantExpr::get(BO->getOpcode(), A, B);
- }
-
- if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
- Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
- if (!A)
- return 0;
- Constant *B = LookupConstant(I->getOperand(1), ConstantPool);
- if (!B)
- return 0;
- return ConstantExpr::getCompare(Cmp->getPredicate(), A, B);
- }
-
+static Constant *
+ConstantFold(Instruction *I,
+ const SmallDenseMap<Value *, Constant *> &ConstantPool,
+ const DataLayout *DL) {
if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
if (!A)
@@ -3336,14 +3344,19 @@ static Constant *ConstantFold(Instruction *I,
return 0;
}
- if (CastInst *Cast = dyn_cast<CastInst>(I)) {
- Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
- if (!A)
+ SmallVector<Constant *, 4> COps;
+ for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
+ if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
+ COps.push_back(A);
+ else
return 0;
- return ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy());
}
- return 0;
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
+ COps[1], DL);
+
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL);
}
/// GetCaseResults - Try to determine the resulting constant values in phi nodes
@@ -3355,7 +3368,8 @@ GetCaseResults(SwitchInst *SI,
ConstantInt *CaseVal,
BasicBlock *CaseDest,
BasicBlock **CommonDest,
- SmallVectorImpl<std::pair<PHINode*,Constant*> > &Res) {
+ SmallVectorImpl<std::pair<PHINode *, Constant *> > &Res,
+ const DataLayout *DL) {
// The block from which we enter the common destination.
BasicBlock *Pred = SI->getParent();
@@ -3374,7 +3388,7 @@ GetCaseResults(SwitchInst *SI,
} else if (isa<DbgInfoIntrinsic>(I)) {
// Skip debug intrinsic.
continue;
- } else if (Constant *C = ConstantFold(I, ConstantPool)) {
+ } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) {
// Instruction is side-effect free and constant.
ConstantPool.insert(std::make_pair(I, C));
} else {
@@ -3698,7 +3712,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
ResultsTy Results;
if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
- Results))
+ Results, TD))
return false;
// Append the result from this case to the list for each phi.
@@ -3712,7 +3726,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
// Get the resulting values for the default case.
SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
- DefaultResultsList))
+ DefaultResultsList, TD))
return false;
for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
PHINode *PHI = DefaultResultsList[I].first;
@@ -3733,14 +3747,32 @@ static bool SwitchToLookupTable(SwitchInst *SI,
CommonDest->getParent(),
CommonDest);
- // Check whether the condition value is within the case range, and branch to
- // the new BB.
+ // Compute the table index value.
Builder.SetInsertPoint(SI);
Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
"switch.tableidx");
- Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
- MinCaseVal->getType(), TableSize));
- Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+
+ // Compute the maximum table size representable by the integer type we are
+ // switching upon.
+ unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
+ uint64_t MaxTableSize = CaseSize > 63? UINT64_MAX : 1ULL << CaseSize;
+ assert(MaxTableSize >= TableSize &&
+ "It is impossible for a switch to have more entries than the max "
+ "representable value of its input integer type's size.");
+
+ // If we have a fully covered lookup table, unconditionally branch to the
+ // lookup table BB. Otherwise, check if the condition value is within the case
+ // range. If it is so, branch to the new BB. Otherwise branch to SI's default
+ // destination.
+ const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize;
+ if (GeneratingCoveredLookupTable) {
+ Builder.CreateBr(LookupBB);
+ SI->getDefaultDest()->removePredecessor(SI->getParent());
+ } else {
+ Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
+ MinCaseVal->getType(), TableSize));
+ Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+ }
// Populate the BB that does the lookups.
Builder.SetInsertPoint(LookupBB);
@@ -3769,9 +3801,11 @@ static bool SwitchToLookupTable(SwitchInst *SI,
Builder.CreateBr(CommonDest);
// Remove the switch.
- for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) {
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
BasicBlock *Succ = SI->getSuccessor(i);
- if (Succ == SI->getDefaultDest()) continue;
+
+ if (Succ == SI->getDefaultDest())
+ continue;
Succ->removePredecessor(SI->getParent());
}
SI->eraseFromParent();
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 094c201..15b3e66 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -17,6 +17,7 @@
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -26,11 +27,16 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
using namespace llvm;
+static cl::opt<bool>
+ColdErrorCalls("error-reporting-is-cold", cl::init(true),
+ cl::Hidden, cl::desc("Treat error-reporting calls as cold"));
+
/// This class is the abstract base class for the set of optimizations that
/// corresponds to one library call.
namespace {
@@ -118,6 +124,21 @@ static bool callHasFloatingPointArgument(const CallInst *CI) {
return false;
}
+/// \brief Check whether the overloaded unary floating point function
+/// corresponing to \a Ty is available.
+static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+ LibFunc::Func DoubleFn, LibFunc::Func FloatFn,
+ LibFunc::Func LongDoubleFn) {
+ switch (Ty->getTypeID()) {
+ case Type::FloatTyID:
+ return TLI->has(FloatFn);
+ case Type::DoubleTyID:
+ return TLI->has(DoubleFn);
+ default:
+ return TLI->has(LongDoubleFn);
+ }
+}
+
//===----------------------------------------------------------------------===//
// Fortified Library Call Optimizations
//===----------------------------------------------------------------------===//
@@ -477,7 +498,7 @@ struct StrChrOpt : public LibCallOptimization {
// Compute the offset, make sure to handle the case when we're searching for
// zero (a weird way to spell strlen).
- size_t I = CharC->getSExtValue() == 0 ?
+ size_t I = (0xFF & CharC->getSExtValue()) == 0 ?
Str.size() : Str.find(CharC->getSExtValue());
if (I == StringRef::npos) // Didn't find the char. strchr returns null.
return Constant::getNullValue(CI->getType());
@@ -513,7 +534,7 @@ struct StrRChrOpt : public LibCallOptimization {
}
// Compute the offset.
- size_t I = CharC->getSExtValue() == 0 ?
+ size_t I = (0xFF & CharC->getSExtValue()) == 0 ?
Str.size() : Str.rfind(CharC->getSExtValue());
if (I == StringRef::npos) // Didn't find the char. Return null.
return Constant::getNullValue(CI->getType());
@@ -774,7 +795,7 @@ struct StrPBrkOpt : public LibCallOptimization {
// Constant folding.
if (HasS1 && HasS2) {
size_t I = S1.find_first_of(S2);
- if (I == std::string::npos) // No match.
+ if (I == StringRef::npos) // No match.
return Constant::getNullValue(CI->getType());
return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
@@ -912,7 +933,7 @@ struct StrStrOpt : public LibCallOptimization {
// If both strings are known, constant fold it.
if (HasStr1 && HasStr2) {
- std::string::size_type Offset = SearchStr.find(ToFindStr);
+ size_t Offset = SearchStr.find(ToFindStr);
if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
return Constant::getNullValue(CI->getType());
@@ -1031,7 +1052,7 @@ struct MemSetOpt : public LibCallOptimization {
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
- FT->getParamType(2) != TD->getIntPtrType(*Context))
+ FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
@@ -1133,9 +1154,13 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
- if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0
+ // pow(1.0, x) -> 1.0
+ if (Op1C->isExactlyValue(1.0))
return Op1C;
- if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x)
+ // pow(2.0, x) -> exp2(x)
+ if (Op1C->isExactlyValue(2.0) &&
+ hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f,
+ LibFunc::exp2l))
return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
}
@@ -1145,7 +1170,11 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
return ConstantFP::get(CI->getType(), 1.0);
- if (Op2C->isExactlyValue(0.5)) {
+ if (Op2C->isExactlyValue(0.5) &&
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf,
+ LibFunc::sqrtl) &&
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf,
+ LibFunc::fabsl)) {
// Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
// This is faster than calling pow, and still handles negative zero
// and negative infinity correctly.
@@ -1178,7 +1207,7 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
Value *Ret = NULL;
if (UnsafeFPShrink && Callee->getName() == "exp2" &&
- TLI->has(LibFunc::exp2)) {
+ TLI->has(LibFunc::exp2f)) {
UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
}
@@ -1229,6 +1258,155 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
}
};
+struct SinCosPiOpt : public LibCallOptimization {
+ SinCosPiOpt() {}
+
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Make sure the prototype is as expected, otherwise the rest of the
+ // function is probably invalid and likely to abort.
+ if (!isTrigLibCall(CI))
+ return 0;
+
+ Value *Arg = CI->getArgOperand(0);
+ SmallVector<CallInst *, 1> SinCalls;
+ SmallVector<CallInst *, 1> CosCalls;
+ SmallVector<CallInst *, 1> SinCosCalls;
+
+ bool IsFloat = Arg->getType()->isFloatTy();
+
+ // Look for all compatible sinpi, cospi and sincospi calls with the same
+ // argument. If there are enough (in some sense) we can make the
+ // substitution.
+ for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+ UI != UE; ++UI)
+ classifyArgUse(*UI, CI->getParent(), IsFloat, SinCalls, CosCalls,
+ SinCosCalls);
+
+ // It's only worthwhile if both sinpi and cospi are actually used.
+ if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
+ return 0;
+
+ Value *Sin, *Cos, *SinCos;
+ insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
+ SinCos);
+
+ replaceTrigInsts(SinCalls, Sin);
+ replaceTrigInsts(CosCalls, Cos);
+ replaceTrigInsts(SinCosCalls, SinCos);
+
+ return 0;
+ }
+
+ bool isTrigLibCall(CallInst *CI) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+
+ // We can only hope to do anything useful if we can ignore things like errno
+ // and floating-point exceptions.
+ bool AttributesSafe = CI->hasFnAttr(Attribute::NoUnwind) &&
+ CI->hasFnAttr(Attribute::ReadNone);
+
+ // Other than that we need float(float) or double(double)
+ return AttributesSafe && FT->getNumParams() == 1 &&
+ FT->getReturnType() == FT->getParamType(0) &&
+ (FT->getParamType(0)->isFloatTy() ||
+ FT->getParamType(0)->isDoubleTy());
+ }
+
+ void classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
+ SmallVectorImpl<CallInst *> &SinCalls,
+ SmallVectorImpl<CallInst *> &CosCalls,
+ SmallVectorImpl<CallInst *> &SinCosCalls) {
+ CallInst *CI = dyn_cast<CallInst>(Val);
+
+ if (!CI)
+ return;
+
+ Function *Callee = CI->getCalledFunction();
+ StringRef FuncName = Callee->getName();
+ LibFunc::Func Func;
+ if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) ||
+ !isTrigLibCall(CI))
+ return;
+
+ if (IsFloat) {
+ if (Func == LibFunc::sinpif)
+ SinCalls.push_back(CI);
+ else if (Func == LibFunc::cospif)
+ CosCalls.push_back(CI);
+ else if (Func == LibFunc::sincospi_stretf)
+ SinCosCalls.push_back(CI);
+ } else {
+ if (Func == LibFunc::sinpi)
+ SinCalls.push_back(CI);
+ else if (Func == LibFunc::cospi)
+ CosCalls.push_back(CI);
+ else if (Func == LibFunc::sincospi_stret)
+ SinCosCalls.push_back(CI);
+ }
+ }
+
+ void replaceTrigInsts(SmallVectorImpl<CallInst*> &Calls, Value *Res) {
+ for (SmallVectorImpl<CallInst*>::iterator I = Calls.begin(),
+ E = Calls.end();
+ I != E; ++I) {
+ LCS->replaceAllUsesWith(*I, Res);
+ }
+ }
+
+ void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
+ bool UseFloat, Value *&Sin, Value *&Cos,
+ Value *&SinCos) {
+ Type *ArgTy = Arg->getType();
+ Type *ResTy;
+ StringRef Name;
+
+ Triple T(OrigCallee->getParent()->getTargetTriple());
+ if (UseFloat) {
+ Name = "__sincospi_stretf";
+
+ assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
+ // x86_64 can't use {float, float} since that would be returned in both
+ // xmm0 and xmm1, which isn't what a real struct would do.
+ ResTy = T.getArch() == Triple::x86_64
+ ? static_cast<Type *>(VectorType::get(ArgTy, 2))
+ : static_cast<Type *>(StructType::get(ArgTy, ArgTy, NULL));
+ } else {
+ Name = "__sincospi_stret";
+ ResTy = StructType::get(ArgTy, ArgTy, NULL);
+ }
+
+ Module *M = OrigCallee->getParent();
+ Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
+ ResTy, ArgTy, NULL);
+
+ if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
+ // If the argument is an instruction, it must dominate all uses so put our
+ // sincos call there.
+ BasicBlock::iterator Loc = ArgInst;
+ B.SetInsertPoint(ArgInst->getParent(), ++Loc);
+ } else {
+ // Otherwise (e.g. for a constant) the beginning of the function is as
+ // good a place as any.
+ BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
+ B.SetInsertPoint(&EntryBB, EntryBB.begin());
+ }
+
+ SinCos = B.CreateCall(Callee, Arg, "sincospi");
+
+ if (SinCos->getType()->isStructTy()) {
+ Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
+ Cos = B.CreateExtractValue(SinCos, 1, "cospi");
+ } else {
+ Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
+ "sinpi");
+ Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
+ "cospi");
+ }
+ }
+
+};
+
//===----------------------------------------------------------------------===//
// Integer Library Call Optimizations
//===----------------------------------------------------------------------===//
@@ -1333,6 +1511,54 @@ struct ToAsciiOpt : public LibCallOptimization {
// Formatting and IO Library Call Optimizations
//===----------------------------------------------------------------------===//
+struct ErrorReportingOpt : public LibCallOptimization {
+ ErrorReportingOpt(int S = -1) : StreamArg(S) {}
+
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &) {
+ // Error reporting calls should be cold, mark them as such.
+ // This applies even to non-builtin calls: it is only a hint and applies to
+ // functions that the frontend might not understand as builtins.
+
+ // This heuristic was suggested in:
+ // Improving Static Branch Prediction in a Compiler
+ // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
+ // Proceedings of PACT'98, Oct. 1998, IEEE
+
+ if (!CI->hasFnAttr(Attribute::Cold) && isReportingError(Callee, CI)) {
+ CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold);
+ }
+
+ return 0;
+ }
+
+protected:
+ bool isReportingError(Function *Callee, CallInst *CI) {
+ if (!ColdErrorCalls)
+ return false;
+
+ if (!Callee || !Callee->isDeclaration())
+ return false;
+
+ if (StreamArg < 0)
+ return true;
+
+ // These functions might be considered cold, but only if their stream
+ // argument is stderr.
+
+ if (StreamArg >= (int) CI->getNumArgOperands())
+ return false;
+ LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
+ if (!LI)
+ return false;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
+ if (!GV || !GV->isDeclaration())
+ return false;
+ return GV->getName() == "stderr";
+ }
+
+ int StreamArg;
+};
+
struct PrintFOpt : public LibCallOptimization {
Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
IRBuilder<> &B) {
@@ -1361,7 +1587,7 @@ struct PrintFOpt : public LibCallOptimization {
// printf("foo\n") --> puts("foo")
if (FormatStr[FormatStr.size()-1] == '\n' &&
- FormatStr.find('%') == std::string::npos) { // no format characters.
+ FormatStr.find('%') == StringRef::npos) { // No format characters.
// Create a string literal with no \n on it. We expect the constant merge
// pass to be run after this pass, to merge duplicate strings.
FormatStr = FormatStr.drop_back();
@@ -1513,6 +1739,9 @@ struct SPrintFOpt : public LibCallOptimization {
struct FPrintFOpt : public LibCallOptimization {
Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
IRBuilder<> &B) {
+ ErrorReportingOpt ER(/* StreamArg = */ 0);
+ (void) ER.callOptimizer(Callee, CI, B);
+
// All the optimizations depend on the format string.
StringRef FormatStr;
if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
@@ -1590,6 +1819,9 @@ struct FPrintFOpt : public LibCallOptimization {
struct FWriteOpt : public LibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ ErrorReportingOpt ER(/* StreamArg = */ 3);
+ (void) ER.callOptimizer(Callee, CI, B);
+
// Require a pointer, an integer, an integer, a pointer, returning integer.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
@@ -1623,6 +1855,9 @@ struct FWriteOpt : public LibCallOptimization {
struct FPutsOpt : public LibCallOptimization {
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ ErrorReportingOpt ER(/* StreamArg = */ 1);
+ (void) ER.callOptimizer(Callee, CI, B);
+
// These optimizations require DataLayout.
if (!TD) return 0;
@@ -1741,6 +1976,7 @@ static MemSetOpt MemSet;
// Math library call optimizations.
static UnaryDoubleFPOpt UnaryDoubleFP(false);
static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+static SinCosPiOpt SinCosPi;
// Integer library call optimizations.
static FFSOpt FFS;
@@ -1750,6 +1986,9 @@ static IsAsciiOpt IsAscii;
static ToAsciiOpt ToAscii;
// Formatting and IO library call optimizations.
+static ErrorReportingOpt ErrorReporting;
+static ErrorReportingOpt ErrorReporting0(0);
+static ErrorReportingOpt ErrorReporting1(1);
static PrintFOpt PrintF;
static SPrintFOpt SPrintF;
static FPrintFOpt FPrintF;
@@ -1825,6 +2064,11 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
case LibFunc::cos:
case LibFunc::cosl:
return &Cos;
+ case LibFunc::sinpif:
+ case LibFunc::sinpi:
+ case LibFunc::cospif:
+ case LibFunc::cospi:
+ return &SinCosPi;
case LibFunc::powf:
case LibFunc::pow:
case LibFunc::powl:
@@ -1859,6 +2103,13 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
return &FPuts;
case LibFunc::puts:
return &Puts;
+ case LibFunc::perror:
+ return &ErrorReporting;
+ case LibFunc::vfprintf:
+ case LibFunc::fiprintf:
+ return &ErrorReporting0;
+ case LibFunc::fputc:
+ return &ErrorReporting1;
case LibFunc::ceil:
case LibFunc::fabs:
case LibFunc::floor:
diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp
index b98cb5b..2ef692c 100644
--- a/lib/Transforms/Utils/SpecialCaseList.cpp
+++ b/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -49,29 +49,45 @@ struct SpecialCaseList::Entry {
}
};
-SpecialCaseList::SpecialCaseList(const StringRef Path) {
- // Validate and open blacklist file.
- if (Path.empty()) return;
+SpecialCaseList::SpecialCaseList() : Entries() {}
+
+SpecialCaseList *SpecialCaseList::create(
+ const StringRef Path, std::string &Error) {
+ if (Path.empty())
+ return new SpecialCaseList();
OwningPtr<MemoryBuffer> File;
if (error_code EC = MemoryBuffer::getFile(Path, File)) {
- report_fatal_error("Can't open blacklist file: " + Path + ": " +
- EC.message());
+ Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str();
+ return 0;
}
+ return create(File.get(), Error);
+}
- init(File.get());
+SpecialCaseList *SpecialCaseList::create(
+ const MemoryBuffer *MB, std::string &Error) {
+ OwningPtr<SpecialCaseList> SCL(new SpecialCaseList());
+ if (!SCL->parse(MB, Error))
+ return 0;
+ return SCL.take();
}
-SpecialCaseList::SpecialCaseList(const MemoryBuffer *MB) {
- init(MB);
+SpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) {
+ std::string Error;
+ if (SpecialCaseList *SCL = create(Path, Error))
+ return SCL;
+ report_fatal_error(Error);
}
-void SpecialCaseList::init(const MemoryBuffer *MB) {
+bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
// Iterate through each line in the blacklist file.
SmallVector<StringRef, 16> Lines;
SplitString(MB->getBuffer(), Lines, "\n\r");
StringMap<StringMap<std::string> > Regexps;
+ assert(Entries.empty() &&
+ "parse() should be called on an empty SpecialCaseList");
+ int LineNo = 1;
for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end();
- I != E; ++I) {
+ I != E; ++I, ++LineNo) {
// Ignore empty lines and lines starting with "#"
if (I->empty() || I->startswith("#"))
continue;
@@ -80,7 +96,9 @@ void SpecialCaseList::init(const MemoryBuffer *MB) {
StringRef Prefix = SplitLine.first;
if (SplitLine.second.empty()) {
// Missing ':' in the line.
- report_fatal_error("malformed blacklist line: " + SplitLine.first);
+ Error = (Twine("Malformed line ") + Twine(LineNo) + ": '" +
+ SplitLine.first + "'").str();
+ return false;
}
std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("=");
@@ -113,10 +131,11 @@ void SpecialCaseList::init(const MemoryBuffer *MB) {
// Check that the regexp is valid.
Regex CheckRE(Regexp);
- std::string Error;
- if (!CheckRE.isValid(Error)) {
- report_fatal_error("malformed blacklist regex: " + SplitLine.second +
- ": " + Error);
+ std::string REError;
+ if (!CheckRE.isValid(REError)) {
+ Error = (Twine("Malformed regex in line ") + Twine(LineNo) + ": '" +
+ SplitLine.second + "': " + REError).str();
+ return false;
}
// Add this regexp into the proper group by its prefix.
@@ -135,6 +154,7 @@ void SpecialCaseList::init(const MemoryBuffer *MB) {
Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue());
}
}
+ return true;
}
SpecialCaseList::~SpecialCaseList() {
@@ -149,18 +169,12 @@ SpecialCaseList::~SpecialCaseList() {
}
}
-bool SpecialCaseList::findCategory(const Function &F,
- StringRef &Category) const {
- return findCategory(*F.getParent(), Category) ||
- findCategory("fun", F.getName(), Category);
-}
-
bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const {
return isIn(*F.getParent(), Category) ||
inSectionCategory("fun", F.getName(), Category);
}
-static StringRef GetGVTypeString(const GlobalVariable &G) {
+static StringRef GetGlobalTypeString(const GlobalValue &G) {
// Types of GlobalVariables are always pointer types.
Type *GType = G.getType()->getElementType();
// For now we support blacklisting struct types only.
@@ -171,46 +185,29 @@ static StringRef GetGVTypeString(const GlobalVariable &G) {
return "<unknown type>";
}
-bool SpecialCaseList::findCategory(const GlobalVariable &G,
- StringRef &Category) const {
- return findCategory(*G.getParent(), Category) ||
- findCategory("global", G.getName(), Category) ||
- findCategory("type", GetGVTypeString(G), Category);
-}
-
bool SpecialCaseList::isIn(const GlobalVariable &G,
const StringRef Category) const {
return isIn(*G.getParent(), Category) ||
inSectionCategory("global", G.getName(), Category) ||
- inSectionCategory("type", GetGVTypeString(G), Category);
+ inSectionCategory("type", GetGlobalTypeString(G), Category);
}
-bool SpecialCaseList::findCategory(const Module &M, StringRef &Category) const {
- return findCategory("src", M.getModuleIdentifier(), Category);
+bool SpecialCaseList::isIn(const GlobalAlias &GA,
+ const StringRef Category) const {
+ if (isIn(*GA.getParent(), Category))
+ return true;
+
+ if (isa<FunctionType>(GA.getType()->getElementType()))
+ return inSectionCategory("fun", GA.getName(), Category);
+
+ return inSectionCategory("global", GA.getName(), Category) ||
+ inSectionCategory("type", GetGlobalTypeString(GA), Category);
}
bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const {
return inSectionCategory("src", M.getModuleIdentifier(), Category);
}
-bool SpecialCaseList::findCategory(const StringRef Section,
- const StringRef Query,
- StringRef &Category) const {
- StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
- if (I == Entries.end()) return false;
-
- for (StringMap<Entry>::const_iterator II = I->second.begin(),
- IE = I->second.end();
- II != IE; ++II) {
- if (II->getValue().match(Query)) {
- Category = II->first();
- return true;
- }
- }
-
- return false;
-}
-
bool SpecialCaseList::inSectionCategory(const StringRef Section,
const StringRef Query,
const StringRef Category) const {
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index cbc1d63..c5e1dcb 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -533,7 +533,7 @@ namespace {
default: break;
case Instruction::GetElementPtr:
// We mark this instruction as zero-cost because scalar GEPs are usually
- // lowered to the intruction addressing mode. At the moment we don't
+ // lowered to the instruction addressing mode. At the moment we don't
// generate vector GEPs.
return 0;
case Instruction::Br:
@@ -625,10 +625,10 @@ namespace {
ConstantInt *IntOff = ConstOffSCEV->getValue();
int64_t Offset = IntOff->getSExtValue();
- Type *VTy = cast<PointerType>(IPtr->getType())->getElementType();
+ Type *VTy = IPtr->getType()->getPointerElementType();
int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy);
- Type *VTy2 = cast<PointerType>(JPtr->getType())->getElementType();
+ Type *VTy2 = JPtr->getType()->getPointerElementType();
if (VTy != VTy2 && Offset < 0) {
int64_t VTy2TSS = (int64_t) TD->getTypeStoreSize(VTy2);
OffsetInElmts = Offset/VTy2TSS;
@@ -1182,6 +1182,8 @@ namespace {
// Look for an instruction with which to pair instruction *I...
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
bool JAfterStart = IAfterStart;
BasicBlock::iterator J = llvm::next(I);
for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
@@ -1403,6 +1405,8 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) {
(void) trackUsesOfI(Users, WriteSet, I, J);
@@ -2227,11 +2231,12 @@ namespace {
// The pointer value is taken to be the one with the lowest offset.
Value *VPtr = IPtr;
- Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType();
- Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType();
+ Type *ArgTypeI = IPtr->getType()->getPointerElementType();
+ Type *ArgTypeJ = JPtr->getType()->getPointerElementType();
Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
- Type *VArgPtrType = PointerType::get(VArgType,
- cast<PointerType>(IPtr->getType())->getAddressSpace());
+ Type *VArgPtrType
+ = PointerType::get(VArgType,
+ IPtr->getType()->getPointerAddressSpace());
return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o),
/* insert before */ I);
}
@@ -2240,7 +2245,7 @@ namespace {
unsigned MaskOffset, unsigned NumInElem,
unsigned NumInElem1, unsigned IdxOffset,
std::vector<Constant*> &Mask) {
- unsigned NumElem1 = cast<VectorType>(J->getType())->getNumElements();
+ unsigned NumElem1 = J->getType()->getVectorNumElements();
for (unsigned v = 0; v < NumElem1; ++v) {
int m = cast<ShuffleVectorInst>(J)->getMaskValue(v);
if (m < 0) {
@@ -2267,18 +2272,18 @@ namespace {
Type *ArgTypeJ = J->getType();
Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
- unsigned NumElemI = cast<VectorType>(ArgTypeI)->getNumElements();
+ unsigned NumElemI = ArgTypeI->getVectorNumElements();
// Get the total number of elements in the fused vector type.
// By definition, this must equal the number of elements in
// the final mask.
- unsigned NumElem = cast<VectorType>(VArgType)->getNumElements();
+ unsigned NumElem = VArgType->getVectorNumElements();
std::vector<Constant*> Mask(NumElem);
Type *OpTypeI = I->getOperand(0)->getType();
- unsigned NumInElemI = cast<VectorType>(OpTypeI)->getNumElements();
+ unsigned NumInElemI = OpTypeI->getVectorNumElements();
Type *OpTypeJ = J->getOperand(0)->getType();
- unsigned NumInElemJ = cast<VectorType>(OpTypeJ)->getNumElements();
+ unsigned NumInElemJ = OpTypeJ->getVectorNumElements();
// The fused vector will be:
// -----------------------------------------------------
@@ -2340,6 +2345,12 @@ namespace {
return ExpandedIEChain;
}
+ static unsigned getNumScalarElements(Type *Ty) {
+ if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
+ return VecTy->getNumElements();
+ return 1;
+ }
+
// Returns the value to be used as the specified operand of the vector
// instruction that fuses I with J.
Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I,
@@ -2355,17 +2366,8 @@ namespace {
Instruction *L = I, *H = J;
Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ;
- unsigned numElemL;
- if (ArgTypeL->isVectorTy())
- numElemL = cast<VectorType>(ArgTypeL)->getNumElements();
- else
- numElemL = 1;
-
- unsigned numElemH;
- if (ArgTypeH->isVectorTy())
- numElemH = cast<VectorType>(ArgTypeH)->getNumElements();
- else
- numElemH = 1;
+ unsigned numElemL = getNumScalarElements(ArgTypeL);
+ unsigned numElemH = getNumScalarElements(ArgTypeH);
Value *LOp = L->getOperand(o);
Value *HOp = H->getOperand(o);
@@ -2426,11 +2428,12 @@ namespace {
if (CanUseInputs) {
unsigned LOpElem =
- cast<VectorType>(cast<Instruction>(LOp)->getOperand(0)->getType())
- ->getNumElements();
+ cast<Instruction>(LOp)->getOperand(0)->getType()
+ ->getVectorNumElements();
+
unsigned HOpElem =
- cast<VectorType>(cast<Instruction>(HOp)->getOperand(0)->getType())
- ->getNumElements();
+ cast<Instruction>(HOp)->getOperand(0)->getType()
+ ->getVectorNumElements();
// We have one or two input vectors. We need to map each index of the
// operands to the index of the original vector.
@@ -2646,14 +2649,14 @@ namespace {
getReplacementName(IBeforeJ ? I : J,
true, o, 1));
}
-
+
NHOp->insertBefore(IBeforeJ ? J : I);
HOp = NHOp;
}
}
if (ArgType->isVectorTy()) {
- unsigned numElem = cast<VectorType>(VArgType)->getNumElements();
+ unsigned numElem = VArgType->getVectorNumElements();
std::vector<Constant*> Mask(numElem);
for (unsigned v = 0; v < numElem; ++v) {
unsigned Idx = v;
@@ -2746,16 +2749,8 @@ namespace {
VectorType *VType = getVecTypeForPair(IType, JType);
unsigned numElem = VType->getNumElements();
- unsigned numElemI, numElemJ;
- if (IType->isVectorTy())
- numElemI = cast<VectorType>(IType)->getNumElements();
- else
- numElemI = 1;
-
- if (JType->isVectorTy())
- numElemJ = cast<VectorType>(JType)->getNumElements();
- else
- numElemJ = 1;
+ unsigned numElemI = getNumScalarElements(IType);
+ unsigned numElemJ = getNumScalarElements(JType);
if (IType->isVectorTy()) {
std::vector<Constant*> Mask1(numElemI), Mask2(numElemI);
@@ -2804,6 +2799,8 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
for (; cast<Instruction>(L) != J; ++L)
(void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs);
@@ -2824,6 +2821,8 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
for (; cast<Instruction>(L) != J;) {
if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) {
// Move this instruction
@@ -2853,6 +2852,7 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
// Note: We cannot end the loop when we reach J because J could be moved
// farther down the use chain by another instruction pairing. Also, J
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index a62fedc..5e75871 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -48,6 +48,7 @@
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -126,6 +127,9 @@ static const unsigned MaxVectorWidth = 64;
/// Maximum vectorization unroll count.
static const unsigned MaxUnrollFactor = 16;
+/// The cost of a loop that is considered 'small' by the unroller.
+static const unsigned SmallLoopCost = 20;
+
namespace {
// Forward declarations.
@@ -167,7 +171,9 @@ public:
updateAnalysis();
}
-private:
+ virtual ~InnerLoopVectorizer() {}
+
+protected:
/// A small list of PHINodes.
typedef SmallVector<PHINode*, 4> PhiVector;
/// When we unroll loops we have multiple vector values for each scalar.
@@ -187,7 +193,13 @@ private:
/// Create an empty loop, based on the loop ranges of the old loop.
void createEmptyLoop(LoopVectorizationLegality *Legal);
/// Copy and widen the instructions from the old loop.
- void vectorizeLoop(LoopVectorizationLegality *Legal);
+ virtual void vectorizeLoop(LoopVectorizationLegality *Legal);
+
+ /// \brief The Loop exit block may have single value PHI nodes where the
+ /// incoming value is 'Undef'. While vectorizing we only handled real values
+ /// that were defined inside the loop. Here we fix the 'undef case'.
+ /// See PR14725.
+ void fixLCSSAPHIs();
/// A helper function that computes the predicate of the block BB, assuming
/// that the header block of the loop is set to True. It returns the *entry*
@@ -201,16 +213,23 @@ private:
void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB,
PhiVector *PV);
+ /// Vectorize a single PHINode in a block. This method handles the induction
+ /// variable canonicalization. It supports both VF = 1 for unrolled loops and
+ /// arbitrary length vectors.
+ void widenPHIInstruction(Instruction *PN, VectorParts &Entry,
+ LoopVectorizationLegality *Legal,
+ unsigned UF, unsigned VF, PhiVector *PV);
+
/// Insert the new loop to the loop hierarchy and pass manager
/// and update the analysis passes.
void updateAnalysis();
/// This instruction is un-vectorizable. Implement it as a sequence
/// of scalars.
- void scalarizeInstruction(Instruction *Instr);
+ virtual void scalarizeInstruction(Instruction *Instr);
/// Vectorize Load and Store instructions,
- void vectorizeMemoryInstruction(Instruction *Instr,
+ virtual void vectorizeMemoryInstruction(Instruction *Instr,
LoopVectorizationLegality *Legal);
/// Create a broadcast instruction. This method generates a broadcast
@@ -218,12 +237,12 @@ private:
/// value. If this is the induction variable then we extend it to N, N+1, ...
/// this is needed because each iteration in the loop corresponds to a SIMD
/// element.
- Value *getBroadcastInstrs(Value *V);
+ virtual Value *getBroadcastInstrs(Value *V);
/// This function adds 0, 1, 2 ... to each vector element, starting at zero.
/// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
/// The sequence starts at StartIndex.
- Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
+ virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
/// When we go over instructions in the basic block we rely on previous
/// values within the current basic block or on loop invariant values.
@@ -233,7 +252,7 @@ private:
VectorParts &getVectorValue(Value *V);
/// Generate a shuffle sequence that will reverse the vector Vec.
- Value *reverseVector(Value *Vec);
+ virtual Value *reverseVector(Value *Vec);
/// This is a helper class that holds the vectorizer state. It maps scalar
/// instructions to vector instructions. When the code is 'unrolled' then
@@ -291,6 +310,8 @@ private:
/// The vectorization SIMD factor to use. Each vector will have this many
/// vector elements.
unsigned VF;
+
+protected:
/// The vectorization unroll factor to use. Each scalar is vectorized to this
/// many different vector instructions.
unsigned UF;
@@ -326,6 +347,22 @@ private:
EdgeMaskCache MaskCache;
};
+class InnerLoopUnroller : public InnerLoopVectorizer {
+public:
+ InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
+ DominatorTree *DT, DataLayout *DL,
+ const TargetLibraryInfo *TLI, unsigned UnrollFactor) :
+ InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
+
+private:
+ virtual void scalarizeInstruction(Instruction *Instr);
+ virtual void vectorizeMemoryInstruction(Instruction *Instr,
+ LoopVectorizationLegality *Legal);
+ virtual Value *getBroadcastInstrs(Value *V);
+ virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
+ virtual Value *reverseVector(Value *Vec);
+};
+
/// \brief Look for a meaningful debug location on the instruction or it's
/// operands.
static Instruction *getDebugLocFromInstOrOperands(Instruction *I) {
@@ -409,7 +446,7 @@ public:
MRK_FloatMax
};
- /// This POD struct holds information about reduction variables.
+ /// This struct holds information about reduction variables.
struct ReductionDescriptor {
ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
@@ -446,8 +483,8 @@ public:
MinMaxReductionKind MinMaxKind;
};
- // This POD struct holds information about the memory runtime legality
- // check that a group of pointers do not overlap.
+ /// This struct holds information about the memory runtime legality
+ /// check that a group of pointers do not overlap.
struct RuntimePointerCheck {
RuntimePointerCheck() : Need(false) {}
@@ -457,6 +494,8 @@ public:
Pointers.clear();
Starts.clear();
Ends.clear();
+ IsWritePtr.clear();
+ DependencySetId.clear();
}
/// Insert a pointer and calculate the start and end SCEVs.
@@ -478,7 +517,7 @@ public:
SmallVector<unsigned, 2> DependencySetId;
};
- /// A POD for saving information about induction variables.
+ /// A struct for saving information about induction variables.
struct InductionInfo {
InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
@@ -725,9 +764,9 @@ struct LoopVectorizeHints {
/// Vectorization unroll factor.
unsigned Unroll;
- LoopVectorizeHints(const Loop *L)
+ LoopVectorizeHints(const Loop *L, bool DisableUnrolling)
: Width(VectorizationFactor)
- , Unroll(VectorizationUnroll)
+ , Unroll(DisableUnrolling ? 1 : VectorizationUnroll)
, LoopID(L->getLoopID()) {
getHints(L);
// The command line options override any loop metadata except for when
@@ -736,6 +775,9 @@ struct LoopVectorizeHints {
Width = VectorizationFactor;
if (VectorizationUnroll.getNumOccurrences() > 0)
Unroll = VectorizationUnroll;
+
+ DEBUG(if (DisableUnrolling && Unroll == 1)
+ dbgs() << "LV: Unrolling disabled by the pass manager\n");
}
/// Return the loop vectorizer metadata prefix.
@@ -762,6 +804,7 @@ struct LoopVectorizeHints {
Vals.push_back(LoopID->getOperand(i));
Vals.push_back(createHint(Context, Twine(Prefix(), "width").str(), Width));
+ Vals.push_back(createHint(Context, Twine(Prefix(), "unroll").str(), 1));
MDNode *NewLoopID = MDNode::get(Context, Vals);
// Set operand 0 to refer to the loop id itself.
@@ -825,15 +868,18 @@ private:
unsigned Val = C->getZExtValue();
if (Hint == "width") {
- assert(isPowerOf2_32(Val) && Val <= MaxVectorWidth &&
- "Invalid width metadata");
- Width = Val;
+ if (isPowerOf2_32(Val) && Val <= MaxVectorWidth)
+ Width = Val;
+ else
+ DEBUG(dbgs() << "LV: ignoring invalid width hint metadata\n");
} else if (Hint == "unroll") {
- assert(isPowerOf2_32(Val) && Val <= MaxUnrollFactor &&
- "Invalid unroll metadata");
- Unroll = Val;
- } else
- DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint);
+ if (isPowerOf2_32(Val) && Val <= MaxUnrollFactor)
+ Unroll = Val;
+ else
+ DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n");
+ } else {
+ DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n');
+ }
}
};
@@ -842,7 +888,8 @@ struct LoopVectorize : public LoopPass {
/// Pass identification, replacement for typeid
static char ID;
- explicit LoopVectorize() : LoopPass(ID) {
+ explicit LoopVectorize(bool NoUnrolling = false)
+ : LoopPass(ID), DisableUnrolling(NoUnrolling) {
initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
}
@@ -852,6 +899,7 @@ struct LoopVectorize : public LoopPass {
TargetTransformInfo *TTI;
DominatorTree *DT;
TargetLibraryInfo *TLI;
+ bool DisableUnrolling;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
// We only vectorize innermost loops.
@@ -865,17 +913,22 @@ struct LoopVectorize : public LoopPass {
DT = &getAnalysis<DominatorTree>();
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ // If the target claims to have no vector registers don't attempt
+ // vectorization.
+ if (!TTI->getNumberOfRegisters(true))
+ return false;
+
if (DL == NULL) {
- DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout");
+ DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout\n");
return false;
}
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
- LoopVectorizeHints Hints(L);
+ LoopVectorizeHints Hints(L, DisableUnrolling);
- if (Hints.Width == 1) {
+ if (Hints.Width == 1 && Hints.Unroll == 1) {
DEBUG(dbgs() << "LV: Not vectorizing.\n");
return false;
}
@@ -912,19 +965,23 @@ struct LoopVectorize : public LoopPass {
unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width,
VF.Cost);
+ DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<<
+ F->getParent()->getModuleIdentifier() << '\n');
+ DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n');
+
if (VF.Width == 1) {
DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
- return false;
+ if (UF == 1)
+ return false;
+ // We decided not to vectorize, but we may want to unroll.
+ InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
+ Unroller.vectorize(&LVL);
+ } else {
+ // If we decided that it is *legal* to vectorize the loop then do it.
+ InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
+ LB.vectorize(&LVL);
}
- DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<<
- F->getParent()->getModuleIdentifier()<<"\n");
- DEBUG(dbgs() << "LV: Unroll Factor is " << UF << "\n");
-
- // If we decided that it is *legal* to vectorize the loop then do it.
- InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
- LB.vectorize(&LVL);
-
// Mark the loop as already vectorized to avoid vectorizing again.
Hints.setAlreadyVectorized(L);
@@ -971,25 +1028,19 @@ LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
}
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
- // Save the current insertion location.
- Instruction *Loc = Builder.GetInsertPoint();
-
// We need to place the broadcast of invariant variables outside the loop.
Instruction *Instr = dyn_cast<Instruction>(V);
bool NewInstr = (Instr && Instr->getParent() == LoopVectorBody);
bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr;
// Place the code for broadcasting invariant variables in the new preheader.
+ IRBuilder<>::InsertPointGuard Guard(Builder);
if (Invariant)
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
// Broadcast the scalar into all locations in the vector.
Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
- // Restore the builder insertion point.
- if (Invariant)
- Builder.SetInsertPoint(Loc);
-
return Shuf;
}
@@ -1016,10 +1067,35 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
return Builder.CreateAdd(Val, Cv, "induction");
}
+/// \brief Find the operand of the GEP that should be checked for consecutive
+/// stores. This ignores trailing indices that have no effect on the final
+/// pointer.
+static unsigned getGEPInductionOperand(DataLayout *DL,
+ const GetElementPtrInst *Gep) {
+ unsigned LastOperand = Gep->getNumOperands() - 1;
+ unsigned GEPAllocSize = DL->getTypeAllocSize(
+ cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
+
+ // Walk backwards and try to peel off zeros.
+ while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
+ // Find the type we're currently indexing into.
+ gep_type_iterator GEPTI = gep_type_begin(Gep);
+ std::advance(GEPTI, LastOperand - 1);
+
+ // If it's a type with the same allocation size as the result of the GEP we
+ // can peel off the zero index.
+ if (DL->getTypeAllocSize(*GEPTI) != GEPAllocSize)
+ break;
+ --LastOperand;
+ }
+
+ return LastOperand;
+}
+
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
// Make sure that the pointer does not point to structs.
- if (cast<PointerType>(Ptr->getType())->getElementType()->isAggregateType())
+ if (Ptr->getType()->getPointerElementType()->isAggregateType())
return 0;
// If this value is a pointer induction variable we know it is consecutive.
@@ -1037,8 +1113,6 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
return 0;
unsigned NumOperands = Gep->getNumOperands();
- Value *LastIndex = Gep->getOperand(NumOperands - 1);
-
Value *GpPtr = Gep->getPointerOperand();
// If this GEP value is a consecutive pointer induction variable and all of
// the indices are constant then we know it is consecutive. We can
@@ -1062,14 +1136,18 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
return -1;
}
- // Check that all of the gep indices are uniform except for the last.
- for (unsigned i = 0; i < NumOperands - 1; ++i)
- if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+ unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
+
+ // Check that all of the gep indices are uniform except for our induction
+ // operand.
+ for (unsigned i = 0; i != NumOperands; ++i)
+ if (i != InductionOperand &&
+ !SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
return 0;
- // We can emit wide load/stores only if the last index is the induction
- // variable.
- const SCEV *Last = SE->getSCEV(LastIndex);
+ // We can emit wide load/stores only if the last non-zero index is the
+ // induction variable.
+ const SCEV *Last = SE->getSCEV(Gep->getOperand(InductionOperand));
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
const SCEV *Step = AR->getStepRecurrence(*SE);
@@ -1127,6 +1205,10 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
Type *DataTy = VectorType::get(ScalarDataTy, VF);
Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
+ // An alignment of 0 means target abi alignment. We need to use the scalar's
+ // target abi alignment in such a case.
+ if (!Alignment)
+ Alignment = DL->getABITypeAlignment(ScalarDataTy);
unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
@@ -1166,7 +1248,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
// The last index does not have to be the induction. It can be
// consecutive and be a function of the index. For example A[I+1];
unsigned NumOperands = Gep->getNumOperands();
- unsigned LastOperand = NumOperands - 1;
+ unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
// Create the new GEP with the new induction variable.
GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
@@ -1175,9 +1257,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
Instruction *GepOperandInst = dyn_cast<Instruction>(GepOperand);
// Update last index or loop invariant instruction anchored in loop.
- if (i == LastOperand ||
+ if (i == InductionOperand ||
(GepOperandInst && OrigLoop->contains(GepOperandInst))) {
- assert((i == LastOperand ||
+ assert((i == InductionOperand ||
SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) &&
"Must be last index or loop invariant");
@@ -1301,7 +1383,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
Instruction *Cloned = Instr->clone();
if (!IsVoidRetTy)
Cloned->setName(Instr->getName() + ".cloned");
- // Replace the operands of the cloned instrucions with extracted scalars.
+ // Replace the operands of the cloned instructions with extracted scalars.
for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
Value *Op = Params[op][Part];
// Param is a vector. Need to extract the right lane.
@@ -1335,11 +1417,9 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
SmallVector<TrackingVH<Value> , 2> Starts;
SmallVector<TrackingVH<Value> , 2> Ends;
+ LLVMContext &Ctx = Loc->getContext();
SCEVExpander Exp(*SE, "induction");
- // Use this type for pointer arithmetic.
- Type* PtrArithTy = Type::getInt8PtrTy(Loc->getContext(), 0);
-
for (unsigned i = 0; i < NumPointers; ++i) {
Value *Ptr = PtrRtCheck->Pointers[i];
const SCEV *Sc = SE->getSCEV(Ptr);
@@ -1350,7 +1430,11 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
Starts.push_back(Ptr);
Ends.push_back(Ptr);
} else {
- DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
+ DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n');
+ unsigned AS = Ptr->getType()->getPointerAddressSpace();
+
+ // Use this type for pointer arithmetic.
+ Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc);
Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
@@ -1372,10 +1456,20 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j])
continue;
- Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc");
- Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc");
- Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy, "bc");
- Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy, "bc");
+ unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
+ unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
+
+ assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
+ (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
+ "Trying to bounds check pointers with different address spaces");
+
+ Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
+ Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
+
+ Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
+ Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
+ Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc");
+ Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc");
Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
@@ -1390,9 +1484,8 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
// We have to do this trickery because the IRBuilder might fold the check to a
// constant expression in which case there is no Instruction anchored in a
// the block.
- LLVMContext &Ctx = Loc->getContext();
- Instruction * Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
- ConstantInt::getTrue(Ctx));
+ Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
+ ConstantInt::getTrue(Ctx));
ChkBuilder.Insert(Check, "memcheck.conflict");
return Check;
}
@@ -1444,6 +1537,16 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop);
assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
+ // The exit count might have the type of i64 while the phi is i32. This can
+ // happen if we have an induction variable that is sign extended before the
+ // compare. The only way that we get a backedge taken count is that the
+ // induction variable was signed and as such will not overflow. In such a case
+ // truncation is legal.
+ if (ExitCount->getType()->getPrimitiveSizeInBits() >
+ IdxTy->getPrimitiveSizeInBits())
+ ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy);
+
+ ExitCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
// Get the total trip count from the count by adding 1.
ExitCount = SE->getAddExpr(ExitCount,
SE->getConstant(ExitCount->getType(), 1));
@@ -1496,7 +1599,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// Use this IR builder to create the loop instructions (Phi, Br, Cmp)
// inside the loop.
- Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
+ Builder.SetInsertPoint(VecBody->getFirstNonPHI());
// Generate the induction variable.
setDebugLocFromInst(Builder, getDebugLocFromInstOrOperands(OldInduction));
@@ -1724,6 +1827,9 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
LoopExitBlock = ExitBlock;
LoopVectorBody = VecBody;
LoopScalarBody = OldBasicBlock;
+
+ LoopVectorizeHints Hints(Lp, true);
+ Hints.setAlreadyVectorized(Lp);
}
/// This function returns the identity element (or neutral element) for
@@ -1753,6 +1859,31 @@ LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
}
}
+static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
+ Intrinsic::ID ValidIntrinsicID) {
+ if (I.getNumArgOperands() != 1 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ !I.onlyReadsMemory())
+ return Intrinsic::not_intrinsic;
+
+ return ValidIntrinsicID;
+}
+
+static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I,
+ Intrinsic::ID ValidIntrinsicID) {
+ if (I.getNumArgOperands() != 2 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ !I.getArgOperand(1)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ I.getType() != I.getArgOperand(1)->getType() ||
+ !I.onlyReadsMemory())
+ return Intrinsic::not_intrinsic;
+
+ return ValidIntrinsicID;
+}
+
+
static Intrinsic::ID
getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
// If we have an intrinsic call, check if it is trivially vectorizable.
@@ -1767,11 +1898,13 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
case Intrinsic::log10:
case Intrinsic::log2:
case Intrinsic::fabs:
+ case Intrinsic::copysign:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
+ case Intrinsic::round:
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
@@ -1789,8 +1922,9 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
LibFunc::Func Func;
Function *F = CI->getCalledFunction();
// We're going to make assumptions on the semantics of the functions, check
- // that the target knows that it's available in this environment.
- if (!F || !TLI->getLibFunc(F->getName(), Func))
+ // that the target knows that it's available in this environment and it does
+ // not have local linkage.
+ if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func))
return Intrinsic::not_intrinsic;
// Otherwise check if we have a call to a function that can be turned into a
@@ -1801,59 +1935,67 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
case LibFunc::sin:
case LibFunc::sinf:
case LibFunc::sinl:
- return Intrinsic::sin;
+ return checkUnaryFloatSignature(*CI, Intrinsic::sin);
case LibFunc::cos:
case LibFunc::cosf:
case LibFunc::cosl:
- return Intrinsic::cos;
+ return checkUnaryFloatSignature(*CI, Intrinsic::cos);
case LibFunc::exp:
case LibFunc::expf:
case LibFunc::expl:
- return Intrinsic::exp;
+ return checkUnaryFloatSignature(*CI, Intrinsic::exp);
case LibFunc::exp2:
case LibFunc::exp2f:
case LibFunc::exp2l:
- return Intrinsic::exp2;
+ return checkUnaryFloatSignature(*CI, Intrinsic::exp2);
case LibFunc::log:
case LibFunc::logf:
case LibFunc::logl:
- return Intrinsic::log;
+ return checkUnaryFloatSignature(*CI, Intrinsic::log);
case LibFunc::log10:
case LibFunc::log10f:
case LibFunc::log10l:
- return Intrinsic::log10;
+ return checkUnaryFloatSignature(*CI, Intrinsic::log10);
case LibFunc::log2:
case LibFunc::log2f:
case LibFunc::log2l:
- return Intrinsic::log2;
+ return checkUnaryFloatSignature(*CI, Intrinsic::log2);
case LibFunc::fabs:
case LibFunc::fabsf:
case LibFunc::fabsl:
- return Intrinsic::fabs;
+ return checkUnaryFloatSignature(*CI, Intrinsic::fabs);
+ case LibFunc::copysign:
+ case LibFunc::copysignf:
+ case LibFunc::copysignl:
+ return checkBinaryFloatSignature(*CI, Intrinsic::copysign);
case LibFunc::floor:
case LibFunc::floorf:
case LibFunc::floorl:
- return Intrinsic::floor;
+ return checkUnaryFloatSignature(*CI, Intrinsic::floor);
case LibFunc::ceil:
case LibFunc::ceilf:
case LibFunc::ceill:
- return Intrinsic::ceil;
+ return checkUnaryFloatSignature(*CI, Intrinsic::ceil);
case LibFunc::trunc:
case LibFunc::truncf:
case LibFunc::truncl:
- return Intrinsic::trunc;
+ return checkUnaryFloatSignature(*CI, Intrinsic::trunc);
case LibFunc::rint:
case LibFunc::rintf:
case LibFunc::rintl:
- return Intrinsic::rint;
+ return checkUnaryFloatSignature(*CI, Intrinsic::rint);
case LibFunc::nearbyint:
case LibFunc::nearbyintf:
case LibFunc::nearbyintl:
- return Intrinsic::nearbyint;
+ return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint);
+ case LibFunc::round:
+ case LibFunc::roundf:
+ case LibFunc::roundl:
+ return checkUnaryFloatSignature(*CI, Intrinsic::round);
case LibFunc::pow:
case LibFunc::powf:
case LibFunc::powl:
- return Intrinsic::pow;
+ return checkBinaryFloatSignature(*CI, Intrinsic::pow);
}
return Intrinsic::not_intrinsic;
@@ -1925,6 +2067,54 @@ Value *createMinMaxOp(IRBuilder<> &Builder,
return Select;
}
+namespace {
+struct CSEDenseMapInfo {
+ static bool canHandle(Instruction *I) {
+ return isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+ isa<ShuffleVectorInst>(I) || isa<GetElementPtrInst>(I);
+ }
+ static inline Instruction *getEmptyKey() {
+ return DenseMapInfo<Instruction *>::getEmptyKey();
+ }
+ static inline Instruction *getTombstoneKey() {
+ return DenseMapInfo<Instruction *>::getTombstoneKey();
+ }
+ static unsigned getHashValue(Instruction *I) {
+ assert(canHandle(I) && "Unknown instruction!");
+ return hash_combine(I->getOpcode(), hash_combine_range(I->value_op_begin(),
+ I->value_op_end()));
+ }
+ static bool isEqual(Instruction *LHS, Instruction *RHS) {
+ if (LHS == getEmptyKey() || RHS == getEmptyKey() ||
+ LHS == getTombstoneKey() || RHS == getTombstoneKey())
+ return LHS == RHS;
+ return LHS->isIdenticalTo(RHS);
+ }
+};
+}
+
+///\brief Perform cse of induction variable instructions.
+static void cse(BasicBlock *BB) {
+ // Perform simple cse.
+ SmallDenseMap<Instruction *, Instruction *, 4, CSEDenseMapInfo> CSEMap;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+ Instruction *In = I++;
+
+ if (!CSEDenseMapInfo::canHandle(In))
+ continue;
+
+ // Check if we can replace this instruction with any of the
+ // visited instructions.
+ if (Instruction *V = CSEMap.lookup(In)) {
+ In->replaceAllUsesWith(V);
+ In->eraseFromParent();
+ continue;
+ }
+
+ CSEMap[In] = In;
+ }
+}
+
void
InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
//===------------------------------------------------===//
@@ -1995,18 +2185,31 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
// MinMax reduction have the start value as their identify.
- VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
- "minmax.ident");
+ if (VF == 1) {
+ VectorStart = Identity = RdxDesc.StartValue;
+ } else {
+ VectorStart = Identity = Builder.CreateVectorSplat(VF,
+ RdxDesc.StartValue,
+ "minmax.ident");
+ }
} else {
+ // Handle other reduction kinds:
Constant *Iden =
- LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
- VecTy->getScalarType());
- Identity = ConstantVector::getSplat(VF, Iden);
-
- // This vector is the Identity vector where the first element is the
- // incoming scalar reduction.
- VectorStart = Builder.CreateInsertElement(Identity,
- RdxDesc.StartValue, Zero);
+ LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
+ VecTy->getScalarType());
+ if (VF == 1) {
+ Identity = Iden;
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ VectorStart = RdxDesc.StartValue;
+ } else {
+ Identity = ConstantVector::getSplat(VF, Iden);
+
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ VectorStart = Builder.CreateInsertElement(Identity,
+ RdxDesc.StartValue, Zero);
+ }
}
// Fix the vector-loop phi.
@@ -2062,37 +2265,40 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
ReducedPartRdx, RdxParts[part]);
}
- // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
- // and vector ops, reducing the set of values being computed by half each
- // round.
- assert(isPowerOf2_32(VF) &&
- "Reduction emission only supported for pow2 vectors!");
- Value *TmpVec = ReducedPartRdx;
- SmallVector<Constant*, 32> ShuffleMask(VF, 0);
- for (unsigned i = VF; i != 1; i >>= 1) {
- // Move the upper half of the vector to the lower half.
- for (unsigned j = 0; j != i/2; ++j)
- ShuffleMask[j] = Builder.getInt32(i/2 + j);
-
- // Fill the rest of the mask with undef.
- std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
- UndefValue::get(Builder.getInt32Ty()));
-
- Value *Shuf =
+ if (VF > 1) {
+ // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
+ // and vector ops, reducing the set of values being computed by half each
+ // round.
+ assert(isPowerOf2_32(VF) &&
+ "Reduction emission only supported for pow2 vectors!");
+ Value *TmpVec = ReducedPartRdx;
+ SmallVector<Constant*, 32> ShuffleMask(VF, 0);
+ for (unsigned i = VF; i != 1; i >>= 1) {
+ // Move the upper half of the vector to the lower half.
+ for (unsigned j = 0; j != i/2; ++j)
+ ShuffleMask[j] = Builder.getInt32(i/2 + j);
+
+ // Fill the rest of the mask with undef.
+ std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
+ UndefValue::get(Builder.getInt32Ty()));
+
+ Value *Shuf =
Builder.CreateShuffleVector(TmpVec,
UndefValue::get(TmpVec->getType()),
ConstantVector::get(ShuffleMask),
"rdx.shuf");
- if (Op != Instruction::ICmp && Op != Instruction::FCmp)
- TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
- "bin.rdx");
- else
- TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
- }
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+ TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+ "bin.rdx");
+ else
+ TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
+ }
- // The result is in the first element of the vector.
- Value *Scalar0 = Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+ // The result is in the first element of the vector.
+ ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
+ Builder.getInt32(0));
+ }
// Now, we need to fix the users of the reduction variable
// inside and outside of the scalar remainder loop.
@@ -2101,7 +2307,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
- if (!LCSSAPhi) continue;
+ if (!LCSSAPhi) break;
// All PHINodes need to have a single entry edge, or two if
// we already fixed them.
@@ -2111,7 +2317,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// incoming bypass edge.
if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
// Add an edge coming from the bypass.
- LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock);
+ LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
break;
}
}// end of the LCSSA phi scan.
@@ -2123,23 +2329,26 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
// Pick the other block.
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
- (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
+ (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
(RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
}// end of for each redux variable.
- // The Loop exit block may have single value PHI nodes where the incoming
- // value is 'undef'. While vectorizing we only handled real values that
- // were defined inside the loop. Here we handle the 'undef case'.
- // See PR14725.
+ fixLCSSAPHIs();
+
+ // Remove redundant induction instructions.
+ cse(LoopVectorBody);
+}
+
+void InnerLoopVectorizer::fixLCSSAPHIs() {
for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
- if (!LCSSAPhi) continue;
+ if (!LCSSAPhi) break;
if (LCSSAPhi->getNumIncomingValues() == 1)
LCSSAPhi->addIncoming(UndefValue::get(LCSSAPhi->getType()),
LoopMiddleBlock);
}
-}
+}
InnerLoopVectorizer::VectorParts
InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
@@ -2200,161 +2409,185 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
return BlockMask;
}
-void
-InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
- BasicBlock *BB, PhiVector *PV) {
- // For each instruction in the old loop.
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- VectorParts &Entry = WidenMap.get(it);
- switch (it->getOpcode()) {
- case Instruction::Br:
- // Nothing to do for PHIs and BR, since we already took care of the
- // loop control flow instructions.
- continue;
- case Instruction::PHI:{
- PHINode* P = cast<PHINode>(it);
- // Handle reduction variables:
- if (Legal->getReductionVars()->count(P)) {
- for (unsigned part = 0; part < UF; ++part) {
- // This is phase one of vectorizing PHIs.
- Type *VecTy = VectorType::get(it->getType(), VF);
- Entry[part] = PHINode::Create(VecTy, 2, "vec.phi",
- LoopVectorBody-> getFirstInsertionPt());
- }
- PV->push_back(P);
- continue;
- }
+void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
+ InnerLoopVectorizer::VectorParts &Entry,
+ LoopVectorizationLegality *Legal,
+ unsigned UF, unsigned VF, PhiVector *PV) {
+ PHINode* P = cast<PHINode>(PN);
+ // Handle reduction variables:
+ if (Legal->getReductionVars()->count(P)) {
+ for (unsigned part = 0; part < UF; ++part) {
+ // This is phase one of vectorizing PHIs.
+ Type *VecTy = (VF == 1) ? PN->getType() :
+ VectorType::get(PN->getType(), VF);
+ Entry[part] = PHINode::Create(VecTy, 2, "vec.phi",
+ LoopVectorBody-> getFirstInsertionPt());
+ }
+ PV->push_back(P);
+ return;
+ }
- setDebugLocFromInst(Builder, P);
- // Check for PHI nodes that are lowered to vector selects.
- if (P->getParent() != OrigLoop->getHeader()) {
- // We know that all PHIs in non header blocks are converted into
- // selects, so we don't have to worry about the insertion order and we
- // can just use the builder.
- // At this point we generate the predication tree. There may be
- // duplications since this is a simple recursive scan, but future
- // optimizations will clean it up.
-
- unsigned NumIncoming = P->getNumIncomingValues();
-
- // Generate a sequence of selects of the form:
- // SELECT(Mask3, In3,
- // SELECT(Mask2, In2,
- // ( ...)))
- for (unsigned In = 0; In < NumIncoming; In++) {
- VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
- P->getParent());
- VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
-
- for (unsigned part = 0; part < UF; ++part) {
- // We might have single edge PHIs (blocks) - use an identity
- // 'select' for the first PHI operand.
- if (In == 0)
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
- In0[part]);
- else
- // Select between the current value and the previous incoming edge
- // based on the incoming mask.
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
- Entry[part], "predphi");
- }
- }
- continue;
+ setDebugLocFromInst(Builder, P);
+ // Check for PHI nodes that are lowered to vector selects.
+ if (P->getParent() != OrigLoop->getHeader()) {
+ // We know that all PHIs in non header blocks are converted into
+ // selects, so we don't have to worry about the insertion order and we
+ // can just use the builder.
+ // At this point we generate the predication tree. There may be
+ // duplications since this is a simple recursive scan, but future
+ // optimizations will clean it up.
+
+ unsigned NumIncoming = P->getNumIncomingValues();
+
+ // Generate a sequence of selects of the form:
+ // SELECT(Mask3, In3,
+ // SELECT(Mask2, In2,
+ // ( ...)))
+ for (unsigned In = 0; In < NumIncoming; In++) {
+ VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
+ P->getParent());
+ VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
+
+ for (unsigned part = 0; part < UF; ++part) {
+ // We might have single edge PHIs (blocks) - use an identity
+ // 'select' for the first PHI operand.
+ if (In == 0)
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+ In0[part]);
+ else
+ // Select between the current value and the previous incoming edge
+ // based on the incoming mask.
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+ Entry[part], "predphi");
}
+ }
+ return;
+ }
- // This PHINode must be an induction variable.
- // Make sure that we know about it.
- assert(Legal->getInductionVars()->count(P) &&
- "Not an induction variable");
-
- LoopVectorizationLegality::InductionInfo II =
- Legal->getInductionVars()->lookup(P);
-
- switch (II.IK) {
- case LoopVectorizationLegality::IK_NoInduction:
- llvm_unreachable("Unknown induction");
- case LoopVectorizationLegality::IK_IntInduction: {
- assert(P->getType() == II.StartValue->getType() && "Types must match");
- Type *PhiTy = P->getType();
- Value *Broadcasted;
- if (P == OldInduction) {
- // Handle the canonical induction variable. We might have had to
- // extend the type.
- Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
- } else {
- // Handle other induction variables that are now based on the
- // canonical one.
- Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
- "normalized.idx");
- NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
- Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
- "offset.idx");
- }
- Broadcasted = getBroadcastInstrs(Broadcasted);
- // After broadcasting the induction variable we need to make the vector
- // consecutive by adding 0, 1, 2, etc.
+ // This PHINode must be an induction variable.
+ // Make sure that we know about it.
+ assert(Legal->getInductionVars()->count(P) &&
+ "Not an induction variable");
+
+ LoopVectorizationLegality::InductionInfo II =
+ Legal->getInductionVars()->lookup(P);
+
+ switch (II.IK) {
+ case LoopVectorizationLegality::IK_NoInduction:
+ llvm_unreachable("Unknown induction");
+ case LoopVectorizationLegality::IK_IntInduction: {
+ assert(P->getType() == II.StartValue->getType() && "Types must match");
+ Type *PhiTy = P->getType();
+ Value *Broadcasted;
+ if (P == OldInduction) {
+ // Handle the canonical induction variable. We might have had to
+ // extend the type.
+ Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
+ } else {
+ // Handle other induction variables that are now based on the
+ // canonical one.
+ Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
+ "normalized.idx");
+ NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
+ Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
+ "offset.idx");
+ }
+ Broadcasted = getBroadcastInstrs(Broadcasted);
+ // After broadcasting the induction variable we need to make the vector
+ // consecutive by adding 0, 1, 2, etc.
+ for (unsigned part = 0; part < UF; ++part)
+ Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
+ return;
+ }
+ case LoopVectorizationLegality::IK_ReverseIntInduction:
+ case LoopVectorizationLegality::IK_PtrInduction:
+ case LoopVectorizationLegality::IK_ReversePtrInduction:
+ // Handle reverse integer and pointer inductions.
+ Value *StartIdx = ExtendedIdx;
+ // This is the normalized GEP that starts counting at zero.
+ Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
+ "normalized.idx");
+
+ // Handle the reverse integer induction variable case.
+ if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
+ IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
+ Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
+ "resize.norm.idx");
+ Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI,
+ "reverse.idx");
+
+ // This is a new value so do not hoist it out.
+ Value *Broadcasted = getBroadcastInstrs(ReverseInd);
+ // After broadcasting the induction variable we need to make the
+ // vector consecutive by adding ... -3, -2, -1, 0.
for (unsigned part = 0; part < UF; ++part)
- Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
- continue;
+ Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
+ true);
+ return;
}
- case LoopVectorizationLegality::IK_ReverseIntInduction:
- case LoopVectorizationLegality::IK_PtrInduction:
- case LoopVectorizationLegality::IK_ReversePtrInduction:
- // Handle reverse integer and pointer inductions.
- Value *StartIdx = ExtendedIdx;
- // This is the normalized GEP that starts counting at zero.
- Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
- "normalized.idx");
- // Handle the reverse integer induction variable case.
- if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
- IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
- Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
- "resize.norm.idx");
- Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI,
- "reverse.idx");
-
- // This is a new value so do not hoist it out.
- Value *Broadcasted = getBroadcastInstrs(ReverseInd);
- // After broadcasting the induction variable we need to make the
- // vector consecutive by adding ... -3, -2, -1, 0.
- for (unsigned part = 0; part < UF; ++part)
- Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
- true);
+ // Handle the pointer induction variable case.
+ assert(P->getType()->isPointerTy() && "Unexpected type.");
+
+ // Is this a reverse induction ptr or a consecutive induction ptr.
+ bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
+ II.IK);
+
+ // This is the vector of results. Notice that we don't generate
+ // vector geps because scalar geps result in better code.
+ for (unsigned part = 0; part < UF; ++part) {
+ if (VF == 1) {
+ int EltIndex = (part) * (Reverse ? -1 : 1);
+ Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+ Value *GlobalIdx;
+ if (Reverse)
+ GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
+ else
+ GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
+
+ Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
+ "next.gep");
+ Entry[part] = SclrGep;
continue;
}
- // Handle the pointer induction variable case.
- assert(P->getType()->isPointerTy() && "Unexpected type.");
-
- // Is this a reverse induction ptr or a consecutive induction ptr.
- bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
- II.IK);
-
- // This is the vector of results. Notice that we don't generate
- // vector geps because scalar geps result in better code.
- for (unsigned part = 0; part < UF; ++part) {
- Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
- for (unsigned int i = 0; i < VF; ++i) {
- int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
- Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
- Value *GlobalIdx;
- if (!Reverse)
- GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
- else
- GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
-
- Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
- "next.gep");
- VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
- Builder.getInt32(i),
- "insert.gep");
- }
- Entry[part] = VecVal;
+ Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
+ for (unsigned int i = 0; i < VF; ++i) {
+ int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
+ Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+ Value *GlobalIdx;
+ if (!Reverse)
+ GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
+ else
+ GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
+
+ Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
+ "next.gep");
+ VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
+ Builder.getInt32(i),
+ "insert.gep");
}
- continue;
+ Entry[part] = VecVal;
}
+ return;
+ }
+}
+void
+InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
+ BasicBlock *BB, PhiVector *PV) {
+ // For each instruction in the old loop.
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ VectorParts &Entry = WidenMap.get(it);
+ switch (it->getOpcode()) {
+ case Instruction::Br:
+ // Nothing to do for PHIs and BR, since we already took care of the
+ // loop control flow instructions.
+ continue;
+ case Instruction::PHI:{
+ // Vectorize PHINodes.
+ widenPHIInstruction(it, Entry, Legal, UF, VF, PV);
+ continue;
}// End of PHI.
case Instruction::Add:
@@ -2413,8 +2646,10 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
VectorParts &Cond = getVectorValue(it->getOperand(0));
VectorParts &Op0 = getVectorValue(it->getOperand(1));
VectorParts &Op1 = getVectorValue(it->getOperand(2));
- Value *ScalarCond = Builder.CreateExtractElement(Cond[0],
- Builder.getInt32(0));
+
+ Value *ScalarCond = (VF == 1) ? Cond[0] :
+ Builder.CreateExtractElement(Cond[0], Builder.getInt32(0));
+
for (unsigned Part = 0; Part < UF; ++Part) {
Entry[Part] = Builder.CreateSelect(
InvariantCond ? ScalarCond : Cond[Part],
@@ -2475,7 +2710,8 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
break;
}
/// Vectorize casts.
- Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
+ Type *DestTy = (VF == 1) ? CI->getType() :
+ VectorType::get(CI->getType(), VF);
VectorParts &A = getVectorValue(it->getOperand(0));
for (unsigned Part = 0; Part < UF; ++Part)
@@ -2505,7 +2741,10 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
Args.push_back(Arg[Part]);
}
- Type *Tys[] = { VectorType::get(CI->getType()->getScalarType(), VF) };
+ Type *Tys[] = {CI->getType()};
+ if (VF > 1)
+ Tys[0] = VectorType::get(CI->getType()->getScalarType(), VF);
+
Function *F = Intrinsic::getDeclaration(M, ID, Tys);
Entry[Part] = Builder.CreateCall(F, Args);
}
@@ -2542,19 +2781,36 @@ void InnerLoopVectorizer::updateAnalysis() {
DEBUG(DT->verifyAnalysis());
}
+/// \brief Check whether it is safe to if-convert this phi node.
+///
+/// Phi nodes with constant expressions that can trap are not safe to if
+/// convert.
+static bool canIfConvertPHINodes(BasicBlock *BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ PHINode *Phi = dyn_cast<PHINode>(I);
+ if (!Phi)
+ return true;
+ for (unsigned p = 0, e = Phi->getNumIncomingValues(); p != e; ++p)
+ if (Constant *C = dyn_cast<Constant>(Phi->getIncomingValue(p)))
+ if (C->canTrap())
+ return false;
+ }
+ return true;
+}
+
bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!EnableIfConversion)
return false;
assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
- std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
// A list of pointers that we can safely read and write to.
SmallPtrSet<Value *, 8> SafePointes;
// Collect safe addresses.
- for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
- BasicBlock *BB = LoopBlocks[i];
+ for (Loop::block_iterator BI = TheLoop->block_begin(),
+ BE = TheLoop->block_end(); BI != BE; ++BI) {
+ BasicBlock *BB = *BI;
if (blockNeedsPredication(BB))
continue;
@@ -2568,16 +2824,22 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
}
// Collect the blocks that need predication.
- for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
- BasicBlock *BB = LoopBlocks[i];
+ BasicBlock *Header = TheLoop->getHeader();
+ for (Loop::block_iterator BI = TheLoop->block_begin(),
+ BE = TheLoop->block_end(); BI != BE; ++BI) {
+ BasicBlock *BB = *BI;
// We don't support switch statements inside loops.
if (!isa<BranchInst>(BB->getTerminator()))
return false;
// We must be able to predicate all blocks that need to be predicated.
- if (blockNeedsPredication(BB) && !blockCanBePredicated(BB, SafePointes))
+ if (blockNeedsPredication(BB)) {
+ if (!blockCanBePredicated(BB, SafePointes))
+ return false;
+ } else if (BB != Header && !canIfConvertPHINodes(BB))
return false;
+
}
// We can if-convert this loop.
@@ -2602,19 +2864,17 @@ bool LoopVectorizationLegality::canVectorize() {
if (!TheLoop->getExitingBlock())
return false;
- unsigned NumBlocks = TheLoop->getNumBlocks();
+ // We need to have a loop header.
+ DEBUG(dbgs() << "LV: Found a loop: " <<
+ TheLoop->getHeader()->getName() << '\n');
// Check if we can if-convert non single-bb loops.
+ unsigned NumBlocks = TheLoop->getNumBlocks();
if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
return false;
}
- // We need to have a loop header.
- BasicBlock *Latch = TheLoop->getLoopLatch();
- DEBUG(dbgs() << "LV: Found a loop: " <<
- TheLoop->getHeader()->getName() << "\n");
-
// ScalarEvolution needs to be able to find the exit count.
const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
if (ExitCount == SE->getCouldNotCompute()) {
@@ -2623,6 +2883,7 @@ bool LoopVectorizationLegality::canVectorize() {
}
// Do not loop-vectorize loops with a tiny trip count.
+ BasicBlock *Latch = TheLoop->getLoopLatch();
unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch);
if (TC > 0u && TC < TinyTripCountVectorThreshold) {
DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " <<
@@ -2657,7 +2918,13 @@ bool LoopVectorizationLegality::canVectorize() {
static Type *convertPointerToIntegerType(DataLayout &DL, Type *Ty) {
if (Ty->isPointerTy())
- return DL.getIntPtrType(Ty->getContext());
+ return DL.getIntPtrType(Ty);
+
+ // It is possible that char's or short's overflow when we ask for the loop's
+ // trip count, work around this by changing the type size.
+ if (Ty->getScalarSizeInBits() < 32)
+ return Type::getInt32Ty(Ty->getContext());
+
return Ty;
}
@@ -2682,7 +2949,7 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
Instruction *U = cast<Instruction>(*I);
// This user may be a reduction exit value.
if (!TheLoop->contains(U)) {
- DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+ DEBUG(dbgs() << "LV: Found an outside user for : " << *U << '\n');
return true;
}
}
@@ -2758,6 +3025,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
DEBUG(dbgs() << "LV: Found an induction variable.\n");
Inductions[Phi] = InductionInfo(StartValue, IK);
+
+ // Until we explicitly handle the case of an induction variable with
+ // an outside loop user we have to give up vectorizing this loop.
+ if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ return false;
+
continue;
}
@@ -2812,9 +3085,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
}
// Check that the instruction return type is vectorizable.
- if (!VectorType::isValidElementType(it->getType()) &&
- !it->getType()->isVoidTy()) {
- DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n");
+ // Also, we can't vectorize extractelement instructions.
+ if ((!VectorType::isValidElementType(it->getType()) &&
+ !it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
+ DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
return false;
}
@@ -2904,7 +3178,7 @@ public:
/// non-intersection.
bool canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
unsigned &NumComparisons, ScalarEvolution *SE,
- Loop *TheLoop);
+ Loop *TheLoop, bool ShouldCheckStride = false);
/// \brief Goes over all memory accesses, checks whether a RT check is needed
/// and builds sets of dependent accesses.
@@ -2918,6 +3192,7 @@ public:
bool isRTCheckNeeded() { return IsRTCheckNeeded; }
bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
+ void resetDepChecks() { CheckDeps.clear(); }
MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
@@ -2972,10 +3247,15 @@ static bool hasComputableBounds(ScalarEvolution *SE, Value *Ptr) {
return AR->isAffine();
}
+/// \brief Check the stride of the pointer and ensure that it does not wrap in
+/// the address space.
+static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
+ const Loop *Lp);
+
bool AccessAnalysis::canCheckPtrAtRT(
LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
unsigned &NumComparisons, ScalarEvolution *SE,
- Loop *TheLoop) {
+ Loop *TheLoop, bool ShouldCheckStride) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
unsigned NumReadPtrChecks = 0;
@@ -3003,7 +3283,10 @@ bool AccessAnalysis::canCheckPtrAtRT(
else
++NumReadPtrChecks;
- if (hasComputableBounds(SE, Ptr)) {
+ if (hasComputableBounds(SE, Ptr) &&
+ // When we run after a failing dependency check we have to make sure we
+ // don't have wrapping pointers.
+ (!ShouldCheckStride || isStridedPtr(SE, DL, Ptr, TheLoop) == 1)) {
// The id of the dependence set.
unsigned DepId;
@@ -3019,7 +3302,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId);
- DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr <<"\n");
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
} else {
CanDoRT = false;
}
@@ -3027,9 +3310,36 @@ bool AccessAnalysis::canCheckPtrAtRT(
if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
NumComparisons = 0; // Only one dependence set.
- else
+ else {
NumComparisons = (NumWritePtrChecks * (NumReadPtrChecks +
NumWritePtrChecks - 1));
+ }
+
+ // If the pointers that we would use for the bounds comparison have different
+ // address spaces, assume the values aren't directly comparable, so we can't
+ // use them for the runtime check. We also have to assume they could
+ // overlap. In the future there should be metadata for whether address spaces
+ // are disjoint.
+ unsigned NumPointers = RtCheck.Pointers.size();
+ for (unsigned i = 0; i < NumPointers; ++i) {
+ for (unsigned j = i + 1; j < NumPointers; ++j) {
+ // Only need to check pointers between two different dependency sets.
+ if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
+ continue;
+
+ Value *PtrI = RtCheck.Pointers[i];
+ Value *PtrJ = RtCheck.Pointers[j];
+
+ unsigned ASi = PtrI->getType()->getPointerAddressSpace();
+ unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
+ if (ASi != ASj) {
+ DEBUG(dbgs() << "LV: Runtime check would require comparison between"
+ " different address spaces\n");
+ return false;
+ }
+ }
+ }
+
return CanDoRT;
}
@@ -3084,7 +3394,7 @@ void AccessAnalysis::processMemAccesses(bool UseDeferred) {
!isa<Argument>(UnderlyingObj)) &&
!isIdentifiedObject(UnderlyingObj))) {
DEBUG(dbgs() << "LV: Found an unidentified " <<
- (IsWrite ? "write" : "read" ) << " ptr:" << *UnderlyingObj <<
+ (IsWrite ? "write" : "read" ) << " ptr: " << *UnderlyingObj <<
"\n");
IsRTCheckNeeded = (IsRTCheckNeeded ||
!isIdentifiedObject(UnderlyingObj) ||
@@ -3158,8 +3468,9 @@ public:
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
- MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop *L) :
- SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0) {}
+ MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop *L)
+ : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
+ ShouldRetryWithRuntimeCheck(false) {}
/// \brief Register the location (instructions are given increasing numbers)
/// of a write access.
@@ -3189,6 +3500,10 @@ public:
/// the accesses safely with.
unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
+ /// \brief In same cases when the dependency check fails we can still
+ /// vectorize the loop with a dynamic array access check.
+ bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
+
private:
ScalarEvolution *SE;
DataLayout *DL;
@@ -3206,6 +3521,10 @@ private:
// We can access this many bytes in parallel safely.
unsigned MaxSafeDepDistBytes;
+ /// \brief If we see a non constant dependence distance we can still try to
+ /// vectorize this loop with runtime checks.
+ bool ShouldRetryWithRuntimeCheck;
+
/// \brief Check whether there is a plausible dependence between the two
/// accesses.
///
@@ -3403,6 +3722,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
if (!C) {
DEBUG(dbgs() << "LV: Dependence because of non constant distance\n");
+ ShouldRetryWithRuntimeCheck = true;
return true;
}
@@ -3428,7 +3748,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
if (Val == 0) {
if (ATy == BTy)
return false;
- DEBUG(dbgs() << "LV: Zero dependence difference but different types");
+ DEBUG(dbgs() << "LV: Zero dependence difference but different types\n");
return true;
}
@@ -3437,7 +3757,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// Positive distance bigger than max vectorization factor.
if (ATy != BTy) {
DEBUG(dbgs() <<
- "LV: ReadWrite-Write positive dependency with different types");
+ "LV: ReadWrite-Write positive dependency with different types\n");
return false;
}
@@ -3454,7 +3774,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2*TypeByteSize > MaxSafeDepDistBytes ||
Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
DEBUG(dbgs() << "LV: Failure because of Positive distance "
- << Val.getSExtValue() << "\n");
+ << Val.getSExtValue() << '\n');
return true;
}
@@ -3467,7 +3787,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return true;
DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() <<
- " with max VF=" << MaxSafeDepDistBytes/TypeByteSize << "\n");
+ " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
return false;
}
@@ -3571,8 +3891,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
Stores.push_back(St);
DepChecker.addAccess(St);
}
- } // next instr.
- } // next block.
+ } // Next instr.
+ } // Next block.
// Now we have two lists that hold the loads and the stores.
// Next, we find the pointers that they use.
@@ -3619,7 +3939,6 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return true;
}
- SmallPtrSet<Value *, 16> ReadOnlyPtr;
for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
LoadInst *LD = cast<LoadInst>(*I);
Value* Ptr = LD->getPointerOperand();
@@ -3667,7 +3986,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
if (NumComparisons == 0 && NeedRTCheck)
NeedRTCheck = false;
- // Check that we did not collect too many pointers or found a unsizeable
+ // Check that we did not collect too many pointers or found an unsizeable
// pointer.
if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
PtrRtCheck.reset();
@@ -3693,9 +4012,32 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
CanVecMem = DepChecker.areDepsSafe(DependentAccesses,
Accesses.getDependenciesToCheck());
MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
+
+ if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
+ DEBUG(dbgs() << "LV: Retrying with memory checks\n");
+ NeedRTCheck = true;
+
+ // Clear the dependency checks. We assume they are not needed.
+ Accesses.resetDepChecks();
+
+ PtrRtCheck.reset();
+ PtrRtCheck.Need = true;
+
+ CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
+ TheLoop, true);
+ // Check that we did not collect too many pointers or found an unsizeable
+ // pointer.
+ if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
+ DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
+ PtrRtCheck.reset();
+ return false;
+ }
+
+ CanVecMem = true;
+ }
}
- DEBUG(dbgs() << "LV: We "<< (NeedRTCheck ? "" : "don't") <<
+ DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
" need a runtime memory check.\n");
return CanVecMem;
@@ -3839,6 +4181,12 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
if (ExitInstruction != 0 || Cur == Phi)
return false;
+ // The instruction used by an outside user must be the last instruction
+ // before we feed back to the reduction phi. Otherwise, we loose VF-1
+ // operations on the value.
+ if (std::find(Phi->op_begin(), Phi->op_end(), Cur) == Phi->op_end())
+ return false;
+
ExitInstruction = Cur;
continue;
}
@@ -4045,6 +4393,14 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
if (it->mayWriteToMemory() || it->mayThrow())
return false;
+ // Check that we don't have a constant expression that can trap as operand.
+ for (Instruction::op_iterator OI = it->op_begin(), OE = it->op_end();
+ OI != OE; ++OI) {
+ if (Constant *C = dyn_cast<Constant>(*OI))
+ if (C->canTrap())
+ return false;
+ }
+
// The instructions below can trap.
switch (it->getOpcode()) {
default: continue;
@@ -4071,7 +4427,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
// Find the trip count.
unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
- DEBUG(dbgs() << "LV: Found trip count:"<<TC<<"\n");
+ DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
unsigned WidestType = getWidestType();
unsigned WidestRegister = TTI.getRegisterBitWidth(true);
@@ -4082,7 +4438,8 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
WidestRegister : MaxSafeDepDist);
unsigned MaxVectorSize = WidestRegister / WidestType;
DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
- DEBUG(dbgs() << "LV: The Widest register is:" << WidestRegister << "bits.\n");
+ DEBUG(dbgs() << "LV: The Widest register is: "
+ << WidestRegister << " bits.\n");
if (MaxVectorSize == 0) {
DEBUG(dbgs() << "LV: The target has no vector registers.\n");
@@ -4118,7 +4475,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
if (UserVF != 0) {
assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
- DEBUG(dbgs() << "LV: Using user VF "<<UserVF<<".\n");
+ DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
Factor.Width = UserVF;
return Factor;
@@ -4126,13 +4483,13 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
float Cost = expectedCost(1);
unsigned Width = 1;
- DEBUG(dbgs() << "LV: Scalar loop costs: "<< (int)Cost << ".\n");
+ DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)Cost << ".\n");
for (unsigned i=2; i <= VF; i*=2) {
// Notice that the vector loop needs to be executed less times, so
// we need to divide the cost of the vector loops by the width of
// the vector elements.
float VectorCost = expectedCost(i) / (float)i;
- DEBUG(dbgs() << "LV: Vector loop of width "<< i << " costs: " <<
+ DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " <<
(int)VectorCost << ".\n");
if (VectorCost < Cost) {
Cost = VectorCost;
@@ -4256,8 +4613,20 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
else if (UF < 1)
UF = 1;
- if (Legal->getReductionVars()->size()) {
- DEBUG(dbgs() << "LV: Unrolling because of reductions. \n");
+ bool HasReductions = Legal->getReductionVars()->size();
+
+ // Decide if we want to unroll if we decided that it is legal to vectorize
+ // but not profitable.
+ if (VF == 1) {
+ if (TheLoop->getNumBlocks() > 1 || !HasReductions ||
+ LoopCost > SmallLoopCost)
+ return 1;
+
+ return UF;
+ }
+
+ if (HasReductions) {
+ DEBUG(dbgs() << "LV: Unrolling because of reductions.\n");
return UF;
}
@@ -4265,14 +4634,14 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
// We assume that the cost overhead is 1 and we use the cost model
// to estimate the cost of the loop and unroll until the cost of the
// loop overhead is about 5% of the cost of the loop.
- DEBUG(dbgs() << "LV: Loop cost is "<< LoopCost <<" \n");
- if (LoopCost < 20) {
- DEBUG(dbgs() << "LV: Unrolling to reduce branch cost. \n");
- unsigned NewUF = 20/LoopCost + 1;
+ DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n');
+ if (LoopCost < SmallLoopCost) {
+ DEBUG(dbgs() << "LV: Unrolling to reduce branch cost.\n");
+ unsigned NewUF = SmallLoopCost / (LoopCost + 1);
return std::min(NewUF, UF);
}
- DEBUG(dbgs() << "LV: Not Unrolling. \n");
+ DEBUG(dbgs() << "LV: Not Unrolling.\n");
return 1;
}
@@ -4373,16 +4742,16 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
MaxUsage = std::max(MaxUsage, OpenIntervals.size());
DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " <<
- OpenIntervals.size() <<"\n");
+ OpenIntervals.size() << '\n');
// Add the current instruction to the list of open intervals.
OpenIntervals.insert(I);
}
unsigned Invariant = LoopInvariants.size();
- DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << " \n");
- DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << " \n");
- DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << " \n");
+ DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << '\n');
+ DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
+ DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << '\n');
R.LoopInvariantRegs = Invariant;
R.MaxLocalUsers = MaxUsage;
@@ -4406,8 +4775,8 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
unsigned C = getInstructionCost(it, VF);
BlockCost += C;
- DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " <<
- VF << " For instruction: "<< *it << "\n");
+ DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF " <<
+ VF << " For instruction: " << *it << '\n');
}
// We assume that if-converted blocks have a 50% chance of being executed.
@@ -4669,13 +5038,16 @@ char LoopVectorize::ID = 0;
static const char lv_name[] = "Loop Vectorization";
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
- Pass *createLoopVectorizePass() {
- return new LoopVectorize();
+ Pass *createLoopVectorizePass(bool NoUnrolling) {
+ return new LoopVectorize(NoUnrolling);
}
}
@@ -4690,3 +5062,96 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
return false;
}
+
+
+void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr) {
+ assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
+ // Holds vector parameters or scalars, in case of uniform vals.
+ SmallVector<VectorParts, 4> Params;
+
+ setDebugLocFromInst(Builder, Instr);
+
+ // Find all of the vectorized parameters.
+ for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+ Value *SrcOp = Instr->getOperand(op);
+
+ // If we are accessing the old induction variable, use the new one.
+ if (SrcOp == OldInduction) {
+ Params.push_back(getVectorValue(SrcOp));
+ continue;
+ }
+
+ // Try using previously calculated values.
+ Instruction *SrcInst = dyn_cast<Instruction>(SrcOp);
+
+ // If the src is an instruction that appeared earlier in the basic block
+ // then it should already be vectorized.
+ if (SrcInst && OrigLoop->contains(SrcInst)) {
+ assert(WidenMap.has(SrcInst) && "Source operand is unavailable");
+ // The parameter is a vector value from earlier.
+ Params.push_back(WidenMap.get(SrcInst));
+ } else {
+ // The parameter is a scalar from outside the loop. Maybe even a constant.
+ VectorParts Scalars;
+ Scalars.append(UF, SrcOp);
+ Params.push_back(Scalars);
+ }
+ }
+
+ assert(Params.size() == Instr->getNumOperands() &&
+ "Invalid number of operands");
+
+ // Does this instruction return a value ?
+ bool IsVoidRetTy = Instr->getType()->isVoidTy();
+
+ Value *UndefVec = IsVoidRetTy ? 0 :
+ UndefValue::get(Instr->getType());
+ // Create a new entry in the WidenMap and initialize it to Undef or Null.
+ VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
+
+ // For each vector unroll 'part':
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ // For each scalar that we create:
+
+ Instruction *Cloned = Instr->clone();
+ if (!IsVoidRetTy)
+ Cloned->setName(Instr->getName() + ".cloned");
+ // Replace the operands of the cloned instructions with extracted scalars.
+ for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+ Value *Op = Params[op][Part];
+ Cloned->setOperand(op, Op);
+ }
+
+ // Place the cloned scalar in the new loop.
+ Builder.Insert(Cloned);
+
+ // If the original scalar returns a value we need to place it in a vector
+ // so that future users will be able to use it.
+ if (!IsVoidRetTy)
+ VecResults[Part] = Cloned;
+ }
+}
+
+void
+InnerLoopUnroller::vectorizeMemoryInstruction(Instruction *Instr,
+ LoopVectorizationLegality*) {
+ return scalarizeInstruction(Instr);
+}
+
+Value *InnerLoopUnroller::reverseVector(Value *Vec) {
+ return Vec;
+}
+
+Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) {
+ return V;
+}
+
+Value *InnerLoopUnroller::getConsecutiveVector(Value* Val, int StartIdx,
+ bool Negate) {
+ // When unrolling and the VF is 1, we only need to add a simple scalar.
+ Type *ITy = Val->getType();
+ assert(!ITy->isVectorTy() && "Val must be a scalar");
+ Constant *C = ConstantInt::get(ITy, StartIdx, Negate);
+ return Builder.CreateAdd(Val, C, "induction");
+}
+
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9312b4b..c72b51f 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -25,8 +25,8 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -49,34 +49,24 @@ static cl::opt<int>
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
cl::desc("Only vectorize if you gain more than this "
"number "));
+
+static cl::opt<bool>
+ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden,
+ cl::desc("Attempt to vectorize horizontal reductions"));
+
+static cl::opt<bool> ShouldStartVectorizeHorAtStore(
+ "slp-vectorize-hor-store", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Attempt to vectorize horizontal reductions feeding into a store"));
+
namespace {
static const unsigned MinVecRegSize = 128;
static const unsigned RecursionMaxDepth = 12;
-/// RAII pattern to save the insertion point of the IR builder.
-class BuilderLocGuard {
-public:
- BuilderLocGuard(IRBuilder<> &B) : Builder(B), Loc(B.GetInsertPoint()),
- DbgLoc(B.getCurrentDebugLocation()) {}
- ~BuilderLocGuard() {
- Builder.SetCurrentDebugLocation(DbgLoc);
- if (Loc)
- Builder.SetInsertPoint(Loc);
- }
-
-private:
- // Prevent copying.
- BuilderLocGuard(const BuilderLocGuard &);
- BuilderLocGuard &operator=(const BuilderLocGuard &);
- IRBuilder<> &Builder;
- AssertingVH<Instruction> Loc;
- DebugLoc DbgLoc;
-};
-
-/// A helper class for numbering instructions in multible blocks.
-/// Numbers starts at zero for each basic block.
+/// A helper class for numbering instructions in multiple blocks.
+/// Numbers start at zero for each basic block.
struct BlockNumbering {
BlockNumbering(BasicBlock *Bb) : BB(Bb), Valid(false) {}
@@ -173,6 +163,37 @@ static unsigned getSameOpcode(ArrayRef<Value *> VL) {
return Opcode;
}
+/// \returns \p I after propagating metadata from \p VL.
+static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
+ Instruction *I0 = cast<Instruction>(VL[0]);
+ SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
+ I0->getAllMetadataOtherThanDebugLoc(Metadata);
+
+ for (unsigned i = 0, n = Metadata.size(); i != n; ++i) {
+ unsigned Kind = Metadata[i].first;
+ MDNode *MD = Metadata[i].second;
+
+ for (int i = 1, e = VL.size(); MD && i != e; i++) {
+ Instruction *I = cast<Instruction>(VL[i]);
+ MDNode *IMD = I->getMetadata(Kind);
+
+ switch (Kind) {
+ default:
+ MD = 0; // Remove unknown metadata
+ break;
+ case LLVMContext::MD_tbaa:
+ MD = MDNode::getMostGenericTBAA(MD, IMD);
+ break;
+ case LLVMContext::MD_fpmath:
+ MD = MDNode::getMostGenericFPMath(MD, IMD);
+ break;
+ }
+ }
+ I->setMetadata(Kind, MD);
+ }
+ return I;
+}
+
/// \returns The type that all of the values in \p VL have or null if there
/// are different types.
static Type* getSameType(ArrayRef<Value *> VL) {
@@ -216,6 +237,104 @@ static bool CanReuseExtract(ArrayRef<Value *> VL) {
return true;
}
+static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right) {
+
+ SmallVector<Value *, 16> OrigLeft, OrigRight;
+
+ bool AllSameOpcodeLeft = true;
+ bool AllSameOpcodeRight = true;
+ for (unsigned i = 0, e = VL.size(); i != e; ++i) {
+ Instruction *I = cast<Instruction>(VL[i]);
+ Value *V0 = I->getOperand(0);
+ Value *V1 = I->getOperand(1);
+
+ OrigLeft.push_back(V0);
+ OrigRight.push_back(V1);
+
+ Instruction *I0 = dyn_cast<Instruction>(V0);
+ Instruction *I1 = dyn_cast<Instruction>(V1);
+
+ // Check whether all operands on one side have the same opcode. In this case
+ // we want to preserve the original order and not make things worse by
+ // reordering.
+ AllSameOpcodeLeft = I0;
+ AllSameOpcodeRight = I1;
+
+ if (i && AllSameOpcodeLeft) {
+ if(Instruction *P0 = dyn_cast<Instruction>(OrigLeft[i-1])) {
+ if(P0->getOpcode() != I0->getOpcode())
+ AllSameOpcodeLeft = false;
+ } else
+ AllSameOpcodeLeft = false;
+ }
+ if (i && AllSameOpcodeRight) {
+ if(Instruction *P1 = dyn_cast<Instruction>(OrigRight[i-1])) {
+ if(P1->getOpcode() != I1->getOpcode())
+ AllSameOpcodeRight = false;
+ } else
+ AllSameOpcodeRight = false;
+ }
+
+ // Sort two opcodes. In the code below we try to preserve the ability to use
+ // broadcast of values instead of individual inserts.
+ // vl1 = load
+ // vl2 = phi
+ // vr1 = load
+ // vr2 = vr2
+ // = vl1 x vr1
+ // = vl2 x vr2
+ // If we just sorted according to opcode we would leave the first line in
+ // tact but we would swap vl2 with vr2 because opcode(phi) > opcode(load).
+ // = vl1 x vr1
+ // = vr2 x vl2
+ // Because vr2 and vr1 are from the same load we loose the opportunity of a
+ // broadcast for the packed right side in the backend: we have [vr1, vl2]
+ // instead of [vr1, vr2=vr1].
+ if (I0 && I1) {
+ if(!i && I0->getOpcode() > I1->getOpcode()) {
+ Left.push_back(I1);
+ Right.push_back(I0);
+ } else if (i && I0->getOpcode() > I1->getOpcode() && Right[i-1] != I1) {
+ // Try not to destroy a broad cast for no apparent benefit.
+ Left.push_back(I1);
+ Right.push_back(I0);
+ } else if (i && I0->getOpcode() == I1->getOpcode() && Right[i-1] == I0) {
+ // Try preserve broadcasts.
+ Left.push_back(I1);
+ Right.push_back(I0);
+ } else if (i && I0->getOpcode() == I1->getOpcode() && Left[i-1] == I1) {
+ // Try preserve broadcasts.
+ Left.push_back(I1);
+ Right.push_back(I0);
+ } else {
+ Left.push_back(I0);
+ Right.push_back(I1);
+ }
+ continue;
+ }
+ // One opcode, put the instruction on the right.
+ if (I0) {
+ Left.push_back(V1);
+ Right.push_back(I0);
+ continue;
+ }
+ Left.push_back(V0);
+ Right.push_back(V1);
+ }
+
+ bool LeftBroadcast = isSplat(Left);
+ bool RightBroadcast = isSplat(Right);
+
+ // Don't reorder if the operands where good to begin with.
+ if (!(LeftBroadcast || RightBroadcast) &&
+ (AllSameOpcodeRight || AllSameOpcodeLeft)) {
+ Left = OrigLeft;
+ Right = OrigRight;
+ }
+}
+
/// Bottom Up SLP Vectorizer.
class BoUpSLP {
public:
@@ -238,17 +357,20 @@ public:
}
/// \brief Vectorize the tree that starts with the elements in \p VL.
- void vectorizeTree();
+ /// Returns the vectorized root.
+ Value *vectorizeTree();
/// \returns the vectorization cost of the subtree that starts at \p VL.
/// A negative number means that this is profitable.
int getTreeCost();
- /// Construct a vectorizable tree that starts at \p Roots.
- void buildTree(ArrayRef<Value *> Roots);
+ /// Construct a vectorizable tree that starts at \p Roots and is possibly
+ /// used by a reduction of \p RdxOps.
+ void buildTree(ArrayRef<Value *> Roots, ValueSet *RdxOps = 0);
/// Clear the internal data structures that are created by 'buildTree'.
void deleteTree() {
+ RdxOps = 0;
VectorizableTree.clear();
ScalarToTreeEntry.clear();
MustGather.clear();
@@ -278,7 +400,7 @@ private:
/// \returns the pointer to the vectorized value if \p VL is already
/// vectorized, or NULL. They may happen in cycles.
- Value *alreadyVectorized(ArrayRef<Value *> VL);
+ Value *alreadyVectorized(ArrayRef<Value *> VL) const;
/// \brief Take the pointer operand from the Load/Store instruction.
/// \returns NULL if this is not a valid Load/Store instruction.
@@ -305,26 +427,31 @@ private:
/// \returns the pointer to the barrier instruction if we can't sink.
Value *getSinkBarrier(Instruction *Src, Instruction *Dst);
- /// \returns the index of the last instrucion in the BB from \p VL.
+ /// \returns the index of the last instruction in the BB from \p VL.
int getLastIndex(ArrayRef<Value *> VL);
- /// \returns the Instrucion in the bundle \p VL.
+ /// \returns the Instruction in the bundle \p VL.
Instruction *getLastInstruction(ArrayRef<Value *> VL);
+ /// \brief Set the Builder insert point to one after the last instruction in
+ /// the bundle
+ void setInsertPointAfterBundle(ArrayRef<Value *> VL);
+
/// \returns a vector from a collection of scalars in \p VL.
Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
+ /// \returns whether the VectorizableTree is fully vectoriable and will
+ /// be beneficial even the tree height is tiny.
+ bool isFullyVectorizableTinyTree();
+
struct TreeEntry {
TreeEntry() : Scalars(), VectorizedValue(0), LastScalarIndex(0),
NeedToGather(0) {}
/// \returns true if the scalars in VL are equal to this entry.
- bool isSame(ArrayRef<Value *> VL) {
+ bool isSame(ArrayRef<Value *> VL) const {
assert(VL.size() == Scalars.size() && "Invalid size");
- for (int i = 0, e = VL.size(); i != e; ++i)
- if (VL[i] != Scalars[i])
- return false;
- return true;
+ return std::equal(VL.begin(), VL.end(), Scalars.begin());
}
/// A vector of scalars.
@@ -393,10 +520,15 @@ private:
/// Holds all of the instructions that we gathered.
SetVector<Instruction *> GatherSeq;
+ /// A list of blocks that we are going to CSE.
+ SmallSet<BasicBlock *, 8> CSEBlocks;
/// Numbers instructions in different blocks.
DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
+ /// Reduction operators.
+ ValueSet *RdxOps;
+
// Analysis and block reference.
Function *F;
ScalarEvolution *SE;
@@ -409,8 +541,9 @@ private:
IRBuilder<> Builder;
};
-void BoUpSLP::buildTree(ArrayRef<Value *> Roots) {
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
deleteTree();
+ RdxOps = Rdx;
if (!getSameType(Roots))
return;
buildTree_rec(Roots, 0);
@@ -431,18 +564,20 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots) {
UE = Scalar->use_end(); User != UE; ++User) {
DEBUG(dbgs() << "SLP: Checking user:" << **User << ".\n");
- bool Gathered = MustGather.count(*User);
-
// Skip in-tree scalars that become vectors.
- if (ScalarToTreeEntry.count(*User) && !Gathered) {
+ if (ScalarToTreeEntry.count(*User)) {
DEBUG(dbgs() << "SLP: \tInternal user will be removed:" <<
**User << ".\n");
int Idx = ScalarToTreeEntry[*User]; (void) Idx;
assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
continue;
}
+ Instruction *UserInst = dyn_cast<Instruction>(*User);
+ if (!UserInst)
+ continue;
- if (!isa<Instruction>(*User))
+ // Ignore uses that are part of the reduction.
+ if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end())
continue;
DEBUG(dbgs() << "SLP: Need to extract:" << **User << " from lane " <<
@@ -574,6 +709,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
continue;
}
+ // This user is part of the reduction.
+ if (RdxOps && RdxOps->count(User))
+ continue;
+
// Make sure that we can schedule this unknown user.
BlockNumbering &BN = BlocksNumbers[BB];
int UserIndex = BN.getIndex(User);
@@ -635,6 +774,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
switch (Opcode) {
case Instruction::PHI: {
PHINode *PH = dyn_cast<PHINode>(VL0);
+
+ // Check for terminator values (e.g. invoke).
+ for (unsigned j = 0; j < VL.size(); ++j)
+ for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
+ TerminatorInst *Term = dyn_cast<TerminatorInst>(cast<PHINode>(VL[j])->getIncomingValue(i));
+ if (Term) {
+ DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+
newTreeEntry(VL, true);
DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
@@ -658,13 +809,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
case Instruction::Load: {
// Check if the loads are consecutive or of we need to swizzle them.
- for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
- if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
+ for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
+ LoadInst *L = cast<LoadInst>(VL[i]);
+ if (!L->isSimple() || !isConsecutiveAccess(VL[i], VL[i + 1])) {
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Need to swizzle loads.\n");
return;
}
-
+ }
newTreeEntry(VL, true);
DEBUG(dbgs() << "SLP: added a vector of loads.\n");
return;
@@ -753,6 +905,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
newTreeEntry(VL, true);
DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
+ // Sort operands of the instructions so that each side is more likely to
+ // have the same opcode.
+ if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
+ ValueList Left, Right;
+ reorderInputsAccordingToOpcode(VL, Left, Right);
+ buildTree_rec(Left, Depth + 1);
+ buildTree_rec(Right, Depth + 1);
+ return;
+ }
+
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
@@ -874,9 +1036,24 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());
VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);
} else {
- ScalarCost = VecTy->getNumElements() *
- TTI->getArithmeticInstrCost(Opcode, ScalarTy);
- VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
+ // Certain instructions can be cheaper to vectorize if they have a
+ // constant second vector operand.
+ TargetTransformInfo::OperandValueKind Op1VK =
+ TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueKind Op2VK =
+ TargetTransformInfo::OK_UniformConstantValue;
+
+ // Check whether all second operands are constant.
+ for (unsigned i = 0; i < VL.size(); ++i)
+ if (!isa<ConstantInt>(cast<Instruction>(VL[i])->getOperand(1))) {
+ Op2VK = TargetTransformInfo::OK_AnyValue;
+ break;
+ }
+
+ ScalarCost =
+ VecTy->getNumElements() *
+ TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, Op2VK);
+ VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK);
}
return VecCost - ScalarCost;
}
@@ -884,14 +1061,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// Cost of wide load - cost of scalar loads.
int ScalarLdCost = VecTy->getNumElements() *
TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
- int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+ int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, 1, 0);
return VecLdCost - ScalarLdCost;
}
case Instruction::Store: {
// We know that we can merge the stores. Calculate the cost.
int ScalarStCost = VecTy->getNumElements() *
TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
- int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
+ int VecStCost = TTI->getMemoryOpCost(Instruction::Store, VecTy, 1, 0);
return VecStCost - ScalarStCost;
}
default:
@@ -899,19 +1076,32 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
}
+bool BoUpSLP::isFullyVectorizableTinyTree() {
+ DEBUG(dbgs() << "SLP: Check whether the tree with height " <<
+ VectorizableTree.size() << " is fully vectorizable .\n");
+
+ // We only handle trees of height 2.
+ if (VectorizableTree.size() != 2)
+ return false;
+
+ // Gathering cost would be too much for tiny trees.
+ if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather)
+ return false;
+
+ return true;
+}
+
int BoUpSLP::getTreeCost() {
int Cost = 0;
DEBUG(dbgs() << "SLP: Calculating cost for tree of size " <<
VectorizableTree.size() << ".\n");
- // Don't vectorize tiny trees. Small load/store chains or consecutive stores
- // of constants will be vectoried in SelectionDAG in MergeConsecutiveStores.
- // The SelectionDAG vectorizer can only handle pairs (trees of height = 2).
- if (VectorizableTree.size() < 3) {
+ // We only vectorize tiny trees if it is fully vectorizable.
+ if (VectorizableTree.size() < 3 && !isFullyVectorizableTinyTree()) {
if (!VectorizableTree.size()) {
assert(!ExternalUses.size() && "We should not have any external users");
}
- return 0;
+ return INT_MAX;
}
unsigned BundleWidth = VectorizableTree[0].Scalars.size();
@@ -992,63 +1182,29 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
if (PtrA == PtrB || PtrA->getType() != PtrB->getType())
return false;
- // Calculate a constant offset from the base pointer without using SCEV
- // in the supported cases.
- // TODO: Add support for the case where one of the pointers is a GEP that
- // uses the other pointer.
- GetElementPtrInst *GepA = dyn_cast<GetElementPtrInst>(PtrA);
- GetElementPtrInst *GepB = dyn_cast<GetElementPtrInst>(PtrB);
-
- unsigned BW = DL->getPointerSizeInBits(ASA);
+ unsigned PtrBitWidth = DL->getPointerSizeInBits(ASA);
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
- int64_t Sz = DL->getTypeStoreSize(Ty);
+ APInt Size(PtrBitWidth, DL->getTypeStoreSize(Ty));
- // Check if PtrA is the base and PtrB is a constant offset.
- if (GepB && GepB->getPointerOperand() == PtrA) {
- APInt Offset(BW, 0);
- if (GepB->accumulateConstantOffset(*DL, Offset))
- return Offset.getSExtValue() == Sz;
- return false;
- }
+ APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
+ PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetA);
+ PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetB);
- // Check if PtrB is the base and PtrA is a constant offset.
- if (GepA && GepA->getPointerOperand() == PtrB) {
- APInt Offset(BW, 0);
- if (GepA->accumulateConstantOffset(*DL, Offset))
- return Offset.getSExtValue() == -Sz;
- return false;
- }
+ APInt OffsetDelta = OffsetB - OffsetA;
- // If both pointers are GEPs:
- if (GepA && GepB) {
- // Check that they have the same base pointer and number of indices.
- if (GepA->getPointerOperand() != GepB->getPointerOperand() ||
- GepA->getNumIndices() != GepB->getNumIndices())
- return false;
+ // Check if they are based on the same pointer. That makes the offsets
+ // sufficient.
+ if (PtrA == PtrB)
+ return OffsetDelta == Size;
- // Try to strip the geps. This makes SCEV faster.
- // Make sure that all of the indices except for the last are identical.
- int LastIdx = GepA->getNumIndices();
- for (int i = 0; i < LastIdx - 1; i++) {
- if (GepA->getOperand(i+1) != GepB->getOperand(i+1))
- return false;
- }
-
- PtrA = GepA->getOperand(LastIdx);
- PtrB = GepB->getOperand(LastIdx);
- Sz = 1;
- }
-
- ConstantInt *CA = dyn_cast<ConstantInt>(PtrA);
- ConstantInt *CB = dyn_cast<ConstantInt>(PtrB);
- if (CA && CB) {
- return (CA->getSExtValue() + Sz == CB->getSExtValue());
- }
+ // Compute the necessary base pointer delta to have the necessary final delta
+ // equal to the size.
+ APInt BaseDelta = Size - OffsetDelta;
- // Calculate the distance.
+ // Otherwise compute the distance with SCEV between the base pointers.
const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
- const SCEV *C = SE->getConstant(PtrSCEVA->getType(), Sz);
+ const SCEV *C = SE->getConstant(BaseDelta);
const SCEV *X = SE->getAddExpr(PtrSCEVA, C);
return X == PtrSCEVB;
}
@@ -1102,6 +1258,15 @@ Instruction *BoUpSLP::getLastInstruction(ArrayRef<Value *> VL) {
return I;
}
+void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
+ Instruction *VL0 = cast<Instruction>(VL[0]);
+ Instruction *LastInst = getLastInstruction(VL);
+ BasicBlock::iterator NextInst = LastInst;
+ ++NextInst;
+ Builder.SetInsertPoint(VL0->getParent(), NextInst);
+ Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+}
+
Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
Value *Vec = UndefValue::get(Ty);
// Generate the 'InsertElement' instruction.
@@ -1109,6 +1274,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) {
GatherSeq.insert(Insrt);
+ CSEBlocks.insert(Insrt->getParent());
// Add to our 'need-to-extract' list.
if (ScalarToTreeEntry.count(VL[i])) {
@@ -1132,10 +1298,12 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
return Vec;
}
-Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) {
- if (ScalarToTreeEntry.count(VL[0])) {
- int Idx = ScalarToTreeEntry[VL[0]];
- TreeEntry *En = &VectorizableTree[Idx];
+Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const {
+ SmallDenseMap<Value*, int>::const_iterator Entry
+ = ScalarToTreeEntry.find(VL[0]);
+ if (Entry != ScalarToTreeEntry.end()) {
+ int Idx = Entry->second;
+ const TreeEntry *En = &VectorizableTree[Idx];
if (En->isSame(VL) && En->VectorizedValue)
return En->VectorizedValue;
}
@@ -1159,38 +1327,48 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
}
Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
- BuilderLocGuard Guard(Builder);
+ IRBuilder<>::InsertPointGuard Guard(Builder);
if (E->VectorizedValue) {
DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n");
return E->VectorizedValue;
}
- Type *ScalarTy = E->Scalars[0]->getType();
- if (StoreInst *SI = dyn_cast<StoreInst>(E->Scalars[0]))
+ Instruction *VL0 = cast<Instruction>(E->Scalars[0]);
+ Type *ScalarTy = VL0->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL0))
ScalarTy = SI->getValueOperand()->getType();
VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size());
if (E->NeedToGather) {
+ setInsertPointAfterBundle(E->Scalars);
return Gather(E->Scalars, VecTy);
}
- Instruction *VL0 = cast<Instruction>(E->Scalars[0]);
unsigned Opcode = VL0->getOpcode();
assert(Opcode == getSameOpcode(E->Scalars) && "Invalid opcode");
switch (Opcode) {
case Instruction::PHI: {
PHINode *PH = dyn_cast<PHINode>(VL0);
- Builder.SetInsertPoint(PH->getParent()->getFirstInsertionPt());
+ Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
E->VectorizedValue = NewPhi;
+ // PHINodes may have multiple entries from the same block. We want to
+ // visit every block once.
+ SmallSet<BasicBlock*, 4> VisitedBBs;
+
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
ValueList Operands;
BasicBlock *IBB = PH->getIncomingBlock(i);
+ if (!VisitedBBs.insert(IBB)) {
+ NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB);
+ continue;
+ }
+
// Prepare the operand vector.
for (unsigned j = 0; j < E->Scalars.size(); ++j)
Operands.push_back(cast<PHINode>(E->Scalars[j])->
@@ -1231,8 +1409,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
for (int i = 0, e = E->Scalars.size(); i < e; ++i)
INVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
- Builder.SetInsertPoint(getLastInstruction(E->Scalars));
- Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+ setInsertPointAfterBundle(E->Scalars);
Value *InVec = vectorizeTree(INVL);
@@ -1252,8 +1429,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
RHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
}
- Builder.SetInsertPoint(getLastInstruction(E->Scalars));
- Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+ setInsertPointAfterBundle(E->Scalars);
Value *L = vectorizeTree(LHSV);
Value *R = vectorizeTree(RHSV);
@@ -1279,8 +1455,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
FalseVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(2));
}
- Builder.SetInsertPoint(getLastInstruction(E->Scalars));
- Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+ setInsertPointAfterBundle(E->Scalars);
Value *Cond = vectorizeTree(CondVec);
Value *True = vectorizeTree(TrueVec);
@@ -1288,7 +1463,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (Value *V = alreadyVectorized(E->Scalars))
return V;
-
+
Value *V = Builder.CreateSelect(Cond, True, False);
E->VectorizedValue = V;
return V;
@@ -1312,13 +1487,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::Or:
case Instruction::Xor: {
ValueList LHSVL, RHSVL;
- for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
- LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
- RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
- }
+ if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
+ reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
+ else
+ for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
+ LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
+ RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
+ }
- Builder.SetInsertPoint(getLastInstruction(E->Scalars));
- Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+ setInsertPointAfterBundle(E->Scalars);
Value *LHS = vectorizeTree(LHSVL);
Value *RHS = vectorizeTree(RHSVL);
@@ -1333,41 +1510,46 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);
E->VectorizedValue = V;
+
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return propagateMetadata(I, E->Scalars);
+
return V;
}
case Instruction::Load: {
// Loads are inserted at the head of the tree because we don't want to
// sink them all the way down past store instructions.
- Builder.SetInsertPoint(getLastInstruction(E->Scalars));
- Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+ setInsertPointAfterBundle(E->Scalars);
LoadInst *LI = cast<LoadInst>(VL0);
- Value *VecPtr =
- Builder.CreateBitCast(LI->getPointerOperand(), VecTy->getPointerTo());
+ unsigned AS = LI->getPointerAddressSpace();
+
+ Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
+ VecTy->getPointerTo(AS));
unsigned Alignment = LI->getAlignment();
LI = Builder.CreateLoad(VecPtr);
LI->setAlignment(Alignment);
E->VectorizedValue = LI;
- return LI;
+ return propagateMetadata(LI, E->Scalars);
}
case Instruction::Store: {
StoreInst *SI = cast<StoreInst>(VL0);
unsigned Alignment = SI->getAlignment();
+ unsigned AS = SI->getPointerAddressSpace();
ValueList ValueOp;
for (int i = 0, e = E->Scalars.size(); i < e; ++i)
ValueOp.push_back(cast<StoreInst>(E->Scalars[i])->getValueOperand());
- Builder.SetInsertPoint(getLastInstruction(E->Scalars));
- Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+ setInsertPointAfterBundle(E->Scalars);
Value *VecValue = vectorizeTree(ValueOp);
- Value *VecPtr =
- Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo());
+ Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
+ VecTy->getPointerTo(AS));
StoreInst *S = Builder.CreateStore(VecValue, VecPtr);
S->setAlignment(Alignment);
E->VectorizedValue = S;
- return S;
+ return propagateMetadata(S, E->Scalars);
}
default:
llvm_unreachable("unknown inst");
@@ -1375,7 +1557,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return 0;
}
-void BoUpSLP::vectorizeTree() {
+Value *BoUpSLP::vectorizeTree() {
Builder.SetInsertPoint(F->getEntryBlock().begin());
vectorizeTree(&VectorizableTree[0]);
@@ -1407,6 +1589,7 @@ void BoUpSLP::vectorizeTree() {
if (PHINode *PN = dyn_cast<PHINode>(Vec)) {
Builder.SetInsertPoint(PN->getParent()->getFirstInsertionPt());
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ CSEBlocks.insert(PN->getParent());
User->replaceUsesOfWith(Scalar, Ex);
} else if (isa<Instruction>(Vec)){
if (PHINode *PH = dyn_cast<PHINode>(User)) {
@@ -1414,17 +1597,20 @@ void BoUpSLP::vectorizeTree() {
if (PH->getIncomingValue(i) == Scalar) {
Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ CSEBlocks.insert(PH->getIncomingBlock(i));
PH->setOperand(i, Ex);
}
}
} else {
Builder.SetInsertPoint(cast<Instruction>(User));
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ CSEBlocks.insert(cast<Instruction>(User)->getParent());
User->replaceUsesOfWith(Scalar, Ex);
}
} else {
Builder.SetInsertPoint(F->getEntryBlock().begin());
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ CSEBlocks.insert(&F->getEntryBlock());
User->replaceUsesOfWith(Scalar, Ex);
}
@@ -1450,9 +1636,10 @@ void BoUpSLP::vectorizeTree() {
for (Value::use_iterator User = Scalar->use_begin(),
UE = Scalar->use_end(); User != UE; ++User) {
DEBUG(dbgs() << "SLP: \tvalidating user:" << **User << ".\n");
- assert(!MustGather.count(*User) &&
- "Replacing gathered value with undef");
- assert(ScalarToTreeEntry.count(*User) &&
+
+ assert((ScalarToTreeEntry.count(*User) ||
+ // It is legal to replace the reduction users by undef.
+ (RdxOps && RdxOps->count(*User))) &&
"Replacing out-of-tree value with undef");
}
Value *Undef = UndefValue::get(Ty);
@@ -1467,8 +1654,20 @@ void BoUpSLP::vectorizeTree() {
BlocksNumbers[it].forget();
}
Builder.ClearInsertionPoint();
+
+ return VectorizableTree[0].VectorizedValue;
}
+class DTCmp {
+ const DominatorTree *DT;
+
+public:
+ DTCmp(const DominatorTree *DT) : DT(DT) {}
+ bool operator()(const BasicBlock *A, const BasicBlock *B) const {
+ return DT->properlyDominates(A, B);
+ }
+};
+
void BoUpSLP::optimizeGatherSequence() {
DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
<< " gather sequences instructions.\n");
@@ -1504,45 +1703,48 @@ void BoUpSLP::optimizeGatherSequence() {
Insert->moveBefore(PreHeader->getTerminator());
}
+ // Sort blocks by domination. This ensures we visit a block after all blocks
+ // dominating it are visited.
+ SmallVector<BasicBlock *, 8> CSEWorkList(CSEBlocks.begin(), CSEBlocks.end());
+ std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), DTCmp(DT));
+
// Perform O(N^2) search over the gather sequences and merge identical
// instructions. TODO: We can further optimize this scan if we split the
// instructions into different buckets based on the insert lane.
- SmallPtrSet<Instruction*, 16> Visited;
- SmallVector<Instruction*, 16> ToRemove;
- ReversePostOrderTraversal<Function*> RPOT(F);
- for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
- E = RPOT.end(); I != E; ++I) {
+ SmallVector<Instruction *, 16> Visited;
+ for (SmallVectorImpl<BasicBlock *>::iterator I = CSEWorkList.begin(),
+ E = CSEWorkList.end();
+ I != E; ++I) {
+ assert((I == CSEWorkList.begin() || !DT->dominates(*I, *llvm::prior(I))) &&
+ "Worklist not sorted properly!");
BasicBlock *BB = *I;
- // For all instructions in the function:
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
- Instruction *In = it;
- if ((!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In)) ||
- !GatherSeq.count(In))
+ // For all instructions in blocks containing gather sequences:
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
+ Instruction *In = it++;
+ if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
continue;
// Check if we can replace this instruction with any of the
// visited instructions.
- for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
- ve = Visited.end(); v != ve; ++v) {
+ for (SmallVectorImpl<Instruction *>::iterator v = Visited.begin(),
+ ve = Visited.end();
+ v != ve; ++v) {
if (In->isIdenticalTo(*v) &&
DT->dominates((*v)->getParent(), In->getParent())) {
In->replaceAllUsesWith(*v);
- ToRemove.push_back(In);
+ In->eraseFromParent();
In = 0;
break;
}
}
- if (In)
- Visited.insert(In);
+ if (In) {
+ assert(std::find(Visited.begin(), Visited.end(), In) == Visited.end());
+ Visited.push_back(In);
+ }
}
}
-
- // Erase all of the instructions that we RAUWed.
- for (SmallVectorImpl<Instruction *>::iterator v = ToRemove.begin(),
- ve = ToRemove.end(); v != ve; ++v) {
- assert((*v)->getNumUses() == 0 && "Can't remove instructions with uses");
- (*v)->eraseFromParent();
- }
+ CSEBlocks.clear();
+ GatherSeq.clear();
}
/// The SLPVectorizer Pass.
@@ -1575,14 +1777,18 @@ struct SLPVectorizer : public FunctionPass {
StoreRefs.clear();
bool Changed = false;
+ // If the target claims to have no vector registers don't attempt
+ // vectorization.
+ if (!TTI->getNumberOfRegisters(true))
+ return false;
+
// Must have DataLayout. We can't require it because some tests run w/o
// triple.
if (!DL)
return false;
// Don't vectorize when the attribute NoImplicitFloat is used.
- if (F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoImplicitFloat))
+ if (F.hasFnAttribute(Attribute::NoImplicitFloat))
return false;
DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
@@ -1661,6 +1867,21 @@ private:
StoreListMap StoreRefs;
};
+/// \brief Check that the Values in the slice in VL array are still existant in
+/// the WeakVH array.
+/// Vectorization of part of the VL array may cause later values in the VL array
+/// to become invalid. We track when this has happened in the WeakVH array.
+static bool hasValueBeenRAUWed(ArrayRef<Value *> &VL,
+ SmallVectorImpl<WeakVH> &VH,
+ unsigned SliceBegin,
+ unsigned SliceSize) {
+ for (unsigned i = SliceBegin; i < SliceBegin + SliceSize; ++i)
+ if (VH[i] != VL[i])
+ return true;
+
+ return false;
+}
+
bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
int CostThreshold, BoUpSLP &R) {
unsigned ChainLen = Chain.size();
@@ -1673,11 +1894,19 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
if (!isPowerOf2_32(Sz) || VF < 2)
return false;
+ // Keep track of values that were delete by vectorizing in the loop below.
+ SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end());
+
bool Changed = false;
// Look for profitable vectorizable trees at all offsets, starting at zero.
for (unsigned i = 0, e = ChainLen; i < e; ++i) {
if (i + VF > e)
break;
+
+ // Check that a previous iteration of this loop did not delete the Value.
+ if (hasValueBeenRAUWed(Chain, TrackValues, i, VF))
+ continue;
+
DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i
<< "\n");
ArrayRef<Value *> Operands = Chain.slice(i, VF);
@@ -1697,7 +1926,7 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
}
}
- return Changed;
+ return Changed;
}
bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
@@ -1764,15 +1993,17 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
if (!SI)
continue;
+ // Don't touch volatile stores.
+ if (!SI->isSimple())
+ continue;
+
// Check that the pointer points to scalars.
Type *Ty = SI->getValueOperand()->getType();
if (Ty->isAggregateType() || Ty->isVectorTy())
return 0;
- // Find the base of the GEP.
- Value *Ptr = SI->getPointerOperand();
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
- Ptr = GEP->getPointerOperand();
+ // Find the base pointer.
+ Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), DL);
// Save the store locations.
StoreRefs[Ptr].push_back(SI);
@@ -1797,28 +2028,61 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
// Check that all of the parts are scalar instructions of the same type.
Instruction *I0 = dyn_cast<Instruction>(VL[0]);
if (!I0)
- return 0;
+ return false;
unsigned Opcode0 = I0->getOpcode();
+ Type *Ty0 = I0->getType();
+ unsigned Sz = DL->getTypeSizeInBits(Ty0);
+ unsigned VF = MinVecRegSize / Sz;
+
for (int i = 0, e = VL.size(); i < e; ++i) {
Type *Ty = VL[i]->getType();
if (Ty->isAggregateType() || Ty->isVectorTy())
- return 0;
+ return false;
Instruction *Inst = dyn_cast<Instruction>(VL[i]);
if (!Inst || Inst->getOpcode() != Opcode0)
- return 0;
+ return false;
}
- R.buildTree(VL);
- int Cost = R.getTreeCost();
+ bool Changed = false;
- if (Cost >= -SLPCostThreshold)
- return false;
+ // Keep track of values that were delete by vectorizing in the loop below.
+ SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
- DEBUG(dbgs() << "SLP: Vectorizing pair at cost:" << Cost << ".\n");
- R.vectorizeTree();
- return true;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ unsigned OpsWidth = 0;
+
+ if (i + VF > e)
+ OpsWidth = e - i;
+ else
+ OpsWidth = VF;
+
+ if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
+ break;
+
+ // Check that a previous iteration of this loop did not delete the Value.
+ if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth))
+ continue;
+
+ DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
+ << "\n");
+ ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
+
+ R.buildTree(Ops);
+ int Cost = R.getTreeCost();
+
+ if (Cost < -SLPCostThreshold) {
+ DEBUG(dbgs() << "SLP: Vectorizing pair at cost:" << Cost << ".\n");
+ R.vectorizeTree();
+
+ // Move to the next bundle.
+ i += VF - 1;
+ Changed = true;
+ }
+ }
+
+ return Changed;
}
bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
@@ -1861,30 +2125,405 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
return 0;
}
+/// \brief Generate a shuffle mask to be used in a reduction tree.
+///
+/// \param VecLen The length of the vector to be reduced.
+/// \param NumEltsToRdx The number of elements that should be reduced in the
+/// vector.
+/// \param IsPairwise Whether the reduction is a pairwise or splitting
+/// reduction. A pairwise reduction will generate a mask of
+/// <0,2,...> or <1,3,..> while a splitting reduction will generate
+/// <2,3, undef,undef> for a vector of 4 and NumElts = 2.
+/// \param IsLeft True will generate a mask of even elements, odd otherwise.
+static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
+ bool IsPairwise, bool IsLeft,
+ IRBuilder<> &Builder) {
+ assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask");
+
+ SmallVector<Constant *, 32> ShuffleMask(
+ VecLen, UndefValue::get(Builder.getInt32Ty()));
+
+ if (IsPairwise)
+ // Build a mask of 0, 2, ... (left) or 1, 3, ... (right).
+ for (unsigned i = 0; i != NumEltsToRdx; ++i)
+ ShuffleMask[i] = Builder.getInt32(2 * i + !IsLeft);
+ else
+ // Move the upper half of the vector to the lower half.
+ for (unsigned i = 0; i != NumEltsToRdx; ++i)
+ ShuffleMask[i] = Builder.getInt32(NumEltsToRdx + i);
+
+ return ConstantVector::get(ShuffleMask);
+}
+
+
+/// Model horizontal reductions.
+///
+/// A horizontal reduction is a tree of reduction operations (currently add and
+/// fadd) that has operations that can be put into a vector as its leaf.
+/// For example, this tree:
+///
+/// mul mul mul mul
+/// \ / \ /
+/// + +
+/// \ /
+/// +
+/// This tree has "mul" as its reduced values and "+" as its reduction
+/// operations. A reduction might be feeding into a store or a binary operation
+/// feeding a phi.
+/// ...
+/// \ /
+/// +
+/// |
+/// phi +=
+///
+/// Or:
+/// ...
+/// \ /
+/// +
+/// |
+/// *p =
+///
+class HorizontalReduction {
+ SmallPtrSet<Value *, 16> ReductionOps;
+ SmallVector<Value *, 32> ReducedVals;
+
+ BinaryOperator *ReductionRoot;
+ PHINode *ReductionPHI;
+
+ /// The opcode of the reduction.
+ unsigned ReductionOpcode;
+ /// The opcode of the values we perform a reduction on.
+ unsigned ReducedValueOpcode;
+ /// The width of one full horizontal reduction operation.
+ unsigned ReduxWidth;
+ /// Should we model this reduction as a pairwise reduction tree or a tree that
+ /// splits the vector in halves and adds those halves.
+ bool IsPairwiseReduction;
+
+public:
+ HorizontalReduction()
+ : ReductionRoot(0), ReductionPHI(0), ReductionOpcode(0),
+ ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
+
+ /// \brief Try to find a reduction tree.
+ bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B,
+ DataLayout *DL) {
+ assert((!Phi ||
+ std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
+ "Thi phi needs to use the binary operator");
+
+ // We could have a initial reductions that is not an add.
+ // r *= v1 + v2 + v3 + v4
+ // In such a case start looking for a tree rooted in the first '+'.
+ if (Phi) {
+ if (B->getOperand(0) == Phi) {
+ Phi = 0;
+ B = dyn_cast<BinaryOperator>(B->getOperand(1));
+ } else if (B->getOperand(1) == Phi) {
+ Phi = 0;
+ B = dyn_cast<BinaryOperator>(B->getOperand(0));
+ }
+ }
+
+ if (!B)
+ return false;
+
+ Type *Ty = B->getType();
+ if (Ty->isVectorTy())
+ return false;
+
+ ReductionOpcode = B->getOpcode();
+ ReducedValueOpcode = 0;
+ ReduxWidth = MinVecRegSize / DL->getTypeSizeInBits(Ty);
+ ReductionRoot = B;
+ ReductionPHI = Phi;
+
+ if (ReduxWidth < 4)
+ return false;
+
+ // We currently only support adds.
+ if (ReductionOpcode != Instruction::Add &&
+ ReductionOpcode != Instruction::FAdd)
+ return false;
+
+ // Post order traverse the reduction tree starting at B. We only handle true
+ // trees containing only binary operators.
+ SmallVector<std::pair<BinaryOperator *, unsigned>, 32> Stack;
+ Stack.push_back(std::make_pair(B, 0));
+ while (!Stack.empty()) {
+ BinaryOperator *TreeN = Stack.back().first;
+ unsigned EdgeToVist = Stack.back().second++;
+ bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;
+
+ // Only handle trees in the current basic block.
+ if (TreeN->getParent() != B->getParent())
+ return false;
+
+ // Each tree node needs to have one user except for the ultimate
+ // reduction.
+ if (!TreeN->hasOneUse() && TreeN != B)
+ return false;
+
+ // Postorder vist.
+ if (EdgeToVist == 2 || IsReducedValue) {
+ if (IsReducedValue) {
+ // Make sure that the opcodes of the operations that we are going to
+ // reduce match.
+ if (!ReducedValueOpcode)
+ ReducedValueOpcode = TreeN->getOpcode();
+ else if (ReducedValueOpcode != TreeN->getOpcode())
+ return false;
+ ReducedVals.push_back(TreeN);
+ } else {
+ // We need to be able to reassociate the adds.
+ if (!TreeN->isAssociative())
+ return false;
+ ReductionOps.insert(TreeN);
+ }
+ // Retract.
+ Stack.pop_back();
+ continue;
+ }
+
+ // Visit left or right.
+ Value *NextV = TreeN->getOperand(EdgeToVist);
+ BinaryOperator *Next = dyn_cast<BinaryOperator>(NextV);
+ if (Next)
+ Stack.push_back(std::make_pair(Next, 0));
+ else if (NextV != Phi)
+ return false;
+ }
+ return true;
+ }
+
+ /// \brief Attempt to vectorize the tree found by
+ /// matchAssociativeReduction.
+ bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
+ if (ReducedVals.empty())
+ return false;
+
+ unsigned NumReducedVals = ReducedVals.size();
+ if (NumReducedVals < ReduxWidth)
+ return false;
+
+ Value *VectorizedTree = 0;
+ IRBuilder<> Builder(ReductionRoot);
+ FastMathFlags Unsafe;
+ Unsafe.setUnsafeAlgebra();
+ Builder.SetFastMathFlags(Unsafe);
+ unsigned i = 0;
+
+ for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
+ ArrayRef<Value *> ValsToReduce(&ReducedVals[i], ReduxWidth);
+ V.buildTree(ValsToReduce, &ReductionOps);
+
+ // Estimate cost.
+ int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
+ if (Cost >= -SLPCostThreshold)
+ break;
+
+ DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost
+ << ". (HorRdx)\n");
+
+ // Vectorize a tree.
+ DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
+ Value *VectorizedRoot = V.vectorizeTree();
+
+ // Emit a reduction.
+ Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder);
+ if (VectorizedTree) {
+ Builder.SetCurrentDebugLocation(Loc);
+ VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
+ ReducedSubTree, "bin.rdx");
+ } else
+ VectorizedTree = ReducedSubTree;
+ }
+
+ if (VectorizedTree) {
+ // Finish the reduction.
+ for (; i < NumReducedVals; ++i) {
+ Builder.SetCurrentDebugLocation(
+ cast<Instruction>(ReducedVals[i])->getDebugLoc());
+ VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
+ ReducedVals[i]);
+ }
+ // Update users.
+ if (ReductionPHI) {
+ assert(ReductionRoot != NULL && "Need a reduction operation");
+ ReductionRoot->setOperand(0, VectorizedTree);
+ ReductionRoot->setOperand(1, ReductionPHI);
+ } else
+ ReductionRoot->replaceAllUsesWith(VectorizedTree);
+ }
+ return VectorizedTree != 0;
+ }
+
+private:
+
+ /// \brief Calcuate the cost of a reduction.
+ int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) {
+ Type *ScalarTy = FirstReducedVal->getType();
+ Type *VecTy = VectorType::get(ScalarTy, ReduxWidth);
+
+ int PairwiseRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, true);
+ int SplittingRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, false);
+
+ IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost;
+ int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost;
+
+ int ScalarReduxCost =
+ ReduxWidth * TTI->getArithmeticInstrCost(ReductionOpcode, VecTy);
+
+ DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost
+ << " for reduction that starts with " << *FirstReducedVal
+ << " (It is a "
+ << (IsPairwiseReduction ? "pairwise" : "splitting")
+ << " reduction)\n");
+
+ return VecReduxCost - ScalarReduxCost;
+ }
+
+ static Value *createBinOp(IRBuilder<> &Builder, unsigned Opcode, Value *L,
+ Value *R, const Twine &Name = "") {
+ if (Opcode == Instruction::FAdd)
+ return Builder.CreateFAdd(L, R, Name);
+ return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, L, R, Name);
+ }
+
+ /// \brief Emit a horizontal reduction of the vectorized value.
+ Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder) {
+ assert(VectorizedValue && "Need to have a vectorized tree node");
+ Instruction *ValToReduce = dyn_cast<Instruction>(VectorizedValue);
+ assert(isPowerOf2_32(ReduxWidth) &&
+ "We only handle power-of-two reductions for now");
+
+ Value *TmpVec = ValToReduce;
+ for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
+ if (IsPairwiseReduction) {
+ Value *LeftMask =
+ createRdxShuffleMask(ReduxWidth, i, true, true, Builder);
+ Value *RightMask =
+ createRdxShuffleMask(ReduxWidth, i, true, false, Builder);
+
+ Value *LeftShuf = Builder.CreateShuffleVector(
+ TmpVec, UndefValue::get(TmpVec->getType()), LeftMask, "rdx.shuf.l");
+ Value *RightShuf = Builder.CreateShuffleVector(
+ TmpVec, UndefValue::get(TmpVec->getType()), (RightMask),
+ "rdx.shuf.r");
+ TmpVec = createBinOp(Builder, ReductionOpcode, LeftShuf, RightShuf,
+ "bin.rdx");
+ } else {
+ Value *UpperHalf =
+ createRdxShuffleMask(ReduxWidth, i, false, false, Builder);
+ Value *Shuf = Builder.CreateShuffleVector(
+ TmpVec, UndefValue::get(TmpVec->getType()), UpperHalf, "rdx.shuf");
+ TmpVec = createBinOp(Builder, ReductionOpcode, TmpVec, Shuf, "bin.rdx");
+ }
+ }
+
+ // The result is in the first element of the vector.
+ return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+ }
+};
+
+/// \brief Recognize construction of vectors like
+/// %ra = insertelement <4 x float> undef, float %s0, i32 0
+/// %rb = insertelement <4 x float> %ra, float %s1, i32 1
+/// %rc = insertelement <4 x float> %rb, float %s2, i32 2
+/// %rd = insertelement <4 x float> %rc, float %s3, i32 3
+///
+/// Returns true if it matches
+///
+static bool findBuildVector(InsertElementInst *IE,
+ SmallVectorImpl<Value *> &Ops) {
+ if (!isa<UndefValue>(IE->getOperand(0)))
+ return false;
+
+ while (true) {
+ Ops.push_back(IE->getOperand(1));
+
+ if (IE->use_empty())
+ return false;
+
+ InsertElementInst *NextUse = dyn_cast<InsertElementInst>(IE->use_back());
+ if (!NextUse)
+ return true;
+
+ // If this isn't the final use, make sure the next insertelement is the only
+ // use. It's OK if the final constructed vector is used multiple times
+ if (!IE->hasOneUse())
+ return false;
+
+ IE = NextUse;
+ }
+
+ return false;
+}
+
+static bool PhiTypeSorterFunc(Value *V, Value *V2) {
+ return V->getType() < V2->getType();
+}
+
bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool Changed = false;
SmallVector<Value *, 4> Incoming;
- // Collect the incoming values from the PHIs.
- for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie;
- ++instr) {
- PHINode *P = dyn_cast<PHINode>(instr);
-
- if (!P)
- break;
+ SmallSet<Value *, 16> VisitedInstrs;
+
+ bool HaveVectorizedPhiNodes = true;
+ while (HaveVectorizedPhiNodes) {
+ HaveVectorizedPhiNodes = false;
+
+ // Collect the incoming values from the PHIs.
+ Incoming.clear();
+ for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie;
+ ++instr) {
+ PHINode *P = dyn_cast<PHINode>(instr);
+ if (!P)
+ break;
- // Stop constructing the list when you reach a different type.
- if (Incoming.size() && P->getType() != Incoming[0]->getType()) {
- Changed |= tryToVectorizeList(Incoming, R);
- Incoming.clear();
+ if (!VisitedInstrs.count(P))
+ Incoming.push_back(P);
}
- Incoming.push_back(P);
+ // Sort by type.
+ std::stable_sort(Incoming.begin(), Incoming.end(), PhiTypeSorterFunc);
+
+ // Try to vectorize elements base on their type.
+ for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),
+ E = Incoming.end();
+ IncIt != E;) {
+
+ // Look for the next elements with the same type.
+ SmallVector<Value *, 4>::iterator SameTypeIt = IncIt;
+ while (SameTypeIt != E &&
+ (*SameTypeIt)->getType() == (*IncIt)->getType()) {
+ VisitedInstrs.insert(*SameTypeIt);
+ ++SameTypeIt;
+ }
+
+ // Try to vectorize them.
+ unsigned NumElts = (SameTypeIt - IncIt);
+ DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");
+ if (NumElts > 1 &&
+ tryToVectorizeList(ArrayRef<Value *>(IncIt, NumElts), R)) {
+ // Success start over because instructions might have been changed.
+ HaveVectorizedPhiNodes = true;
+ Changed = true;
+ break;
+ }
+
+ // Start over at the next instruction of a differnt type (or the end).
+ IncIt = SameTypeIt;
+ }
}
- if (Incoming.size() > 1)
- Changed |= tryToVectorizeList(Incoming, R);
+ VisitedInstrs.clear();
+
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
+ // We may go through BB multiple times so skip the one we have checked.
+ if (!VisitedInstrs.insert(it))
+ continue;
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
if (isa<DbgInfoIntrinsic>(it))
continue;
@@ -1902,24 +2541,86 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
if (!BI)
continue;
- Value *Inst = BI->getOperand(0);
+ // Try to match and vectorize a horizontal reduction.
+ HorizontalReduction HorRdx;
+ if (ShouldVectorizeHor &&
+ HorRdx.matchAssociativeReduction(P, BI, DL) &&
+ HorRdx.tryToReduce(R, TTI)) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
+
+ Value *Inst = BI->getOperand(0);
if (Inst == P)
Inst = BI->getOperand(1);
- Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
+ if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) {
+ // We would like to start over since some instructions are deleted
+ // and the iterator may become invalid value.
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
+
continue;
}
+ // Try to vectorize horizontal reductions feeding into a store.
+ if (ShouldStartVectorizeHorAtStore)
+ if (StoreInst *SI = dyn_cast<StoreInst>(it))
+ if (BinaryOperator *BinOp =
+ dyn_cast<BinaryOperator>(SI->getValueOperand())) {
+ HorizontalReduction HorRdx;
+ if (((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
+ HorRdx.tryToReduce(R, TTI)) ||
+ tryToVectorize(BinOp, R))) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
+ }
+
// Try to vectorize trees that start at compare instructions.
if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
- Changed |= true;
+ Changed = true;
+ // We would like to start over since some instructions are deleted
+ // and the iterator may become invalid value.
+ it = BB->begin();
+ e = BB->end();
+ continue;
+ }
+
+ for (int i = 0; i < 2; ++i) {
+ if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) {
+ if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) {
+ Changed = true;
+ // We would like to start over since some instructions are deleted
+ // and the iterator may become invalid value.
+ it = BB->begin();
+ e = BB->end();
+ }
+ }
+ }
+ continue;
+ }
+
+ // Try to vectorize trees that start at insertelement instructions.
+ if (InsertElementInst *IE = dyn_cast<InsertElementInst>(it)) {
+ SmallVector<Value *, 8> Ops;
+ if (!findBuildVector(IE, Ops))
continue;
+
+ if (tryToVectorizeList(Ops, R)) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
}
- for (int i = 0; i < 2; ++i)
- if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
- Changed |=
- tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R);
+
continue;
}
}