aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/IPO
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/IPO')
-rw-r--r--lib/Transforms/IPO/Android.mk1
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp6
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt5
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp161
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp8
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp3
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp8
-rw-r--r--lib/Transforms/IPO/IPO.cpp3
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp4
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp4
-rw-r--r--lib/Transforms/IPO/Inliner.cpp53
-rw-r--r--lib/Transforms/IPO/LLVMBuild.txt2
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp2
-rw-r--r--lib/Transforms/IPO/LowerBitSets.cpp612
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp10
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp53
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp8
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp4
18 files changed, 794 insertions, 153 deletions
diff --git a/lib/Transforms/IPO/Android.mk b/lib/Transforms/IPO/Android.mk
index 1fe7d63..f08b0ad 100644
--- a/lib/Transforms/IPO/Android.mk
+++ b/lib/Transforms/IPO/Android.mk
@@ -16,6 +16,7 @@ transforms_ipo_SRC_FILES := \
Inliner.cpp \
Internalize.cpp \
LoopExtractor.cpp \
+ LowerBitSets.cpp \
MergeFunctions.cpp \
PartialInlining.cpp \
PassManagerBuilder.cpp \
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index c4706e8..7e48ce3 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -554,14 +554,14 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
BasicBlock *BB = Load->getParent();
AliasAnalysis::Location Loc = AA.getLocation(Load);
- if (AA.canInstructionRangeModify(BB->front(), *Load, Loc))
+ if (AA.canInstructionRangeModRef(BB->front(), *Load, Loc,
+ AliasAnalysis::Mod))
return false; // Pointer is invalidated!
// Now check every path from the entry block to the load for transparency.
// To do this, we perform a depth first search on the inverse CFG from the
// loading block.
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(BB)) {
for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
if (AA.canBasicBlockModify(*TranspBB, Loc))
return false;
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 90c1c33..3df17b9 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -14,12 +14,17 @@ add_llvm_library(LLVMipo
Inliner.cpp
Internalize.cpp
LoopExtractor.cpp
+ LowerBitSets.cpp
MergeFunctions.cpp
PartialInlining.cpp
PassManagerBuilder.cpp
PruneEH.cpp
StripDeadPrototypes.cpp
StripSymbols.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms
+ ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/IPO
)
add_dependencies(LLVMipo intrinsics_gen)
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 4045c09..4431311 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -146,7 +146,7 @@ namespace {
private:
Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses,
- unsigned RetValNum = 0);
+ unsigned RetValNum = -1U);
Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
void SurveyFunction(const Function &F);
@@ -387,14 +387,32 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
/// for void functions and 1 for functions not returning a struct. It returns
/// the number of struct elements for functions returning a struct.
static unsigned NumRetVals(const Function *F) {
- if (F->getReturnType()->isVoidTy())
+ Type *RetTy = F->getReturnType();
+ if (RetTy->isVoidTy())
return 0;
- else if (StructType *STy = dyn_cast<StructType>(F->getReturnType()))
+ else if (StructType *STy = dyn_cast<StructType>(RetTy))
return STy->getNumElements();
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+ return ATy->getNumElements();
else
return 1;
}
+/// Returns the sub-type a function will return at a given Idx. Should
+/// correspond to the result type of an ExtractValue instruction executed with
+/// just that one Idx (i.e. only top-level structure is considered).
+static Type *getRetComponentType(const Function *F, unsigned Idx) {
+ Type *RetTy = F->getReturnType();
+ assert(!RetTy->isVoidTy() && "void type has no subtype");
+
+ if (StructType *STy = dyn_cast<StructType>(RetTy))
+ return STy->getElementType(Idx);
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(RetTy))
+ return ATy->getElementType();
+ else
+ return RetTy;
+}
+
/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not
/// live, it adds Use to the MaybeLiveUses argument. Returns the determined
/// liveness of Use.
@@ -425,9 +443,24 @@ DAE::Liveness DAE::SurveyUse(const Use *U,
// function's return value is live. We use RetValNum here, for the case
// that U is really a use of an insertvalue instruction that uses the
// original Use.
- RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum);
- // We might be live, depending on the liveness of Use.
- return MarkIfNotLive(Use, MaybeLiveUses);
+ const Function *F = RI->getParent()->getParent();
+ if (RetValNum != -1U) {
+ RetOrArg Use = CreateRet(F, RetValNum);
+ // We might be live, depending on the liveness of Use.
+ return MarkIfNotLive(Use, MaybeLiveUses);
+ } else {
+ DAE::Liveness Result = MaybeLive;
+ for (unsigned i = 0; i < NumRetVals(F); ++i) {
+ RetOrArg Use = CreateRet(F, i);
+ // We might be live, depending on the liveness of Use. If any
+ // sub-value is live, then the entire value is considered live. This
+ // is a conservative choice, and better tracking is possible.
+ DAE::Liveness SubResult = MarkIfNotLive(Use, MaybeLiveUses);
+ if (Result != Live)
+ Result = SubResult;
+ }
+ return Result;
+ }
}
if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex()
@@ -541,7 +574,6 @@ void DAE::SurveyFunction(const Function &F) {
// Keep track of the number of live retvals, so we can skip checks once all
// of them turn out to be live.
unsigned NumLiveRetVals = 0;
- Type *STy = dyn_cast<StructType>(F.getReturnType());
// Loop all uses of the function.
for (const Use &U : F.uses()) {
// If the function is PASSED IN as an argument, its address has been
@@ -563,34 +595,35 @@ void DAE::SurveyFunction(const Function &F) {
// Now, check how our return value(s) is/are used in this caller. Don't
// bother checking return values if all of them are live already.
- if (NumLiveRetVals != RetCount) {
- if (STy) {
- // Check all uses of the return value.
- for (const User *U : TheCall->users()) {
- const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U);
- if (Ext && Ext->hasIndices()) {
- // This use uses a part of our return value, survey the uses of
- // that part and store the results for this index only.
- unsigned Idx = *Ext->idx_begin();
- if (RetValLiveness[Idx] != Live) {
- RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
- if (RetValLiveness[Idx] == Live)
- NumLiveRetVals++;
- }
- } else {
- // Used by something else than extractvalue. Mark all return
- // values as live.
- for (unsigned i = 0; i != RetCount; ++i )
- RetValLiveness[i] = Live;
- NumLiveRetVals = RetCount;
- break;
- }
+ if (NumLiveRetVals == RetCount)
+ continue;
+
+ // Check all uses of the return value.
+ for (const Use &U : TheCall->uses()) {
+ if (ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(U.getUser())) {
+ // This use uses a part of our return value, survey the uses of
+ // that part and store the results for this index only.
+ unsigned Idx = *Ext->idx_begin();
+ if (RetValLiveness[Idx] != Live) {
+ RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
+ if (RetValLiveness[Idx] == Live)
+ NumLiveRetVals++;
}
} else {
- // Single return value
- RetValLiveness[0] = SurveyUses(TheCall, MaybeLiveRetUses[0]);
- if (RetValLiveness[0] == Live)
+ // Used by something else than extractvalue. Survey, but assume that the
+ // result applies to all sub-values.
+ UseVector MaybeLiveAggregateUses;
+ if (SurveyUse(&U, MaybeLiveAggregateUses) == Live) {
NumLiveRetVals = RetCount;
+ RetValLiveness.assign(RetCount, Live);
+ break;
+ } else {
+ for (unsigned i = 0; i != RetCount; ++i) {
+ if (RetValLiveness[i] != Live)
+ MaybeLiveRetUses[i].append(MaybeLiveAggregateUses.begin(),
+ MaybeLiveAggregateUses.end());
+ }
+ }
}
}
}
@@ -775,39 +808,29 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (RetTy->isVoidTy() || HasLiveReturnedArg) {
NRetTy = RetTy;
} else {
- StructType *STy = dyn_cast<StructType>(RetTy);
- if (STy)
- // Look at each of the original return values individually.
- for (unsigned i = 0; i != RetCount; ++i) {
- RetOrArg Ret = CreateRet(F, i);
- if (LiveValues.erase(Ret)) {
- RetTypes.push_back(STy->getElementType(i));
- NewRetIdxs[i] = RetTypes.size() - 1;
- } else {
- ++NumRetValsEliminated;
- DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
- << F->getName() << "\n");
- }
- }
- else
- // We used to return a single value.
- if (LiveValues.erase(CreateRet(F, 0))) {
- RetTypes.push_back(RetTy);
- NewRetIdxs[0] = 0;
+ // Look at each of the original return values individually.
+ for (unsigned i = 0; i != RetCount; ++i) {
+ RetOrArg Ret = CreateRet(F, i);
+ if (LiveValues.erase(Ret)) {
+ RetTypes.push_back(getRetComponentType(F, i));
+ NewRetIdxs[i] = RetTypes.size() - 1;
} else {
- DEBUG(dbgs() << "DAE - Removing return value from " << F->getName()
- << "\n");
++NumRetValsEliminated;
+ DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
+ << F->getName() << "\n");
+ }
+ }
+ if (RetTypes.size() > 1) {
+ // More than one return type? Reduce it down to size.
+ if (StructType *STy = dyn_cast<StructType>(RetTy)) {
+ // Make the new struct packed if we used to return a packed struct
+ // already.
+ NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
+ } else {
+ assert(isa<ArrayType>(RetTy) && "unexpected multi-value return");
+ NRetTy = ArrayType::get(RetTypes[0], RetTypes.size());
}
- if (RetTypes.size() > 1)
- // More than one return type? Return a struct with them. Also, if we used
- // to return a struct and didn't change the number of return values,
- // return a struct again. This prevents changing {something} into
- // something and {} into void.
- // Make the new struct packed if we used to return a packed struct
- // already.
- NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
- else if (RetTypes.size() == 1)
+ } else if (RetTypes.size() == 1)
// One return type? Just a simple value then, but only if we didn't use to
// return a struct with that simple value before.
NRetTy = RetTypes.front();
@@ -959,9 +982,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (!Call->getType()->isX86_MMXTy())
Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
} else {
- assert(RetTy->isStructTy() &&
+ assert((RetTy->isStructTy() || RetTy->isArrayTy()) &&
"Return type changed, but not into a void. The old return type"
- " must have been a struct!");
+ " must have been a struct or an array!");
Instruction *InsertPt = Call;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
BasicBlock::iterator IP = II->getNormalDest()->begin();
@@ -969,9 +992,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
InsertPt = IP;
}
- // We used to return a struct. Instead of doing smart stuff with all the
- // uses of this struct, we will just rebuild it using
- // extract/insertvalue chaining and let instcombine clean that up.
+ // We used to return a struct or array. Instead of doing smart stuff
+ // with all the uses, we will just rebuild it using extract/insertvalue
+ // chaining and let instcombine clean that up.
//
// Start out building up our return value from undef
Value *RetVal = UndefValue::get(RetTy);
@@ -1034,8 +1057,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (NFTy->getReturnType()->isVoidTy()) {
RetVal = nullptr;
} else {
- assert (RetTy->isStructTy());
- // The original return value was a struct, insert
+ assert(RetTy->isStructTy() || RetTy->isArrayTy());
+ // The original return value was a struct or array, insert
// extractvalue/insertvalue chains to extract only the values we need
// to return and insert them into our new result.
// This does generate messy code, but we'll let it to instcombine to
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 823ae53..8925e4c 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -31,7 +31,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
#define DEBUG_TYPE "functionattrs"
@@ -124,7 +124,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -139,7 +139,7 @@ INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
@@ -1702,7 +1702,7 @@ bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
AA = &getAnalysis<AliasAnalysis>();
- TLI = &getAnalysis<TargetLibraryInfo>();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = annotateLibraryCalls(SCC);
Changed |= AddReadAttrs(SCC);
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 705e929..0c844fe 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -219,6 +219,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
if (F->hasPrefixData())
MarkUsedGlobalsAsNeeded(F->getPrefixData());
+ if (F->hasPrologueData())
+ MarkUsedGlobalsAsNeeded(F->getPrologueData());
+
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 6e0ae83..45e04f1 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -38,7 +38,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -68,7 +68,7 @@ STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
namespace {
struct GlobalOpt : public ModulePass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
static char ID; // Pass identification, replacement for typeid
GlobalOpt() : ModulePass(ID) {
@@ -95,7 +95,7 @@ namespace {
char GlobalOpt::ID = 0;
INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(GlobalOpt, "globalopt",
"Global Variable Optimizer", false, false)
@@ -3042,7 +3042,7 @@ bool GlobalOpt::runOnModule(Module &M) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
- TLI = &getAnalysis<TargetLibraryInfo>();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool LocalChange = true;
while (LocalChange) {
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index b4d31d8..fcacec3 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -16,7 +16,7 @@
#include "llvm-c/Initialization.h"
#include "llvm-c/Transforms/IPO.h"
#include "llvm/InitializePasses.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
@@ -36,6 +36,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeLoopExtractorPass(Registry);
initializeBlockExtractorPassPass(Registry);
initializeSingleLoopExtractorPass(Registry);
+ initializeLowerBitSetsPass(Registry);
initializeMergeFunctionsPass(Registry);
initializePartialInlinerPass(Registry);
initializePruneEHPass(Registry);
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 819b2e0..dc56a02 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -15,7 +15,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/IR/CallSite.h"
@@ -68,7 +68,7 @@ char AlwaysInliner::ID = 0;
INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index d9a2b9e..9b01d81 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -13,7 +13,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/IR/CallSite.h"
@@ -76,7 +76,7 @@ char SimpleInliner::ID = 0;
INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(SimpleInliner, "inline",
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 3abe7a8..305ad7a 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -17,7 +17,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/IR/CallSite.h"
@@ -29,7 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -77,7 +77,7 @@ Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
/// always explicitly call the implementation here.
void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
- AU.addRequired<AssumptionTracker>();
+ AU.addRequired<AssumptionCacheTracker>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
@@ -97,25 +97,17 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(),
AttributeSet::FunctionIndex,
B);
- AttributeSet CallerAttr = Caller->getAttributes(),
- CalleeAttr = Callee->getAttributes();
- if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectReq)) {
+ if (Callee->hasFnAttribute(Attribute::StackProtectReq)) {
Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
Caller->addFnAttr(Attribute::StackProtectReq);
- } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectStrong) &&
- !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectReq)) {
+ } else if (Callee->hasFnAttribute(Attribute::StackProtectStrong) &&
+ !Caller->hasFnAttribute(Attribute::StackProtectReq)) {
Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
Caller->addFnAttr(Attribute::StackProtectStrong);
- } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtect) &&
- !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectReq) &&
- !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectStrong))
+ } else if (Callee->hasFnAttribute(Attribute::StackProtect) &&
+ !Caller->hasFnAttribute(Attribute::StackProtectReq) &&
+ !Caller->hasFnAttribute(Attribute::StackProtectStrong))
Caller->addFnAttr(Attribute::StackProtect);
}
@@ -273,8 +265,7 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
// would decrease the threshold.
Function *Caller = CS.getCaller();
bool OptSize = Caller && !Caller->isDeclaration() &&
- Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize);
+ Caller->hasFnAttribute(Attribute::OptimizeForSize);
if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
OptSizeThreshold < thres)
thres = OptSizeThreshold;
@@ -283,17 +274,14 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
// and the caller does not need to minimize its size.
Function *Callee = CS.getCalledFunction();
bool InlineHint = Callee && !Callee->isDeclaration() &&
- Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::InlineHint);
- if (InlineHint && HintThreshold > thres
- && !Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::MinSize))
+ Callee->hasFnAttribute(Attribute::InlineHint);
+ if (InlineHint && HintThreshold > thres &&
+ !Caller->hasFnAttribute(Attribute::MinSize))
thres = HintThreshold;
// Listen to the cold attribute when it would decrease the threshold.
bool ColdCallee = Callee && !Callee->isDeclaration() &&
- Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::Cold);
+ Callee->hasFnAttribute(Attribute::Cold);
// Command line argument for InlineLimit will override the default
// ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
// do not use the default cold threshold even if it is smaller.
@@ -443,10 +431,11 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
- AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();
+ AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
- const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
SmallPtrSet<Function*, 8> SCCFunctions;
@@ -506,8 +495,8 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, DL, AA, AT);
-
+ InlineFunctionInfo InlineInfo(&CG, DL, AA, ACT);
+
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
bool Changed = false;
@@ -658,9 +647,7 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
// Handle the case when this function is called and we only want to care
// about always-inline functions. This is a bit of a hack to share code
// between here and the InlineAlways pass.
- if (AlwaysInlineOnly &&
- !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::AlwaysInline))
+ if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
continue;
// If the only remaining users of the function are dead constants, remove
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
index 77e0b22..575dce4 100644
--- a/lib/Transforms/IPO/LLVMBuild.txt
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
name = IPO
parent = Transforms
library_name = ipo
-required_libraries = Analysis Core IPA InstCombine Scalar Support Target TransformUtils Vectorize
+required_libraries = Analysis Core IPA InstCombine Scalar Support TransformUtils Vectorize
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 20414aa..41334ca 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -242,7 +242,7 @@ void BlockExtractorPass::SplitLandingPadPreds(Function *F) {
if (!Split) continue;
SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", nullptr, NewBBs);
+ SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs);
}
}
diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp
new file mode 100644
index 0000000..0a22a80
--- /dev/null
+++ b/lib/Transforms/IPO/LowerBitSets.cpp
@@ -0,0 +1,612 @@
+//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers bitset metadata and calls to the llvm.bitset.test intrinsic.
+// See http://llvm.org/docs/LangRef.html#bitsets for more information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/LowerBitSets.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lowerbitsets"
+
+STATISTIC(NumBitSetsCreated, "Number of bitsets created");
+STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered");
+STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets");
+
+bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
+ if (Offset < ByteOffset)
+ return false;
+
+ if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0)
+ return false;
+
+ uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2;
+ if (BitOffset >= BitSize)
+ return false;
+
+ return (Bits[BitOffset / 8] >> (BitOffset % 8)) & 1;
+}
+
+bool BitSetInfo::containsValue(
+ const DataLayout *DL,
+ const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout, Value *V,
+ uint64_t COffset) const {
+ if (auto GV = dyn_cast<GlobalVariable>(V)) {
+ auto I = GlobalLayout.find(GV);
+ if (I == GlobalLayout.end())
+ return false;
+ return containsGlobalOffset(I->second + COffset);
+ }
+
+ if (auto GEP = dyn_cast<GEPOperator>(V)) {
+ APInt APOffset(DL->getPointerSizeInBits(0), 0);
+ bool Result = GEP->accumulateConstantOffset(*DL, APOffset);
+ if (!Result)
+ return false;
+ COffset += APOffset.getZExtValue();
+ return containsValue(DL, GlobalLayout, GEP->getPointerOperand(),
+ COffset);
+ }
+
+ if (auto Op = dyn_cast<Operator>(V)) {
+ if (Op->getOpcode() == Instruction::BitCast)
+ return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset);
+
+ if (Op->getOpcode() == Instruction::Select)
+ return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) &&
+ containsValue(DL, GlobalLayout, Op->getOperand(2), COffset);
+ }
+
+ return false;
+}
+
+BitSetInfo BitSetBuilder::build() {
+ if (Min > Max)
+ Min = 0;
+
+ // Normalize each offset against the minimum observed offset, and compute
+ // the bitwise OR of each of the offsets. The number of trailing zeros
+ // in the mask gives us the log2 of the alignment of all offsets, which
+ // allows us to compress the bitset by only storing one bit per aligned
+ // address.
+ uint64_t Mask = 0;
+ for (uint64_t &Offset : Offsets) {
+ Offset -= Min;
+ Mask |= Offset;
+ }
+
+ BitSetInfo BSI;
+ BSI.ByteOffset = Min;
+
+ BSI.AlignLog2 = 0;
+ // FIXME: Can probably do something smarter if all offsets are 0.
+ if (Mask != 0)
+ BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined);
+
+ // Build the compressed bitset while normalizing the offsets against the
+ // computed alignment.
+ BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1;
+ uint64_t ByteSize = (BSI.BitSize + 7) / 8;
+ BSI.Bits.resize(ByteSize);
+ for (uint64_t Offset : Offsets) {
+ Offset >>= BSI.AlignLog2;
+ BSI.Bits[Offset / 8] |= 1 << (Offset % 8);
+ }
+
+ return BSI;
+}
+
+void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
+ // Create a new fragment to hold the layout for F.
+ Fragments.emplace_back();
+ std::vector<uint64_t> &Fragment = Fragments.back();
+ uint64_t FragmentIndex = Fragments.size() - 1;
+
+ for (auto ObjIndex : F) {
+ uint64_t OldFragmentIndex = FragmentMap[ObjIndex];
+ if (OldFragmentIndex == 0) {
+ // We haven't seen this object index before, so just add it to the current
+ // fragment.
+ Fragment.push_back(ObjIndex);
+ } else {
+ // This index belongs to an existing fragment. Copy the elements of the
+ // old fragment into this one and clear the old fragment. We don't update
+ // the fragment map just yet, this ensures that any further references to
+ // indices from the old fragment in this fragment do not insert any more
+ // indices.
+ std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex];
+ Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end());
+ OldFragment.clear();
+ }
+ }
+
+ // Update the fragment map to point our object indices to this fragment.
+ for (uint64_t ObjIndex : Fragment)
+ FragmentMap[ObjIndex] = FragmentIndex;
+}
+
+namespace {
+
+struct LowerBitSets : public ModulePass {
+ static char ID;
+ LowerBitSets() : ModulePass(ID) {
+ initializeLowerBitSetsPass(*PassRegistry::getPassRegistry());
+ }
+
+ const DataLayout *DL;
+ IntegerType *Int1Ty;
+ IntegerType *Int8Ty;
+ IntegerType *Int32Ty;
+ Type *Int32PtrTy;
+ IntegerType *Int64Ty;
+ Type *IntPtrTy;
+
+ // The llvm.bitsets named metadata.
+ NamedMDNode *BitSetNM;
+
+ // Mapping from bitset mdstrings to the call sites that test them.
+ DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites;
+
+ BitSetInfo
+ buildBitSet(MDString *BitSet,
+ const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
+ Value *createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI,
+ GlobalVariable *BitSetGlobal, Value *BitOffset);
+ Value *
+ lowerBitSetCall(CallInst *CI, const BitSetInfo &BSI,
+ GlobalVariable *BitSetGlobal, GlobalVariable *CombinedGlobal,
+ const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
+ void buildBitSetsFromGlobals(Module &M,
+ const std::vector<MDString *> &BitSets,
+ const std::vector<GlobalVariable *> &Globals);
+ bool buildBitSets(Module &M);
+ bool eraseBitSetMetadata(Module &M);
+
+ bool doInitialization(Module &M) override;
+ bool runOnModule(Module &M) override;
+};
+
+} // namespace
+
+INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets",
+ "Lower bitset metadata", false, false)
+INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets",
+ "Lower bitset metadata", false, false)
+char LowerBitSets::ID = 0;
+
+ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; }
+
+bool LowerBitSets::doInitialization(Module &M) {
+ DL = M.getDataLayout();
+ if (!DL)
+ report_fatal_error("Data layout required");
+
+ Int1Ty = Type::getInt1Ty(M.getContext());
+ Int8Ty = Type::getInt8Ty(M.getContext());
+ Int32Ty = Type::getInt32Ty(M.getContext());
+ Int32PtrTy = PointerType::getUnqual(Int32Ty);
+ Int64Ty = Type::getInt64Ty(M.getContext());
+ IntPtrTy = DL->getIntPtrType(M.getContext(), 0);
+
+ BitSetNM = M.getNamedMetadata("llvm.bitsets");
+
+ BitSetTestCallSites.clear();
+
+ return false;
+}
+
+/// Build a bit set for BitSet using the object layouts in
+/// GlobalLayout.
+BitSetInfo LowerBitSets::buildBitSet(
+ MDString *BitSet,
+ const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+ BitSetBuilder BSB;
+
+ // Compute the byte offset of each element of this bitset.
+ if (BitSetNM) {
+ for (MDNode *Op : BitSetNM->operands()) {
+ if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
+ continue;
+ auto OpGlobal = cast<GlobalVariable>(
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+ uint64_t Offset =
+ cast<ConstantInt>(cast<ConstantAsMetadata>(Op->getOperand(2))
+ ->getValue())->getZExtValue();
+
+ Offset += GlobalLayout.find(OpGlobal)->second;
+
+ BSB.addOffset(Offset);
+ }
+ }
+
+ return BSB.build();
+}
+
+/// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in
+/// Bits. This pattern matches to the bt instruction on x86.
+static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits,
+ Value *BitOffset) {
+ auto BitsType = cast<IntegerType>(Bits->getType());
+ unsigned BitWidth = BitsType->getBitWidth();
+
+ BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType);
+ Value *BitIndex =
+ B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1));
+ Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex);
+ Value *MaskedBits = B.CreateAnd(Bits, BitMask);
+ return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0));
+}
+
+/// Build a test that bit BitOffset is set in BSI, where
+/// BitSetGlobal is a global containing the bits in BSI.
+Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI,
+ GlobalVariable *BitSetGlobal,
+ Value *BitOffset) {
+ if (BSI.Bits.size() <= 8) {
+ // If the bit set is sufficiently small, we can avoid a load by bit testing
+ // a constant.
+ IntegerType *BitsTy;
+ if (BSI.Bits.size() <= 4)
+ BitsTy = Int32Ty;
+ else
+ BitsTy = Int64Ty;
+
+ uint64_t Bits = 0;
+ for (auto I = BSI.Bits.rbegin(), E = BSI.Bits.rend(); I != E; ++I) {
+ Bits <<= 8;
+ Bits |= *I;
+ }
+ Constant *BitsConst = ConstantInt::get(BitsTy, Bits);
+ return createMaskedBitTest(B, BitsConst, BitOffset);
+ } else {
+ // TODO: We might want to use the memory variant of the bt instruction
+ // with the previously computed bit offset at -Os. This instruction does
+ // exactly what we want but has been benchmarked as being slower than open
+ // coding the load+bt.
+ Value *BitSetGlobalOffset =
+ B.CreateLShr(BitOffset, ConstantInt::get(IntPtrTy, 5));
+ Value *BitSetEntryAddr = B.CreateGEP(
+ ConstantExpr::getBitCast(BitSetGlobal, Int32PtrTy), BitSetGlobalOffset);
+ Value *BitSetEntry = B.CreateLoad(BitSetEntryAddr);
+
+ return createMaskedBitTest(B, BitSetEntry, BitOffset);
+ }
+}
+
+/// Lower a llvm.bitset.test call to its implementation. Returns the value to
+/// replace the call with.
+Value *LowerBitSets::lowerBitSetCall(
+ CallInst *CI, const BitSetInfo &BSI, GlobalVariable *BitSetGlobal,
+ GlobalVariable *CombinedGlobal,
+ const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+ Value *Ptr = CI->getArgOperand(0);
+
+ if (BSI.containsValue(DL, GlobalLayout, Ptr))
+ return ConstantInt::getTrue(BitSetGlobal->getParent()->getContext());
+
+ Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy);
+ Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
+ GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
+
+ BasicBlock *InitialBB = CI->getParent();
+
+ IRBuilder<> B(CI);
+
+ Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);
+
+ if (BSI.isSingleOffset())
+ return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
+
+ Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);
+
+ Value *BitOffset;
+ if (BSI.AlignLog2 == 0) {
+ BitOffset = PtrOffset;
+ } else {
+ // We need to check that the offset both falls within our range and is
+ // suitably aligned. We can check both properties at the same time by
+ // performing a right rotate by log2(alignment) followed by an integer
+ // comparison against the bitset size. The rotate will move the lower
+ // order bits that need to be zero into the higher order bits of the
+ // result, causing the comparison to fail if they are nonzero. The rotate
+ // also conveniently gives us a bit offset to use during the load from
+ // the bitset.
+ Value *OffsetSHR =
+ B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2));
+ Value *OffsetSHL = B.CreateShl(
+ PtrOffset, ConstantInt::get(IntPtrTy, DL->getPointerSizeInBits(0) -
+ BSI.AlignLog2));
+ BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
+ }
+
+ Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize);
+ Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst);
+
+ // If the bit set is all ones, testing against it is unnecessary.
+ if (BSI.isAllOnes())
+ return OffsetInRange;
+
+ TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
+ IRBuilder<> ThenB(Term);
+
+ // Now that we know that the offset is in range and aligned, load the
+ // appropriate bit from the bitset.
+ Value *Bit = createBitSetTest(ThenB, BSI, BitSetGlobal, BitOffset);
+
+ // The value we want is 0 if we came directly from the initial block
+ // (having failed the range or alignment checks), or the loaded bit if
+ // we came from the block in which we loaded it.
+ B.SetInsertPoint(CI);
+ PHINode *P = B.CreatePHI(Int1Ty, 2);
+ P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB);
+ P->addIncoming(Bit, ThenB.GetInsertBlock());
+ return P;
+}
+
+/// Given a disjoint set of bitsets and globals, layout the globals, build the
+/// bit sets and lower the llvm.bitset.test calls.
+void LowerBitSets::buildBitSetsFromGlobals(
+ Module &M,
+ const std::vector<MDString *> &BitSets,
+ const std::vector<GlobalVariable *> &Globals) {
+ // Build a new global with the combined contents of the referenced globals.
+ std::vector<Constant *> GlobalInits;
+ for (GlobalVariable *G : Globals) {
+ GlobalInits.push_back(G->getInitializer());
+ uint64_t InitSize = DL->getTypeAllocSize(G->getInitializer()->getType());
+
+ // Compute the amount of padding required to align the next element to the
+ // next power of 2.
+ uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
+
+ // Cap at 128 was found experimentally to have a good data/instruction
+ // overhead tradeoff.
+ if (Padding > 128)
+ Padding = RoundUpToAlignment(InitSize, 128) - InitSize;
+
+ GlobalInits.push_back(
+ ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
+ }
+ if (!GlobalInits.empty())
+ GlobalInits.pop_back();
+ Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits);
+ auto CombinedGlobal =
+ new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, NewInit);
+
+ const StructLayout *CombinedGlobalLayout =
+ DL->getStructLayout(cast<StructType>(NewInit->getType()));
+
+ // Compute the offsets of the original globals within the new global.
+ DenseMap<GlobalVariable *, uint64_t> GlobalLayout;
+ for (unsigned I = 0; I != Globals.size(); ++I)
+ // Multiply by 2 to account for padding elements.
+ GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
+
+ // For each bitset in this disjoint set...
+ for (MDString *BS : BitSets) {
+ // Build the bitset.
+ BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
+
+ // Create a global in which to store it.
+ ++NumBitSetsCreated;
+ Constant *BitsConst = ConstantDataArray::get(M.getContext(), BSI.Bits);
+ auto BitSetGlobal = new GlobalVariable(
+ M, BitsConst->getType(), /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, BitsConst, BS->getString() + ".bits");
+
+ // Lower each call to llvm.bitset.test for this bitset.
+ for (CallInst *CI : BitSetTestCallSites[BS]) {
+ ++NumBitSetCallsLowered;
+ Value *Lowered =
+ lowerBitSetCall(CI, BSI, BitSetGlobal, CombinedGlobal, GlobalLayout);
+ CI->replaceAllUsesWith(Lowered);
+ CI->eraseFromParent();
+ }
+ }
+
+ // Build aliases pointing to offsets into the combined global for each
+ // global from which we built the combined global, and replace references
+ // to the original globals with references to the aliases.
+ for (unsigned I = 0; I != Globals.size(); ++I) {
+ // Multiply by 2 to account for padding elements.
+ Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, I * 2)};
+ Constant *CombinedGlobalElemPtr =
+ ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs);
+ GlobalAlias *GAlias = GlobalAlias::create(
+ Globals[I]->getType()->getElementType(),
+ Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(),
+ "", CombinedGlobalElemPtr, &M);
+ GAlias->takeName(Globals[I]);
+ Globals[I]->replaceAllUsesWith(GAlias);
+ Globals[I]->eraseFromParent();
+ }
+}
+
+/// Lower all bit sets in this module.
+bool LowerBitSets::buildBitSets(Module &M) {
+ Function *BitSetTestFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::bitset_test));
+ if (!BitSetTestFunc)
+ return false;
+
+ // Equivalence class set containing bitsets and the globals they reference.
+ // This is used to partition the set of bitsets in the module into disjoint
+ // sets.
+ typedef EquivalenceClasses<PointerUnion<GlobalVariable *, MDString *>>
+ GlobalClassesTy;
+ GlobalClassesTy GlobalClasses;
+
+ for (const Use &U : BitSetTestFunc->uses()) {
+ auto CI = cast<CallInst>(U.getUser());
+
+ auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
+ if (!BitSetMDVal || !isa<MDString>(BitSetMDVal->getMetadata()))
+ report_fatal_error(
+ "Second argument of llvm.bitset.test must be metadata string");
+ auto BitSet = cast<MDString>(BitSetMDVal->getMetadata());
+
+ // Add the call site to the list of call sites for this bit set. We also use
+ // BitSetTestCallSites to keep track of whether we have seen this bit set
+ // before. If we have, we don't need to re-add the referenced globals to the
+ // equivalence class.
+ std::pair<DenseMap<MDString *, std::vector<CallInst *>>::iterator,
+ bool> Ins =
+ BitSetTestCallSites.insert(
+ std::make_pair(BitSet, std::vector<CallInst *>()));
+ Ins.first->second.push_back(CI);
+ if (!Ins.second)
+ continue;
+
+ // Add the bitset to the equivalence class.
+ GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet);
+ GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
+
+ if (!BitSetNM)
+ continue;
+
+ // Verify the bitset metadata and add the referenced globals to the bitset's
+ // equivalence class.
+ for (MDNode *Op : BitSetNM->operands()) {
+ if (Op->getNumOperands() != 3)
+ report_fatal_error(
+ "All operands of llvm.bitsets metadata must have 3 elements");
+
+ if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
+ continue;
+
+ auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
+ if (!OpConstMD)
+ report_fatal_error("Bit set element must be a constant");
+ auto OpGlobal = dyn_cast<GlobalVariable>(OpConstMD->getValue());
+ if (!OpGlobal)
+ report_fatal_error("Bit set element must refer to global");
+
+ auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
+ if (!OffsetConstMD)
+ report_fatal_error("Bit set element offset must be a constant");
+ auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
+ if (!OffsetInt)
+ report_fatal_error(
+ "Bit set element offset must be an integer constant");
+
+ CurSet = GlobalClasses.unionSets(
+ CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal)));
+ }
+ }
+
+ if (GlobalClasses.empty())
+ return false;
+
+ // For each disjoint set we found...
+ for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
+ E = GlobalClasses.end();
+ I != E; ++I) {
+ if (!I->isLeader()) continue;
+
+ ++NumBitSetDisjointSets;
+
+ // Build the list of bitsets and referenced globals in this disjoint set.
+ std::vector<MDString *> BitSets;
+ std::vector<GlobalVariable *> Globals;
+ llvm::DenseMap<MDString *, uint64_t> BitSetIndices;
+ llvm::DenseMap<GlobalVariable *, uint64_t> GlobalIndices;
+ for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
+ MI != GlobalClasses.member_end(); ++MI) {
+ if ((*MI).is<MDString *>()) {
+ BitSetIndices[MI->get<MDString *>()] = BitSets.size();
+ BitSets.push_back(MI->get<MDString *>());
+ } else {
+ GlobalIndices[MI->get<GlobalVariable *>()] = Globals.size();
+ Globals.push_back(MI->get<GlobalVariable *>());
+ }
+ }
+
+ // For each bitset, build a set of indices that refer to globals referenced
+ // by the bitset.
+ std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
+ if (BitSetNM) {
+ for (MDNode *Op : BitSetNM->operands()) {
+ // Op = { bitset name, global, offset }
+ if (!Op->getOperand(1))
+ continue;
+ auto I = BitSetIndices.find(cast<MDString>(Op->getOperand(0)));
+ if (I == BitSetIndices.end())
+ continue;
+
+ auto OpGlobal = cast<GlobalVariable>(
+ cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+ BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
+ }
+ }
+
+ // Order the sets of indices by size. The GlobalLayoutBuilder works best
+ // when given small index sets first.
+ std::stable_sort(
+ BitSetMembers.begin(), BitSetMembers.end(),
+ [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
+ return O1.size() < O2.size();
+ });
+
+ // Create a GlobalLayoutBuilder and provide it with index sets as layout
+ // fragments. The GlobalLayoutBuilder tries to lay out members of fragments
+ // as close together as possible.
+ GlobalLayoutBuilder GLB(Globals.size());
+ for (auto &&MemSet : BitSetMembers)
+ GLB.addFragment(MemSet);
+
+ // Build a vector of globals with the computed layout.
+ std::vector<GlobalVariable *> OrderedGlobals(Globals.size());
+ auto OGI = OrderedGlobals.begin();
+ for (auto &&F : GLB.Fragments)
+ for (auto &&Offset : F)
+ *OGI++ = Globals[Offset];
+
+ // Order bitsets by name for determinism.
+ std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) {
+ return S1->getString() < S2->getString();
+ });
+
+ // Build the bitsets from this disjoint set.
+ buildBitSetsFromGlobals(M, BitSets, OrderedGlobals);
+ }
+
+ return true;
+}
+
+bool LowerBitSets::eraseBitSetMetadata(Module &M) {
+ if (!BitSetNM)
+ return false;
+
+ M.eraseNamedMetadata(BitSetNM);
+ return true;
+}
+
+bool LowerBitSets::runOnModule(Module &M) {
+ bool Changed = buildBitSets(M);
+ Changed |= eraseBitSetMetadata(M);
+ return Changed;
+}
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 76d6dfa..4a7cb7b 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -58,13 +58,13 @@ Function* PartialInliner::unswitchFunction(Function* F) {
BasicBlock* returnBlock = nullptr;
BasicBlock* nonReturnBlock = nullptr;
unsigned returnCount = 0;
- for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
- SI != SE; ++SI)
- if (isa<ReturnInst>((*SI)->getTerminator())) {
- returnBlock = *SI;
+ for (BasicBlock *BB : successors(entryBlock)) {
+ if (isa<ReturnInst>(BB->getTerminator())) {
+ returnBlock = BB;
returnCount++;
} else
- nonReturnBlock = *SI;
+ nonReturnBlock = BB;
+ }
if (returnCount != 1)
return nullptr;
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index da85a91..9a75050 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -19,12 +19,11 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Verifier.h"
-#include "llvm/PassManager.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Vectorize.h"
@@ -118,7 +117,7 @@ void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) {
}
void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
- PassManagerBase &PM) const {
+ legacy::PassManagerBase &PM) const {
for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i)
if ((*GlobalExtensions)[i].first == ETy)
(*GlobalExtensions)[i].second(*this, PM);
@@ -127,8 +126,8 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy,
Extensions[i].second(*this, PM);
}
-void
-PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const {
+void PassManagerBuilder::addInitialAliasAnalysisPasses(
+ legacy::PassManagerBase &PM) const {
// Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
// BasicAliasAnalysis wins if they disagree. This is intended to help
// support "obvious" type-punning idioms.
@@ -139,11 +138,13 @@ PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const {
PM.add(createBasicAliasAnalysisPass());
}
-void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) {
+void PassManagerBuilder::populateFunctionPassManager(
+ legacy::FunctionPassManager &FPM) {
addExtensionsToPM(EP_EarlyAsPossible, FPM);
// Add LibraryInfo if we have some.
- if (LibraryInfo) FPM.add(new TargetLibraryInfo(*LibraryInfo));
+ if (LibraryInfo)
+ FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
if (OptLevel == 0) return;
@@ -158,7 +159,8 @@ void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) {
FPM.add(createLowerExpectIntrinsicPass());
}
-void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
+void PassManagerBuilder::populateModulePassManager(
+ legacy::PassManagerBase &MPM) {
// If all optimizations are disabled, just run the always-inline pass and,
// if enabled, the function merging pass.
if (OptLevel == 0) {
@@ -182,7 +184,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
}
// Add LibraryInfo if we have some.
- if (LibraryInfo) MPM.add(new TargetLibraryInfo(*LibraryInfo));
+ if (LibraryInfo)
+ MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
addInitialAliasAnalysisPasses(MPM);
@@ -228,7 +231,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
- MPM.add(createLoopRotatePass()); // Rotate Loop
+ // Rotate Loop - disable header duplication at -Oz
+ MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
MPM.add(createLICMPass()); // Hoist loop invariants
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
MPM.add(createInstructionCombiningPass());
@@ -248,6 +252,11 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
MPM.add(createSCCPPass()); // Constant prop with SCCP
+ // Delete dead bit computations (instcombine runs after to fold away the dead
+ // computations, and then ADCE will run later to exploit any new DCE
+ // opportunities that creates).
+ MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations
+
// Run instcombine after redundancy elimination to exploit opportunities
// opened up by them.
MPM.add(createInstructionCombiningPass());
@@ -255,6 +264,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createJumpThreadingPass()); // Thread jumps
MPM.add(createCorrelatedValuePropagationPass());
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
+ MPM.add(createLICMPass());
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
@@ -373,7 +383,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
addExtensionsToPM(EP_OptimizerLast, MPM);
}
-void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
+void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Provide AliasAnalysis services for optimizations.
addInitialAliasAnalysisPasses(PM);
@@ -464,6 +474,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
PM.add(createJumpThreadingPass());
+ // Lower bitset metadata to bitsets.
+ PM.add(createLowerBitSetsPass());
+
// Delete basic blocks, which optimization passes may have killed.
PM.add(createCFGSimplificationPass());
@@ -476,15 +489,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) {
PM.add(createMergeFunctionsPass());
}
-void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
- TargetMachine *TM) {
- if (TM) {
- PM.add(new DataLayoutPass());
- TM->addAnalysisPasses(PM);
- }
-
+void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
if (LibraryInfo)
- PM.add(new TargetLibraryInfo(*LibraryInfo));
+ PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
if (VerifyInput)
PM.add(createVerifierPass());
@@ -567,7 +574,7 @@ void
LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
LLVMPassManagerRef PM) {
PassManagerBuilder *Builder = unwrap(PMB);
- FunctionPassManager *FPM = unwrap<FunctionPassManager>(PM);
+ legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM);
Builder->populateFunctionPassManager(*FPM);
}
@@ -575,7 +582,7 @@ void
LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
LLVMPassManagerRef PM) {
PassManagerBuilder *Builder = unwrap(PMB);
- PassManagerBase *MPM = unwrap(PM);
+ legacy::PassManagerBase *MPM = unwrap(PM);
Builder->populateModulePassManager(*MPM);
}
@@ -584,7 +591,7 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
LLVMBool Internalize,
LLVMBool RunInliner) {
PassManagerBuilder *Builder = unwrap(PMB);
- PassManagerBase *LPM = unwrap(PM);
+ legacy::PassManagerBase *LPM = unwrap(PM);
// A small backwards compatibility hack. populateLTOPassManager used to take
// an RunInliner option.
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index b2c4a09..1943b93 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -18,8 +18,10 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -175,7 +177,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
bool MadeChange = false;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
- if (II->doesNotThrow()) {
+ if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(II)) {
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
// Insert a call instruction before the invoke.
CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
@@ -200,7 +202,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
BB->getInstList().pop_back();
// If the unwind block is now dead, nuke it.
- if (pred_begin(UnwindBlock) == pred_end(UnwindBlock))
+ if (pred_empty(UnwindBlock))
DeleteBasicBlock(UnwindBlock); // Delete the new BB.
++NumRemoved;
@@ -234,7 +236,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
/// updating the callgraph to reflect any now-obsolete edges due to calls that
/// exist in the BB.
void PruneEH::DeleteBasicBlock(BasicBlock *BB) {
- assert(pred_begin(BB) == pred_end(BB) && "BB is not dead!");
+ assert(pred_empty(BB) && "BB is not dead!");
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
CallGraphNode *CGN = CG[BB->getParent()];
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 3412b9e..816978e 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -301,8 +301,8 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
// For each compile unit, find the live set of global variables/functions and
// replace the current list of potentially dead global variables/functions
// with the live list.
- SmallVector<Value *, 64> LiveGlobalVariables;
- SmallVector<Value *, 64> LiveSubprograms;
+ SmallVector<Metadata *, 64> LiveGlobalVariables;
+ SmallVector<Metadata *, 64> LiveSubprograms;
DenseSet<const MDNode *> VisitedSet;
for (DICompileUnit DIC : F.compile_units()) {