diff options
Diffstat (limited to 'lib/Transforms/IPO')
-rw-r--r-- | lib/Transforms/IPO/ArgumentPromotion.cpp | 80 | ||||
-rw-r--r-- | lib/Transforms/IPO/ConstantMerge.cpp | 25 | ||||
-rw-r--r-- | lib/Transforms/IPO/GlobalDCE.cpp | 26 | ||||
-rw-r--r-- | lib/Transforms/IPO/GlobalOpt.cpp | 106 | ||||
-rw-r--r-- | lib/Transforms/IPO/Inliner.cpp | 40 | ||||
-rw-r--r-- | lib/Transforms/IPO/LowerBitSets.cpp | 276 | ||||
-rw-r--r-- | lib/Transforms/IPO/MergeFunctions.cpp | 54 | ||||
-rw-r--r-- | lib/Transforms/IPO/PassManagerBuilder.cpp | 48 |
8 files changed, 379 insertions, 276 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 7e48ce3..46480bd 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -69,16 +69,15 @@ namespace { bool runOnSCC(CallGraphSCC &SCC) override; static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) - : CallGraphSCCPass(ID), DL(nullptr), maxElements(maxElements) { + : CallGraphSCCPass(ID), maxElements(maxElements) { initializeArgPromotionPass(*PassRegistry::getPassRegistry()); } /// A vector used to hold the indices of a single GEP instruction typedef std::vector<uint64_t> IndicesVector; - const DataLayout *DL; private: - bool isDenselyPacked(Type *type); + bool isDenselyPacked(Type *type, const DataLayout &DL); bool canPaddingBeAccessed(Argument *Arg); CallGraphNode *PromoteArguments(CallGraphNode *CGN); bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; @@ -109,9 +108,6 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { bool Changed = false, LocalChange; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; - do { // Iterate until we stop promoting from this SCC. LocalChange = false; // Attempt to promote arguments from all functions in this SCC. @@ -128,7 +124,7 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { } /// \brief Checks if a type could have padding bytes. -bool ArgPromotion::isDenselyPacked(Type *type) { +bool ArgPromotion::isDenselyPacked(Type *type, const DataLayout &DL) { // There is no size information, so be conservative. if (!type->isSized()) @@ -136,7 +132,7 @@ bool ArgPromotion::isDenselyPacked(Type *type) { // If the alloc size is not equal to the storage size, then there are padding // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. - if (!DL || DL->getTypeSizeInBits(type) != DL->getTypeAllocSizeInBits(type)) + if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type)) return false; if (!isa<CompositeType>(type)) @@ -144,19 +140,20 @@ bool ArgPromotion::isDenselyPacked(Type *type) { // For homogenous sequential types, check for padding within members. if (SequentialType *seqTy = dyn_cast<SequentialType>(type)) - return isa<PointerType>(seqTy) || isDenselyPacked(seqTy->getElementType()); + return isa<PointerType>(seqTy) || + isDenselyPacked(seqTy->getElementType(), DL); // Check for padding within and between elements of a struct. StructType *StructTy = cast<StructType>(type); - const StructLayout *Layout = DL->getStructLayout(StructTy); + const StructLayout *Layout = DL.getStructLayout(StructTy); uint64_t StartPos = 0; for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { Type *ElTy = StructTy->getElementType(i); - if (!isDenselyPacked(ElTy)) + if (!isDenselyPacked(ElTy, DL)) return false; if (StartPos != Layout->getElementOffsetInBits(i)) return false; - StartPos += DL->getTypeAllocSizeInBits(ElTy); + StartPos += DL.getTypeAllocSizeInBits(ElTy); } return true; @@ -236,6 +233,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // IR, while in the callee the classification is determined dynamically based // on the number of registers consumed so far. if (F->isVarArg()) return nullptr; + const DataLayout &DL = F->getParent()->getDataLayout(); // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. @@ -250,8 +248,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // packed or if we can prove the padding bytes are never accessed. This does // not apply to inalloca. bool isSafeToPromote = - PtrArg->hasByValAttr() && - (isDenselyPacked(AgTy) || !canPaddingBeAccessed(PtrArg)); + PtrArg->hasByValAttr() && + (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); if (isSafeToPromote) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { @@ -310,9 +308,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { /// AllCallersPassInValidPointerForArgument - Return true if we can prove that /// all callees pass in a valid pointer for the specified function argument. -static bool AllCallersPassInValidPointerForArgument(Argument *Arg, - const DataLayout *DL) { +static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { Function *Callee = Arg->getParent(); + const DataLayout &DL = Callee->getParent()->getDataLayout(); unsigned ArgNo = Arg->getArgNo(); @@ -430,7 +428,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. - if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg, DL)) + if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as @@ -586,7 +584,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, FunctionType *FTy = F->getFunctionType(); std::vector<Type*> Params; - typedef std::set<IndicesVector> ScalarizeTable; + typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable; // ScalarizedElements - If we are promoting a pointer that has elements // accessed out of it, keep track of which elements are accessed so that we @@ -623,8 +621,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Simple byval argument? Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) - Params.push_back(STy->getElementType(i)); + Params.insert(Params.end(), STy->element_begin(), STy->element_end()); ++NumByValArgsPromoted; } else if (!ArgsToPromote.count(I)) { // Unchanged argument @@ -647,7 +644,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ScalarizeTable &ArgIndices = ScalarizedElements[I]; for (User *U : I->users()) { Instruction *UI = cast<Instruction>(U); - assert(isa<LoadInst>(UI) || isa<GetElementPtrInst>(UI)); + Type *SrcTy; + if (LoadInst *L = dyn_cast<LoadInst>(UI)) + SrcTy = L->getType(); + else + SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType(); IndicesVector Indices; Indices.reserve(UI->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single @@ -659,7 +660,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // GEPs with a single 0 index can be merged with direct loads if (Indices.size() == 1 && Indices.front() == 0) Indices.clear(); - ArgIndices.insert(Indices); + ArgIndices.insert(std::make_pair(SrcTy, Indices)); LoadInst *OrigLoad; if (LoadInst *L = dyn_cast<LoadInst>(UI)) OrigLoad = L; @@ -673,11 +674,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { // not allowed to dereference ->begin() if size() is 0 - Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), *SI)); + Params.push_back( + GetElementPtrInst::getIndexedType(I->getType(), SI->second)); assert(Params.back()); } - if (ArgIndices.size() == 1 && ArgIndices.begin()->empty()) + if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) ++NumArgumentsPromoted; else ++NumAggregatesPromoted; @@ -768,9 +770,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - Value *Idx = GetElementPtrInst::Create(*AI, Idxs, - (*AI)->getName()+"."+utostr(i), - Call); + Value *Idx = GetElementPtrInst::Create( + STy, *AI, Idxs, (*AI)->getName() + "." + utostr(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call)); } @@ -783,12 +784,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { Value *V = *AI; - LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, *SI)]; - if (!SI->empty()) { - Ops.reserve(SI->size()); + LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, SI->second)]; + if (!SI->second.empty()) { + Ops.reserve(SI->second.size()); Type *ElTy = V->getType(); - for (IndicesVector::const_iterator II = SI->begin(), - IE = SI->end(); II != IE; ++II) { + for (IndicesVector::const_iterator II = SI->second.begin(), + IE = SI->second.end(); + II != IE; ++II) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. Type *IdxTy = (ElTy->isStructTy() ? @@ -799,7 +801,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); } // And create a GEP to extract those indices. - V = GetElementPtrInst::Create(V, Ops, V->getName()+".idx", Call); + V = GetElementPtrInst::Create(SI->first, V, Ops, + V->getName() + ".idx", Call); Ops.clear(); AA.copyValue(OrigLoad->getOperand(0), V); } @@ -903,10 +906,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - Value *Idx = - GetElementPtrInst::Create(TheAlloca, Idxs, - TheAlloca->getName()+"."+Twine(i), - InsertPt); + Value *Idx = GetElementPtrInst::Create( + AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), + InsertPt); I2->setName(I->getName()+"."+Twine(i)); new StoreInst(I2++, Idx, InsertPt); } @@ -939,7 +941,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, while (!I->use_empty()) { if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) { - assert(ArgIndices.begin()->empty() && + assert(ArgIndices.begin()->second.empty() && "Load element should sort to front!"); I2->setName(I->getName()+".val"); LI->replaceAllUsesWith(I2); @@ -961,7 +963,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Function::arg_iterator TheArg = I2; for (ScalarizeTable::iterator It = ArgIndices.begin(); - *It != Operands; ++It, ++TheArg) { + It->second != Operands; ++It, ++TheArg) { assert(It != ArgIndices.end() && "GEP not handled??"); } diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 0b6ade9..8ce7646 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -52,7 +52,6 @@ namespace { // alignment to a concrete value. unsigned getAlignment(GlobalVariable *GV) const; - const DataLayout *DL; }; } @@ -89,32 +88,22 @@ static bool IsBetterCanonical(const GlobalVariable &A, return A.hasUnnamedAddr(); } -bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const { - return DL || GV->getAlignment() != 0; -} - unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const { unsigned Align = GV->getAlignment(); if (Align) return Align; - if (DL) - return DL->getPreferredAlignment(GV); - return 0; + return GV->getParent()->getDataLayout().getPreferredAlignment(GV); } bool ConstantMerge::runOnModule(Module &M) { - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; // Find all the globals that are marked "used". These cannot be merged. SmallPtrSet<const GlobalValue*, 8> UsedGlobals; FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals); FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals); - - // Map unique <constants, has-unknown-alignment> pairs to globals. We don't - // want to merge globals of unknown alignment with those of explicit - // alignment. If we have DataLayout, we always know the alignment. - DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap; + + // Map unique constants to globals. + DenseMap<Constant *, GlobalVariable *> CMap; // Replacements - This vector contains a list of replacements to perform. SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements; @@ -156,8 +145,7 @@ bool ConstantMerge::runOnModule(Module &M) { Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. - PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV)); - GlobalVariable *&Slot = CMap[Pair]; + GlobalVariable *&Slot = CMap[Init]; // If this is the first constant we find or if the old one is local, // replace with the current one. If the current is externally visible @@ -188,8 +176,7 @@ bool ConstantMerge::runOnModule(Module &M) { Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. - PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV)); - GlobalVariable *Slot = CMap[Pair]; + GlobalVariable *Slot = CMap[Init]; if (!Slot || Slot == GV) continue; diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 0c844fe..ba04c80 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Pass.h" +#include <unordered_map> using namespace llvm; #define DEBUG_TYPE "globaldce" @@ -47,6 +48,7 @@ namespace { private: SmallPtrSet<GlobalValue*, 32> AliveGlobals; SmallPtrSet<Constant *, 8> SeenConstants; + std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; /// GlobalIsNeeded - mark the specific global value as needed, and /// recursively mark anything that it uses as also needed. @@ -78,6 +80,17 @@ bool GlobalDCE::runOnModule(Module &M) { // Remove empty functions from the global ctors list. Changed |= optimizeGlobalCtorsList(M, isEmptyFunction); + // Collect the set of members for each comdat. + for (Function &F : M) + if (Comdat *C = F.getComdat()) + ComdatMembers.insert(std::make_pair(C, &F)); + for (GlobalVariable &GV : M.globals()) + if (Comdat *C = GV.getComdat()) + ComdatMembers.insert(std::make_pair(C, &GV)); + for (GlobalAlias &GA : M.aliases()) + if (Comdat *C = GA.getComdat()) + ComdatMembers.insert(std::make_pair(C, &GA)); + // Loop over the module, adding globals which are obviously necessary. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); @@ -177,6 +190,7 @@ bool GlobalDCE::runOnModule(Module &M) { // Make sure that all memory is released AliveGlobals.clear(); SeenConstants.clear(); + ComdatMembers.clear(); return Changed; } @@ -188,17 +202,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { if (!AliveGlobals.insert(G).second) return; - Module *M = G->getParent(); if (Comdat *C = G->getComdat()) { - for (Function &F : *M) - if (F.getComdat() == C) - GlobalIsNeeded(&F); - for (GlobalVariable &GV : M->globals()) - if (GV.getComdat() == C) - GlobalIsNeeded(&GV); - for (GlobalAlias &GA : M->aliases()) - if (GA.getComdat() == C) - GlobalIsNeeded(&GA); + for (auto &&CM : make_range(ComdatMembers.equal_range(C))) + GlobalIsNeeded(CM.second); } if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) { diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 45e04f1..20b41fb 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" @@ -38,7 +39,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -86,7 +86,6 @@ namespace { const GlobalStatus &GS); bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); - const DataLayout *DL; TargetLibraryInfo *TLI; SmallSet<const Comdat *, 8> NotDiscardableComdats; }; @@ -269,7 +268,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, /// quick scan over the use list to clean up the easy and obvious cruft. This /// returns true if it made a change. static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, - const DataLayout *DL, + const DataLayout &DL, TargetLibraryInfo *TLI) { bool Changed = false; // Note that we need to use a weak value handle for the worklist items. When @@ -318,8 +317,8 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, // and will invalidate our notion of what Init is. Constant *SubInit = nullptr; if (!isa<ConstantExpr>(GEP->getOperand(0))) { - ConstantExpr *CE = - dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, DL, TLI)); + ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>( + ConstantFoldInstruction(GEP, DL, TLI)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); @@ -580,8 +579,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { Idxs.push_back(NullInt); for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) Idxs.push_back(GEPI->getOperand(i)); - NewPtr = GetElementPtrInst::Create(NewPtr, Idxs, - GEPI->getName()+"."+Twine(Val),GEPI); + NewPtr = GetElementPtrInst::Create( + NewPtr->getType()->getPointerElementType(), NewPtr, Idxs, + GEPI->getName() + "." + Twine(Val), GEPI); } } GEP->replaceAllUsesWith(NewPtr); @@ -739,7 +739,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { /// if the loaded value is dynamically null, then we know that they cannot be /// reachable with a null optimize away the load. static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, - const DataLayout *DL, + const DataLayout &DL, TargetLibraryInfo *TLI) { bool Changed = false; @@ -802,7 +802,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the /// instructions that are foldable. -static void ConstantPropUsersOf(Value *V, const DataLayout *DL, +static void ConstantPropUsersOf(Value *V, const DataLayout &DL, TargetLibraryInfo *TLI) { for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; ) if (Instruction *I = dyn_cast<Instruction>(*UI++)) @@ -822,12 +822,10 @@ static void ConstantPropUsersOf(Value *V, const DataLayout *DL, /// the specified malloc. Because it is always the result of the specified /// malloc, there is no reason to actually DO the malloc. Instead, turn the /// malloc into a global, and any loads of GV as uses of the new global. -static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, - CallInst *CI, - Type *AllocTy, - ConstantInt *NElements, - const DataLayout *DL, - TargetLibraryInfo *TLI) { +static GlobalVariable * +OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, + ConstantInt *NElements, const DataLayout &DL, + TargetLibraryInfo *TLI) { DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); Type *GlobalType; @@ -1167,7 +1165,8 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, InsertedScalarizedValues, PHIsToRewrite), LI->getName()+".f"+Twine(FieldNo), LI); - } else if (PHINode *PN = dyn_cast<PHINode>(V)) { + } else { + PHINode *PN = cast<PHINode>(V); // PN's type is pointer to struct. Make a new PHI of pointer to struct // field. @@ -1181,8 +1180,6 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, PN->getName()+".f"+Twine(FieldNo), PN); Result = NewPN; PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); - } else { - llvm_unreachable("Unknown usable value"); } return FieldVals[FieldNo] = Result; @@ -1224,7 +1221,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, GEPIdx.push_back(GEPI->getOperand(1)); GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); - Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx, + Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx, GEPI->getName(), GEPI); GEPI->replaceAllUsesWith(NGEPI); GEPI->eraseFromParent(); @@ -1271,7 +1268,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, - Value *NElems, const DataLayout *DL, + Value *NElems, const DataLayout &DL, const TargetLibraryInfo *TLI) { DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); Type *MAT = getMallocAllocatedType(CI, TLI); @@ -1301,10 +1298,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, GV->getThreadLocalMode()); FieldGlobals.push_back(NGV); - unsigned TypeSize = DL->getTypeAllocSize(FieldTy); + unsigned TypeSize = DL.getTypeAllocSize(FieldTy); if (StructType *ST = dyn_cast<StructType>(FieldTy)) - TypeSize = DL->getStructLayout(ST)->getSizeInBytes(); - Type *IntPtrTy = DL->getIntPtrType(CI->getType()); + TypeSize = DL.getStructLayout(ST)->getSizeInBytes(); + Type *IntPtrTy = DL.getIntPtrType(CI->getType()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), NElems, nullptr, @@ -1459,16 +1456,12 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, /// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a /// pointer global variable with a single value stored it that is a malloc or /// cast of malloc. -static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, - CallInst *CI, +static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, Type *AllocTy, AtomicOrdering Ordering, Module::global_iterator &GVI, - const DataLayout *DL, + const DataLayout &DL, TargetLibraryInfo *TLI) { - if (!DL) - return false; - // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -1504,7 +1497,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // Restrict this transformation to only working on small allocations // (2048 bytes currently), as we don't want to introduce a 16M global or // something. - if (NElements->getZExtValue() * DL->getTypeAllocSize(AllocTy) < 2048) { + if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) { GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI); return true; } @@ -1534,8 +1527,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is a fixed size array, transform the Malloc to be an alloc of // structs. malloc [100 x struct],1 -> malloc struct, 100 if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) { - Type *IntPtrTy = DL->getIntPtrType(CI->getType()); - unsigned TypeSize = DL->getStructLayout(AllocSTy)->getSizeInBytes(); + Type *IntPtrTy = DL.getIntPtrType(CI->getType()); + unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes(); Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, @@ -1563,7 +1556,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, AtomicOrdering Ordering, Module::global_iterator &GVI, - const DataLayout *DL, + const DataLayout &DL, TargetLibraryInfo *TLI) { // Ignore no-op GEPs and bitcasts. StoredOnceVal = StoredOnceVal->stripPointerCasts(); @@ -1733,6 +1726,7 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, Module::global_iterator &GVI, const GlobalStatus &GS) { + auto &DL = GV->getParent()->getDataLayout(); // If this is a first class global and has only one accessing function // and this function is main (which we know is not recursive), we replace // the global with a local alloca in this function. @@ -1804,12 +1798,10 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, ++NumMarked; return true; } else if (!GV->getInitializer()->getType()->isSingleValueType()) { - if (DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>()) { - const DataLayout &DL = DLP->getDataLayout(); - if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) { - GVI = FirstNewGV; // Don't skip the newly produced globals! - return true; - } + const DataLayout &DL = GV->getParent()->getDataLayout(); + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) { + GVI = FirstNewGV; // Don't skip the newly produced globals! + return true; } } else if (GS.StoredType == GlobalStatus::StoredOnce) { // If the initial value for the global was an undef value, and if only @@ -1954,6 +1946,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { // Simplify the initializer. if (GV->hasInitializer()) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) { + auto &DL = M.getDataLayout(); Constant *New = ConstantFoldConstantExpression(CE, DL, TLI); if (New && New != CE) GV->setInitializer(New); @@ -1971,9 +1964,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { static inline bool isSimpleEnoughValueToCommit(Constant *C, - SmallPtrSetImpl<Constant*> &SimpleConstants, - const DataLayout *DL); - + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL); /// isSimpleEnoughValueToCommit - Return true if the specified constant can be /// handled by the code generator. We don't want to generate something like: @@ -1983,9 +1975,10 @@ isSimpleEnoughValueToCommit(Constant *C, /// This function should be called if C was not found (but just got inserted) /// in SimpleConstants to avoid having to rescan the same constants all the /// time. -static bool isSimpleEnoughValueToCommitHelper(Constant *C, - SmallPtrSetImpl<Constant*> &SimpleConstants, - const DataLayout *DL) { +static bool +isSimpleEnoughValueToCommitHelper(Constant *C, + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL) { // Simple global addresses are supported, do not allow dllimport or // thread-local globals. if (auto *GV = dyn_cast<GlobalValue>(C)) @@ -2019,8 +2012,8 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, case Instruction::PtrToInt: // int <=> ptr is fine if the int type is the same size as the // pointer type. - if (!DL || DL->getTypeSizeInBits(CE->getType()) != - DL->getTypeSizeInBits(CE->getOperand(0)->getType())) + if (DL.getTypeSizeInBits(CE->getType()) != + DL.getTypeSizeInBits(CE->getOperand(0)->getType())) return false; return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); @@ -2042,8 +2035,8 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, static inline bool isSimpleEnoughValueToCommit(Constant *C, - SmallPtrSetImpl<Constant*> &SimpleConstants, - const DataLayout *DL) { + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL) { // If we already checked this constant, we win. if (!SimpleConstants.insert(C).second) return true; @@ -2174,8 +2167,8 @@ namespace { /// Once an evaluation call fails, the evaluation object should not be reused. class Evaluator { public: - Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI) - : DL(DL), TLI(TLI) { + Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI) + : DL(DL), TLI(TLI) { ValueStack.emplace_back(); } @@ -2249,7 +2242,7 @@ private: /// simple enough to live in a static initializer of a global. SmallPtrSet<Constant*, 8> SimpleConstants; - const DataLayout *DL; + const DataLayout &DL; const TargetLibraryInfo *TLI; }; @@ -2498,9 +2491,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Value *Ptr = PtrArg->stripPointerCasts(); if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { Type *ElemTy = cast<PointerType>(GV->getType())->getElementType(); - if (DL && !Size->isAllOnesValue() && + if (!Size->isAllOnesValue() && Size->getValue().getLimitedValue() >= - DL->getTypeStoreSize(ElemTy)) { + DL.getTypeStoreSize(ElemTy)) { Invariants.insert(GV); DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV << "\n"); @@ -2689,7 +2682,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, /// EvaluateStaticConstructor - Evaluate static constructors in the function, if /// we can. Return true if we can, false otherwise. -static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL, +static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, const TargetLibraryInfo *TLI) { // Call the function. Evaluator Eval(DL, TLI); @@ -3040,8 +3033,7 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { bool GlobalOpt::runOnModule(Module &M) { bool Changed = false; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; + auto &DL = M.getDataLayout(); TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); bool LocalChange = true; diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 305ad7a..3aa4ee5 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" @@ -29,7 +30,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -72,8 +72,8 @@ Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime) InlineLimit : Threshold), InsertLifetime(InsertLifetime) {} -/// getAnalysisUsage - For this class, we declare that we require and preserve -/// the call graph. If the derived class implements this method, it should +/// For this class, we declare that we require and preserve the call graph. +/// If the derived class implements this method, it should /// always explicitly call the implementation here. void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AliasAnalysis>(); @@ -111,18 +111,17 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) { Caller->addFnAttr(Attribute::StackProtect); } -/// InlineCallIfPossible - If it is possible to inline the specified call site, +/// If it is possible to inline the specified call site, /// do so and update the CallGraph for this operation. /// /// This function also does some basic book-keeping to update the IR. The /// InlinedArrayAllocas map keeps track of any allocas that are already -/// available from other functions inlined into the caller. If we are able to +/// available from other functions inlined into the caller. If we are able to /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, - int InlineHistory, bool InsertLifetime, - const DataLayout *DL) { + int InlineHistory, bool InsertLifetime) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); @@ -198,11 +197,6 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, unsigned Align1 = AI->getAlignment(), Align2 = AvailableAlloca->getAlignment(); - // If we don't have data layout information, and only one alloca is using - // the target default, then we can't safely merge them because we can't - // pick the greater alignment. - if (!DL && (!Align1 || !Align2) && Align1 != Align2) - continue; // The available alloca has to be in the right function, not in some other // function in this SCC. @@ -223,8 +217,8 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, if (Align1 != Align2) { if (!Align1 || !Align2) { - assert(DL && "DataLayout required to compare default alignments"); - unsigned TypeAlign = DL->getABITypeAlignment(AI->getAllocatedType()); + const DataLayout &DL = Caller->getParent()->getDataLayout(); + unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType()); Align1 = Align1 ? Align1 : TypeAlign; Align2 = Align2 ? Align2 : TypeAlign; @@ -300,8 +294,7 @@ static void emitAnalysis(CallSite CS, const Twine &Msg) { emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg); } -/// shouldInline - Return true if the inliner should attempt to inline -/// at the given CallSite. +/// Return true if the inliner should attempt to inline at the given CallSite. bool Inliner::shouldInline(CallSite CS) { InlineCost IC = getInlineCost(CS); @@ -415,7 +408,7 @@ bool Inliner::shouldInline(CallSite CS) { return true; } -/// InlineHistoryIncludes - Return true if the specified inline history ID +/// Return true if the specified inline history ID /// indicates an inline history that includes the specified function. static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) { @@ -432,8 +425,6 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr; AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); @@ -495,7 +486,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, DL, AA, ACT); + InlineFunctionInfo InlineInfo(&CG, AA, ACT); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -553,7 +544,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // Attempt to inline the function. if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, - InlineHistoryID, InsertLifetime, DL)) { + InlineHistoryID, InsertLifetime)) { emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, Twine(Callee->getName() + " will not be inlined into " + @@ -625,14 +616,13 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { return Changed; } -// doFinalization - Remove now-dead linkonce functions at the end of -// processing to avoid breaking the SCC traversal. +/// Remove now-dead linkonce functions at the end of +/// processing to avoid breaking the SCC traversal. bool Inliner::doFinalization(CallGraph &CG) { return removeDeadFunctions(CG); } -/// removeDeadFunctions - Remove dead functions that are not included in -/// DNR (Do Not Remove) list. +/// Remove dead functions that are not included in DNR (Do Not Remove) list. bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { SmallVector<CallGraphNode*, 16> FunctionsToRemove; diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp index 0a22a80..fe00d92 100644 --- a/lib/Transforms/IPO/LowerBitSets.cpp +++ b/lib/Transforms/IPO/LowerBitSets.cpp @@ -16,6 +16,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalVariable.h" @@ -31,10 +32,17 @@ using namespace llvm; #define DEBUG_TYPE "lowerbitsets" -STATISTIC(NumBitSetsCreated, "Number of bitsets created"); +STATISTIC(ByteArraySizeBits, "Byte array size in bits"); +STATISTIC(ByteArraySizeBytes, "Byte array size in bytes"); +STATISTIC(NumByteArraysCreated, "Number of byte arrays created"); STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered"); STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets"); +static cl::opt<bool> AvoidReuse( + "lowerbitsets-avoid-reuse", + cl::desc("Try to avoid reuse of byte array addresses using aliases"), + cl::Hidden, cl::init(true)); + bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { if (Offset < ByteOffset) return false; @@ -46,11 +54,11 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { if (BitOffset >= BitSize) return false; - return (Bits[BitOffset / 8] >> (BitOffset % 8)) & 1; + return Bits.count(BitOffset); } bool BitSetInfo::containsValue( - const DataLayout *DL, + const DataLayout &DL, const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout, Value *V, uint64_t COffset) const { if (auto GV = dyn_cast<GlobalVariable>(V)) { @@ -61,8 +69,8 @@ bool BitSetInfo::containsValue( } if (auto GEP = dyn_cast<GEPOperator>(V)) { - APInt APOffset(DL->getPointerSizeInBits(0), 0); - bool Result = GEP->accumulateConstantOffset(*DL, APOffset); + APInt APOffset(DL.getPointerSizeInBits(0), 0); + bool Result = GEP->accumulateConstantOffset(DL, APOffset); if (!Result) return false; COffset += APOffset.getZExtValue(); @@ -101,18 +109,15 @@ BitSetInfo BitSetBuilder::build() { BSI.ByteOffset = Min; BSI.AlignLog2 = 0; - // FIXME: Can probably do something smarter if all offsets are 0. if (Mask != 0) BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined); // Build the compressed bitset while normalizing the offsets against the // computed alignment. BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1; - uint64_t ByteSize = (BSI.BitSize + 7) / 8; - BSI.Bits.resize(ByteSize); for (uint64_t Offset : Offsets) { Offset >>= BSI.AlignLog2; - BSI.Bits[Offset / 8] |= 1 << (Offset % 8); + BSI.Bits.insert(Offset); } return BSI; @@ -147,15 +152,47 @@ void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) { FragmentMap[ObjIndex] = FragmentIndex; } +void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits, + uint64_t BitSize, uint64_t &AllocByteOffset, + uint8_t &AllocMask) { + // Find the smallest current allocation. + unsigned Bit = 0; + for (unsigned I = 1; I != BitsPerByte; ++I) + if (BitAllocs[I] < BitAllocs[Bit]) + Bit = I; + + AllocByteOffset = BitAllocs[Bit]; + + // Add our size to it. + unsigned ReqSize = AllocByteOffset + BitSize; + BitAllocs[Bit] = ReqSize; + if (Bytes.size() < ReqSize) + Bytes.resize(ReqSize); + + // Set our bits. + AllocMask = 1 << Bit; + for (uint64_t B : Bits) + Bytes[AllocByteOffset + B] |= AllocMask; +} + namespace { +struct ByteArrayInfo { + std::set<uint64_t> Bits; + uint64_t BitSize; + GlobalVariable *ByteArray; + Constant *Mask; +}; + struct LowerBitSets : public ModulePass { static char ID; LowerBitSets() : ModulePass(ID) { initializeLowerBitSetsPass(*PassRegistry::getPassRegistry()); } - const DataLayout *DL; + Module *M; + + bool LinkerSubsectionsViaSymbols; IntegerType *Int1Ty; IntegerType *Int8Ty; IntegerType *Int32Ty; @@ -169,20 +206,23 @@ struct LowerBitSets : public ModulePass { // Mapping from bitset mdstrings to the call sites that test them. DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites; + std::vector<ByteArrayInfo> ByteArrayInfos; + BitSetInfo buildBitSet(MDString *BitSet, const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout); - Value *createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI, - GlobalVariable *BitSetGlobal, Value *BitOffset); + ByteArrayInfo *createByteArray(BitSetInfo &BSI); + void allocateByteArrays(); + Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI, + Value *BitOffset); Value * - lowerBitSetCall(CallInst *CI, const BitSetInfo &BSI, - GlobalVariable *BitSetGlobal, GlobalVariable *CombinedGlobal, + lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, + GlobalVariable *CombinedGlobal, const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout); - void buildBitSetsFromGlobals(Module &M, - const std::vector<MDString *> &BitSets, + void buildBitSetsFromGlobals(const std::vector<MDString *> &BitSets, const std::vector<GlobalVariable *> &Globals); - bool buildBitSets(Module &M); - bool eraseBitSetMetadata(Module &M); + bool buildBitSets(); + bool eraseBitSetMetadata(); bool doInitialization(Module &M) override; bool runOnModule(Module &M) override; @@ -198,19 +238,21 @@ char LowerBitSets::ID = 0; ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; } -bool LowerBitSets::doInitialization(Module &M) { - DL = M.getDataLayout(); - if (!DL) - report_fatal_error("Data layout required"); +bool LowerBitSets::doInitialization(Module &Mod) { + M = &Mod; + const DataLayout &DL = Mod.getDataLayout(); - Int1Ty = Type::getInt1Ty(M.getContext()); - Int8Ty = Type::getInt8Ty(M.getContext()); - Int32Ty = Type::getInt32Ty(M.getContext()); + Triple TargetTriple(M->getTargetTriple()); + LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); + + Int1Ty = Type::getInt1Ty(M->getContext()); + Int8Ty = Type::getInt8Ty(M->getContext()); + Int32Ty = Type::getInt32Ty(M->getContext()); Int32PtrTy = PointerType::getUnqual(Int32Ty); - Int64Ty = Type::getInt64Ty(M.getContext()); - IntPtrTy = DL->getIntPtrType(M.getContext(), 0); + Int64Ty = Type::getInt64Ty(M->getContext()); + IntPtrTy = DL.getIntPtrType(M->getContext(), 0); - BitSetNM = M.getNamedMetadata("llvm.bitsets"); + BitSetNM = M->getNamedMetadata("llvm.bitsets"); BitSetTestCallSites.clear(); @@ -259,52 +301,128 @@ static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits, return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0)); } +ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) { + // Create globals to stand in for byte arrays and masks. These never actually + // get initialized, we RAUW and erase them later in allocateByteArrays() once + // we know the offset and mask to use. + auto ByteArrayGlobal = new GlobalVariable( + *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); + auto MaskGlobal = new GlobalVariable( + *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); + + ByteArrayInfos.emplace_back(); + ByteArrayInfo *BAI = &ByteArrayInfos.back(); + + BAI->Bits = BSI.Bits; + BAI->BitSize = BSI.BitSize; + BAI->ByteArray = ByteArrayGlobal; + BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty); + return BAI; +} + +void LowerBitSets::allocateByteArrays() { + std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(), + [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) { + return BAI1.BitSize > BAI2.BitSize; + }); + + std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size()); + + ByteArrayBuilder BAB; + for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { + ByteArrayInfo *BAI = &ByteArrayInfos[I]; + + uint8_t Mask; + BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask); + + BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask)); + cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent(); + } + + Constant *ByteArrayConst = ConstantDataArray::get(M->getContext(), BAB.Bytes); + auto ByteArray = + new GlobalVariable(*M, ByteArrayConst->getType(), /*isConstant=*/true, + GlobalValue::PrivateLinkage, ByteArrayConst); + + for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { + ByteArrayInfo *BAI = &ByteArrayInfos[I]; + + Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0), + ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])}; + Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(ByteArray, Idxs); + + // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures + // that the pc-relative displacement is folded into the lea instead of the + // test instruction getting another displacement. + if (LinkerSubsectionsViaSymbols) { + BAI->ByteArray->replaceAllUsesWith(GEP); + } else { + GlobalAlias *Alias = GlobalAlias::create( + Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M); + BAI->ByteArray->replaceAllUsesWith(Alias); + } + BAI->ByteArray->eraseFromParent(); + } + + ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] + + BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] + + BAB.BitAllocs[6] + BAB.BitAllocs[7]; + ByteArraySizeBytes = BAB.Bytes.size(); +} + /// Build a test that bit BitOffset is set in BSI, where /// BitSetGlobal is a global containing the bits in BSI. -Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI, - GlobalVariable *BitSetGlobal, - Value *BitOffset) { - if (BSI.Bits.size() <= 8) { +Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, + ByteArrayInfo *&BAI, Value *BitOffset) { + if (BSI.BitSize <= 64) { // If the bit set is sufficiently small, we can avoid a load by bit testing // a constant. IntegerType *BitsTy; - if (BSI.Bits.size() <= 4) + if (BSI.BitSize <= 32) BitsTy = Int32Ty; else BitsTy = Int64Ty; uint64_t Bits = 0; - for (auto I = BSI.Bits.rbegin(), E = BSI.Bits.rend(); I != E; ++I) { - Bits <<= 8; - Bits |= *I; - } + for (auto Bit : BSI.Bits) + Bits |= uint64_t(1) << Bit; Constant *BitsConst = ConstantInt::get(BitsTy, Bits); return createMaskedBitTest(B, BitsConst, BitOffset); } else { - // TODO: We might want to use the memory variant of the bt instruction - // with the previously computed bit offset at -Os. This instruction does - // exactly what we want but has been benchmarked as being slower than open - // coding the load+bt. - Value *BitSetGlobalOffset = - B.CreateLShr(BitOffset, ConstantInt::get(IntPtrTy, 5)); - Value *BitSetEntryAddr = B.CreateGEP( - ConstantExpr::getBitCast(BitSetGlobal, Int32PtrTy), BitSetGlobalOffset); - Value *BitSetEntry = B.CreateLoad(BitSetEntryAddr); - - return createMaskedBitTest(B, BitSetEntry, BitOffset); + if (!BAI) { + ++NumByteArraysCreated; + BAI = createByteArray(BSI); + } + + Constant *ByteArray = BAI->ByteArray; + if (!LinkerSubsectionsViaSymbols && AvoidReuse) { + // Each use of the byte array uses a different alias. This makes the + // backend less likely to reuse previously computed byte array addresses, + // improving the security of the CFI mechanism based on this pass. + ByteArray = GlobalAlias::create( + BAI->ByteArray->getType()->getElementType(), 0, + GlobalValue::PrivateLinkage, "bits_use", ByteArray, M); + } + + Value *ByteAddr = B.CreateGEP(ByteArray, BitOffset); + Value *Byte = B.CreateLoad(ByteAddr); + + Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask); + return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0)); } } /// Lower a llvm.bitset.test call to its implementation. Returns the value to /// replace the call with. Value *LowerBitSets::lowerBitSetCall( - CallInst *CI, const BitSetInfo &BSI, GlobalVariable *BitSetGlobal, + CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, GlobalVariable *CombinedGlobal, const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) { Value *Ptr = CI->getArgOperand(0); + const DataLayout &DL = M->getDataLayout(); if (BSI.containsValue(DL, GlobalLayout, Ptr)) - return ConstantInt::getTrue(BitSetGlobal->getParent()->getContext()); + return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext()); Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy); Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd( @@ -336,8 +454,8 @@ Value *LowerBitSets::lowerBitSetCall( Value *OffsetSHR = B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2)); Value *OffsetSHL = B.CreateShl( - PtrOffset, ConstantInt::get(IntPtrTy, DL->getPointerSizeInBits(0) - - BSI.AlignLog2)); + PtrOffset, + ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2)); BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); } @@ -353,7 +471,7 @@ Value *LowerBitSets::lowerBitSetCall( // Now that we know that the offset is in range and aligned, load the // appropriate bit from the bitset. - Value *Bit = createBitSetTest(ThenB, BSI, BitSetGlobal, BitOffset); + Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset); // The value we want is 0 if we came directly from the initial block // (having failed the range or alignment checks), or the loaded bit if @@ -368,14 +486,14 @@ Value *LowerBitSets::lowerBitSetCall( /// Given a disjoint set of bitsets and globals, layout the globals, build the /// bit sets and lower the llvm.bitset.test calls. void LowerBitSets::buildBitSetsFromGlobals( - Module &M, const std::vector<MDString *> &BitSets, const std::vector<GlobalVariable *> &Globals) { // Build a new global with the combined contents of the referenced globals. std::vector<Constant *> GlobalInits; + const DataLayout &DL = M->getDataLayout(); for (GlobalVariable *G : Globals) { GlobalInits.push_back(G->getInitializer()); - uint64_t InitSize = DL->getTypeAllocSize(G->getInitializer()->getType()); + uint64_t InitSize = DL.getTypeAllocSize(G->getInitializer()->getType()); // Compute the amount of padding required to align the next element to the // next power of 2. @@ -391,13 +509,13 @@ void LowerBitSets::buildBitSetsFromGlobals( } if (!GlobalInits.empty()) GlobalInits.pop_back(); - Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits); + Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits); auto CombinedGlobal = - new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true, + new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true, GlobalValue::PrivateLinkage, NewInit); const StructLayout *CombinedGlobalLayout = - DL->getStructLayout(cast<StructType>(NewInit->getType())); + DL.getStructLayout(cast<StructType>(NewInit->getType())); // Compute the offsets of the original globals within the new global. DenseMap<GlobalVariable *, uint64_t> GlobalLayout; @@ -410,18 +528,12 @@ void LowerBitSets::buildBitSetsFromGlobals( // Build the bitset. BitSetInfo BSI = buildBitSet(BS, GlobalLayout); - // Create a global in which to store it. - ++NumBitSetsCreated; - Constant *BitsConst = ConstantDataArray::get(M.getContext(), BSI.Bits); - auto BitSetGlobal = new GlobalVariable( - M, BitsConst->getType(), /*isConstant=*/true, - GlobalValue::PrivateLinkage, BitsConst, BS->getString() + ".bits"); + ByteArrayInfo *BAI = 0; // Lower each call to llvm.bitset.test for this bitset. for (CallInst *CI : BitSetTestCallSites[BS]) { ++NumBitSetCallsLowered; - Value *Lowered = - lowerBitSetCall(CI, BSI, BitSetGlobal, CombinedGlobal, GlobalLayout); + Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout); CI->replaceAllUsesWith(Lowered); CI->eraseFromParent(); } @@ -436,20 +548,24 @@ void LowerBitSets::buildBitSetsFromGlobals( ConstantInt::get(Int32Ty, I * 2)}; Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs); - GlobalAlias *GAlias = GlobalAlias::create( - Globals[I]->getType()->getElementType(), - Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(), - "", CombinedGlobalElemPtr, &M); - GAlias->takeName(Globals[I]); - Globals[I]->replaceAllUsesWith(GAlias); + if (LinkerSubsectionsViaSymbols) { + Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr); + } else { + GlobalAlias *GAlias = GlobalAlias::create( + Globals[I]->getType()->getElementType(), + Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(), + "", CombinedGlobalElemPtr, M); + GAlias->takeName(Globals[I]); + Globals[I]->replaceAllUsesWith(GAlias); + } Globals[I]->eraseFromParent(); } } /// Lower all bit sets in this module. -bool LowerBitSets::buildBitSets(Module &M) { +bool LowerBitSets::buildBitSets() { Function *BitSetTestFunc = - M.getFunction(Intrinsic::getName(Intrinsic::bitset_test)); + M->getFunction(Intrinsic::getName(Intrinsic::bitset_test)); if (!BitSetTestFunc) return false; @@ -591,22 +707,24 @@ bool LowerBitSets::buildBitSets(Module &M) { }); // Build the bitsets from this disjoint set. - buildBitSetsFromGlobals(M, BitSets, OrderedGlobals); + buildBitSetsFromGlobals(BitSets, OrderedGlobals); } + allocateByteArrays(); + return true; } -bool LowerBitSets::eraseBitSetMetadata(Module &M) { +bool LowerBitSets::eraseBitSetMetadata() { if (!BitSetNM) return false; - M.eraseNamedMetadata(BitSetNM); + M->eraseNamedMetadata(BitSetNM); return true; } bool LowerBitSets::runOnModule(Module &M) { - bool Changed = buildBitSets(M); - Changed |= eraseBitSetMetadata(M); + bool Changed = buildBitSets(); + Changed |= eraseBitSetMetadata(); return Changed; } diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index b91ebf2..596674d 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -127,9 +127,8 @@ namespace { /// side of claiming that two functions are different). class FunctionComparator { public: - FunctionComparator(const DataLayout *DL, const Function *F1, - const Function *F2) - : FnL(F1), FnR(F2), DL(DL) {} + FunctionComparator(const Function *F1, const Function *F2) + : FnL(F1), FnR(F2) {} /// Test whether the two functions have equivalent behaviour. int compare(); @@ -292,8 +291,7 @@ private: /// Parts to be compared for each comparison stage, /// most significant stage first: /// 1. Address space. As numbers. - /// 2. Constant offset, (if "DataLayout *DL" field is not NULL, - /// using GEPOperator::accumulateConstantOffset method). + /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method). /// 3. Pointer operand type (using cmpType method). /// 4. Number of operands. /// 5. Compare operands, using cmpValues method. @@ -354,8 +352,6 @@ private: // The two functions undergoing comparison. const Function *FnL, *FnR; - const DataLayout *DL; - /// Assign serial numbers to values from left function, and values from /// right function. /// Explanation: @@ -394,14 +390,13 @@ private: class FunctionNode { AssertingVH<Function> F; - const DataLayout *DL; public: - FunctionNode(Function *F, const DataLayout *DL) : F(F), DL(DL) {} + FunctionNode(Function *F) : F(F) {} Function *getFunc() const { return F; } void release() { F = 0; } bool operator<(const FunctionNode &RHS) const { - return (FunctionComparator(DL, F, RHS.getFunc()).compare()) == -1; + return (FunctionComparator(F, RHS.getFunc()).compare()) == -1; } }; } @@ -620,10 +615,11 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { PointerType *PTyL = dyn_cast<PointerType>(TyL); PointerType *PTyR = dyn_cast<PointerType>(TyR); - if (DL) { - if (PTyL && PTyL->getAddressSpace() == 0) TyL = DL->getIntPtrType(TyL); - if (PTyR && PTyR->getAddressSpace() == 0) TyR = DL->getIntPtrType(TyR); - } + const DataLayout &DL = FnL->getParent()->getDataLayout(); + if (PTyL && PTyL->getAddressSpace() == 0) + TyL = DL.getIntPtrType(TyL); + if (PTyR && PTyR->getAddressSpace() == 0) + TyR = DL.getIntPtrType(TyR); if (TyL == TyR) return 0; @@ -855,13 +851,12 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, // When we have target data, we can reduce the GEP down to the value in bytes // added to the address. - if (DL) { - unsigned BitWidth = DL->getPointerSizeInBits(ASL); - APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); - if (GEPL->accumulateConstantOffset(*DL, OffsetL) && - GEPR->accumulateConstantOffset(*DL, OffsetR)) - return cmpAPInts(OffsetL, OffsetR); - } + const DataLayout &DL = FnL->getParent()->getDataLayout(); + unsigned BitWidth = DL.getPointerSizeInBits(ASL); + APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); + if (GEPL->accumulateConstantOffset(DL, OffsetL) && + GEPR->accumulateConstantOffset(DL, OffsetR)) + return cmpAPInts(OffsetL, OffsetR); if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(), (uint64_t)GEPR->getPointerOperand()->getType())) @@ -1122,9 +1117,6 @@ private: /// to modify it. FnTreeType FnTree; - /// DataLayout for more accurate GEP comparisons. May be NULL. - const DataLayout *DL; - /// Whether or not the target supports global aliases. bool HasGlobalAliases; }; @@ -1152,8 +1144,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) { Function *F1 = cast<Function>(*I); Function *F2 = cast<Function>(*J); - int Res1 = FunctionComparator(DL, F1, F2).compare(); - int Res2 = FunctionComparator(DL, F2, F1).compare(); + int Res1 = FunctionComparator(F1, F2).compare(); + int Res2 = FunctionComparator(F2, F1).compare(); // If F1 <= F2, then F2 >= F1, otherwise report failure. if (Res1 != -Res2) { @@ -1174,8 +1166,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { continue; Function *F3 = cast<Function>(*K); - int Res3 = FunctionComparator(DL, F1, F3).compare(); - int Res4 = FunctionComparator(DL, F2, F3).compare(); + int Res3 = FunctionComparator(F1, F3).compare(); + int Res4 = FunctionComparator(F2, F3).compare(); bool Transitive = true; @@ -1212,8 +1204,6 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { bool MergeFunctions::runOnModule(Module &M) { bool Changed = false; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) @@ -1420,7 +1410,7 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { // that was already inserted. bool MergeFunctions::insert(Function *NewFunction) { std::pair<FnTreeType::iterator, bool> Result = - FnTree.insert(FunctionNode(NewFunction, DL)); + FnTree.insert(FunctionNode(NewFunction)); if (Result.second) { DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n'); @@ -1457,7 +1447,7 @@ bool MergeFunctions::insert(Function *NewFunction) { void MergeFunctions::remove(Function *F) { // We need to make sure we remove F, not a function "equal" to F per the // function equality comparator. - FnTreeType::iterator found = FnTree.find(FunctionNode(F, DL)); + FnTreeType::iterator found = FnTree.find(FunctionNode(F)); size_t Erased = 0; if (found != FnTree.end() && found->getFunc() == F) { Erased = 1; diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 9a75050..d28d563 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -77,6 +77,10 @@ static cl::opt<bool> EnableMLSM("mlsm", cl::init(true), cl::Hidden, cl::desc("Enable motion of merged load and store")); +static cl::opt<bool> EnableLoopInterchange( + "enable-loopinterchange", cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental LoopInterchange Pass")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -93,7 +97,6 @@ PassManagerBuilder::PassManagerBuilder() { DisableGVNLoadPRE = false; VerifyInput = false; VerifyOutput = false; - StripDebug = false; MergeFunctions = false; } @@ -239,6 +242,8 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops + if (EnableLoopInterchange) + MPM.add(createLoopInterchangePass()); // Interchange loops if (!DisableUnrollLoops) MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops @@ -305,8 +310,7 @@ void PassManagerBuilder::populateModulePassManager( // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies // on the rotated form. - if (ExtraVectorizerPasses) - MPM.add(createLoopRotatePass()); + MPM.add(createLoopRotatePass()); MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize)); // FIXME: Because of #pragma vectorize enable, the passes below are always @@ -358,9 +362,20 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createCFGSimplificationPass()); MPM.add(createInstructionCombiningPass()); - if (!DisableUnrollLoops) + if (!DisableUnrollLoops) { MPM.add(createLoopUnrollPass()); // Unroll small loops + // This is a barrier pass to avoid combine LICM pass and loop unroll pass + // within same loop pass manager. + MPM.add(createInstructionSimplifierPass()); + + // Runtime unrolling will introduce runtime check in loop prologue. If the + // unrolled loop is a inner loop, then the prologue will be inside the + // outer loop. LICM pass can help to promote the runtime check out if the + // checked value is loop invariant. + MPM.add(createLICMPass()); + } + // After vectorization and unrolling, assume intrinsics may tell us more // about pointer alignments. MPM.add(createAlignmentFromAssumptionsPass()); @@ -454,6 +469,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // More loops are countable; try to optimize them. PM.add(createIndVarSimplifyPass()); PM.add(createLoopDeletionPass()); + if (EnableLoopInterchange) + PM.add(createLoopInterchangePass()); + PM.add(createLoopVectorizePass(true, LoopVectorize)); // More scalar chains could be vectorized due to more alias information @@ -473,10 +491,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); +} - // Lower bitset metadata to bitsets. - PM.add(createLowerBitSetsPass()); - +void PassManagerBuilder::addLateLTOOptimizationPasses( + legacy::PassManagerBase &PM) { // Delete basic blocks, which optimization passes may have killed. PM.add(createCFGSimplificationPass()); @@ -496,19 +514,19 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (VerifyInput) PM.add(createVerifierPass()); - if (StripDebug) - PM.add(createStripSymbolsPass(true)); + if (OptLevel > 1) + addLTOOptimizationPasses(PM); - if (VerifyInput) - PM.add(createDebugInfoVerifierPass()); + // Lower bit sets to globals. This pass supports Clang's control flow + // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI + // is enabled. The pass does nothing if CFI is disabled. + PM.add(createLowerBitSetsPass()); if (OptLevel != 0) - addLTOOptimizationPasses(PM); + addLateLTOOptimizationPasses(PM); - if (VerifyOutput) { + if (VerifyOutput) PM.add(createVerifierPass()); - PM.add(createDebugInfoVerifierPass()); - } } inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) { |