diff options
Diffstat (limited to 'lib/Transforms')
105 files changed, 6352 insertions, 4297 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 7e48ce3..46480bd 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -69,16 +69,15 @@ namespace { bool runOnSCC(CallGraphSCC &SCC) override; static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) - : CallGraphSCCPass(ID), DL(nullptr), maxElements(maxElements) { + : CallGraphSCCPass(ID), maxElements(maxElements) { initializeArgPromotionPass(*PassRegistry::getPassRegistry()); } /// A vector used to hold the indices of a single GEP instruction typedef std::vector<uint64_t> IndicesVector; - const DataLayout *DL; private: - bool isDenselyPacked(Type *type); + bool isDenselyPacked(Type *type, const DataLayout &DL); bool canPaddingBeAccessed(Argument *Arg); CallGraphNode *PromoteArguments(CallGraphNode *CGN); bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; @@ -109,9 +108,6 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { bool Changed = false, LocalChange; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; - do { // Iterate until we stop promoting from this SCC. LocalChange = false; // Attempt to promote arguments from all functions in this SCC. @@ -128,7 +124,7 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { } /// \brief Checks if a type could have padding bytes. -bool ArgPromotion::isDenselyPacked(Type *type) { +bool ArgPromotion::isDenselyPacked(Type *type, const DataLayout &DL) { // There is no size information, so be conservative. if (!type->isSized()) @@ -136,7 +132,7 @@ bool ArgPromotion::isDenselyPacked(Type *type) { // If the alloc size is not equal to the storage size, then there are padding // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. - if (!DL || DL->getTypeSizeInBits(type) != DL->getTypeAllocSizeInBits(type)) + if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type)) return false; if (!isa<CompositeType>(type)) @@ -144,19 +140,20 @@ bool ArgPromotion::isDenselyPacked(Type *type) { // For homogenous sequential types, check for padding within members. if (SequentialType *seqTy = dyn_cast<SequentialType>(type)) - return isa<PointerType>(seqTy) || isDenselyPacked(seqTy->getElementType()); + return isa<PointerType>(seqTy) || + isDenselyPacked(seqTy->getElementType(), DL); // Check for padding within and between elements of a struct. StructType *StructTy = cast<StructType>(type); - const StructLayout *Layout = DL->getStructLayout(StructTy); + const StructLayout *Layout = DL.getStructLayout(StructTy); uint64_t StartPos = 0; for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { Type *ElTy = StructTy->getElementType(i); - if (!isDenselyPacked(ElTy)) + if (!isDenselyPacked(ElTy, DL)) return false; if (StartPos != Layout->getElementOffsetInBits(i)) return false; - StartPos += DL->getTypeAllocSizeInBits(ElTy); + StartPos += DL.getTypeAllocSizeInBits(ElTy); } return true; @@ -236,6 +233,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // IR, while in the callee the classification is determined dynamically based // on the number of registers consumed so far. if (F->isVarArg()) return nullptr; + const DataLayout &DL = F->getParent()->getDataLayout(); // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. @@ -250,8 +248,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // packed or if we can prove the padding bytes are never accessed. This does // not apply to inalloca. bool isSafeToPromote = - PtrArg->hasByValAttr() && - (isDenselyPacked(AgTy) || !canPaddingBeAccessed(PtrArg)); + PtrArg->hasByValAttr() && + (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); if (isSafeToPromote) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { @@ -310,9 +308,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { /// AllCallersPassInValidPointerForArgument - Return true if we can prove that /// all callees pass in a valid pointer for the specified function argument. -static bool AllCallersPassInValidPointerForArgument(Argument *Arg, - const DataLayout *DL) { +static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { Function *Callee = Arg->getParent(); + const DataLayout &DL = Callee->getParent()->getDataLayout(); unsigned ArgNo = Arg->getArgNo(); @@ -430,7 +428,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. - if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg, DL)) + if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as @@ -586,7 +584,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, FunctionType *FTy = F->getFunctionType(); std::vector<Type*> Params; - typedef std::set<IndicesVector> ScalarizeTable; + typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable; // ScalarizedElements - If we are promoting a pointer that has elements // accessed out of it, keep track of which elements are accessed so that we @@ -623,8 +621,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Simple byval argument? Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) - Params.push_back(STy->getElementType(i)); + Params.insert(Params.end(), STy->element_begin(), STy->element_end()); ++NumByValArgsPromoted; } else if (!ArgsToPromote.count(I)) { // Unchanged argument @@ -647,7 +644,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ScalarizeTable &ArgIndices = ScalarizedElements[I]; for (User *U : I->users()) { Instruction *UI = cast<Instruction>(U); - assert(isa<LoadInst>(UI) || isa<GetElementPtrInst>(UI)); + Type *SrcTy; + if (LoadInst *L = dyn_cast<LoadInst>(UI)) + SrcTy = L->getType(); + else + SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType(); IndicesVector Indices; Indices.reserve(UI->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single @@ -659,7 +660,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // GEPs with a single 0 index can be merged with direct loads if (Indices.size() == 1 && Indices.front() == 0) Indices.clear(); - ArgIndices.insert(Indices); + ArgIndices.insert(std::make_pair(SrcTy, Indices)); LoadInst *OrigLoad; if (LoadInst *L = dyn_cast<LoadInst>(UI)) OrigLoad = L; @@ -673,11 +674,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { // not allowed to dereference ->begin() if size() is 0 - Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), *SI)); + Params.push_back( + GetElementPtrInst::getIndexedType(I->getType(), SI->second)); assert(Params.back()); } - if (ArgIndices.size() == 1 && ArgIndices.begin()->empty()) + if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) ++NumArgumentsPromoted; else ++NumAggregatesPromoted; @@ -768,9 +770,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - Value *Idx = GetElementPtrInst::Create(*AI, Idxs, - (*AI)->getName()+"."+utostr(i), - Call); + Value *Idx = GetElementPtrInst::Create( + STy, *AI, Idxs, (*AI)->getName() + "." + utostr(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call)); } @@ -783,12 +784,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { Value *V = *AI; - LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, *SI)]; - if (!SI->empty()) { - Ops.reserve(SI->size()); + LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, SI->second)]; + if (!SI->second.empty()) { + Ops.reserve(SI->second.size()); Type *ElTy = V->getType(); - for (IndicesVector::const_iterator II = SI->begin(), - IE = SI->end(); II != IE; ++II) { + for (IndicesVector::const_iterator II = SI->second.begin(), + IE = SI->second.end(); + II != IE; ++II) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. Type *IdxTy = (ElTy->isStructTy() ? @@ -799,7 +801,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); } // And create a GEP to extract those indices. - V = GetElementPtrInst::Create(V, Ops, V->getName()+".idx", Call); + V = GetElementPtrInst::Create(SI->first, V, Ops, + V->getName() + ".idx", Call); Ops.clear(); AA.copyValue(OrigLoad->getOperand(0), V); } @@ -903,10 +906,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - Value *Idx = - GetElementPtrInst::Create(TheAlloca, Idxs, - TheAlloca->getName()+"."+Twine(i), - InsertPt); + Value *Idx = GetElementPtrInst::Create( + AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), + InsertPt); I2->setName(I->getName()+"."+Twine(i)); new StoreInst(I2++, Idx, InsertPt); } @@ -939,7 +941,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, while (!I->use_empty()) { if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) { - assert(ArgIndices.begin()->empty() && + assert(ArgIndices.begin()->second.empty() && "Load element should sort to front!"); I2->setName(I->getName()+".val"); LI->replaceAllUsesWith(I2); @@ -961,7 +963,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Function::arg_iterator TheArg = I2; for (ScalarizeTable::iterator It = ArgIndices.begin(); - *It != Operands; ++It, ++TheArg) { + It->second != Operands; ++It, ++TheArg) { assert(It != ArgIndices.end() && "GEP not handled??"); } diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 0b6ade9..8ce7646 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -52,7 +52,6 @@ namespace { // alignment to a concrete value. unsigned getAlignment(GlobalVariable *GV) const; - const DataLayout *DL; }; } @@ -89,32 +88,22 @@ static bool IsBetterCanonical(const GlobalVariable &A, return A.hasUnnamedAddr(); } -bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const { - return DL || GV->getAlignment() != 0; -} - unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const { unsigned Align = GV->getAlignment(); if (Align) return Align; - if (DL) - return DL->getPreferredAlignment(GV); - return 0; + return GV->getParent()->getDataLayout().getPreferredAlignment(GV); } bool ConstantMerge::runOnModule(Module &M) { - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; // Find all the globals that are marked "used". These cannot be merged. SmallPtrSet<const GlobalValue*, 8> UsedGlobals; FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals); FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals); - - // Map unique <constants, has-unknown-alignment> pairs to globals. We don't - // want to merge globals of unknown alignment with those of explicit - // alignment. If we have DataLayout, we always know the alignment. - DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap; + + // Map unique constants to globals. + DenseMap<Constant *, GlobalVariable *> CMap; // Replacements - This vector contains a list of replacements to perform. SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements; @@ -156,8 +145,7 @@ bool ConstantMerge::runOnModule(Module &M) { Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. - PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV)); - GlobalVariable *&Slot = CMap[Pair]; + GlobalVariable *&Slot = CMap[Init]; // If this is the first constant we find or if the old one is local, // replace with the current one. If the current is externally visible @@ -188,8 +176,7 @@ bool ConstantMerge::runOnModule(Module &M) { Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. - PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV)); - GlobalVariable *Slot = CMap[Pair]; + GlobalVariable *Slot = CMap[Init]; if (!Slot || Slot == GV) continue; diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 0c844fe..ba04c80 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Pass.h" +#include <unordered_map> using namespace llvm; #define DEBUG_TYPE "globaldce" @@ -47,6 +48,7 @@ namespace { private: SmallPtrSet<GlobalValue*, 32> AliveGlobals; SmallPtrSet<Constant *, 8> SeenConstants; + std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers; /// GlobalIsNeeded - mark the specific global value as needed, and /// recursively mark anything that it uses as also needed. @@ -78,6 +80,17 @@ bool GlobalDCE::runOnModule(Module &M) { // Remove empty functions from the global ctors list. Changed |= optimizeGlobalCtorsList(M, isEmptyFunction); + // Collect the set of members for each comdat. + for (Function &F : M) + if (Comdat *C = F.getComdat()) + ComdatMembers.insert(std::make_pair(C, &F)); + for (GlobalVariable &GV : M.globals()) + if (Comdat *C = GV.getComdat()) + ComdatMembers.insert(std::make_pair(C, &GV)); + for (GlobalAlias &GA : M.aliases()) + if (Comdat *C = GA.getComdat()) + ComdatMembers.insert(std::make_pair(C, &GA)); + // Loop over the module, adding globals which are obviously necessary. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); @@ -177,6 +190,7 @@ bool GlobalDCE::runOnModule(Module &M) { // Make sure that all memory is released AliveGlobals.clear(); SeenConstants.clear(); + ComdatMembers.clear(); return Changed; } @@ -188,17 +202,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { if (!AliveGlobals.insert(G).second) return; - Module *M = G->getParent(); if (Comdat *C = G->getComdat()) { - for (Function &F : *M) - if (F.getComdat() == C) - GlobalIsNeeded(&F); - for (GlobalVariable &GV : M->globals()) - if (GV.getComdat() == C) - GlobalIsNeeded(&GV); - for (GlobalAlias &GA : M->aliases()) - if (GA.getComdat() == C) - GlobalIsNeeded(&GA); + for (auto &&CM : make_range(ComdatMembers.equal_range(C))) + GlobalIsNeeded(CM.second); } if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) { diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 45e04f1..20b41fb 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" @@ -38,7 +39,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -86,7 +86,6 @@ namespace { const GlobalStatus &GS); bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); - const DataLayout *DL; TargetLibraryInfo *TLI; SmallSet<const Comdat *, 8> NotDiscardableComdats; }; @@ -269,7 +268,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, /// quick scan over the use list to clean up the easy and obvious cruft. This /// returns true if it made a change. static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, - const DataLayout *DL, + const DataLayout &DL, TargetLibraryInfo *TLI) { bool Changed = false; // Note that we need to use a weak value handle for the worklist items. When @@ -318,8 +317,8 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, // and will invalidate our notion of what Init is. Constant *SubInit = nullptr; if (!isa<ConstantExpr>(GEP->getOperand(0))) { - ConstantExpr *CE = - dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, DL, TLI)); + ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>( + ConstantFoldInstruction(GEP, DL, TLI)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); @@ -580,8 +579,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { Idxs.push_back(NullInt); for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) Idxs.push_back(GEPI->getOperand(i)); - NewPtr = GetElementPtrInst::Create(NewPtr, Idxs, - GEPI->getName()+"."+Twine(Val),GEPI); + NewPtr = GetElementPtrInst::Create( + NewPtr->getType()->getPointerElementType(), NewPtr, Idxs, + GEPI->getName() + "." + Twine(Val), GEPI); } } GEP->replaceAllUsesWith(NewPtr); @@ -739,7 +739,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { /// if the loaded value is dynamically null, then we know that they cannot be /// reachable with a null optimize away the load. static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, - const DataLayout *DL, + const DataLayout &DL, TargetLibraryInfo *TLI) { bool Changed = false; @@ -802,7 +802,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the /// instructions that are foldable. -static void ConstantPropUsersOf(Value *V, const DataLayout *DL, +static void ConstantPropUsersOf(Value *V, const DataLayout &DL, TargetLibraryInfo *TLI) { for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; ) if (Instruction *I = dyn_cast<Instruction>(*UI++)) @@ -822,12 +822,10 @@ static void ConstantPropUsersOf(Value *V, const DataLayout *DL, /// the specified malloc. Because it is always the result of the specified /// malloc, there is no reason to actually DO the malloc. Instead, turn the /// malloc into a global, and any loads of GV as uses of the new global. -static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, - CallInst *CI, - Type *AllocTy, - ConstantInt *NElements, - const DataLayout *DL, - TargetLibraryInfo *TLI) { +static GlobalVariable * +OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, + ConstantInt *NElements, const DataLayout &DL, + TargetLibraryInfo *TLI) { DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); Type *GlobalType; @@ -1167,7 +1165,8 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, InsertedScalarizedValues, PHIsToRewrite), LI->getName()+".f"+Twine(FieldNo), LI); - } else if (PHINode *PN = dyn_cast<PHINode>(V)) { + } else { + PHINode *PN = cast<PHINode>(V); // PN's type is pointer to struct. Make a new PHI of pointer to struct // field. @@ -1181,8 +1180,6 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, PN->getName()+".f"+Twine(FieldNo), PN); Result = NewPN; PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); - } else { - llvm_unreachable("Unknown usable value"); } return FieldVals[FieldNo] = Result; @@ -1224,7 +1221,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, GEPIdx.push_back(GEPI->getOperand(1)); GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); - Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx, + Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx, GEPI->getName(), GEPI); GEPI->replaceAllUsesWith(NGEPI); GEPI->eraseFromParent(); @@ -1271,7 +1268,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, - Value *NElems, const DataLayout *DL, + Value *NElems, const DataLayout &DL, const TargetLibraryInfo *TLI) { DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); Type *MAT = getMallocAllocatedType(CI, TLI); @@ -1301,10 +1298,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, GV->getThreadLocalMode()); FieldGlobals.push_back(NGV); - unsigned TypeSize = DL->getTypeAllocSize(FieldTy); + unsigned TypeSize = DL.getTypeAllocSize(FieldTy); if (StructType *ST = dyn_cast<StructType>(FieldTy)) - TypeSize = DL->getStructLayout(ST)->getSizeInBytes(); - Type *IntPtrTy = DL->getIntPtrType(CI->getType()); + TypeSize = DL.getStructLayout(ST)->getSizeInBytes(); + Type *IntPtrTy = DL.getIntPtrType(CI->getType()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), NElems, nullptr, @@ -1459,16 +1456,12 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, /// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a /// pointer global variable with a single value stored it that is a malloc or /// cast of malloc. -static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, - CallInst *CI, +static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, Type *AllocTy, AtomicOrdering Ordering, Module::global_iterator &GVI, - const DataLayout *DL, + const DataLayout &DL, TargetLibraryInfo *TLI) { - if (!DL) - return false; - // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -1504,7 +1497,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // Restrict this transformation to only working on small allocations // (2048 bytes currently), as we don't want to introduce a 16M global or // something. - if (NElements->getZExtValue() * DL->getTypeAllocSize(AllocTy) < 2048) { + if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) { GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI); return true; } @@ -1534,8 +1527,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is a fixed size array, transform the Malloc to be an alloc of // structs. malloc [100 x struct],1 -> malloc struct, 100 if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) { - Type *IntPtrTy = DL->getIntPtrType(CI->getType()); - unsigned TypeSize = DL->getStructLayout(AllocSTy)->getSizeInBytes(); + Type *IntPtrTy = DL.getIntPtrType(CI->getType()); + unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes(); Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, @@ -1563,7 +1556,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, AtomicOrdering Ordering, Module::global_iterator &GVI, - const DataLayout *DL, + const DataLayout &DL, TargetLibraryInfo *TLI) { // Ignore no-op GEPs and bitcasts. StoredOnceVal = StoredOnceVal->stripPointerCasts(); @@ -1733,6 +1726,7 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, Module::global_iterator &GVI, const GlobalStatus &GS) { + auto &DL = GV->getParent()->getDataLayout(); // If this is a first class global and has only one accessing function // and this function is main (which we know is not recursive), we replace // the global with a local alloca in this function. @@ -1804,12 +1798,10 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, ++NumMarked; return true; } else if (!GV->getInitializer()->getType()->isSingleValueType()) { - if (DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>()) { - const DataLayout &DL = DLP->getDataLayout(); - if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) { - GVI = FirstNewGV; // Don't skip the newly produced globals! - return true; - } + const DataLayout &DL = GV->getParent()->getDataLayout(); + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) { + GVI = FirstNewGV; // Don't skip the newly produced globals! + return true; } } else if (GS.StoredType == GlobalStatus::StoredOnce) { // If the initial value for the global was an undef value, and if only @@ -1954,6 +1946,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { // Simplify the initializer. if (GV->hasInitializer()) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) { + auto &DL = M.getDataLayout(); Constant *New = ConstantFoldConstantExpression(CE, DL, TLI); if (New && New != CE) GV->setInitializer(New); @@ -1971,9 +1964,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { static inline bool isSimpleEnoughValueToCommit(Constant *C, - SmallPtrSetImpl<Constant*> &SimpleConstants, - const DataLayout *DL); - + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL); /// isSimpleEnoughValueToCommit - Return true if the specified constant can be /// handled by the code generator. We don't want to generate something like: @@ -1983,9 +1975,10 @@ isSimpleEnoughValueToCommit(Constant *C, /// This function should be called if C was not found (but just got inserted) /// in SimpleConstants to avoid having to rescan the same constants all the /// time. -static bool isSimpleEnoughValueToCommitHelper(Constant *C, - SmallPtrSetImpl<Constant*> &SimpleConstants, - const DataLayout *DL) { +static bool +isSimpleEnoughValueToCommitHelper(Constant *C, + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL) { // Simple global addresses are supported, do not allow dllimport or // thread-local globals. if (auto *GV = dyn_cast<GlobalValue>(C)) @@ -2019,8 +2012,8 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, case Instruction::PtrToInt: // int <=> ptr is fine if the int type is the same size as the // pointer type. - if (!DL || DL->getTypeSizeInBits(CE->getType()) != - DL->getTypeSizeInBits(CE->getOperand(0)->getType())) + if (DL.getTypeSizeInBits(CE->getType()) != + DL.getTypeSizeInBits(CE->getOperand(0)->getType())) return false; return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL); @@ -2042,8 +2035,8 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C, static inline bool isSimpleEnoughValueToCommit(Constant *C, - SmallPtrSetImpl<Constant*> &SimpleConstants, - const DataLayout *DL) { + SmallPtrSetImpl<Constant *> &SimpleConstants, + const DataLayout &DL) { // If we already checked this constant, we win. if (!SimpleConstants.insert(C).second) return true; @@ -2174,8 +2167,8 @@ namespace { /// Once an evaluation call fails, the evaluation object should not be reused. class Evaluator { public: - Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI) - : DL(DL), TLI(TLI) { + Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI) + : DL(DL), TLI(TLI) { ValueStack.emplace_back(); } @@ -2249,7 +2242,7 @@ private: /// simple enough to live in a static initializer of a global. SmallPtrSet<Constant*, 8> SimpleConstants; - const DataLayout *DL; + const DataLayout &DL; const TargetLibraryInfo *TLI; }; @@ -2498,9 +2491,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Value *Ptr = PtrArg->stripPointerCasts(); if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { Type *ElemTy = cast<PointerType>(GV->getType())->getElementType(); - if (DL && !Size->isAllOnesValue() && + if (!Size->isAllOnesValue() && Size->getValue().getLimitedValue() >= - DL->getTypeStoreSize(ElemTy)) { + DL.getTypeStoreSize(ElemTy)) { Invariants.insert(GV); DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV << "\n"); @@ -2689,7 +2682,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, /// EvaluateStaticConstructor - Evaluate static constructors in the function, if /// we can. Return true if we can, false otherwise. -static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL, +static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, const TargetLibraryInfo *TLI) { // Call the function. Evaluator Eval(DL, TLI); @@ -3040,8 +3033,7 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { bool GlobalOpt::runOnModule(Module &M) { bool Changed = false; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; + auto &DL = M.getDataLayout(); TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); bool LocalChange = true; diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 305ad7a..3aa4ee5 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" @@ -29,7 +30,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -72,8 +72,8 @@ Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime) InlineLimit : Threshold), InsertLifetime(InsertLifetime) {} -/// getAnalysisUsage - For this class, we declare that we require and preserve -/// the call graph. If the derived class implements this method, it should +/// For this class, we declare that we require and preserve the call graph. +/// If the derived class implements this method, it should /// always explicitly call the implementation here. void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AliasAnalysis>(); @@ -111,18 +111,17 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) { Caller->addFnAttr(Attribute::StackProtect); } -/// InlineCallIfPossible - If it is possible to inline the specified call site, +/// If it is possible to inline the specified call site, /// do so and update the CallGraph for this operation. /// /// This function also does some basic book-keeping to update the IR. The /// InlinedArrayAllocas map keeps track of any allocas that are already -/// available from other functions inlined into the caller. If we are able to +/// available from other functions inlined into the caller. If we are able to /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, - int InlineHistory, bool InsertLifetime, - const DataLayout *DL) { + int InlineHistory, bool InsertLifetime) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); @@ -198,11 +197,6 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, unsigned Align1 = AI->getAlignment(), Align2 = AvailableAlloca->getAlignment(); - // If we don't have data layout information, and only one alloca is using - // the target default, then we can't safely merge them because we can't - // pick the greater alignment. - if (!DL && (!Align1 || !Align2) && Align1 != Align2) - continue; // The available alloca has to be in the right function, not in some other // function in this SCC. @@ -223,8 +217,8 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, if (Align1 != Align2) { if (!Align1 || !Align2) { - assert(DL && "DataLayout required to compare default alignments"); - unsigned TypeAlign = DL->getABITypeAlignment(AI->getAllocatedType()); + const DataLayout &DL = Caller->getParent()->getDataLayout(); + unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType()); Align1 = Align1 ? Align1 : TypeAlign; Align2 = Align2 ? Align2 : TypeAlign; @@ -300,8 +294,7 @@ static void emitAnalysis(CallSite CS, const Twine &Msg) { emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg); } -/// shouldInline - Return true if the inliner should attempt to inline -/// at the given CallSite. +/// Return true if the inliner should attempt to inline at the given CallSite. bool Inliner::shouldInline(CallSite CS) { InlineCost IC = getInlineCost(CS); @@ -415,7 +408,7 @@ bool Inliner::shouldInline(CallSite CS) { return true; } -/// InlineHistoryIncludes - Return true if the specified inline history ID +/// Return true if the specified inline history ID /// indicates an inline history that includes the specified function. static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) { @@ -432,8 +425,6 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr; AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); @@ -495,7 +486,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, DL, AA, ACT); + InlineFunctionInfo InlineInfo(&CG, AA, ACT); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -553,7 +544,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // Attempt to inline the function. if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, - InlineHistoryID, InsertLifetime, DL)) { + InlineHistoryID, InsertLifetime)) { emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, Twine(Callee->getName() + " will not be inlined into " + @@ -625,14 +616,13 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { return Changed; } -// doFinalization - Remove now-dead linkonce functions at the end of -// processing to avoid breaking the SCC traversal. +/// Remove now-dead linkonce functions at the end of +/// processing to avoid breaking the SCC traversal. bool Inliner::doFinalization(CallGraph &CG) { return removeDeadFunctions(CG); } -/// removeDeadFunctions - Remove dead functions that are not included in -/// DNR (Do Not Remove) list. +/// Remove dead functions that are not included in DNR (Do Not Remove) list. bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { SmallVector<CallGraphNode*, 16> FunctionsToRemove; diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp index 0a22a80..fe00d92 100644 --- a/lib/Transforms/IPO/LowerBitSets.cpp +++ b/lib/Transforms/IPO/LowerBitSets.cpp @@ -16,6 +16,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalVariable.h" @@ -31,10 +32,17 @@ using namespace llvm; #define DEBUG_TYPE "lowerbitsets" -STATISTIC(NumBitSetsCreated, "Number of bitsets created"); +STATISTIC(ByteArraySizeBits, "Byte array size in bits"); +STATISTIC(ByteArraySizeBytes, "Byte array size in bytes"); +STATISTIC(NumByteArraysCreated, "Number of byte arrays created"); STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered"); STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets"); +static cl::opt<bool> AvoidReuse( + "lowerbitsets-avoid-reuse", + cl::desc("Try to avoid reuse of byte array addresses using aliases"), + cl::Hidden, cl::init(true)); + bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { if (Offset < ByteOffset) return false; @@ -46,11 +54,11 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { if (BitOffset >= BitSize) return false; - return (Bits[BitOffset / 8] >> (BitOffset % 8)) & 1; + return Bits.count(BitOffset); } bool BitSetInfo::containsValue( - const DataLayout *DL, + const DataLayout &DL, const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout, Value *V, uint64_t COffset) const { if (auto GV = dyn_cast<GlobalVariable>(V)) { @@ -61,8 +69,8 @@ bool BitSetInfo::containsValue( } if (auto GEP = dyn_cast<GEPOperator>(V)) { - APInt APOffset(DL->getPointerSizeInBits(0), 0); - bool Result = GEP->accumulateConstantOffset(*DL, APOffset); + APInt APOffset(DL.getPointerSizeInBits(0), 0); + bool Result = GEP->accumulateConstantOffset(DL, APOffset); if (!Result) return false; COffset += APOffset.getZExtValue(); @@ -101,18 +109,15 @@ BitSetInfo BitSetBuilder::build() { BSI.ByteOffset = Min; BSI.AlignLog2 = 0; - // FIXME: Can probably do something smarter if all offsets are 0. if (Mask != 0) BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined); // Build the compressed bitset while normalizing the offsets against the // computed alignment. BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1; - uint64_t ByteSize = (BSI.BitSize + 7) / 8; - BSI.Bits.resize(ByteSize); for (uint64_t Offset : Offsets) { Offset >>= BSI.AlignLog2; - BSI.Bits[Offset / 8] |= 1 << (Offset % 8); + BSI.Bits.insert(Offset); } return BSI; @@ -147,15 +152,47 @@ void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) { FragmentMap[ObjIndex] = FragmentIndex; } +void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits, + uint64_t BitSize, uint64_t &AllocByteOffset, + uint8_t &AllocMask) { + // Find the smallest current allocation. + unsigned Bit = 0; + for (unsigned I = 1; I != BitsPerByte; ++I) + if (BitAllocs[I] < BitAllocs[Bit]) + Bit = I; + + AllocByteOffset = BitAllocs[Bit]; + + // Add our size to it. + unsigned ReqSize = AllocByteOffset + BitSize; + BitAllocs[Bit] = ReqSize; + if (Bytes.size() < ReqSize) + Bytes.resize(ReqSize); + + // Set our bits. + AllocMask = 1 << Bit; + for (uint64_t B : Bits) + Bytes[AllocByteOffset + B] |= AllocMask; +} + namespace { +struct ByteArrayInfo { + std::set<uint64_t> Bits; + uint64_t BitSize; + GlobalVariable *ByteArray; + Constant *Mask; +}; + struct LowerBitSets : public ModulePass { static char ID; LowerBitSets() : ModulePass(ID) { initializeLowerBitSetsPass(*PassRegistry::getPassRegistry()); } - const DataLayout *DL; + Module *M; + + bool LinkerSubsectionsViaSymbols; IntegerType *Int1Ty; IntegerType *Int8Ty; IntegerType *Int32Ty; @@ -169,20 +206,23 @@ struct LowerBitSets : public ModulePass { // Mapping from bitset mdstrings to the call sites that test them. DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites; + std::vector<ByteArrayInfo> ByteArrayInfos; + BitSetInfo buildBitSet(MDString *BitSet, const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout); - Value *createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI, - GlobalVariable *BitSetGlobal, Value *BitOffset); + ByteArrayInfo *createByteArray(BitSetInfo &BSI); + void allocateByteArrays(); + Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI, + Value *BitOffset); Value * - lowerBitSetCall(CallInst *CI, const BitSetInfo &BSI, - GlobalVariable *BitSetGlobal, GlobalVariable *CombinedGlobal, + lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, + GlobalVariable *CombinedGlobal, const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout); - void buildBitSetsFromGlobals(Module &M, - const std::vector<MDString *> &BitSets, + void buildBitSetsFromGlobals(const std::vector<MDString *> &BitSets, const std::vector<GlobalVariable *> &Globals); - bool buildBitSets(Module &M); - bool eraseBitSetMetadata(Module &M); + bool buildBitSets(); + bool eraseBitSetMetadata(); bool doInitialization(Module &M) override; bool runOnModule(Module &M) override; @@ -198,19 +238,21 @@ char LowerBitSets::ID = 0; ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; } -bool LowerBitSets::doInitialization(Module &M) { - DL = M.getDataLayout(); - if (!DL) - report_fatal_error("Data layout required"); +bool LowerBitSets::doInitialization(Module &Mod) { + M = &Mod; + const DataLayout &DL = Mod.getDataLayout(); - Int1Ty = Type::getInt1Ty(M.getContext()); - Int8Ty = Type::getInt8Ty(M.getContext()); - Int32Ty = Type::getInt32Ty(M.getContext()); + Triple TargetTriple(M->getTargetTriple()); + LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); + + Int1Ty = Type::getInt1Ty(M->getContext()); + Int8Ty = Type::getInt8Ty(M->getContext()); + Int32Ty = Type::getInt32Ty(M->getContext()); Int32PtrTy = PointerType::getUnqual(Int32Ty); - Int64Ty = Type::getInt64Ty(M.getContext()); - IntPtrTy = DL->getIntPtrType(M.getContext(), 0); + Int64Ty = Type::getInt64Ty(M->getContext()); + IntPtrTy = DL.getIntPtrType(M->getContext(), 0); - BitSetNM = M.getNamedMetadata("llvm.bitsets"); + BitSetNM = M->getNamedMetadata("llvm.bitsets"); BitSetTestCallSites.clear(); @@ -259,52 +301,128 @@ static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits, return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0)); } +ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) { + // Create globals to stand in for byte arrays and masks. These never actually + // get initialized, we RAUW and erase them later in allocateByteArrays() once + // we know the offset and mask to use. + auto ByteArrayGlobal = new GlobalVariable( + *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); + auto MaskGlobal = new GlobalVariable( + *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); + + ByteArrayInfos.emplace_back(); + ByteArrayInfo *BAI = &ByteArrayInfos.back(); + + BAI->Bits = BSI.Bits; + BAI->BitSize = BSI.BitSize; + BAI->ByteArray = ByteArrayGlobal; + BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty); + return BAI; +} + +void LowerBitSets::allocateByteArrays() { + std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(), + [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) { + return BAI1.BitSize > BAI2.BitSize; + }); + + std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size()); + + ByteArrayBuilder BAB; + for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { + ByteArrayInfo *BAI = &ByteArrayInfos[I]; + + uint8_t Mask; + BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask); + + BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask)); + cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent(); + } + + Constant *ByteArrayConst = ConstantDataArray::get(M->getContext(), BAB.Bytes); + auto ByteArray = + new GlobalVariable(*M, ByteArrayConst->getType(), /*isConstant=*/true, + GlobalValue::PrivateLinkage, ByteArrayConst); + + for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { + ByteArrayInfo *BAI = &ByteArrayInfos[I]; + + Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0), + ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])}; + Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(ByteArray, Idxs); + + // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures + // that the pc-relative displacement is folded into the lea instead of the + // test instruction getting another displacement. + if (LinkerSubsectionsViaSymbols) { + BAI->ByteArray->replaceAllUsesWith(GEP); + } else { + GlobalAlias *Alias = GlobalAlias::create( + Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M); + BAI->ByteArray->replaceAllUsesWith(Alias); + } + BAI->ByteArray->eraseFromParent(); + } + + ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] + + BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] + + BAB.BitAllocs[6] + BAB.BitAllocs[7]; + ByteArraySizeBytes = BAB.Bytes.size(); +} + /// Build a test that bit BitOffset is set in BSI, where /// BitSetGlobal is a global containing the bits in BSI. -Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI, - GlobalVariable *BitSetGlobal, - Value *BitOffset) { - if (BSI.Bits.size() <= 8) { +Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, + ByteArrayInfo *&BAI, Value *BitOffset) { + if (BSI.BitSize <= 64) { // If the bit set is sufficiently small, we can avoid a load by bit testing // a constant. IntegerType *BitsTy; - if (BSI.Bits.size() <= 4) + if (BSI.BitSize <= 32) BitsTy = Int32Ty; else BitsTy = Int64Ty; uint64_t Bits = 0; - for (auto I = BSI.Bits.rbegin(), E = BSI.Bits.rend(); I != E; ++I) { - Bits <<= 8; - Bits |= *I; - } + for (auto Bit : BSI.Bits) + Bits |= uint64_t(1) << Bit; Constant *BitsConst = ConstantInt::get(BitsTy, Bits); return createMaskedBitTest(B, BitsConst, BitOffset); } else { - // TODO: We might want to use the memory variant of the bt instruction - // with the previously computed bit offset at -Os. This instruction does - // exactly what we want but has been benchmarked as being slower than open - // coding the load+bt. - Value *BitSetGlobalOffset = - B.CreateLShr(BitOffset, ConstantInt::get(IntPtrTy, 5)); - Value *BitSetEntryAddr = B.CreateGEP( - ConstantExpr::getBitCast(BitSetGlobal, Int32PtrTy), BitSetGlobalOffset); - Value *BitSetEntry = B.CreateLoad(BitSetEntryAddr); - - return createMaskedBitTest(B, BitSetEntry, BitOffset); + if (!BAI) { + ++NumByteArraysCreated; + BAI = createByteArray(BSI); + } + + Constant *ByteArray = BAI->ByteArray; + if (!LinkerSubsectionsViaSymbols && AvoidReuse) { + // Each use of the byte array uses a different alias. This makes the + // backend less likely to reuse previously computed byte array addresses, + // improving the security of the CFI mechanism based on this pass. + ByteArray = GlobalAlias::create( + BAI->ByteArray->getType()->getElementType(), 0, + GlobalValue::PrivateLinkage, "bits_use", ByteArray, M); + } + + Value *ByteAddr = B.CreateGEP(ByteArray, BitOffset); + Value *Byte = B.CreateLoad(ByteAddr); + + Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask); + return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0)); } } /// Lower a llvm.bitset.test call to its implementation. Returns the value to /// replace the call with. Value *LowerBitSets::lowerBitSetCall( - CallInst *CI, const BitSetInfo &BSI, GlobalVariable *BitSetGlobal, + CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, GlobalVariable *CombinedGlobal, const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) { Value *Ptr = CI->getArgOperand(0); + const DataLayout &DL = M->getDataLayout(); if (BSI.containsValue(DL, GlobalLayout, Ptr)) - return ConstantInt::getTrue(BitSetGlobal->getParent()->getContext()); + return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext()); Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy); Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd( @@ -336,8 +454,8 @@ Value *LowerBitSets::lowerBitSetCall( Value *OffsetSHR = B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2)); Value *OffsetSHL = B.CreateShl( - PtrOffset, ConstantInt::get(IntPtrTy, DL->getPointerSizeInBits(0) - - BSI.AlignLog2)); + PtrOffset, + ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2)); BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); } @@ -353,7 +471,7 @@ Value *LowerBitSets::lowerBitSetCall( // Now that we know that the offset is in range and aligned, load the // appropriate bit from the bitset. - Value *Bit = createBitSetTest(ThenB, BSI, BitSetGlobal, BitOffset); + Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset); // The value we want is 0 if we came directly from the initial block // (having failed the range or alignment checks), or the loaded bit if @@ -368,14 +486,14 @@ Value *LowerBitSets::lowerBitSetCall( /// Given a disjoint set of bitsets and globals, layout the globals, build the /// bit sets and lower the llvm.bitset.test calls. void LowerBitSets::buildBitSetsFromGlobals( - Module &M, const std::vector<MDString *> &BitSets, const std::vector<GlobalVariable *> &Globals) { // Build a new global with the combined contents of the referenced globals. std::vector<Constant *> GlobalInits; + const DataLayout &DL = M->getDataLayout(); for (GlobalVariable *G : Globals) { GlobalInits.push_back(G->getInitializer()); - uint64_t InitSize = DL->getTypeAllocSize(G->getInitializer()->getType()); + uint64_t InitSize = DL.getTypeAllocSize(G->getInitializer()->getType()); // Compute the amount of padding required to align the next element to the // next power of 2. @@ -391,13 +509,13 @@ void LowerBitSets::buildBitSetsFromGlobals( } if (!GlobalInits.empty()) GlobalInits.pop_back(); - Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits); + Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits); auto CombinedGlobal = - new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true, + new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true, GlobalValue::PrivateLinkage, NewInit); const StructLayout *CombinedGlobalLayout = - DL->getStructLayout(cast<StructType>(NewInit->getType())); + DL.getStructLayout(cast<StructType>(NewInit->getType())); // Compute the offsets of the original globals within the new global. DenseMap<GlobalVariable *, uint64_t> GlobalLayout; @@ -410,18 +528,12 @@ void LowerBitSets::buildBitSetsFromGlobals( // Build the bitset. BitSetInfo BSI = buildBitSet(BS, GlobalLayout); - // Create a global in which to store it. - ++NumBitSetsCreated; - Constant *BitsConst = ConstantDataArray::get(M.getContext(), BSI.Bits); - auto BitSetGlobal = new GlobalVariable( - M, BitsConst->getType(), /*isConstant=*/true, - GlobalValue::PrivateLinkage, BitsConst, BS->getString() + ".bits"); + ByteArrayInfo *BAI = 0; // Lower each call to llvm.bitset.test for this bitset. for (CallInst *CI : BitSetTestCallSites[BS]) { ++NumBitSetCallsLowered; - Value *Lowered = - lowerBitSetCall(CI, BSI, BitSetGlobal, CombinedGlobal, GlobalLayout); + Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout); CI->replaceAllUsesWith(Lowered); CI->eraseFromParent(); } @@ -436,20 +548,24 @@ void LowerBitSets::buildBitSetsFromGlobals( ConstantInt::get(Int32Ty, I * 2)}; Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs); - GlobalAlias *GAlias = GlobalAlias::create( - Globals[I]->getType()->getElementType(), - Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(), - "", CombinedGlobalElemPtr, &M); - GAlias->takeName(Globals[I]); - Globals[I]->replaceAllUsesWith(GAlias); + if (LinkerSubsectionsViaSymbols) { + Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr); + } else { + GlobalAlias *GAlias = GlobalAlias::create( + Globals[I]->getType()->getElementType(), + Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(), + "", CombinedGlobalElemPtr, M); + GAlias->takeName(Globals[I]); + Globals[I]->replaceAllUsesWith(GAlias); + } Globals[I]->eraseFromParent(); } } /// Lower all bit sets in this module. -bool LowerBitSets::buildBitSets(Module &M) { +bool LowerBitSets::buildBitSets() { Function *BitSetTestFunc = - M.getFunction(Intrinsic::getName(Intrinsic::bitset_test)); + M->getFunction(Intrinsic::getName(Intrinsic::bitset_test)); if (!BitSetTestFunc) return false; @@ -591,22 +707,24 @@ bool LowerBitSets::buildBitSets(Module &M) { }); // Build the bitsets from this disjoint set. - buildBitSetsFromGlobals(M, BitSets, OrderedGlobals); + buildBitSetsFromGlobals(BitSets, OrderedGlobals); } + allocateByteArrays(); + return true; } -bool LowerBitSets::eraseBitSetMetadata(Module &M) { +bool LowerBitSets::eraseBitSetMetadata() { if (!BitSetNM) return false; - M.eraseNamedMetadata(BitSetNM); + M->eraseNamedMetadata(BitSetNM); return true; } bool LowerBitSets::runOnModule(Module &M) { - bool Changed = buildBitSets(M); - Changed |= eraseBitSetMetadata(M); + bool Changed = buildBitSets(); + Changed |= eraseBitSetMetadata(); return Changed; } diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index b91ebf2..596674d 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -127,9 +127,8 @@ namespace { /// side of claiming that two functions are different). class FunctionComparator { public: - FunctionComparator(const DataLayout *DL, const Function *F1, - const Function *F2) - : FnL(F1), FnR(F2), DL(DL) {} + FunctionComparator(const Function *F1, const Function *F2) + : FnL(F1), FnR(F2) {} /// Test whether the two functions have equivalent behaviour. int compare(); @@ -292,8 +291,7 @@ private: /// Parts to be compared for each comparison stage, /// most significant stage first: /// 1. Address space. As numbers. - /// 2. Constant offset, (if "DataLayout *DL" field is not NULL, - /// using GEPOperator::accumulateConstantOffset method). + /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method). /// 3. Pointer operand type (using cmpType method). /// 4. Number of operands. /// 5. Compare operands, using cmpValues method. @@ -354,8 +352,6 @@ private: // The two functions undergoing comparison. const Function *FnL, *FnR; - const DataLayout *DL; - /// Assign serial numbers to values from left function, and values from /// right function. /// Explanation: @@ -394,14 +390,13 @@ private: class FunctionNode { AssertingVH<Function> F; - const DataLayout *DL; public: - FunctionNode(Function *F, const DataLayout *DL) : F(F), DL(DL) {} + FunctionNode(Function *F) : F(F) {} Function *getFunc() const { return F; } void release() { F = 0; } bool operator<(const FunctionNode &RHS) const { - return (FunctionComparator(DL, F, RHS.getFunc()).compare()) == -1; + return (FunctionComparator(F, RHS.getFunc()).compare()) == -1; } }; } @@ -620,10 +615,11 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { PointerType *PTyL = dyn_cast<PointerType>(TyL); PointerType *PTyR = dyn_cast<PointerType>(TyR); - if (DL) { - if (PTyL && PTyL->getAddressSpace() == 0) TyL = DL->getIntPtrType(TyL); - if (PTyR && PTyR->getAddressSpace() == 0) TyR = DL->getIntPtrType(TyR); - } + const DataLayout &DL = FnL->getParent()->getDataLayout(); + if (PTyL && PTyL->getAddressSpace() == 0) + TyL = DL.getIntPtrType(TyL); + if (PTyR && PTyR->getAddressSpace() == 0) + TyR = DL.getIntPtrType(TyR); if (TyL == TyR) return 0; @@ -855,13 +851,12 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, // When we have target data, we can reduce the GEP down to the value in bytes // added to the address. - if (DL) { - unsigned BitWidth = DL->getPointerSizeInBits(ASL); - APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); - if (GEPL->accumulateConstantOffset(*DL, OffsetL) && - GEPR->accumulateConstantOffset(*DL, OffsetR)) - return cmpAPInts(OffsetL, OffsetR); - } + const DataLayout &DL = FnL->getParent()->getDataLayout(); + unsigned BitWidth = DL.getPointerSizeInBits(ASL); + APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); + if (GEPL->accumulateConstantOffset(DL, OffsetL) && + GEPR->accumulateConstantOffset(DL, OffsetR)) + return cmpAPInts(OffsetL, OffsetR); if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(), (uint64_t)GEPR->getPointerOperand()->getType())) @@ -1122,9 +1117,6 @@ private: /// to modify it. FnTreeType FnTree; - /// DataLayout for more accurate GEP comparisons. May be NULL. - const DataLayout *DL; - /// Whether or not the target supports global aliases. bool HasGlobalAliases; }; @@ -1152,8 +1144,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) { Function *F1 = cast<Function>(*I); Function *F2 = cast<Function>(*J); - int Res1 = FunctionComparator(DL, F1, F2).compare(); - int Res2 = FunctionComparator(DL, F2, F1).compare(); + int Res1 = FunctionComparator(F1, F2).compare(); + int Res2 = FunctionComparator(F2, F1).compare(); // If F1 <= F2, then F2 >= F1, otherwise report failure. if (Res1 != -Res2) { @@ -1174,8 +1166,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { continue; Function *F3 = cast<Function>(*K); - int Res3 = FunctionComparator(DL, F1, F3).compare(); - int Res4 = FunctionComparator(DL, F2, F3).compare(); + int Res3 = FunctionComparator(F1, F3).compare(); + int Res4 = FunctionComparator(F2, F3).compare(); bool Transitive = true; @@ -1212,8 +1204,6 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { bool MergeFunctions::runOnModule(Module &M) { bool Changed = false; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) @@ -1420,7 +1410,7 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { // that was already inserted. bool MergeFunctions::insert(Function *NewFunction) { std::pair<FnTreeType::iterator, bool> Result = - FnTree.insert(FunctionNode(NewFunction, DL)); + FnTree.insert(FunctionNode(NewFunction)); if (Result.second) { DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n'); @@ -1457,7 +1447,7 @@ bool MergeFunctions::insert(Function *NewFunction) { void MergeFunctions::remove(Function *F) { // We need to make sure we remove F, not a function "equal" to F per the // function equality comparator. - FnTreeType::iterator found = FnTree.find(FunctionNode(F, DL)); + FnTreeType::iterator found = FnTree.find(FunctionNode(F)); size_t Erased = 0; if (found != FnTree.end() && found->getFunc() == F) { Erased = 1; diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 9a75050..d28d563 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -77,6 +77,10 @@ static cl::opt<bool> EnableMLSM("mlsm", cl::init(true), cl::Hidden, cl::desc("Enable motion of merged load and store")); +static cl::opt<bool> EnableLoopInterchange( + "enable-loopinterchange", cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental LoopInterchange Pass")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -93,7 +97,6 @@ PassManagerBuilder::PassManagerBuilder() { DisableGVNLoadPRE = false; VerifyInput = false; VerifyOutput = false; - StripDebug = false; MergeFunctions = false; } @@ -239,6 +242,8 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops + if (EnableLoopInterchange) + MPM.add(createLoopInterchangePass()); // Interchange loops if (!DisableUnrollLoops) MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops @@ -305,8 +310,7 @@ void PassManagerBuilder::populateModulePassManager( // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies // on the rotated form. - if (ExtraVectorizerPasses) - MPM.add(createLoopRotatePass()); + MPM.add(createLoopRotatePass()); MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize)); // FIXME: Because of #pragma vectorize enable, the passes below are always @@ -358,9 +362,20 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createCFGSimplificationPass()); MPM.add(createInstructionCombiningPass()); - if (!DisableUnrollLoops) + if (!DisableUnrollLoops) { MPM.add(createLoopUnrollPass()); // Unroll small loops + // This is a barrier pass to avoid combine LICM pass and loop unroll pass + // within same loop pass manager. + MPM.add(createInstructionSimplifierPass()); + + // Runtime unrolling will introduce runtime check in loop prologue. If the + // unrolled loop is a inner loop, then the prologue will be inside the + // outer loop. LICM pass can help to promote the runtime check out if the + // checked value is loop invariant. + MPM.add(createLICMPass()); + } + // After vectorization and unrolling, assume intrinsics may tell us more // about pointer alignments. MPM.add(createAlignmentFromAssumptionsPass()); @@ -454,6 +469,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // More loops are countable; try to optimize them. PM.add(createIndVarSimplifyPass()); PM.add(createLoopDeletionPass()); + if (EnableLoopInterchange) + PM.add(createLoopInterchangePass()); + PM.add(createLoopVectorizePass(true, LoopVectorize)); // More scalar chains could be vectorized due to more alias information @@ -473,10 +491,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); +} - // Lower bitset metadata to bitsets. - PM.add(createLowerBitSetsPass()); - +void PassManagerBuilder::addLateLTOOptimizationPasses( + legacy::PassManagerBase &PM) { // Delete basic blocks, which optimization passes may have killed. PM.add(createCFGSimplificationPass()); @@ -496,19 +514,19 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (VerifyInput) PM.add(createVerifierPass()); - if (StripDebug) - PM.add(createStripSymbolsPass(true)); + if (OptLevel > 1) + addLTOOptimizationPasses(PM); - if (VerifyInput) - PM.add(createDebugInfoVerifierPass()); + // Lower bit sets to globals. This pass supports Clang's control flow + // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI + // is enabled. The pass does nothing if CFI is disabled. + PM.add(createLowerBitSetsPass()); if (OptLevel != 0) - addLTOOptimizationPasses(PM); + addLateLTOOptimizationPasses(PM); - if (VerifyOutput) { + if (VerifyOutput) PM.add(createVerifierPass()); - PM.add(createDebugInfoVerifierPass()); - } } inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) { diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 752f79d..c608f84 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -891,7 +891,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero, /// This basically requires proving that the add in the original type would not /// overflow to change the sign bit or have a carry out. bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, - Instruction *CxtI) { + Instruction &CxtI) { // There are different heuristics we can use for this. Here are some simple // ones. @@ -909,18 +909,18 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, // // Since the carry into the most significant position is always equal to // the carry out of the addition, there is no signed overflow. - if (ComputeNumSignBits(LHS, 0, CxtI) > 1 && - ComputeNumSignBits(RHS, 0, CxtI) > 1) + if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 && + ComputeNumSignBits(RHS, 0, &CxtI) > 1) return true; unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI); // Addition of two 2's compliment numbers having opposite signs will never // overflow. @@ -943,21 +943,21 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, /// overflow to change the sign bit or have a carry out. /// TODO: Handle this for Vectors. bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, - Instruction *CxtI) { + Instruction &CxtI) { // If LHS and RHS each have at least two sign bits, the subtraction // cannot overflow. - if (ComputeNumSignBits(LHS, 0, CxtI) > 1 && - ComputeNumSignBits(RHS, 0, CxtI) > 1) + if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 && + ComputeNumSignBits(RHS, 0, &CxtI) > 1) return true; unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI); // Subtraction of two 2's compliment numbers having identical signs will // never overflow. @@ -972,12 +972,14 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, /// \brief Return true if we can prove that: /// (sub LHS, RHS) === (sub nuw LHS, RHS) bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, - Instruction *CxtI) { + Instruction &CxtI) { // If the LHS is negative and the RHS is non-negative, no unsigned wrap. bool LHSKnownNonNegative, LHSKnownNegative; bool RHSKnownNonNegative, RHSKnownNegative; - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0, CxtI); - ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0, CxtI); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0, + &CxtI); + ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0, + &CxtI); if (LHSKnownNegative && RHSKnownNonNegative) return true; @@ -1046,15 +1048,15 @@ static Value *checkForNegativeOperand(BinaryOperator &I, } Instruction *InstCombiner::visitAdd(BinaryOperator &I) { - bool Changed = SimplifyAssociativeOrCommutative(I); - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + bool Changed = SimplifyAssociativeOrCommutative(I); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - if (Value *V = SimplifyVectorOp(I)) - return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); - if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, TLI, DT, AC)) - return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), + I.hasNoUnsignedWrap(), DL, TLI, DT, AC)) + return ReplaceInstUsesWith(I, V); // (A*B)+(A*C) -> A*(B+C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) @@ -1243,7 +1245,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSExt(CI, I.getType()) == RHSC && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) { + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new, smaller add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); @@ -1256,10 +1258,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // Only do this if x/y have the same type, if at last one of them has a // single use (so we don't increase the number of sexts), and if the // integer add will not overflow. - if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + if (LHSConv->getOperand(0)->getType() == + RHSConv->getOperand(0)->getType() && (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && WillNotOverflowSignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), &I)) { + RHSConv->getOperand(0), I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); @@ -1307,7 +1310,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // TODO(jingyue): Consider WillNotOverflowSignedAdd and // WillNotOverflowUnsignedAdd to reduce the number of invocations of // computeKnownBits. - if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, &I)) { + if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) { Changed = true; I.setHasNoSignedWrap(true); } @@ -1371,7 +1374,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSIToFP(CI, I.getType()) == CFP && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) { + WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); @@ -1384,10 +1387,11 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { // Only do this if x/y have the same type, if at last one of them has a // single use (so we don't increase the number of int->fp conversions), // and if the integer add will not overflow. - if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& + if (LHSConv->getOperand(0)->getType() == + RHSConv->getOperand(0)->getType() && (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && WillNotOverflowSignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), &I)) { + RHSConv->getOperand(0), I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0),"addconv"); @@ -1436,8 +1440,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { /// Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty) { - assert(DL && "Must have target data info for this"); - // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize // this. bool Swapped = false; @@ -1662,26 +1664,24 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". - if (DL) { - Value *LHSOp, *RHSOp; - if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && - match(Op1, m_PtrToInt(m_Value(RHSOp)))) - if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) - return ReplaceInstUsesWith(I, Res); - - // trunc(p)-trunc(q) -> trunc(p-q) - if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && - match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) - if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) - return ReplaceInstUsesWith(I, Res); - } + Value *LHSOp, *RHSOp; + if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && + match(Op1, m_PtrToInt(m_Value(RHSOp)))) + if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) + return ReplaceInstUsesWith(I, Res); + + // trunc(p)-trunc(q) -> trunc(p-q) + if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && + match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) + if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) + return ReplaceInstUsesWith(I, Res); bool Changed = false; - if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, &I)) { + if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) { Changed = true; I.setHasNoSignedWrap(true); } - if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, &I)) { + if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) { Changed = true; I.setHasNoUnsignedWrap(true); } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 863eeaf..ee21c81 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -979,9 +979,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // Make a constant range that's the intersection of the two icmp ranges. // If the intersection is empty, we know that the result is false. ConstantRange LHSRange = - ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue()); + ConstantRange::makeAllowedICmpRegion(LHSCC, LHSCst->getValue()); ConstantRange RHSRange = - ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue()); + ConstantRange::makeAllowedICmpRegion(RHSCC, RHSCst->getValue()); if (LHSRange.intersectWith(RHSRange).isEmptySet()) return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); @@ -1709,15 +1709,17 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Value *Mask = nullptr; Value *Masked = nullptr; if (LAnd->getOperand(0) == RAnd->getOperand(0) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, AC, CxtI, DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, AC, CxtI, DT)) { + isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, AC, CxtI, + DT) && + isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, AC, CxtI, + DT)) { Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1)); Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask); } else if (LAnd->getOperand(1) == RAnd->getOperand(1) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(0), false, 0, AC, CxtI, - DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(0), false, 0, AC, CxtI, - DT)) { + isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, AC, + CxtI, DT) && + isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, AC, + CxtI, DT)) { Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0)); Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask); } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 05e7162..21243c2 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" @@ -61,8 +60,8 @@ static Type *reduceToSingleValueType(Type *T) { } Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, AC, MI, DT); - unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, AC, MI, DT); + unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT); + unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT); unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment(); @@ -108,7 +107,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { if (StrippedDest != MI->getArgOperand(0)) { Type *SrcETy = cast<PointerType>(StrippedDest->getType()) ->getElementType(); - if (DL && SrcETy->isSized() && DL->getTypeStoreSize(SrcETy) == Size) { + if (SrcETy->isSized() && DL.getTypeStoreSize(SrcETy) == Size) { // The SrcETy might be something like {{{double}}} or [1 x double]. Rip // down through these levels if so. SrcETy = reduceToSingleValueType(SrcETy); @@ -156,7 +155,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { } Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { - unsigned Alignment = getKnownAlignment(MI->getDest(), DL, AC, MI, DT); + unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT); if (MI->getAlignment() < Alignment) { MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Alignment, false)); @@ -198,6 +197,71 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return nullptr; } +/// The shuffle mask for a perm2*128 selects any two halves of two 256-bit +/// source vectors, unless a zero bit is set. If a zero bit is set, +/// then ignore that half of the mask and clear that half of the vector. +static Value *SimplifyX86vperm2(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder) { + if (auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { + VectorType *VecTy = cast<VectorType>(II.getType()); + ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); + + // The immediate permute control byte looks like this: + // [1:0] - select 128 bits from sources for low half of destination + // [2] - ignore + // [3] - zero low half of destination + // [5:4] - select 128 bits from sources for high half of destination + // [6] - ignore + // [7] - zero high half of destination + + uint8_t Imm = CInt->getZExtValue(); + + bool LowHalfZero = Imm & 0x08; + bool HighHalfZero = Imm & 0x80; + + // If both zero mask bits are set, this was just a weird way to + // generate a zero vector. + if (LowHalfZero && HighHalfZero) + return ZeroVector; + + // If 0 or 1 zero mask bits are set, this is a simple shuffle. + unsigned NumElts = VecTy->getNumElements(); + unsigned HalfSize = NumElts / 2; + SmallVector<int, 8> ShuffleMask(NumElts); + + // The high bit of the selection field chooses the 1st or 2nd operand. + bool LowInputSelect = Imm & 0x02; + bool HighInputSelect = Imm & 0x20; + + // The low bit of the selection field chooses the low or high half + // of the selected operand. + bool LowHalfSelect = Imm & 0x01; + bool HighHalfSelect = Imm & 0x10; + + // Determine which operand(s) are actually in use for this instruction. + Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); + Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0); + + // If needed, replace operands based on zero mask. + V0 = LowHalfZero ? ZeroVector : V0; + V1 = HighHalfZero ? ZeroVector : V1; + + // Permute low half of result. + unsigned StartIndex = LowHalfSelect ? HalfSize : 0; + for (unsigned i = 0; i < HalfSize; ++i) + ShuffleMask[i] = StartIndex + i; + + // Permute high half of result. + StartIndex = HighHalfSelect ? HalfSize : 0; + StartIndex += NumElts; + for (unsigned i = 0; i < HalfSize; ++i) + ShuffleMask[i + HalfSize] = StartIndex + i; + + return Builder.CreateShuffleVector(V0, V1, ShuffleMask); + } + return nullptr; +} + /// visitCallInst - CallInst simplification. This mostly only handles folding /// of intrinsic instructions. For normal calls, it allows visitCallSite to do /// the heavy lifting. @@ -386,7 +450,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // can prove that it will never overflow. if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - if (WillNotOverflowSignedAdd(LHS, RHS, II)) { + if (WillNotOverflowSignedAdd(LHS, RHS, *II)) { return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false); } } @@ -407,11 +471,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } } if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) { - if (WillNotOverflowSignedSub(LHS, RHS, II)) { + if (WillNotOverflowSignedSub(LHS, RHS, *II)) { return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false); } } else { - if (WillNotOverflowUnsignedSub(LHS, RHS, II)) { + if (WillNotOverflowUnsignedSub(LHS, RHS, *II)) { return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false); } } @@ -452,7 +516,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - if (WillNotOverflowSignedMul(LHS, RHS, II)) { + if (WillNotOverflowSignedMul(LHS, RHS, *II)) { return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false); } } @@ -544,7 +608,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: // Turn PPC lvx -> load if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >= 16) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); @@ -561,7 +625,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); @@ -578,7 +642,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } case Intrinsic::ppc_qpx_qvlfs: // Turn PPC QPX qvlfs -> load if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >= 16) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); @@ -587,7 +651,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::ppc_qpx_qvlfd: // Turn PPC QPX qvlfd -> load if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, AC, DT) >= 32) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); @@ -596,7 +660,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::ppc_qpx_qvstfs: // Turn PPC QPX qvstfs -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); @@ -606,7 +670,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::ppc_qpx_qvstfd: // Turn PPC QPX qvstfd -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, AC, DT) >= 32) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); @@ -618,7 +682,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >= + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(1)->getType()); @@ -735,9 +799,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { unsigned LowHalfElts = VWidth / 2; APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts)); APInt UndefElts(VWidth, 0); - if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), - InputDemandedElts, - UndefElts)) { + if (Value *TmpV = SimplifyDemandedVectorElts( + II->getArgOperand(0), InputDemandedElts, UndefElts)) { II->setArgOperand(0, TmpV); return II; } @@ -906,6 +969,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return ReplaceInstUsesWith(CI, Shuffle); } + case Intrinsic::x86_avx_vperm2f128_pd_256: + case Intrinsic::x86_avx_vperm2f128_ps_256: + case Intrinsic::x86_avx_vperm2f128_si_256: + case Intrinsic::x86_avx2_vperm2i128: + if (Value *V = SimplifyX86vperm2(*II, *Builder)) + return ReplaceInstUsesWith(*II, V); + break; + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. // Note that ppc_altivec_vperm has a big-endian bias, so when creating @@ -945,12 +1016,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { unsigned Idx = cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); Idx &= 31; // Match the hardware behavior. - if (DL && DL->isLittleEndian()) + if (DL.isLittleEndian()) Idx = 31 - Idx; if (!ExtractedElts[Idx]) { - Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0; - Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1; + Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0; + Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1; ExtractedElts[Idx] = Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse, Builder->getInt32(Idx&15)); @@ -979,7 +1050,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::arm_neon_vst2lane: case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: { - unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, AC, II, DT); + unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, AC, DT); unsigned AlignArg = II->getNumArgOperands() - 1; ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg)); if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { @@ -1118,7 +1189,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { RHS->getType()->isPointerTy() && cast<Constant>(RHS)->isNullValue()) { LoadInst* LI = cast<LoadInst>(LHS); - if (isValidAssumeForContext(II, LI, DL, DT)) { + if (isValidAssumeForContext(II, LI, DT)) { MDNode *MD = MDNode::get(II->getContext(), None); LI->setMetadata(LLVMContext::MD_nonnull, MD); return EraseInstFromFunction(*II); @@ -1192,8 +1263,8 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { /// isSafeToEliminateVarargsCast - If this cast does not affect the value /// passed through the varargs area, we can eliminate the use of the cast. static bool isSafeToEliminateVarargsCast(const CallSite CS, - const CastInst * const CI, - const DataLayout * const DL, + const DataLayout &DL, + const CastInst *const CI, const int ix) { if (!CI->isLosslessCast()) return false; @@ -1217,7 +1288,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); if (!SrcTy->isSized() || !DstTy->isSized()) return false; - if (!DL || DL->getTypeAllocSize(SrcTy) != DL->getTypeAllocSize(DstTy)) + if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy)) return false; return true; } @@ -1226,7 +1297,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, // Currently we're only working with the checking functions, memcpy_chk, // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, // strcat_chk and strncat_chk. -Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) { +Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) { if (!CI->getCalledFunction()) return nullptr; auto InstCombineRAUW = [this](Instruction *From, Value *With) { @@ -1391,7 +1462,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(), E = CS.arg_end(); I != E; ++I, ++ix) { CastInst *CI = dyn_cast<CastInst>(*I); - if (CI && isSafeToEliminateVarargsCast(CS, CI, DL, ix)) { + if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) { *I = CI->getOperand(0); Changed = true; } @@ -1408,7 +1479,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // this. None of these calls are seen as possibly dead so go ahead and // delete the instruction now. if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) { - Instruction *I = tryOptimizeCall(CI, DL); + Instruction *I = tryOptimizeCall(CI); // If we changed something return the result, etc. Otherwise let // the fallthrough check. if (I) return EraseInstFromFunction(*I); @@ -1487,7 +1558,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // // into: // call void @takes_i32_inalloca(i32* null) - if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca)) + // + // Similarly, avoid folding away bitcasts of byval calls. + if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) || + Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal)) return false; CallSite::arg_iterator AI = CS.arg_begin(); @@ -1512,12 +1586,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1, Attribute::ByVal)) { PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); - if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL) + if (!ParamPTy || !ParamPTy->getElementType()->isSized()) return false; Type *CurElTy = ActTy->getPointerElementType(); - if (DL->getTypeAllocSize(CurElTy) != - DL->getTypeAllocSize(ParamPTy->getElementType())) + if (DL.getTypeAllocSize(CurElTy) != + DL.getTypeAllocSize(ParamPTy->getElementType())) return false; } } diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 3e2b719..fe544c2 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -80,9 +80,6 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, /// try to eliminate the cast by moving the type information into the alloc. Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI) { - // This requires DataLayout to get the alloca alignment and size information. - if (!DL) return nullptr; - PointerType *PTy = cast<PointerType>(CI.getType()); BuilderTy AllocaBuilder(*Builder); @@ -93,8 +90,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, Type *CastElTy = PTy->getElementType(); if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr; - unsigned AllocElTyAlign = DL->getABITypeAlignment(AllocElTy); - unsigned CastElTyAlign = DL->getABITypeAlignment(CastElTy); + unsigned AllocElTyAlign = DL.getABITypeAlignment(AllocElTy); + unsigned CastElTyAlign = DL.getABITypeAlignment(CastElTy); if (CastElTyAlign < AllocElTyAlign) return nullptr; // If the allocation has multiple uses, only promote it if we are strictly @@ -102,14 +99,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // same, we open the door to infinite loops of various kinds. if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr; - uint64_t AllocElTySize = DL->getTypeAllocSize(AllocElTy); - uint64_t CastElTySize = DL->getTypeAllocSize(CastElTy); + uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy); + uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy); if (CastElTySize == 0 || AllocElTySize == 0) return nullptr; // If the allocation has multiple uses, only promote it if we're not // shrinking the amount of memory being allocated. - uint64_t AllocElTyStoreSize = DL->getTypeStoreSize(AllocElTy); - uint64_t CastElTyStoreSize = DL->getTypeStoreSize(CastElTy); + uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy); + uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy); if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr; // See if we can satisfy the modulus by pulling a scale out of the array @@ -215,7 +212,8 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, PHINode *OPN = cast<PHINode>(I); PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues()); for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { - Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); + Value *V = + EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); NPN->addIncoming(V, OPN->getIncomingBlock(i)); } Res = NPN; @@ -234,25 +232,22 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, /// This function is a wrapper around CastInst::isEliminableCastPair. It /// simply extracts arguments and returns what that function returns. static Instruction::CastOps -isEliminableCastPair( - const CastInst *CI, ///< The first cast instruction - unsigned opcode, ///< The opcode of the second cast instruction - Type *DstTy, ///< The target type for the second cast instruction - const DataLayout *DL ///< The target data for pointer size -) { - +isEliminableCastPair(const CastInst *CI, ///< First cast instruction + unsigned opcode, ///< Opcode for the second cast + Type *DstTy, ///< Target type for the second cast + const DataLayout &DL) { Type *SrcTy = CI->getOperand(0)->getType(); // A from above Type *MidTy = CI->getType(); // B from above // Get the opcodes of the two Cast instructions Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); Instruction::CastOps secondOp = Instruction::CastOps(opcode); - Type *SrcIntPtrTy = DL && SrcTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(SrcTy) : nullptr; - Type *MidIntPtrTy = DL && MidTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(MidTy) : nullptr; - Type *DstIntPtrTy = DL && DstTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(DstTy) : nullptr; + Type *SrcIntPtrTy = + SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr; + Type *MidIntPtrTy = + MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr; + Type *DstIntPtrTy = + DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr; unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy, SrcIntPtrTy, MidIntPtrTy, DstIntPtrTy); @@ -298,7 +293,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { // eliminate it now. if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast if (Instruction::CastOps opc = - isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) { + isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) { // The first cast (CSrc) is eliminable so we need to fix up or replace // the second cast (CI). CSrc will then have a good chance of being dead. return CastInst::Create(opc, CSrc->getOperand(0), CI.getType()); @@ -314,8 +309,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { if (isa<PHINode>(Src)) { // We don't do this if this would create a PHI node with an illegal type if // it is currently legal. - if (!Src->getType()->isIntegerTy() || - !CI.getType()->isIntegerTy() || + if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() || ShouldChangeType(CI.getType(), Src->getType())) if (Instruction *NV = FoldOpIntoPhi(CI)) return NV; @@ -1419,18 +1413,15 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { // If the source integer type is not the intptr_t type for this target, do a // trunc or zext to the intptr_t type, then inttoptr of it. This allows the // cast to be exposed to other transforms. - - if (DL) { - unsigned AS = CI.getAddressSpace(); - if (CI.getOperand(0)->getType()->getScalarSizeInBits() != - DL->getPointerSizeInBits(AS)) { - Type *Ty = DL->getIntPtrType(CI.getContext(), AS); - if (CI.getType()->isVectorTy()) // Handle vectors of pointers. - Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); - - Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty); - return new IntToPtrInst(P, CI.getType()); - } + unsigned AS = CI.getAddressSpace(); + if (CI.getOperand(0)->getType()->getScalarSizeInBits() != + DL.getPointerSizeInBits(AS)) { + Type *Ty = DL.getIntPtrType(CI.getContext(), AS); + if (CI.getType()->isVectorTy()) // Handle vectors of pointers. + Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); + + Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty); + return new IntToPtrInst(P, CI.getType()); } if (Instruction *I = commonCastTransforms(CI)) @@ -1460,32 +1451,33 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { return &CI; } - if (!DL) - return commonCastTransforms(CI); - // If the GEP has a single use, and the base pointer is a bitcast, and the // GEP computes a constant offset, see if we can convert these three // instructions into fewer. This typically happens with unions and other // non-type-safe code. unsigned AS = GEP->getPointerAddressSpace(); - unsigned OffsetBits = DL->getPointerSizeInBits(AS); + unsigned OffsetBits = DL.getPointerSizeInBits(AS); APInt Offset(OffsetBits, 0); BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0)); - if (GEP->hasOneUse() && - BCI && - GEP->accumulateConstantOffset(*DL, Offset)) { + if (GEP->hasOneUse() && BCI && GEP->accumulateConstantOffset(DL, Offset)) { + // FIXME: This is insufficiently tested - just a no-crash test + // (test/Transforms/InstCombine/2007-05-14-Crash.ll) + // // Get the base pointer input of the bitcast, and the type it points to. Value *OrigBase = BCI->getOperand(0); SmallVector<Value*, 8> NewIndices; - if (FindElementAtOffset(OrigBase->getType(), - Offset.getSExtValue(), + if (FindElementAtOffset(OrigBase->getType(), Offset.getSExtValue(), NewIndices)) { + // FIXME: This codepath is completely untested - could be unreachable + // for all I know. // If we were able to index down into an element, create the GEP // and bitcast the result. This eliminates one bitcast, potentially // two. - Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ? - Builder->CreateInBoundsGEP(OrigBase, NewIndices) : - Builder->CreateGEP(OrigBase, NewIndices); + Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() + ? Builder->CreateInBoundsGEP(OrigBase, NewIndices) + : Builder->CreateGEP( + OrigBase->getType()->getPointerElementType(), + OrigBase, NewIndices); NGEP->takeName(GEP); if (isa<BitCastInst>(CI)) @@ -1504,16 +1496,13 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast // to be exposed to other transforms. - if (!DL) - return commonPointerCastTransforms(CI); - Type *Ty = CI.getType(); unsigned AS = CI.getPointerAddressSpace(); - if (Ty->getScalarSizeInBits() == DL->getPointerSizeInBits(AS)) + if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS)) return commonPointerCastTransforms(CI); - Type *PtrTy = DL->getIntPtrType(CI.getContext(), AS); + Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS); if (Ty->isVectorTy()) // Handle vectors of pointers. PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements()); @@ -1597,8 +1586,8 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) { /// This returns false if the pattern can't be matched or true if it can, /// filling in Elements with the elements found here. static bool CollectInsertionElements(Value *V, unsigned Shift, - SmallVectorImpl<Value*> &Elements, - Type *VecEltTy, InstCombiner &IC) { + SmallVectorImpl<Value *> &Elements, + Type *VecEltTy, bool isBigEndian) { assert(isMultipleOfTypeSize(Shift, VecEltTy) && "Shift should be a multiple of the element type size"); @@ -1614,7 +1603,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, return true; unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy); - if (IC.getDataLayout()->isBigEndian()) + if (isBigEndian) ElementIndex = Elements.size() - ElementIndex - 1; // Fail if multiple elements are inserted into this slot. @@ -1634,7 +1623,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, // it to the right type so it gets properly inserted. if (NumElts == 1) return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), - Shift, Elements, VecEltTy, IC); + Shift, Elements, VecEltTy, isBigEndian); // Okay, this is a constant that covers multiple elements. Slice it up into // pieces and insert each element-sized piece into the vector. @@ -1649,7 +1638,8 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), ShiftI)); Piece = ConstantExpr::getTrunc(Piece, ElementIntTy); - if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC)) + if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, + isBigEndian)) return false; } return true; @@ -1662,28 +1652,28 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, switch (I->getOpcode()) { default: return false; // Unhandled case. case Instruction::BitCast: - return CollectInsertionElements(I->getOperand(0), Shift, - Elements, VecEltTy, IC); + return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian); case Instruction::ZExt: if (!isMultipleOfTypeSize( I->getOperand(0)->getType()->getPrimitiveSizeInBits(), VecEltTy)) return false; - return CollectInsertionElements(I->getOperand(0), Shift, - Elements, VecEltTy, IC); + return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian); case Instruction::Or: - return CollectInsertionElements(I->getOperand(0), Shift, - Elements, VecEltTy, IC) && - CollectInsertionElements(I->getOperand(1), Shift, - Elements, VecEltTy, IC); + return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian) && + CollectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy, + isBigEndian); case Instruction::Shl: { // Must be shifting by a constant that is a multiple of the element size. ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); if (!CI) return false; Shift += CI->getZExtValue(); if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false; - return CollectInsertionElements(I->getOperand(0), Shift, - Elements, VecEltTy, IC); + return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy, + isBigEndian); } } @@ -1706,15 +1696,13 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, /// Into two insertelements that do "buildvector{%inc, %inc5}". static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombiner &IC) { - // We need to know the target byte order to perform this optimization. - if (!IC.getDataLayout()) return nullptr; - VectorType *DestVecTy = cast<VectorType>(CI.getType()); Value *IntInput = CI.getOperand(0); SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); if (!CollectInsertionElements(IntInput, 0, Elements, - DestVecTy->getElementType(), IC)) + DestVecTy->getElementType(), + IC.getDataLayout().isBigEndian())) return nullptr; // If we succeeded, we know that all of the element are specified by Elements @@ -1734,10 +1722,8 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double /// bitcast. The various long double bitcasts can't get in here. -static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ - // We need to know the target byte order to perform this optimization. - if (!IC.getDataLayout()) return nullptr; - +static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI, InstCombiner &IC, + const DataLayout &DL) { Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(); @@ -1760,7 +1746,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ } unsigned Elt = 0; - if (IC.getDataLayout()->isBigEndian()) + if (DL.isBigEndian()) Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1; return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } @@ -1784,7 +1770,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ } unsigned Elt = ShAmt->getZExtValue() / DestWidth; - if (IC.getDataLayout()->isBigEndian()) + if (DL.isBigEndian()) Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt; return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } @@ -1839,7 +1825,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // Try to optimize int -> float bitcasts. if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy)) - if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) + if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this, DL)) return I; if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index f48d89b..803b50a 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -229,10 +229,6 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, Instruction *InstCombiner:: FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { - // We need TD information to know the pointer size unless this is inbounds. - if (!GEP->isInBounds() && !DL) - return nullptr; - Constant *Init = GV->getInitializer(); if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) return nullptr; @@ -303,7 +299,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // the array, this will fully represent all the comparison results. uint64_t MagicBitvector = 0; - // Scan the array and see if one of our patterns matches. Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { @@ -398,7 +393,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // index down like the GEP would do implicitly. We don't have to do this for // an inbounds GEP because the index can't be out of range. if (!GEP->isInBounds()) { - Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize) Idx = Builder->CreateTrunc(Idx, IntPtrTy); @@ -487,10 +482,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // - Default to i32 if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) Ty = Idx->getType(); - else if (DL) - Ty = DL->getSmallestLegalIntType(Init->getContext(), ArrayElementCount); - else if (ArrayElementCount <= 32) - Ty = Type::getInt32Ty(Init->getContext()); + else + Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount); if (Ty) { Value *V = Builder->CreateIntCast(Idx, Ty, false); @@ -514,8 +507,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, /// /// If we can't emit an optimized form for this expression, this returns null. /// -static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { - const DataLayout &DL = *IC.getDataLayout(); +static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC, + const DataLayout &DL) { gep_type_iterator GTI = gep_type_begin(GEP); // Check to see if this gep only has a single variable index. If so, and if @@ -628,12 +621,12 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, RHS = RHS->stripPointerCasts(); Value *PtrBase = GEPLHS->getOperand(0); - if (DL && PtrBase == RHS && GEPLHS->isInBounds()) { + if (PtrBase == RHS && GEPLHS->isInBounds()) { // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). // This transformation (ignoring the base and scales) is valid because we // know pointers can't overflow since the gep is inbounds. See if we can // output an optimized form. - Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); + Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this, DL); // If not, synthesize the offset the hard way. if (!Offset) @@ -661,11 +654,11 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // If we're comparing GEPs with two base pointers that only differ in type // and both GEPs have only constant indices or just one use, then fold // the compare with the adjusted indices. - if (DL && GEPLHS->isInBounds() && GEPRHS->isInBounds() && + if (GEPLHS->isInBounds() && GEPRHS->isInBounds() && (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) && (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && PtrBase->stripPointerCasts() == - GEPRHS->getOperand(0)->stripPointerCasts()) { + GEPRHS->getOperand(0)->stripPointerCasts()) { Value *LOffset = EmitGEPOffset(GEPLHS); Value *ROffset = EmitGEPOffset(GEPRHS); @@ -733,9 +726,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // Only lower this if the icmp is the only user of the GEP or if we expect // the result to fold to a constant! - if (DL && - GEPsInBounds && - (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && + if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) { // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) Value *L = EmitGEPOffset(GEPLHS); @@ -1928,8 +1919,8 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the // integer type is the same size as the pointer type. - if (DL && LHSCI->getOpcode() == Instruction::PtrToInt && - DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) { + if (LHSCI->getOpcode() == Instruction::PtrToInt && + DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) { Value *RHSOp = nullptr; if (PtrToIntOperator *RHSC = dyn_cast<PtrToIntOperator>(ICI.getOperand(1))) { Value *RHSCIOp = RHSC->getOperand(0); @@ -2660,8 +2651,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { unsigned BitWidth = 0; if (Ty->isIntOrIntVectorTy()) BitWidth = Ty->getScalarSizeInBits(); - else if (DL) // Pointers require DL info to get their size. - BitWidth = DL->getTypeSizeInBits(Ty->getScalarType()); + else // Get pointer size. + BitWidth = DL.getTypeSizeInBits(Ty->getScalarType()); bool isSignBit = false; @@ -2774,8 +2765,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Op0KnownZero, Op0KnownOne, 0)) return &I; if (SimplifyDemandedBits(I.getOperandUse(1), - APInt::getAllOnesValue(BitWidth), - Op1KnownZero, Op1KnownOne, 0)) + APInt::getAllOnesValue(BitWidth), Op1KnownZero, + Op1KnownOne, 0)) return &I; // Given the known and unknown bits, compute a range that the LHS could be @@ -3094,9 +3085,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } case Instruction::IntToPtr: // icmp pred inttoptr(X), null -> icmp pred X, 0 - if (RHSC->isNullValue() && DL && - DL->getIntPtrType(RHSC->getType()) == - LHSI->getOperand(0)->getType()) + if (RHSC->isNullValue() && + DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType()) return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), Constant::getNullValue(LHSI->getOperand(0)->getType())); break; @@ -3428,7 +3418,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // if A is a power of 2. if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && match(Op1, m_Zero()) && - isKnownToBeAPowerOfTwo(A, false, 0, AC, &I, DT) && I.isEquality()) + isKnownToBeAPowerOfTwo(A, DL, false, 0, AC, &I, DT) && I.isEquality()) return new ICmpInst(I.getInversePredicate(), Builder->CreateAnd(A, B), Op1); @@ -3563,6 +3553,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } + // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0 + if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) && + match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) { + unsigned TypeBits = Cst1->getBitWidth(); + unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); + if (ShAmt < TypeBits && ShAmt != 0) { + Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted"); + APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt); + Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal), + I.getName() + ".mask"); + return new ICmpInst(I.getPredicate(), And, + Constant::getNullValue(Cst1->getType())); + } + } + // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to // "icmp (and X, mask), cst" uint64_t ShAmt = 0; diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 2fd5318..fb2321d 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -158,10 +158,10 @@ private: AssumptionCache *AC; TargetLibraryInfo *TLI; DominatorTree *DT; + const DataLayout &DL; // Optional analyses. When non-null, these can both be used to do better // combining and will be updated to reflect any changes. - const DataLayout *DL; LoopInfo *LI; bool MadeIRChange; @@ -169,7 +169,7 @@ private: public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder, bool MinimizeSize, AssumptionCache *AC, TargetLibraryInfo *TLI, - DominatorTree *DT, const DataLayout *DL, LoopInfo *LI) + DominatorTree *DT, const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {} @@ -180,7 +180,7 @@ public: AssumptionCache *getAssumptionCache() const { return AC; } - const DataLayout *getDataLayout() const { return DL; } + const DataLayout &getDataLayout() const { return DL; } DominatorTree *getDominatorTree() const { return DT; } @@ -330,17 +330,17 @@ private: Type *Ty); Instruction *visitCallSite(CallSite CS); - Instruction *tryOptimizeCall(CallInst *CI, const DataLayout *DL); + Instruction *tryOptimizeCall(CallInst *CI); bool transformConstExprCastCall(CallSite CS); Instruction *transformCallThroughTrampoline(CallSite CS, IntrinsicInst *Tramp); Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, bool DoXform = true); Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); - bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction *CxtI); - bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI); - bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI); - bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction *CxtI); + bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction &CxtI); + bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction &CxtI); + bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction &CxtI); + bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction &CxtI); Value *EmitGEPOffset(User *GEP); Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask); @@ -372,6 +372,10 @@ public: /// I to the worklist, replace all uses of I with the new value, then return /// I, so that the inst combiner will know that I was modified. Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { + // If there are no uses to replace, then we return nullptr to indicate that + // no changes were made to the program. + if (I.use_empty()) return nullptr; + Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. // If we are replacing the instruction with itself, this must be in a @@ -423,7 +427,7 @@ public: } void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - unsigned Depth = 0, Instruction *CxtI = nullptr) const { + unsigned Depth, Instruction *CxtI) const { return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, AC, CxtI, DT); } @@ -468,7 +472,7 @@ private: /// bits. Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne, unsigned Depth, - Instruction *CxtI = nullptr); + Instruction *CxtI); bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne, unsigned Depth = 0); /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index b9eb986..6b0f268 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -164,62 +164,75 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI, return nullptr; } -Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { - // Ensure that the alloca array size argument has type intptr_t, so that - // any casting is exposed early. - if (DL) { - Type *IntPtrTy = DL->getIntPtrType(AI.getType()); - if (AI.getArraySize()->getType() != IntPtrTy) { - Value *V = Builder->CreateIntCast(AI.getArraySize(), - IntPtrTy, false); - AI.setOperand(0, V); - return &AI; - } +static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { + // Check for array size of 1 (scalar allocation). + if (!AI.isArrayAllocation()) { + // i32 1 is the canonical array size for scalar allocations. + if (AI.getArraySize()->getType()->isIntegerTy(32)) + return nullptr; + + // Canonicalize it. + Value *V = IC.Builder->getInt32(1); + AI.setOperand(0, V); + return &AI; } // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 - if (AI.isArrayAllocation()) { // Check C != 1 - if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { - Type *NewTy = - ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName()); - New->setAlignment(AI.getAlignment()); - - // Scan to the end of the allocation instructions, to skip over a block of - // allocas if possible...also skip interleaved debug info - // - BasicBlock::iterator It = New; - while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; - - // Now that I is pointing to the first non-allocation-inst in the block, - // insert our getelementptr instruction... - // - Type *IdxTy = DL - ? DL->getIntPtrType(AI.getType()) - : Type::getInt64Ty(AI.getContext()); - Value *NullIdx = Constant::getNullValue(IdxTy); - Value *Idx[2] = { NullIdx, NullIdx }; - Instruction *GEP = + if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { + Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); + AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName()); + New->setAlignment(AI.getAlignment()); + + // Scan to the end of the allocation instructions, to skip over a block of + // allocas if possible...also skip interleaved debug info + // + BasicBlock::iterator It = New; + while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) + ++It; + + // Now that I is pointing to the first non-allocation-inst in the block, + // insert our getelementptr instruction... + // + Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType()); + Value *NullIdx = Constant::getNullValue(IdxTy); + Value *Idx[2] = {NullIdx, NullIdx}; + Instruction *GEP = GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); - InsertNewInstBefore(GEP, *It); + IC.InsertNewInstBefore(GEP, *It); - // Now make everything use the getelementptr instead of the original - // allocation. - return ReplaceInstUsesWith(AI, GEP); - } else if (isa<UndefValue>(AI.getArraySize())) { - return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); - } + // Now make everything use the getelementptr instead of the original + // allocation. + return IC.ReplaceInstUsesWith(AI, GEP); } - if (DL && AI.getAllocatedType()->isSized()) { + if (isa<UndefValue>(AI.getArraySize())) + return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); + + // Ensure that the alloca array size argument has type intptr_t, so that + // any casting is exposed early. + Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType()); + if (AI.getArraySize()->getType() != IntPtrTy) { + Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); + AI.setOperand(0, V); + return &AI; + } + + return nullptr; +} + +Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { + if (auto *I = simplifyAllocaArraySize(*this, AI)) + return I; + + if (AI.getAllocatedType()->isSized()) { // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) - AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType())); + AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType())); // Move all alloca's of zero byte objects to the entry block and merge them // together. Note that we only do this for alloca's, because malloc should // allocate and return a unique pointer, even for a zero byte allocation. - if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) { + if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) { // For a zero sized alloca there is no point in doing an array allocation. // This is helpful if the array size is a complicated expression not used // elsewhere. @@ -237,7 +250,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // dominance as the array size was forced to a constant earlier already. AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || - DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { + DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { AI.moveBefore(FirstInst); return &AI; } @@ -246,7 +259,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // assign it the preferred alignment. if (EntryAI->getAlignment() == 0) EntryAI->setAlignment( - DL->getPrefTypeAlignment(EntryAI->getAllocatedType())); + DL.getPrefTypeAlignment(EntryAI->getAllocatedType())); // Replace this zero-sized alloca with the one at the start of the entry // block after ensuring that the address will be aligned enough for both // types. @@ -270,7 +283,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { SmallVector<Instruction *, 4> ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { unsigned SourceAlign = getOrEnforceKnownAlignment( - Copy->getSource(), AI.getAlignment(), DL, AC, &AI, DT); + Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT); if (AI.getAlignment() <= SourceAlign) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); @@ -439,22 +452,22 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { return nullptr; Type *Ty = LI.getType(); + const DataLayout &DL = IC.getDataLayout(); // Try to canonicalize loads which are only ever stored to operate over // integers instead of any other type. We only do this when the loaded type // is sized and has a size exactly the same as its store size and the store // size is a legal integer type. - const DataLayout *DL = IC.getDataLayout(); - if (!Ty->isIntegerTy() && Ty->isSized() && DL && - DL->isLegalInteger(DL->getTypeStoreSizeInBits(Ty)) && - DL->getTypeStoreSizeInBits(Ty) == DL->getTypeSizeInBits(Ty)) { + if (!Ty->isIntegerTy() && Ty->isSized() && + DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) && + DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) { if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) { auto *SI = dyn_cast<StoreInst>(U); return SI && SI->getPointerOperand() != &LI; })) { LoadInst *NewLoad = combineLoadToNewType( IC, LI, - Type::getIntNTy(LI.getContext(), DL->getTypeStoreSizeInBits(Ty))); + Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty))); // Replace all the stores with stores of the newly loaded value. for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) { auto *SI = cast<StoreInst>(*UI++); @@ -489,7 +502,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { // // FIXME: This should probably live in ValueTracking (or similar). static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, - const DataLayout *DL) { + const DataLayout &DL) { SmallPtrSet<Value *, 4> Visited; SmallVector<Value *, 4> Worklist(1, V); @@ -529,7 +542,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, if (!CS) return false; - uint64_t TypeSize = DL->getTypeAllocSize(AI->getAllocatedType()); + uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType()); // Make sure that, even if the multiplication below would wrap as an // uint64_t, we still do the right thing. if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize)) @@ -541,7 +554,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, if (!GV->hasDefinitiveInitializer() || !GV->isConstant()) return false; - uint64_t InitSize = DL->getTypeAllocSize(GV->getType()->getElementType()); + uint64_t InitSize = DL.getTypeAllocSize(GV->getType()->getElementType()); if (InitSize > MaxSize) return false; continue; @@ -570,8 +583,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, // offsets those indices implied. static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx) { - const DataLayout *DL = IC.getDataLayout(); - if (GEPI->getNumOperands() < 2 || !DL) + if (GEPI->getNumOperands() < 2) return false; // Find the first non-zero index of a GEP. If all indices are zero, return @@ -603,7 +615,8 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, GetElementPtrInst::getIndexedType(GEPI->getOperand(0)->getType(), Ops); if (!AllocTy || !AllocTy->isSized()) return false; - uint64_t TyAllocSize = DL->getTypeAllocSize(AllocTy); + const DataLayout &DL = IC.getDataLayout(); + uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy); // If there are more indices after the one we might replace with a zero, make // sure they're all non-negative. If any of them are negative, the overall @@ -665,18 +678,16 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { return Res; // Attempt to improve the alignment. - if (DL) { - unsigned KnownAlign = getOrEnforceKnownAlignment( - Op, DL->getPrefTypeAlignment(LI.getType()), DL, AC, &LI, DT); - unsigned LoadAlign = LI.getAlignment(); - unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : - DL->getABITypeAlignment(LI.getType()); - - if (KnownAlign > EffectiveLoadAlign) - LI.setAlignment(KnownAlign); - else if (LoadAlign == 0) - LI.setAlignment(EffectiveLoadAlign); - } + unsigned KnownAlign = getOrEnforceKnownAlignment( + Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT); + unsigned LoadAlign = LI.getAlignment(); + unsigned EffectiveLoadAlign = + LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType()); + + if (KnownAlign > EffectiveLoadAlign) + LI.setAlignment(KnownAlign); + else if (LoadAlign == 0) + LI.setAlignment(EffectiveLoadAlign); // Replace GEP indices if possible. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) { @@ -738,8 +749,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). unsigned Align = LI.getAlignment(); - if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) && - isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) { + if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) && + isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) { LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), SI->getOperand(1)->getName()+".val"); LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), @@ -807,6 +818,30 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) { return false; } +static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { + // FIXME: We could probably with some care handle both volatile and atomic + // stores here but it isn't clear that this is important. + if (!SI.isSimple()) + return false; + + Value *V = SI.getValueOperand(); + Type *T = V->getType(); + + if (!T->isAggregateType()) + return false; + + if (StructType *ST = dyn_cast<StructType>(T)) { + // If the struct only have one element, we unpack. + if (ST->getNumElements() == 1) { + V = IC.Builder->CreateExtractValue(V, 0); + combineStoreToNewValue(IC, SI, V); + return true; + } + } + + return false; +} + /// equivalentAddressValues - Test if A and B will obviously have the same /// value. This includes recognizing that %t0 and %t1 will have the same /// value in code like this: @@ -845,18 +880,20 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { return EraseInstFromFunction(SI); // Attempt to improve the alignment. - if (DL) { - unsigned KnownAlign = getOrEnforceKnownAlignment( - Ptr, DL->getPrefTypeAlignment(Val->getType()), DL, AC, &SI, DT); - unsigned StoreAlign = SI.getAlignment(); - unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : - DL->getABITypeAlignment(Val->getType()); - - if (KnownAlign > EffectiveStoreAlign) - SI.setAlignment(KnownAlign); - else if (StoreAlign == 0) - SI.setAlignment(EffectiveStoreAlign); - } + unsigned KnownAlign = getOrEnforceKnownAlignment( + Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT); + unsigned StoreAlign = SI.getAlignment(); + unsigned EffectiveStoreAlign = + StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType()); + + if (KnownAlign > EffectiveStoreAlign) + SI.setAlignment(KnownAlign); + else if (StoreAlign == 0) + SI.setAlignment(EffectiveStoreAlign); + + // Try to canonicalize the stored type. + if (unpackStoreToAggregate(*this, SI)) + return EraseInstFromFunction(SI); // Replace GEP indices if possible. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) { diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index c48e3c9..35513f1 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -26,7 +26,7 @@ using namespace PatternMatch; /// where it is known to be non-zero. If this allows us to simplify the /// computation, do so and return the new operand, otherwise return null. static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, - Instruction *CxtI) { + Instruction &CxtI) { // If V has multiple uses, then we would have to do more analysis to determine // if this is safe. For example, the use could be in dynamically unreached // code. @@ -47,8 +47,8 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, // inexact. Similarly for <<. if (BinaryOperator *I = dyn_cast<BinaryOperator>(V)) if (I->isLogicalShift() && - isKnownToBeAPowerOfTwo(I->getOperand(0), false, 0, - IC.getAssumptionCache(), CxtI, + isKnownToBeAPowerOfTwo(I->getOperand(0), IC.getDataLayout(), false, 0, + IC.getAssumptionCache(), &CxtI, IC.getDominatorTree())) { // We know that this is an exact/nuw shift and that the input is a // non-zero context as well. @@ -126,7 +126,7 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) { /// \brief Return true if we can prove that: /// (mul LHS, RHS) === (mul nsw LHS, RHS) bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, - Instruction *CxtI) { + Instruction &CxtI) { // Multiplying n * m significant bits yields a result of n + m significant // bits. If the total number of significant bits does not exceed the // result bit width (minus 1), there is no overflow. @@ -137,8 +137,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, // Note that underestimating the number of sign bits gives a more // conservative answer. - unsigned SignBits = ComputeNumSignBits(LHS, 0, CxtI) + - ComputeNumSignBits(RHS, 0, CxtI); + unsigned SignBits = + ComputeNumSignBits(LHS, 0, &CxtI) + ComputeNumSignBits(RHS, 0, &CxtI); // First handle the easy case: if we have enough sign bits there's // definitely no overflow. @@ -157,8 +157,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, // For simplicity we just check if at least one side is not negative. bool LHSNonNegative, LHSNegative; bool RHSNonNegative, RHSNegative; - ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, CxtI); - ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, CxtI); + ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, &CxtI); + ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, &CxtI); if (LHSNonNegative || RHSNonNegative) return true; } @@ -375,7 +375,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } } - if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, &I)) { + if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, I)) { Changed = true; I.setHasNoSignedWrap(true); } @@ -422,7 +422,7 @@ static bool isFiniteNonZeroFp(Constant *C) { if (C->getType()->isVectorTy()) { for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E; ++I) { - ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I)); + ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I)); if (!CFP || !CFP->getValueAPF().isFiniteNonZero()) return false; } @@ -437,7 +437,7 @@ static bool isNormalFp(Constant *C) { if (C->getType()->isVectorTy()) { for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E; ++I) { - ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I)); + ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I)); if (!CFP || !CFP->getValueAPF().isNormal()) return false; } @@ -780,7 +780,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // The RHS is known non-zero. - if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) { + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) { I.setOperand(1, V); return &I; } @@ -1155,7 +1155,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { return BO; } - if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) { + if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) { // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) // Safe because the only negative value (1 << Y) can take on is // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have @@ -1338,7 +1338,7 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // The RHS is known non-zero. - if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) { + if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) { I.setOperand(1, V); return &I; } @@ -1385,7 +1385,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { I.getType()); // X urem Y -> X and Y-1, where Y is a power of 2, - if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) { + if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) { Constant *N1 = Constant::getAllOnesValue(I.getType()); Value *Add = Builder->CreateAdd(Op1, N1); return BinaryOperator::CreateAnd(Op0, Add); diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 0e73db8..ca2caed 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/IR/DataLayout.h" using namespace llvm; #define DEBUG_TYPE "instcombine" @@ -231,7 +230,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { Value *Base = FixedOperands[0]; GetElementPtrInst *NewGEP = - GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1)); + GetElementPtrInst::Create(FirstInst->getSourceElementType(), Base, + makeArrayRef(FixedOperands).slice(1)); if (AllInBounds) NewGEP->setIsInBounds(); NewGEP->setDebugLoc(FirstInst->getDebugLoc()); return NewGEP; @@ -891,8 +891,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { // it is only used by trunc or trunc(lshr) operations. If so, we split the // PHI into the various pieces being extracted. This sort of thing is // introduced when SROA promotes an aggregate to a single large integer type. - if (PN.getType()->isIntegerTy() && DL && - !DL->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) + if (PN.getType()->isIntegerTy() && + !DL.isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) return Res; diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index dd0e65f..b28611f 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -312,9 +312,9 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is /// replaced with RepOp. static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, - const DataLayout *TD, const TargetLibraryInfo *TLI, - DominatorTree *DT, AssumptionCache *AC) { + const DataLayout &DL, DominatorTree *DT, + AssumptionCache *AC) { // Trivial replacement. if (V == Op) return RepOp; @@ -326,18 +326,18 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, // If this is a binary operator, try to simplify it with the replaced op. if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) { if (B->getOperand(0) == Op) - return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI); + return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), DL, TLI); if (B->getOperand(1) == Op) - return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI); + return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, DL, TLI); } // Same for CmpInsts. if (CmpInst *C = dyn_cast<CmpInst>(I)) { if (C->getOperand(0) == Op) - return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD, + return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), DL, TLI, DT, AC); if (C->getOperand(1) == Op) - return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD, + return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, DL, TLI, DT, AC); } @@ -361,14 +361,14 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, if (ConstOps.size() == I->getNumOperands()) { if (CmpInst *C = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0], - ConstOps[1], TD, TLI); + ConstOps[1], DL, TLI); if (LoadInst *LI = dyn_cast<LoadInst>(I)) if (!LI->isVolatile()) - return ConstantFoldLoadFromConstPtr(ConstOps[0], TD); + return ConstantFoldLoadFromConstPtr(ConstOps[0], DL); - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - ConstOps, TD, TLI); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), ConstOps, + DL, TLI); } } @@ -635,25 +635,25 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, // arms of the select. See if substituting this value into the arm and // simplifying the result yields the same value as the other arm. if (Pred == ICmpInst::ICMP_EQ) { - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) == + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) == + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == TrueVal) return ReplaceInstUsesWith(SI, FalseVal); - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) == + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) == + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == FalseVal) return ReplaceInstUsesWith(SI, FalseVal); } else if (Pred == ICmpInst::ICMP_NE) { - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) == + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) == + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == FalseVal) return ReplaceInstUsesWith(SI, TrueVal); - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) == + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) == TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) == + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) == TrueVal) return ReplaceInstUsesWith(SI, TrueVal); } @@ -927,7 +927,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return BinaryOperator::CreateAnd(NotCond, FalseVal); } if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { - if (C->getZExtValue() == false) { + if (!C->getZExtValue()) { // Change: A = select B, C, false --> A = and B, C return BinaryOperator::CreateAnd(CondVal, TrueVal); } @@ -1203,37 +1203,41 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return NV; if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) { - // select(C, select(C, a, b), c) -> select(C, a, c) - if (TrueSI->getCondition() == CondVal) { - if (SI.getTrueValue() == TrueSI->getTrueValue()) - return nullptr; - SI.setOperand(1, TrueSI->getTrueValue()); - return &SI; - } - // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b) - // We choose this as normal form to enable folding on the And and shortening - // paths for the values (this helps GetUnderlyingObjects() for example). - if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) { - Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition()); - SI.setOperand(0, And); - SI.setOperand(1, TrueSI->getTrueValue()); - return &SI; + if (TrueSI->getCondition()->getType() == CondVal->getType()) { + // select(C, select(C, a, b), c) -> select(C, a, c) + if (TrueSI->getCondition() == CondVal) { + if (SI.getTrueValue() == TrueSI->getTrueValue()) + return nullptr; + SI.setOperand(1, TrueSI->getTrueValue()); + return &SI; + } + // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b) + // We choose this as normal form to enable folding on the And and shortening + // paths for the values (this helps GetUnderlyingObjects() for example). + if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) { + Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition()); + SI.setOperand(0, And); + SI.setOperand(1, TrueSI->getTrueValue()); + return &SI; + } } } if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) { - // select(C, a, select(C, b, c)) -> select(C, a, c) - if (FalseSI->getCondition() == CondVal) { - if (SI.getFalseValue() == FalseSI->getFalseValue()) - return nullptr; - SI.setOperand(2, FalseSI->getFalseValue()); - return &SI; - } - // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b) - if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) { - Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition()); - SI.setOperand(0, Or); - SI.setOperand(2, FalseSI->getFalseValue()); - return &SI; + if (FalseSI->getCondition()->getType() == CondVal->getType()) { + // select(C, a, select(C, b, c)) -> select(C, a, c) + if (FalseSI->getCondition() == CondVal) { + if (SI.getFalseValue() == FalseSI->getFalseValue()) + return nullptr; + SI.setOperand(2, FalseSI->getFalseValue()); + return &SI; + } + // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b) + if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) { + Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition()); + SI.setOperand(0, Or); + SI.setOperand(2, FalseSI->getFalseValue()); + return &SI; + } } } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index b4976e0..a414ec6 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -187,7 +187,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, /// GetShiftedValue - When CanEvaluateShifted returned true for an expression, /// this value inserts the new computation that produces the shifted value. static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, - InstCombiner &IC) { + InstCombiner &IC, const DataLayout &DL) { // We can always evaluate constants shifted. if (Constant *C = dyn_cast<Constant>(V)) { if (isLeftShift) @@ -196,8 +196,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, V = IC.Builder->CreateLShr(C, NumBits); // If we got a constantexpr back, try to simplify it with TD info. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - V = ConstantFoldConstantExpression(CE, IC.getDataLayout(), - IC.getTargetLibraryInfo()); + V = ConstantFoldConstantExpression(CE, DL, IC.getTargetLibraryInfo()); return V; } @@ -210,8 +209,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, case Instruction::Or: case Instruction::Xor: // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. - I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC)); - I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); + I->setOperand( + 0, GetShiftedValue(I->getOperand(0), NumBits, isLeftShift, IC, DL)); + I->setOperand( + 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL)); return I; case Instruction::Shl: { @@ -297,8 +298,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, } case Instruction::Select: - I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); - I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC)); + I->setOperand( + 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL)); + I->setOperand( + 2, GetShiftedValue(I->getOperand(2), NumBits, isLeftShift, IC, DL)); return I; case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never @@ -306,8 +309,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, // instructions with a single use. PHINode *PN = cast<PHINode>(I); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), - NumBits, isLeftShift, IC)); + PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), NumBits, + isLeftShift, IC, DL)); return PN; } } @@ -337,8 +340,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); - return ReplaceInstUsesWith(I, - GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this)); + return ReplaceInstUsesWith( + I, GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this, DL)); } // See if we can simplify any instructions used by the instruction whose sole diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index c5603aa..cd391d0 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" @@ -70,8 +69,8 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); - Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, - KnownZero, KnownOne, 0, &Inst); + Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne, + 0, &Inst); if (!V) return false; if (V == &Inst) return true; ReplaceInstUsesWith(Inst, V); @@ -84,9 +83,9 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne, unsigned Depth) { - Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, - KnownZero, KnownOne, Depth, - dyn_cast<Instruction>(U.getUser())); + Value *NewVal = + SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, KnownOne, Depth, + dyn_cast<Instruction>(U.getUser())); if (!NewVal) return false; U = NewVal; return true; @@ -122,15 +121,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, assert(Depth <= 6 && "Limit Search Depth"); uint32_t BitWidth = DemandedMask.getBitWidth(); Type *VTy = V->getType(); - assert((DL || !VTy->isPointerTy()) && - "SimplifyDemandedBits needs to know bit widths!"); - assert((!DL || DL->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && - (!VTy->isIntOrIntVectorTy() || - VTy->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && - KnownOne.getBitWidth() == BitWidth && - "Value *V, DemandedMask, KnownZero and KnownOne " - "must have same BitWidth"); + assert( + (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) && + KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + "Value *V, DemandedMask, KnownZero and KnownOne " + "must have same BitWidth"); if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { // We know all of the bits for a constant! KnownOne = CI->getValue() & DemandedMask; @@ -174,9 +170,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // this instruction has a simpler value in that context. if (I->getOpcode() == Instruction::And) { // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, CxtI); // If all of the demanded bits are known 1 on one side, return the other. @@ -198,9 +194,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // only bits from X or Y are demanded. // If either the LHS or the RHS are One, the result is One. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, CxtI); // If all of the demanded bits are known zero on one side, return the @@ -225,9 +221,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // We can simplify (X^Y) -> X or Y in the user's context if we know that // only bits from X or Y are demanded. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, CxtI); // If all of the demanded bits are known zero on one side, return the @@ -256,10 +252,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::And: // If either the LHS or the RHS are Zero, the result is zero. - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, + RHSKnownOne, Depth + 1) || SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero, - LHSKnownZero, LHSKnownOne, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); @@ -294,10 +290,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::Or: // If either the LHS or the RHS are One, the result is One. - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, + RHSKnownOne, Depth + 1) || SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, - LHSKnownZero, LHSKnownOne, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); @@ -336,10 +332,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownOne = RHSKnownOne | LHSKnownOne; break; case Instruction::Xor: { - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - LHSKnownZero, LHSKnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, + RHSKnownOne, Depth + 1) || + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero, + LHSKnownOne, Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); @@ -423,10 +419,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; } case Instruction::Select: - if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, - LHSKnownZero, LHSKnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero, + RHSKnownOne, Depth + 1) || + SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero, + LHSKnownOne, Depth + 1)) return I; assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); @@ -445,8 +441,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, DemandedMask = DemandedMask.zext(truncBf); KnownZero = KnownZero.zext(truncBf); KnownOne = KnownOne.zext(truncBf); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, + KnownOne, Depth + 1)) return I; DemandedMask = DemandedMask.trunc(BitWidth); KnownZero = KnownZero.trunc(BitWidth); @@ -471,8 +467,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Don't touch a vector-to-scalar bitcast. return nullptr; - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, + KnownOne, Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); break; @@ -483,8 +479,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, DemandedMask = DemandedMask.trunc(SrcBitWidth); KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, + KnownOne, Depth + 1)) return I; DemandedMask = DemandedMask.zext(BitWidth); KnownZero = KnownZero.zext(BitWidth); @@ -510,8 +506,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero, + KnownOne, Depth + 1)) return I; InputDemandedBits = InputDemandedBits.zext(BitWidth); KnownZero = KnownZero.zext(BitWidth); @@ -552,7 +548,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Find information about known zero/one bits in the input. if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, - LHSKnownZero, LHSKnownOne, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth + 1)) return I; // If the RHS of the add has bits set that can't affect the input, reduce @@ -602,9 +598,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // significant bit and all those below it. APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1) || + LHSKnownZero, LHSKnownOne, Depth + 1) || SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth + 1)) return I; } } @@ -619,9 +615,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, uint32_t NLZ = DemandedMask.countLeadingZeros(); APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1) || + LHSKnownZero, LHSKnownOne, Depth + 1) || SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps, - LHSKnownZero, LHSKnownOne, Depth+1)) + LHSKnownZero, LHSKnownOne, Depth + 1)) return I; } @@ -662,8 +658,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, else if (IOp->hasNoUnsignedWrap()) DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, + KnownOne, Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); KnownZero <<= ShiftAmt; @@ -686,8 +682,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (cast<LShrOperator>(I)->isExact()) DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, + KnownOne, Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); @@ -731,8 +727,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (cast<AShrOperator>(I)->isExact()) DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - KnownZero, KnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, + KnownOne, Depth + 1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now. @@ -772,8 +768,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt LowBits = RA - 1; APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, - LHSKnownZero, LHSKnownOne, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero, + LHSKnownOne, Depth + 1)) return I; // The low bits of LHS are unchanged by the srem. @@ -798,7 +794,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // remainder is zero. if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1, + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, CxtI); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) @@ -808,10 +804,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, case Instruction::URem: { APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); APInt AllOnes = APInt::getAllOnesValue(BitWidth); - if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, - KnownZero2, KnownOne2, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), AllOnes, - KnownZero2, KnownOne2, Depth+1)) + if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2, + KnownOne2, Depth + 1) || + SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2, + KnownOne2, Depth + 1)) return I; unsigned Leaders = KnownZero2.countLeadingOnes(); @@ -1051,7 +1047,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // Note that we can't propagate undef elt info, because we don't know // which elt is getting updated. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, - UndefElts2, Depth+1); + UndefElts2, Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } break; } @@ -1069,7 +1065,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt DemandedElts2 = DemandedElts; DemandedElts2.clearBit(IdxNo); TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2, - UndefElts, Depth+1); + UndefElts, Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } // The inserted element is defined. @@ -1097,12 +1093,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt UndefElts4(LHSVWidth, 0); TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded, - UndefElts4, Depth+1); + UndefElts4, Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } APInt UndefElts3(LHSVWidth, 0); TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded, - UndefElts3, Depth+1); + UndefElts3, Depth + 1); if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } bool NewUndefElts = false; @@ -1152,12 +1148,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } } - TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, - UndefElts, Depth+1); + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts, + Depth + 1); if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded, - UndefElts2, Depth+1); + UndefElts2, Depth + 1); if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; } // Output elements are undefined if both are undefined. @@ -1204,7 +1200,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // div/rem demand all inputs, because they don't want divide by zero. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, - UndefElts2, Depth+1); + UndefElts2, Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; @@ -1238,11 +1234,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Instruction::Sub: case Instruction::Mul: // div/rem demand all inputs, because they don't want divide by zero. - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, - UndefElts, Depth+1); + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts, + Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts, - UndefElts2, Depth+1); + UndefElts2, Depth + 1); if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } // Output elements are undefined if both are undefined. Consider things @@ -1251,8 +1247,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, break; case Instruction::FPTrunc: case Instruction::FPExt: - TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, - UndefElts, Depth+1); + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts, + Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } break; @@ -1273,10 +1269,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Intrinsic::x86_sse2_min_sd: case Intrinsic::x86_sse2_max_sd: TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, - UndefElts, Depth+1); + UndefElts, Depth + 1); if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, - UndefElts2, Depth+1); + UndefElts2, Depth + 1); if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } // If only the low elt is demanded and this is a scalarizable intrinsic, diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index e07efb5..b6beb65 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -202,8 +202,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { APInt UndefElts(VectorWidth, 0); APInt DemandedMask(VectorWidth, 0); DemandedMask.setBit(IndexVal); - if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), - DemandedMask, UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask, + UndefElts)) { EI.setOperand(0, V); return &EI; } @@ -733,7 +733,8 @@ static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) { case Instruction::GetElementPtr: { Value *Ptr = NewOps[0]; ArrayRef<Value*> Idx = NewOps.slice(1); - GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I); GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds()); return GEP; } diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 88fcd53..90551e4 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -57,6 +57,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> @@ -75,7 +76,7 @@ STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc , "Number of reassociations"); Value *InstCombiner::EmitGEPOffset(User *GEP) { - return llvm::EmitGEPOffset(Builder, *getDataLayout(), GEP); + return llvm::EmitGEPOffset(Builder, DL, GEP); } /// ShouldChangeType - Return true if it is desirable to convert a computation @@ -84,13 +85,10 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) { bool InstCombiner::ShouldChangeType(Type *From, Type *To) const { assert(From->isIntegerTy() && To->isIntegerTy()); - // If we don't have DL, we don't know if the source/dest are legal. - if (!DL) return false; - unsigned FromWidth = From->getPrimitiveSizeInBits(); unsigned ToWidth = To->getPrimitiveSizeInBits(); - bool FromLegal = DL->isLegalInteger(FromWidth); - bool ToLegal = DL->isLegalInteger(ToWidth); + bool FromLegal = DL.isLegalInteger(FromWidth); + bool ToLegal = DL.isLegalInteger(ToWidth); // If this is a legal integer from type, and the result would be an illegal // type, don't do the transformation. @@ -445,7 +443,7 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode, /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). static Value *tryFactorization(InstCombiner::BuilderTy *Builder, - const DataLayout *DL, BinaryOperator &I, + const DataLayout &DL, BinaryOperator &I, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D) { @@ -872,12 +870,9 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { /// will land us at the specified offset. If so, fill them into NewIndices and /// return the resultant element type, otherwise return null. Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, - SmallVectorImpl<Value*> &NewIndices) { + SmallVectorImpl<Value *> &NewIndices) { assert(PtrTy->isPtrOrPtrVectorTy()); - if (!DL) - return nullptr; - Type *Ty = PtrTy->getPointerElementType(); if (!Ty->isSized()) return nullptr; @@ -885,9 +880,9 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] - Type *IntPtrTy = DL->getIntPtrType(PtrTy); + Type *IntPtrTy = DL.getIntPtrType(PtrTy); int64_t FirstIdx = 0; - if (int64_t TySize = DL->getTypeAllocSize(Ty)) { + if (int64_t TySize = DL.getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; Offset -= FirstIdx*TySize; @@ -905,11 +900,11 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, // Index into the types. If we fail, set OrigBase to null. while (Offset) { // Indexing into tail padding between struct/array elements. - if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty)) + if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty)) return nullptr; if (StructType *STy = dyn_cast<StructType>(Ty)) { - const StructLayout *SL = DL->getStructLayout(STy); + const StructLayout *SL = DL.getStructLayout(STy); assert(Offset < (int64_t)SL->getSizeInBytes() && "Offset must stay within the indexed type"); @@ -920,7 +915,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, Offset -= SL->getElementOffset(Elt); Ty = STy->getElementType(Elt); } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { - uint64_t EltSize = DL->getTypeAllocSize(AT->getElementType()); + uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType()); assert(EltSize && "Cannot index into a zero-sized array"); NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); Offset %= EltSize; @@ -1214,7 +1209,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { // It may not be safe to reorder shuffles and things like div, urem, etc. // because we may trap when executing those ops on unknown vector elements. // See PR20059. - if (!isSafeToSpeculativelyExecute(&Inst, DL)) return nullptr; + if (!isSafeToSpeculativelyExecute(&Inst)) + return nullptr; unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements(); Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); @@ -1300,37 +1296,37 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Eliminate unneeded casts for indices, and replace indices which displace // by multiples of a zero size type with zero. - if (DL) { - bool MadeChange = false; - Type *IntPtrTy = DL->getIntPtrType(GEP.getPointerOperandType()); - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); - I != E; ++I, ++GTI) { - // Skip indices into struct types. - SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI); - if (!SeqTy) continue; - - // If the element type has zero size then any index over it is equivalent - // to an index of zero, so replace it with zero if it is not zero already. - if (SeqTy->getElementType()->isSized() && - DL->getTypeAllocSize(SeqTy->getElementType()) == 0) - if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) { - *I = Constant::getNullValue(IntPtrTy); - MadeChange = true; - } + bool MadeChange = false; + Type *IntPtrTy = DL.getIntPtrType(GEP.getPointerOperandType()); + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; + ++I, ++GTI) { + // Skip indices into struct types. + SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI); + if (!SeqTy) + continue; - Type *IndexTy = (*I)->getType(); - if (IndexTy != IntPtrTy) { - // If we are using a wider index than needed for this platform, shrink - // it to what we need. If narrower, sign-extend it to what we need. - // This explicit cast can make subsequent optimizations more obvious. - *I = Builder->CreateIntCast(*I, IntPtrTy, true); + // If the element type has zero size then any index over it is equivalent + // to an index of zero, so replace it with zero if it is not zero already. + if (SeqTy->getElementType()->isSized() && + DL.getTypeAllocSize(SeqTy->getElementType()) == 0) + if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) { + *I = Constant::getNullValue(IntPtrTy); MadeChange = true; } + + Type *IndexTy = (*I)->getType(); + if (IndexTy != IntPtrTy) { + // If we are using a wider index than needed for this platform, shrink + // it to what we need. If narrower, sign-extend it to what we need. + // This explicit cast can make subsequent optimizations more obvious. + *I = Builder->CreateIntCast(*I, IntPtrTy, true); + MadeChange = true; } - if (MadeChange) return &GEP; } + if (MadeChange) + return &GEP; // Check to see if the inputs to the PHI node are getelementptr instructions. if (PHINode *PN = dyn_cast<PHINode>(PtrOp)) { @@ -1338,6 +1334,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (!Op1) return nullptr; + // Don't fold a GEP into itself through a PHI node. This can only happen + // through the back-edge of a loop. Folding a GEP into itself means that + // the value of the previous iteration needs to be stored in the meantime, + // thus requiring an additional register variable to be live, but not + // actually achieving anything (the GEP still needs to be executed once per + // loop iteration). + if (Op1 == &GEP) + return nullptr; + signed DI = -1; for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { @@ -1345,6 +1350,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands()) return nullptr; + // As for Op1 above, don't try to fold a GEP into itself. + if (Op2 == &GEP) + return nullptr; + // Keep track of the type as we walk the GEP. Type *CurTy = Op1->getOperand(0)->getType()->getScalarType(); @@ -1481,19 +1490,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } if (!Indices.empty()) - return (GEP.isInBounds() && Src->isInBounds()) ? - GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices, - GEP.getName()) : - GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName()); + return GEP.isInBounds() && Src->isInBounds() + ? GetElementPtrInst::CreateInBounds( + Src->getSourceElementType(), Src->getOperand(0), Indices, + GEP.getName()) + : GetElementPtrInst::Create(Src->getSourceElementType(), + Src->getOperand(0), Indices, + GEP.getName()); } - if (DL && GEP.getNumIndices() == 1) { + if (GEP.getNumIndices() == 1) { unsigned AS = GEP.getPointerAddressSpace(); if (GEP.getOperand(1)->getType()->getScalarSizeInBits() == - DL->getPointerSizeInBits(AS)) { + DL.getPointerSizeInBits(AS)) { Type *PtrTy = GEP.getPointerOperandType(); Type *Ty = PtrTy->getPointerElementType(); - uint64_t TyAllocSize = DL->getTypeAllocSize(Ty); + uint64_t TyAllocSize = DL.getTypeAllocSize(Ty); bool Matched = false; uint64_t C; @@ -1562,8 +1574,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (CATy->getElementType() == StrippedPtrTy->getElementType()) { // -> GEP i8* X, ... SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end()); - GetElementPtrInst *Res = - GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName()); + GetElementPtrInst *Res = GetElementPtrInst::Create( + StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName()); Res->setIsInBounds(GEP.isInBounds()); if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) return Res; @@ -1599,9 +1611,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // %0 = GEP [10 x i8] addrspace(1)* X, ... // addrspacecast i8 addrspace(1)* %0 to i8* SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end()); - Value *NewGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : - Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); + Value *NewGEP = + GEP.isInBounds() + ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, + GEP.getName()) + : Builder->CreateGEP(StrippedPtrTy->getElementType(), + StrippedPtr, Idx, GEP.getName()); return new AddrSpaceCastInst(NewGEP, GEP.getType()); } } @@ -1612,14 +1627,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast Type *SrcElTy = StrippedPtrTy->getElementType(); Type *ResElTy = PtrOp->getType()->getPointerElementType(); - if (DL && SrcElTy->isArrayTy() && - DL->getTypeAllocSize(SrcElTy->getArrayElementType()) == - DL->getTypeAllocSize(ResElTy)) { - Type *IdxType = DL->getIntPtrType(GEP.getType()); + if (SrcElTy->isArrayTy() && + DL.getTypeAllocSize(SrcElTy->getArrayElementType()) == + DL.getTypeAllocSize(ResElTy)) { + Type *IdxType = DL.getIntPtrType(GEP.getType()); Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; - Value *NewGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : - Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); + Value *NewGEP = + GEP.isInBounds() + ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) + : Builder->CreateGEP(StrippedPtrTy->getElementType(), + StrippedPtr, Idx, GEP.getName()); // V and GEP are both pointer types --> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1630,11 +1647,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // %V = mul i64 %N, 4 // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast - if (DL && ResElTy->isSized() && SrcElTy->isSized()) { + if (ResElTy->isSized() && SrcElTy->isSized()) { // Check that changing the type amounts to dividing the index by a scale // factor. - uint64_t ResSize = DL->getTypeAllocSize(ResElTy); - uint64_t SrcSize = DL->getTypeAllocSize(SrcElTy); + uint64_t ResSize = DL.getTypeAllocSize(ResElTy); + uint64_t SrcSize = DL.getTypeAllocSize(SrcElTy); if (ResSize && SrcSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -1642,7 +1659,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Earlier transforms ensure that the index has type IntPtrType, which // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) && + assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) && "Index not cast to pointer width?"); bool NSW; @@ -1650,9 +1667,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Successfully decomposed Idx as NewIdx * Scale, form a new GEP. // If the multiplication NewIdx * Scale may overflow then the new // GEP may not be "inbounds". - Value *NewGEP = GEP.isInBounds() && NSW ? - Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) : - Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName()); + Value *NewGEP = + GEP.isInBounds() && NSW + ? Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, + GEP.getName()) + : Builder->CreateGEP(StrippedPtrTy->getElementType(), + StrippedPtr, NewIdx, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1665,13 +1685,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - if (DL && ResElTy->isSized() && SrcElTy->isSized() && - SrcElTy->isArrayTy()) { + if (ResElTy->isSized() && SrcElTy->isSized() && SrcElTy->isArrayTy()) { // Check that changing to the array element type amounts to dividing the // index by a scale factor. - uint64_t ResSize = DL->getTypeAllocSize(ResElTy); - uint64_t ArrayEltSize - = DL->getTypeAllocSize(SrcElTy->getArrayElementType()); + uint64_t ResSize = DL.getTypeAllocSize(ResElTy); + uint64_t ArrayEltSize = + DL.getTypeAllocSize(SrcElTy->getArrayElementType()); if (ResSize && ArrayEltSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -1679,7 +1698,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Earlier transforms ensure that the index has type IntPtrType, which // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) && + assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) && "Index not cast to pointer width?"); bool NSW; @@ -1688,13 +1707,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If the multiplication NewIdx * Scale may overflow then the new // GEP may not be "inbounds". Value *Off[2] = { - Constant::getNullValue(DL->getIntPtrType(GEP.getType())), - NewIdx - }; + Constant::getNullValue(DL.getIntPtrType(GEP.getType())), + NewIdx}; Value *NewGEP = GEP.isInBounds() && NSW ? Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) : - Builder->CreateGEP(StrippedPtr, Off, GEP.getName()); + Builder->CreateGEP(SrcElTy, StrippedPtr, Off, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEP.getType()); @@ -1704,9 +1722,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } - if (!DL) - return nullptr; - // addrspacecast between types is canonicalized as a bitcast, then an // addrspacecast. To take advantage of the below bitcast + struct GEP, look // through the addrspacecast. @@ -1727,10 +1742,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { Value *Operand = BCI->getOperand(0); PointerType *OpType = cast<PointerType>(Operand->getType()); - unsigned OffsetBits = DL->getPointerTypeSizeInBits(GEP.getType()); + unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType()); APInt Offset(OffsetBits, 0); if (!isa<BitCastInst>(Operand) && - GEP.accumulateConstantOffset(*DL, Offset)) { + GEP.accumulateConstantOffset(DL, Offset)) { // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. @@ -1761,7 +1776,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) { Value *NGEP = GEP.isInBounds() ? Builder->CreateInBoundsGEP(Operand, NewIndices) : - Builder->CreateGEP(Operand, NewIndices); + Builder->CreateGEP(OpType->getElementType(), Operand, NewIndices); if (NGEP->getType() == GEP.getType()) return ReplaceInstUsesWith(GEP, NGEP); @@ -2012,6 +2027,15 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { return &BI; } + // If the condition is irrelevant, remove the use so that other + // transforms on the condition become more effective. + if (BI.isConditional() && + BI.getSuccessor(0) == BI.getSuccessor(1) && + !isa<UndefValue>(BI.getCondition())) { + BI.setCondition(UndefValue::get(BI.getCondition()->getType())); + return &BI; + } + // Canonicalize fcmp_one -> fcmp_oeq FCmpInst::Predicate FPred; Value *Y; if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), @@ -2051,7 +2075,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { Value *Cond = SI.getCondition(); unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(Cond, KnownZero, KnownOne); + computeKnownBits(Cond, KnownZero, KnownOne, 0, &SI); unsigned LeadingKnownZeros = KnownZero.countLeadingOnes(); unsigned LeadingKnownOnes = KnownOne.countLeadingOnes(); @@ -2070,8 +2094,8 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { // x86 generates redundant zero-extenstion instructions if the operand is // truncated to i8 or i16. bool TruncCond = false; - if (DL && BitWidth > NewWidth && - NewWidth >= DL->getLargestLegalIntTypeSize()) { + if (NewWidth > 0 && BitWidth > NewWidth && + NewWidth >= DL.getLargestLegalIntTypeSize()) { TruncCond = true; IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth); Builder->SetInsertPoint(&SI); @@ -2632,7 +2656,7 @@ bool InstCombiner::run() { } // Instruction isn't dead, see if we can constant propagate it. - if (!I->use_empty() && isa<Constant>(I->getOperand(0))) + if (!I->use_empty() && isa<Constant>(I->getOperand(0))) { if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) { DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); @@ -2643,6 +2667,7 @@ bool InstCombiner::run() { MadeIRChange = true; continue; } + } // See if we can trivially sink this instruction to a successor basic block. if (I->hasOneUse()) { @@ -2756,10 +2781,9 @@ bool InstCombiner::run() { /// many instructions are dead or constant). Additionally, if we find a branch /// whose condition is a known constant, we only visit the reachable successors. /// -static bool AddReachableCodeToWorklist(BasicBlock *BB, - SmallPtrSetImpl<BasicBlock*> &Visited, +static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, + SmallPtrSetImpl<BasicBlock *> &Visited, InstCombineWorklist &ICWorklist, - const DataLayout *DL, const TargetLibraryInfo *TLI) { bool MadeIRChange = false; SmallVector<BasicBlock*, 256> Worklist; @@ -2797,23 +2821,22 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, continue; } - if (DL) { - // See if we can constant fold its operands. - for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); - i != e; ++i) { - ConstantExpr *CE = dyn_cast<ConstantExpr>(i); - if (CE == nullptr) continue; + // See if we can constant fold its operands. + for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); i != e; + ++i) { + ConstantExpr *CE = dyn_cast<ConstantExpr>(i); + if (CE == nullptr) + continue; - Constant*& FoldRes = FoldedConstants[CE]; - if (!FoldRes) - FoldRes = ConstantFoldConstantExpression(CE, DL, TLI); - if (!FoldRes) - FoldRes = CE; + Constant *&FoldRes = FoldedConstants[CE]; + if (!FoldRes) + FoldRes = ConstantFoldConstantExpression(CE, DL, TLI); + if (!FoldRes) + FoldRes = CE; - if (FoldRes != CE) { - *i = FoldRes; - MadeIRChange = true; - } + if (FoldRes != CE) { + *i = FoldRes; + MadeIRChange = true; } } @@ -2867,7 +2890,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, /// /// This also does basic constant propagation and other forward fixing to make /// the combiner itself run much faster. -static bool prepareICWorklistFromFunction(Function &F, const DataLayout *DL, +static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, TargetLibraryInfo *TLI, InstCombineWorklist &ICWorklist) { bool MadeIRChange = false; @@ -2877,7 +2900,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout *DL, // track of which blocks we visit. SmallPtrSet<BasicBlock *, 64> Visited; MadeIRChange |= - AddReachableCodeToWorklist(F.begin(), Visited, ICWorklist, DL, TLI); + AddReachableCodeToWorklist(F.begin(), DL, Visited, ICWorklist, TLI); // Do a quick scan over the function. If we find any blocks that are // unreachable, remove any instructions inside of them. This prevents @@ -2910,12 +2933,13 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout *DL, return MadeIRChange; } -static bool combineInstructionsOverFunction( - Function &F, InstCombineWorklist &Worklist, AssumptionCache &AC, - TargetLibraryInfo &TLI, DominatorTree &DT, const DataLayout *DL = nullptr, - LoopInfo *LI = nullptr) { +static bool +combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, + AssumptionCache &AC, TargetLibraryInfo &TLI, + DominatorTree &DT, LoopInfo *LI = nullptr) { // Minimizing size? bool MinimizeSize = F.hasFnAttribute(Attribute::MinSize); + auto &DL = F.getParent()->getDataLayout(); /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. @@ -2950,15 +2974,13 @@ static bool combineInstructionsOverFunction( PreservedAnalyses InstCombinePass::run(Function &F, AnalysisManager<Function> *AM) { - auto *DL = F.getParent()->getDataLayout(); - auto &AC = AM->getResult<AssumptionAnalysis>(F); auto &DT = AM->getResult<DominatorTreeAnalysis>(F); auto &TLI = AM->getResult<TargetLibraryAnalysis>(F); auto *LI = AM->getCachedResult<LoopAnalysis>(F); - if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, DL, LI)) + if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3007,12 +3029,10 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); // Optional analyses. - auto *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - auto *DL = DLP ? &DLP->getDataLayout() : nullptr; auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, DL, LI); + return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI); } char InstructionCombiningPass::ID = 0; diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 882aab0..978c857 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -24,6 +24,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" @@ -43,12 +46,14 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/ASanStackFrameLayout.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" #include <algorithm> #include <string> #include <system_error> @@ -70,17 +75,15 @@ static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; static const uint64_t kWindowsShadowOffset32 = 3ULL << 28; -static const size_t kMinStackMallocSize = 1 << 6; // 64B +static const size_t kMinStackMallocSize = 1 << 6; // 64B static const size_t kMaxStackMallocSize = 1 << 16; // 64K static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3; static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E; static const char *const kAsanModuleCtorName = "asan.module_ctor"; static const char *const kAsanModuleDtorName = "asan.module_dtor"; -static const uint64_t kAsanCtorAndDtorPriority = 1; +static const uint64_t kAsanCtorAndDtorPriority = 1; static const char *const kAsanReportErrorTemplate = "__asan_report_"; -static const char *const kAsanReportLoadN = "__asan_report_load_n"; -static const char *const kAsanReportStoreN = "__asan_report_store_n"; static const char *const kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *const kAsanUnregisterGlobalsName = "__asan_unregister_globals"; @@ -90,7 +93,7 @@ static const char *const kAsanInitName = "__asan_init_v5"; static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp"; static const char *const kAsanPtrSub = "__sanitizer_ptr_sub"; static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return"; -static const int kMaxAsanStackMallocSizeClass = 10; +static const int kMaxAsanStackMallocSizeClass = 10; static const char *const kAsanStackMallocNameTemplate = "__asan_stack_malloc_"; static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_"; static const char *const kAsanGenPrefix = "__asan_gen_"; @@ -103,10 +106,6 @@ static const char *const kAsanUnpoisonStackMemoryName = static const char *const kAsanOptionDetectUAR = "__asan_option_detect_stack_use_after_return"; -#ifndef NDEBUG -static const int kAsanStackAfterReturnMagic = 0xf5; -#endif - // Accesses sizes are powers of two: 1, 2, 4, 8, 16. static const size_t kNumberOfAccessSizes = 5; @@ -120,84 +119,110 @@ static const unsigned kAsanAllocaPartialVal2 = 0x000000cbU; // This flag may need to be replaced with -f[no-]asan-reads. static cl::opt<bool> ClInstrumentReads("asan-instrument-reads", - cl::desc("instrument read instructions"), cl::Hidden, cl::init(true)); -static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes", - cl::desc("instrument write instructions"), cl::Hidden, cl::init(true)); -static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics", - cl::desc("instrument atomic instructions (rmw, cmpxchg)"), - cl::Hidden, cl::init(true)); -static cl::opt<bool> ClAlwaysSlowPath("asan-always-slow-path", - cl::desc("use instrumentation with slow path for all accesses"), - cl::Hidden, cl::init(false)); + cl::desc("instrument read instructions"), + cl::Hidden, cl::init(true)); +static cl::opt<bool> ClInstrumentWrites( + "asan-instrument-writes", cl::desc("instrument write instructions"), + cl::Hidden, cl::init(true)); +static cl::opt<bool> ClInstrumentAtomics( + "asan-instrument-atomics", + cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, + cl::init(true)); +static cl::opt<bool> ClAlwaysSlowPath( + "asan-always-slow-path", + cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden, + cl::init(false)); // This flag limits the number of instructions to be instrumented // in any given BB. Normally, this should be set to unlimited (INT_MAX), // but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary // set it to 10000. -static cl::opt<int> ClMaxInsnsToInstrumentPerBB("asan-max-ins-per-bb", - cl::init(10000), - cl::desc("maximal number of instructions to instrument in any given BB"), - cl::Hidden); +static cl::opt<int> ClMaxInsnsToInstrumentPerBB( + "asan-max-ins-per-bb", cl::init(10000), + cl::desc("maximal number of instructions to instrument in any given BB"), + cl::Hidden); // This flag may need to be replaced with -f[no]asan-stack. -static cl::opt<bool> ClStack("asan-stack", - cl::desc("Handle stack memory"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClStack("asan-stack", cl::desc("Handle stack memory"), + cl::Hidden, cl::init(true)); static cl::opt<bool> ClUseAfterReturn("asan-use-after-return", - cl::desc("Check return-after-free"), cl::Hidden, cl::init(true)); + cl::desc("Check return-after-free"), + cl::Hidden, cl::init(true)); // This flag may need to be replaced with -f[no]asan-globals. static cl::opt<bool> ClGlobals("asan-globals", - cl::desc("Handle global objects"), cl::Hidden, cl::init(true)); + cl::desc("Handle global objects"), cl::Hidden, + cl::init(true)); static cl::opt<bool> ClInitializers("asan-initialization-order", - cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(true)); -static cl::opt<bool> ClInvalidPointerPairs("asan-detect-invalid-pointer-pair", - cl::desc("Instrument <, <=, >, >=, - with pointer operands"), - cl::Hidden, cl::init(false)); -static cl::opt<unsigned> ClRealignStack("asan-realign-stack", - cl::desc("Realign stack to the value of this flag (power of two)"), - cl::Hidden, cl::init(32)); + cl::desc("Handle C++ initializer order"), + cl::Hidden, cl::init(true)); +static cl::opt<bool> ClInvalidPointerPairs( + "asan-detect-invalid-pointer-pair", + cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden, + cl::init(false)); +static cl::opt<unsigned> ClRealignStack( + "asan-realign-stack", + cl::desc("Realign stack to the value of this flag (power of two)"), + cl::Hidden, cl::init(32)); static cl::opt<int> ClInstrumentationWithCallsThreshold( "asan-instrumentation-with-call-threshold", - cl::desc("If the function being instrumented contains more than " - "this number of memory accesses, use callbacks instead of " - "inline checks (-1 means never use callbacks)."), - cl::Hidden, cl::init(7000)); + cl::desc( + "If the function being instrumented contains more than " + "this number of memory accesses, use callbacks instead of " + "inline checks (-1 means never use callbacks)."), + cl::Hidden, cl::init(7000)); static cl::opt<std::string> ClMemoryAccessCallbackPrefix( - "asan-memory-access-callback-prefix", - cl::desc("Prefix for memory access callbacks"), cl::Hidden, - cl::init("__asan_")); + "asan-memory-access-callback-prefix", + cl::desc("Prefix for memory access callbacks"), cl::Hidden, + cl::init("__asan_")); static cl::opt<bool> ClInstrumentAllocas("asan-instrument-allocas", - cl::desc("instrument dynamic allocas"), cl::Hidden, cl::init(false)); + cl::desc("instrument dynamic allocas"), + cl::Hidden, cl::init(false)); +static cl::opt<bool> ClSkipPromotableAllocas( + "asan-skip-promotable-allocas", + cl::desc("Do not instrument promotable allocas"), cl::Hidden, + cl::init(true)); // These flags allow to change the shadow mapping. // The shadow mapping looks like // Shadow = (Mem >> scale) + (1 << offset_log) static cl::opt<int> ClMappingScale("asan-mapping-scale", - cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0)); + cl::desc("scale of asan shadow mapping"), + cl::Hidden, cl::init(0)); // Optimization flags. Not user visible, used mostly for testing // and benchmarking the tool. -static cl::opt<bool> ClOpt("asan-opt", - cl::desc("Optimize instrumentation"), cl::Hidden, cl::init(true)); -static cl::opt<bool> ClOptSameTemp("asan-opt-same-temp", - cl::desc("Instrument the same temp just once"), cl::Hidden, - cl::init(true)); +static cl::opt<bool> ClOpt("asan-opt", cl::desc("Optimize instrumentation"), + cl::Hidden, cl::init(true)); +static cl::opt<bool> ClOptSameTemp( + "asan-opt-same-temp", cl::desc("Instrument the same temp just once"), + cl::Hidden, cl::init(true)); static cl::opt<bool> ClOptGlobals("asan-opt-globals", - cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true)); + cl::desc("Don't instrument scalar globals"), + cl::Hidden, cl::init(true)); +static cl::opt<bool> ClOptStack( + "asan-opt-stack", cl::desc("Don't instrument scalar stack variables"), + cl::Hidden, cl::init(false)); -static cl::opt<bool> ClCheckLifetime("asan-check-lifetime", - cl::desc("Use llvm.lifetime intrinsics to insert extra checks"), - cl::Hidden, cl::init(false)); +static cl::opt<bool> ClCheckLifetime( + "asan-check-lifetime", + cl::desc("Use llvm.lifetime intrinsics to insert extra checks"), cl::Hidden, + cl::init(false)); static cl::opt<bool> ClDynamicAllocaStack( "asan-stack-dynamic-alloca", cl::desc("Use dynamic alloca to represent stack variables"), cl::Hidden, cl::init(true)); +static cl::opt<uint32_t> ClForceExperiment( + "asan-force-experiment", + cl::desc("Force optimization experiment (for testing)"), cl::Hidden, + cl::init(0)); + // Debug flags. static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, cl::init(0)); static cl::opt<int> ClDebugStack("asan-debug-stack", cl::desc("debug stack"), cl::Hidden, cl::init(0)); -static cl::opt<std::string> ClDebugFunc("asan-debug-func", - cl::Hidden, cl::desc("Debug func")); +static cl::opt<std::string> ClDebugFunc("asan-debug-func", cl::Hidden, + cl::desc("Debug func")); static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"), cl::Hidden, cl::init(-1)); static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"), @@ -207,10 +232,10 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); STATISTIC(NumInstrumentedDynamicAllocas, "Number of instrumented dynamic allocas"); -STATISTIC(NumOptimizedAccessesToGlobalArray, - "Number of optimized accesses to global arrays"); STATISTIC(NumOptimizedAccessesToGlobalVar, "Number of optimized accesses to global vars"); +STATISTIC(NumOptimizedAccessesToStackVar, + "Number of optimized accesses to stack vars"); namespace { /// Frontend-provided metadata for source location. @@ -238,9 +263,7 @@ struct LocationMetadata { class GlobalsMetadata { public: struct Entry { - Entry() - : SourceLoc(), Name(), IsDynInit(false), - IsBlacklisted(false) {} + Entry() : SourceLoc(), Name(), IsDynInit(false), IsBlacklisted(false) {} LocationMetadata SourceLoc; StringRef Name; bool IsDynInit; @@ -249,19 +272,17 @@ class GlobalsMetadata { GlobalsMetadata() : inited_(false) {} - void init(Module& M) { + void init(Module &M) { assert(!inited_); inited_ = true; NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals"); - if (!Globals) - return; + if (!Globals) return; for (auto MDN : Globals->operands()) { // Metadata node contains the global and the fields of "Entry". assert(MDN->getNumOperands() == 5); auto *GV = mdconst::extract_or_null<GlobalVariable>(MDN->getOperand(0)); // The optimizer may optimize away a global entirely. - if (!GV) - continue; + if (!GV) continue; // We can already have an entry for GV if it was merged with another // global. Entry &E = Entries[GV]; @@ -286,7 +307,7 @@ class GlobalsMetadata { private: bool inited_; - DenseMap<GlobalVariable*, Entry> Entries; + DenseMap<GlobalVariable *, Entry> Entries; }; /// This struct defines the shadow mapping using the rule: @@ -371,17 +392,36 @@ struct AddressSanitizer : public FunctionPass { } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } - void instrumentMop(Instruction *I, bool UseCalls); + uint64_t getAllocaSizeInBytes(AllocaInst *AI) const { + Type *Ty = AI->getAllocatedType(); + uint64_t SizeInBytes = + AI->getModule()->getDataLayout().getTypeAllocSize(Ty); + return SizeInBytes; + } + /// Check if we want (and can) handle this alloca. + bool isInterestingAlloca(AllocaInst &AI) const; + /// If it is an interesting memory access, return the PointerOperand + /// and set IsWrite/Alignment. Otherwise return nullptr. + Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, + uint64_t *TypeSize, + unsigned *Alignment) const; + void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I, + bool UseCalls, const DataLayout &DL); void instrumentPointerComparisonOrSubtraction(Instruction *I); void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, bool IsWrite, - Value *SizeArgument, bool UseCalls); + Value *SizeArgument, bool UseCalls, uint32_t Exp); + void instrumentUnusualSizeOrAlignment(Instruction *I, Value *Addr, + uint32_t TypeSize, bool IsWrite, + Value *SizeArgument, bool UseCalls, + uint32_t Exp); Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize); Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex, - Value *SizeArgument); + Value *SizeArgument, uint32_t Exp); void instrumentMemIntrinsic(MemIntrinsic *MI); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool runOnFunction(Function &F) override; @@ -396,9 +436,10 @@ struct AddressSanitizer : public FunctionPass { bool LooksLikeCodeInBug11395(Instruction *I); bool GlobalIsLinkerInitialized(GlobalVariable *G); + bool isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr, + uint64_t TypeSize) const; LLVMContext *C; - const DataLayout *DL; Triple TargetTriple; int LongSize; Type *IntptrTy; @@ -408,12 +449,12 @@ struct AddressSanitizer : public FunctionPass { Function *AsanInitFunction; Function *AsanHandleNoReturnFunc; Function *AsanPtrCmpFunction, *AsanPtrSubFunction; - // This array is indexed by AccessIsWrite and log2(AccessSize). - Function *AsanErrorCallback[2][kNumberOfAccessSizes]; - Function *AsanMemoryAccessCallback[2][kNumberOfAccessSizes]; - // This array is indexed by AccessIsWrite. - Function *AsanErrorCallbackSized[2], - *AsanMemoryAccessCallbackSized[2]; + // This array is indexed by AccessIsWrite, Experiment and log2(AccessSize). + Function *AsanErrorCallback[2][2][kNumberOfAccessSizes]; + Function *AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes]; + // This array is indexed by AccessIsWrite and Experiment. + Function *AsanErrorCallbackSized[2][2]; + Function *AsanMemoryAccessCallbackSized[2][2]; Function *AsanMemmove, *AsanMemcpy, *AsanMemset; InlineAsm *EmptyAsm; GlobalsMetadata GlobalsMD; @@ -426,9 +467,7 @@ class AddressSanitizerModule : public ModulePass { AddressSanitizerModule() : ModulePass(ID) {} bool runOnModule(Module &M) override; static char ID; // Pass identification, replacement for typeid - const char *getPassName() const override { - return "AddressSanitizerModule"; - } + const char *getPassName() const override { return "AddressSanitizerModule"; } private: void initializeCallbacks(Module &M); @@ -444,7 +483,6 @@ class AddressSanitizerModule : public ModulePass { GlobalsMetadata GlobalsMD; Type *IntptrTy; LLVMContext *C; - const DataLayout *DL; Triple TargetTriple; ShadowMapping Mapping; Function *AsanPoisonGlobals; @@ -471,12 +509,12 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { Type *IntptrPtrTy; ShadowMapping Mapping; - SmallVector<AllocaInst*, 16> AllocaVec; - SmallVector<Instruction*, 8> RetVec; + SmallVector<AllocaInst *, 16> AllocaVec; + SmallVector<Instruction *, 8> RetVec; unsigned StackAlignment; Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1], - *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1]; + *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1]; Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc; // Stores a place and arguments of poisoning/unpoisoning call for alloca. @@ -497,33 +535,38 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { Value *LeftRzAddr; Value *RightRzAddr; bool Poison; - explicit DynamicAllocaCall(AllocaInst *AI, - Value *LeftRzAddr = nullptr, - Value *RightRzAddr = nullptr) - : AI(AI), LeftRzAddr(LeftRzAddr), RightRzAddr(RightRzAddr), Poison(true) - {} + explicit DynamicAllocaCall(AllocaInst *AI, Value *LeftRzAddr = nullptr, + Value *RightRzAddr = nullptr) + : AI(AI), + LeftRzAddr(LeftRzAddr), + RightRzAddr(RightRzAddr), + Poison(true) {} }; SmallVector<DynamicAllocaCall, 1> DynamicAllocaVec; // Maps Value to an AllocaInst from which the Value is originated. - typedef DenseMap<Value*, AllocaInst*> AllocaForValueMapTy; + typedef DenseMap<Value *, AllocaInst *> AllocaForValueMapTy; AllocaForValueMapTy AllocaForValue; bool HasNonEmptyInlineAsm; std::unique_ptr<CallInst> EmptyInlineAsm; FunctionStackPoisoner(Function &F, AddressSanitizer &ASan) - : F(F), ASan(ASan), DIB(*F.getParent(), /*AllowUnresolved*/ false), - C(ASan.C), IntptrTy(ASan.IntptrTy), - IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping), - StackAlignment(1 << Mapping.Scale), HasNonEmptyInlineAsm(false), + : F(F), + ASan(ASan), + DIB(*F.getParent(), /*AllowUnresolved*/ false), + C(ASan.C), + IntptrTy(ASan.IntptrTy), + IntptrPtrTy(PointerType::get(IntptrTy, 0)), + Mapping(ASan.Mapping), + StackAlignment(1 << Mapping.Scale), + HasNonEmptyInlineAsm(false), EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {} bool runOnFunction() { if (!ClStack) return false; // Collect alloca, ret, lifetime instructions etc. - for (BasicBlock *BB : depth_first(&F.getEntryBlock())) - visit(*BB); + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB); if (AllocaVec.empty() && DynamicAllocaVec.empty()) return false; @@ -544,33 +587,31 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { // ----------------------- Visitors. /// \brief Collect all Ret instructions. - void visitReturnInst(ReturnInst &RI) { - RetVec.push_back(&RI); - } + void visitReturnInst(ReturnInst &RI) { RetVec.push_back(&RI); } // Unpoison dynamic allocas redzones. void unpoisonDynamicAlloca(DynamicAllocaCall &AllocaCall) { - if (!AllocaCall.Poison) - return; + if (!AllocaCall.Poison) return; for (auto Ret : RetVec) { IRBuilder<> IRBRet(Ret); PointerType *Int32PtrTy = PointerType::getUnqual(IRBRet.getInt32Ty()); Value *Zero = Constant::getNullValue(IRBRet.getInt32Ty()); Value *PartialRzAddr = IRBRet.CreateSub(AllocaCall.RightRzAddr, ConstantInt::get(IntptrTy, 4)); - IRBRet.CreateStore(Zero, IRBRet.CreateIntToPtr(AllocaCall.LeftRzAddr, - Int32PtrTy)); - IRBRet.CreateStore(Zero, IRBRet.CreateIntToPtr(PartialRzAddr, - Int32PtrTy)); - IRBRet.CreateStore(Zero, IRBRet.CreateIntToPtr(AllocaCall.RightRzAddr, - Int32PtrTy)); + IRBRet.CreateStore( + Zero, IRBRet.CreateIntToPtr(AllocaCall.LeftRzAddr, Int32PtrTy)); + IRBRet.CreateStore(Zero, + IRBRet.CreateIntToPtr(PartialRzAddr, Int32PtrTy)); + IRBRet.CreateStore( + Zero, IRBRet.CreateIntToPtr(AllocaCall.RightRzAddr, Int32PtrTy)); } } // Right shift for BigEndian and left shift for LittleEndian. Value *shiftAllocaMagic(Value *Val, IRBuilder<> &IRB, Value *Shift) { - return ASan.DL->isLittleEndian() ? IRB.CreateShl(Val, Shift) - : IRB.CreateLShr(Val, Shift); + auto &DL = F.getParent()->getDataLayout(); + return DL.isLittleEndian() ? IRB.CreateShl(Val, Shift) + : IRB.CreateLShr(Val, Shift); } // Compute PartialRzMagic for dynamic alloca call. Since we don't know the @@ -599,7 +640,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { /// \brief Collect Alloca instructions we want (and can) handle. void visitAllocaInst(AllocaInst &AI) { - if (!isInterestingAlloca(AI)) return; + if (!ASan.isInterestingAlloca(AI)) return; StackAlignment = std::max(StackAlignment, AI.getAlignment()); if (isDynamicAlloca(AI)) @@ -613,8 +654,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { void visitIntrinsicInst(IntrinsicInst &II) { if (!ClCheckLifetime) return; Intrinsic::ID ID = II.getIntrinsicID(); - if (ID != Intrinsic::lifetime_start && - ID != Intrinsic::lifetime_end) + if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end) return; // Found lifetime intrinsic, add ASan instrumentation if necessary. ConstantInt *Size = dyn_cast<ConstantInt>(II.getArgOperand(0)); @@ -644,8 +684,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { bool doesDominateAllExits(const Instruction *I) const { for (auto Ret : RetVec) { - if (!ASan.getDominatorTree().dominates(I, Ret)) - return false; + if (!ASan.getDominatorTree().dominates(I, Ret)) return false; } return true; } @@ -653,19 +692,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { bool isDynamicAlloca(AllocaInst &AI) const { return AI.isArrayAllocation() || !AI.isStaticAlloca(); } - - // Check if we want (and can) handle this alloca. - bool isInterestingAlloca(AllocaInst &AI) const { - return (AI.getAllocatedType()->isSized() && - // alloca() may be called with 0 size, ignore it. - getAllocaSizeInBytes(&AI) > 0); - } - - uint64_t getAllocaSizeInBytes(AllocaInst *AI) const { - Type *Ty = AI->getAllocatedType(); - uint64_t SizeInBytes = ASan.DL->getTypeAllocSize(Ty); - return SizeInBytes; - } /// Finds alloca where the value comes from. AllocaInst *findAllocaForValue(Value *V); void poisonRedZones(ArrayRef<uint8_t> ShadowBytes, IRBuilder<> &IRB, @@ -683,21 +709,25 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { } // namespace char AddressSanitizer::ID = 0; -INITIALIZE_PASS_BEGIN(AddressSanitizer, "asan", - "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", - false, false) +INITIALIZE_PASS_BEGIN( + AddressSanitizer, "asan", + "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, + false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(AddressSanitizer, "asan", - "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", - false, false) +INITIALIZE_PASS_END( + AddressSanitizer, "asan", + "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, + false) FunctionPass *llvm::createAddressSanitizerFunctionPass() { return new AddressSanitizer(); } char AddressSanitizerModule::ID = 0; -INITIALIZE_PASS(AddressSanitizerModule, "asan-module", +INITIALIZE_PASS( + AddressSanitizerModule, "asan-module", "AddressSanitizer: detects use-after-free and out-of-bounds bugs." - "ModulePass", false, false) + "ModulePass", + false, false) ModulePass *llvm::createAddressSanitizerModulePass() { return new AddressSanitizerModule(); } @@ -709,16 +739,15 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { } // \brief Create a constant for Str so that we can pass it to the run-time lib. -static GlobalVariable *createPrivateGlobalForString( - Module &M, StringRef Str, bool AllowMerging) { +static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str, + bool AllowMerging) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); // We use private linkage for module-local strings. If they can be merged // with another one, we set the unnamed_addr attribute. GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true, GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix); - if (AllowMerging) - GV->setUnnamedAddr(true); + if (AllowMerging) GV->setUnnamedAddr(true); GV->setAlignment(1); // Strings may not be merged w/o setting align 1. return GV; } @@ -747,8 +776,7 @@ static bool GlobalWasGeneratedByAsan(GlobalVariable *G) { Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { // Shadow >> scale Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); - if (Mapping.Offset == 0) - return Shadow; + if (Mapping.Offset == 0) return Shadow; // (Shadow >> scale) | offset if (Mapping.OrShadowOffset) return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset)); @@ -775,38 +803,61 @@ void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { MI->eraseFromParent(); } -// If I is an interesting memory access, return the PointerOperand -// and set IsWrite/Alignment. Otherwise return nullptr. -static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, - unsigned *Alignment) { +/// Check if we want (and can) handle this alloca. +bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) const { + return (AI.getAllocatedType()->isSized() && + // alloca() may be called with 0 size, ignore it. + getAllocaSizeInBytes(&AI) > 0 && + // We are only interested in allocas not promotable to registers. + // Promotable allocas are common under -O0. + (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI))); +} + +/// If I is an interesting memory access, return the PointerOperand +/// and set IsWrite/Alignment. Otherwise return nullptr. +Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I, + bool *IsWrite, + uint64_t *TypeSize, + unsigned *Alignment) const { // Skip memory accesses inserted by another instrumentation. - if (I->getMetadata("nosanitize")) - return nullptr; + if (I->getMetadata("nosanitize")) return nullptr; + + Value *PtrOperand = nullptr; + const DataLayout &DL = I->getModule()->getDataLayout(); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { if (!ClInstrumentReads) return nullptr; *IsWrite = false; + *TypeSize = DL.getTypeStoreSizeInBits(LI->getType()); *Alignment = LI->getAlignment(); - return LI->getPointerOperand(); - } - if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + PtrOperand = LI->getPointerOperand(); + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { if (!ClInstrumentWrites) return nullptr; *IsWrite = true; + *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType()); *Alignment = SI->getAlignment(); - return SI->getPointerOperand(); - } - if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { + PtrOperand = SI->getPointerOperand(); + } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { if (!ClInstrumentAtomics) return nullptr; *IsWrite = true; + *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType()); *Alignment = 0; - return RMW->getPointerOperand(); - } - if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { + PtrOperand = RMW->getPointerOperand(); + } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { if (!ClInstrumentAtomics) return nullptr; *IsWrite = true; + *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType()); *Alignment = 0; - return XCHG->getPointerOperand(); + PtrOperand = XCHG->getPointerOperand(); } - return nullptr; + + // Treat memory accesses to promotable allocas as non-interesting since they + // will not cause memory violations. This greatly speeds up the instrumented + // executable at -O0. + if (ClSkipPromotableAllocas) + if (auto AI = dyn_cast_or_null<AllocaInst>(PtrOperand)) + return isInterestingAlloca(*AI) ? AI : nullptr; + + return PtrOperand; } static bool isPointerOperand(Value *V) { @@ -818,17 +869,15 @@ static bool isPointerOperand(Value *V) { // the frontend. static bool isInterestingPointerComparisonOrSubtraction(Instruction *I) { if (ICmpInst *Cmp = dyn_cast<ICmpInst>(I)) { - if (!Cmp->isRelational()) - return false; + if (!Cmp->isRelational()) return false; } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { - if (BO->getOpcode() != Instruction::Sub) - return false; + if (BO->getOpcode() != Instruction::Sub) return false; } else { return false; } if (!isPointerOperand(I->getOperand(0)) || !isPointerOperand(I->getOperand(1))) - return false; + return false; return true; } @@ -839,8 +888,8 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) { return G->hasInitializer() && !GlobalsMD.get(G).IsDynInit; } -void -AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) { +void AddressSanitizer::instrumentPointerComparisonOrSubtraction( + Instruction *I) { IRBuilder<> IRB(I); Function *F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction; Value *Param[2] = {I->getOperand(0), I->getOperand(1)}; @@ -851,38 +900,47 @@ AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) { IRB.CreateCall2(F, Param[0], Param[1]); } -void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) { +void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, + Instruction *I, bool UseCalls, + const DataLayout &DL) { bool IsWrite = false; unsigned Alignment = 0; - Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &Alignment); + uint64_t TypeSize = 0; + Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment); assert(Addr); + + // Optimization experiments. + // The experiments can be used to evaluate potential optimizations that remove + // instrumentation (assess false negatives). Instead of completely removing + // some instrumentation, you set Exp to a non-zero value (mask of optimization + // experiments that want to remove instrumentation of this instruction). + // If Exp is non-zero, this pass will emit special calls into runtime + // (e.g. __asan_report_exp_load1 instead of __asan_report_load1). These calls + // make runtime terminate the program in a special way (with a different + // exit status). Then you run the new compiler on a buggy corpus, collect + // the special terminations (ideally, you don't see them at all -- no false + // negatives) and make the decision on the optimization. + uint32_t Exp = ClForceExperiment; + if (ClOpt && ClOptGlobals) { - if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) { - // If initialization order checking is disabled, a simple access to a - // dynamically initialized global is always valid. - if (!ClInitializers || GlobalIsLinkerInitialized(G)) { - NumOptimizedAccessesToGlobalVar++; - return; - } - } - ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr); - if (CE && CE->isGEPWithNoNotionalOverIndexing()) { - if (GlobalVariable *G = dyn_cast<GlobalVariable>(CE->getOperand(0))) { - if (CE->getOperand(1)->isNullValue() && GlobalIsLinkerInitialized(G)) { - NumOptimizedAccessesToGlobalArray++; - return; - } - } + // If initialization order checking is disabled, a simple access to a + // dynamically initialized global is always valid. + GlobalVariable *G = dyn_cast<GlobalVariable>(GetUnderlyingObject(Addr, DL)); + if (G != NULL && (!ClInitializers || GlobalIsLinkerInitialized(G)) && + isSafeAccess(ObjSizeVis, Addr, TypeSize)) { + NumOptimizedAccessesToGlobalVar++; + return; } } - Type *OrigPtrTy = Addr->getType(); - Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); - - assert(OrigTy->isSized()); - uint32_t TypeSize = DL->getTypeStoreSizeInBits(OrigTy); - - assert((TypeSize % 8) == 0); + if (ClOpt && ClOptStack) { + // A direct inbounds access to a stack variable is always valid. + if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) && + isSafeAccess(ObjSizeVis, Addr, TypeSize)) { + NumOptimizedAccessesToStackVar++; + return; + } + } if (IsWrite) NumInstrumentedWrites++; @@ -895,23 +953,10 @@ void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) { if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 || TypeSize == 128) && (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8)) - return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls); - // Instrument unusual size or unusual alignment. - // We can not do it with a single check, so we do 1-byte check for the first - // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able - // to report the actual access size. - IRBuilder<> IRB(I); - Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8); - Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); - if (UseCalls) { - IRB.CreateCall2(AsanMemoryAccessCallbackSized[IsWrite], AddrLong, Size); - } else { - Value *LastByte = IRB.CreateIntToPtr( - IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)), - OrigPtrTy); - instrumentAddress(I, I, Addr, 8, IsWrite, Size, false); - instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false); - } + return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls, + Exp); + instrumentUnusualSizeOrAlignment(I, Addr, TypeSize, IsWrite, nullptr, + UseCalls, Exp); } // Validate the result of Module::getOrInsertFunction called for an interface @@ -921,17 +966,34 @@ void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) { static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { if (isa<Function>(FuncOrBitcast)) return cast<Function>(FuncOrBitcast); FuncOrBitcast->dump(); - report_fatal_error("trying to redefine an AddressSanitizer " - "interface function"); + report_fatal_error( + "trying to redefine an AddressSanitizer " + "interface function"); } -Instruction *AddressSanitizer::generateCrashCode( - Instruction *InsertBefore, Value *Addr, - bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument) { +Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore, + Value *Addr, bool IsWrite, + size_t AccessSizeIndex, + Value *SizeArgument, + uint32_t Exp) { IRBuilder<> IRB(InsertBefore); - CallInst *Call = SizeArgument - ? IRB.CreateCall2(AsanErrorCallbackSized[IsWrite], Addr, SizeArgument) - : IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr); + Value *ExpVal = Exp == 0 ? nullptr : ConstantInt::get(IRB.getInt32Ty(), Exp); + CallInst *Call = nullptr; + if (SizeArgument) { + if (Exp == 0) + Call = IRB.CreateCall2(AsanErrorCallbackSized[IsWrite][0], Addr, + SizeArgument); + else + Call = IRB.CreateCall3(AsanErrorCallbackSized[IsWrite][1], Addr, + SizeArgument, ExpVal); + } else { + if (Exp == 0) + Call = + IRB.CreateCall(AsanErrorCallback[IsWrite][0][AccessSizeIndex], Addr); + else + Call = IRB.CreateCall2(AsanErrorCallback[IsWrite][1][AccessSizeIndex], + Addr, ExpVal); + } // We don't do Call->setDoesNotReturn() because the BB already has // UnreachableInst at the end. @@ -941,19 +1003,19 @@ Instruction *AddressSanitizer::generateCrashCode( } Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, - Value *ShadowValue, - uint32_t TypeSize) { + Value *ShadowValue, + uint32_t TypeSize) { size_t Granularity = 1 << Mapping.Scale; // Addr & (Granularity - 1) - Value *LastAccessedByte = IRB.CreateAnd( - AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); + Value *LastAccessedByte = + IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); // (Addr & (Granularity - 1)) + size - 1 if (TypeSize / 8 > 1) LastAccessedByte = IRB.CreateAdd( LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)); // (uint8_t) ((Addr & (Granularity-1)) + size - 1) - LastAccessedByte = IRB.CreateIntCast( - LastAccessedByte, ShadowValue->getType(), false); + LastAccessedByte = + IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false); // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue); } @@ -961,24 +1023,29 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, bool IsWrite, - Value *SizeArgument, bool UseCalls) { + Value *SizeArgument, bool UseCalls, + uint32_t Exp) { IRBuilder<> IRB(InsertBefore); Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); if (UseCalls) { - IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][AccessSizeIndex], - AddrLong); + if (Exp == 0) + IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex], + AddrLong); + else + IRB.CreateCall2(AsanMemoryAccessCallback[IsWrite][1][AccessSizeIndex], + AddrLong, ConstantInt::get(IRB.getInt32Ty(), Exp)); return; } - Type *ShadowTy = IntegerType::get( - *C, std::max(8U, TypeSize >> Mapping.Scale)); + Type *ShadowTy = + IntegerType::get(*C, std::max(8U, TypeSize >> Mapping.Scale)); Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); Value *ShadowPtr = memToShadow(AddrLong, IRB); Value *CmpVal = Constant::getNullValue(ShadowTy); - Value *ShadowValue = IRB.CreateLoad( - IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); + Value *ShadowValue = + IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); size_t Granularity = 1 << Mapping.Scale; @@ -987,9 +1054,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { // We use branch weights for the slow path check, to indicate that the slow // path is rarely taken. This seems to be the case for SPEC benchmarks. - TerminatorInst *CheckTerm = - SplitBlockAndInsertIfThen(Cmp, InsertBefore, false, - MDBuilder(*C).createBranchWeights(1, 100000)); + TerminatorInst *CheckTerm = SplitBlockAndInsertIfThen( + Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000)); assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional()); BasicBlock *NextBB = CheckTerm->getSuccessor(0); IRB.SetInsertPoint(CheckTerm); @@ -1003,11 +1069,37 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, true); } - Instruction *Crash = generateCrashCode( - CrashTerm, AddrLong, IsWrite, AccessSizeIndex, SizeArgument); + Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite, + AccessSizeIndex, SizeArgument, Exp); Crash->setDebugLoc(OrigIns->getDebugLoc()); } +// Instrument unusual size or unusual alignment. +// We can not do it with a single check, so we do 1-byte check for the first +// and the last bytes. We call __asan_report_*_n(addr, real_size) to be able +// to report the actual access size. +void AddressSanitizer::instrumentUnusualSizeOrAlignment( + Instruction *I, Value *Addr, uint32_t TypeSize, bool IsWrite, + Value *SizeArgument, bool UseCalls, uint32_t Exp) { + IRBuilder<> IRB(I); + Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8); + Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + if (UseCalls) { + if (Exp == 0) + IRB.CreateCall2(AsanMemoryAccessCallbackSized[IsWrite][0], AddrLong, + Size); + else + IRB.CreateCall3(AsanMemoryAccessCallbackSized[IsWrite][1], AddrLong, Size, + ConstantInt::get(IRB.getInt32Ty(), Exp)); + } else { + Value *LastByte = IRB.CreateIntToPtr( + IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)), + Addr->getType()); + instrumentAddress(I, I, Addr, 8, IsWrite, Size, false, Exp); + instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false, Exp); + } +} + void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName) { // Set up the arguments to our poison/unpoison functions. @@ -1029,12 +1121,11 @@ void AddressSanitizerModule::createInitializerPoisonCalls( ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); for (Use &OP : CA->operands()) { - if (isa<ConstantAggregateZero>(OP)) - continue; + if (isa<ConstantAggregateZero>(OP)) continue; ConstantStruct *CS = cast<ConstantStruct>(OP); // Must have a function or null ptr. - if (Function* F = dyn_cast<Function>(CS->getOperand(1))) { + if (Function *F = dyn_cast<Function>(CS->getOperand(1))) { if (F->getName() == kAsanModuleCtorName) continue; ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0)); // Don't instrument CTORs that will run before asan.module_ctor. @@ -1059,13 +1150,11 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { G->getLinkage() != GlobalVariable::PrivateLinkage && G->getLinkage() != GlobalVariable::InternalLinkage) return false; - if (G->hasComdat()) - return false; + if (G->hasComdat()) return false; // Two problems with thread-locals: // - The address of the main thread's copy can't be computed at link-time. // - Need to poison all copies, not just the main thread's one. - if (G->isThreadLocal()) - return false; + if (G->isThreadLocal()) return false; // For now, just ignore this Global if the alignment is large. if (G->getAlignment() > MinRedzoneSizeForGlobal()) return false; @@ -1076,10 +1165,8 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { StringRef ParsedSegment, ParsedSection; unsigned TAA = 0, StubSize = 0; bool TAAParsed; - std::string ErrorCode = - MCSectionMachO::ParseSectionSpecifier(Section, ParsedSegment, - ParsedSection, TAA, TAAParsed, - StubSize); + std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier( + Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize); if (!ErrorCode.empty()) { report_fatal_error("Invalid section specifier '" + ParsedSection + "': " + ErrorCode + "."); @@ -1140,12 +1227,11 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage); // Declare functions that register/unregister globals. AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction( - kAsanRegisterGlobalsName, IRB.getVoidTy(), - IntptrTy, IntptrTy, nullptr)); + kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); AsanRegisterGlobals->setLinkage(Function::ExternalLinkage); - AsanUnregisterGlobals = checkInterfaceFunction(M.getOrInsertFunction( - kAsanUnregisterGlobalsName, - IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); + AsanUnregisterGlobals = checkInterfaceFunction( + M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(), + IntptrTy, IntptrTy, nullptr)); AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage); } @@ -1158,8 +1244,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { SmallVector<GlobalVariable *, 16> GlobalsToChange; for (auto &G : M.globals()) { - if (ShouldInstrumentGlobal(&G)) - GlobalsToChange.push_back(&G); + if (ShouldInstrumentGlobal(&G)) GlobalsToChange.push_back(&G); } size_t n = GlobalsToChange.size(); @@ -1184,8 +1269,9 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { // We shouldn't merge same module names, as this string serves as unique // module ID in runtime. GlobalVariable *ModuleName = createPrivateGlobalForString( - M, M.getModuleIdentifier(), /*AllowMerging*/false); + M, M.getModuleIdentifier(), /*AllowMerging*/ false); + auto &DL = M.getDataLayout(); for (size_t i = 0; i < n; i++) { static const uint64_t kMaxGlobalRedzone = 1 << 18; GlobalVariable *G = GlobalsToChange[i]; @@ -1199,32 +1285,30 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { PointerType *PtrTy = cast<PointerType>(G->getType()); Type *Ty = PtrTy->getElementType(); - uint64_t SizeInBytes = DL->getTypeAllocSize(Ty); + uint64_t SizeInBytes = DL.getTypeAllocSize(Ty); uint64_t MinRZ = MinRedzoneSizeForGlobal(); // MinRZ <= RZ <= kMaxGlobalRedzone // and trying to make RZ to be ~ 1/4 of SizeInBytes. - uint64_t RZ = std::max(MinRZ, - std::min(kMaxGlobalRedzone, - (SizeInBytes / MinRZ / 4) * MinRZ)); + uint64_t RZ = std::max( + MinRZ, std::min(kMaxGlobalRedzone, (SizeInBytes / MinRZ / 4) * MinRZ)); uint64_t RightRedzoneSize = RZ; // Round up to MinRZ - if (SizeInBytes % MinRZ) - RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ); + if (SizeInBytes % MinRZ) RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ); assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); StructType *NewTy = StructType::get(Ty, RightRedZoneTy, nullptr); - Constant *NewInitializer = ConstantStruct::get( - NewTy, G->getInitializer(), - Constant::getNullValue(RightRedZoneTy), nullptr); + Constant *NewInitializer = + ConstantStruct::get(NewTy, G->getInitializer(), + Constant::getNullValue(RightRedZoneTy), nullptr); // Create a new global variable with enough space for a redzone. GlobalValue::LinkageTypes Linkage = G->getLinkage(); if (G->isConstant() && Linkage == GlobalValue::PrivateLinkage) Linkage = GlobalValue::InternalLinkage; - GlobalVariable *NewGlobal = new GlobalVariable( - M, NewTy, G->isConstant(), Linkage, - NewInitializer, "", G, G->getThreadLocalMode()); + GlobalVariable *NewGlobal = + new GlobalVariable(M, NewTy, G->isConstant(), Linkage, NewInitializer, + "", G, G->getThreadLocalMode()); NewGlobal->copyAttributesFrom(G); NewGlobal->setAlignment(MinRZ); @@ -1253,8 +1337,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { ConstantExpr::getPointerCast(ModuleName, IntptrTy), ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc, nullptr); - if (ClInitializers && MD.IsDynInit) - HasDynamicallyInitializedGlobals = true; + if (ClInitializers && MD.IsDynInit) HasDynamicallyInitializedGlobals = true; DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n"); } @@ -1273,9 +1356,9 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { // We also need to unregister globals at the end, e.g. when a shared library // gets closed. - Function *AsanDtorFunction = Function::Create( - FunctionType::get(Type::getVoidTy(*C), false), - GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); + Function *AsanDtorFunction = + Function::Create(FunctionType::get(Type::getVoidTy(*C), false), + GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB)); IRB_Dtor.CreateCall2(AsanUnregisterGlobals, @@ -1288,12 +1371,8 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { } bool AddressSanitizerModule::runOnModule(Module &M) { - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (!DLP) - return false; - DL = &DLP->getDataLayout(); C = &(M.getContext()); - int LongSize = DL->getPointerSizeInBits(); + int LongSize = M.getDataLayout().getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); TargetTriple = Triple(M.getTargetTriple()); Mapping = getShadowMapping(TargetTriple, LongSize); @@ -1305,8 +1384,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { assert(CtorFunc); IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); - if (ClGlobals) - Changed |= InstrumentGlobals(IRB, M); + if (ClGlobals) Changed |= InstrumentGlobals(IRB, M); return Changed; } @@ -1314,33 +1392,34 @@ bool AddressSanitizerModule::runOnModule(Module &M) { void AddressSanitizer::initializeCallbacks(Module &M) { IRBuilder<> IRB(*C); // Create __asan_report* callbacks. - for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { - for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; - AccessSizeIndex++) { - // IsWrite and TypeSize are encoded in the function name. - std::string Suffix = - (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex); - AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = - checkInterfaceFunction( - M.getOrInsertFunction(kAsanReportErrorTemplate + Suffix, - IRB.getVoidTy(), IntptrTy, nullptr)); - AsanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] = - checkInterfaceFunction( - M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + Suffix, - IRB.getVoidTy(), IntptrTy, nullptr)); + // IsWrite, TypeSize and Exp are encoded in the function name. + for (int Exp = 0; Exp < 2; Exp++) { + for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { + const std::string TypeStr = AccessIsWrite ? "store" : "load"; + const std::string ExpStr = Exp ? "exp_" : ""; + const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr; + AsanErrorCallbackSized[AccessIsWrite][Exp] = + checkInterfaceFunction(M.getOrInsertFunction( + kAsanReportErrorTemplate + ExpStr + TypeStr + "_n", + IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr)); + AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = + checkInterfaceFunction(M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N", + IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr)); + for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; + AccessSizeIndex++) { + const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex); + AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] = + checkInterfaceFunction(M.getOrInsertFunction( + kAsanReportErrorTemplate + ExpStr + Suffix, IRB.getVoidTy(), + IntptrTy, ExpType, nullptr)); + AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] = + checkInterfaceFunction(M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + ExpStr + Suffix, IRB.getVoidTy(), + IntptrTy, ExpType, nullptr)); + } } } - AsanErrorCallbackSized[0] = checkInterfaceFunction(M.getOrInsertFunction( - kAsanReportLoadN, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); - AsanErrorCallbackSized[1] = checkInterfaceFunction(M.getOrInsertFunction( - kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); - - AsanMemoryAccessCallbackSized[0] = checkInterfaceFunction( - M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "loadN", - IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); - AsanMemoryAccessCallbackSized[1] = checkInterfaceFunction( - M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "storeN", - IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); AsanMemmove = checkInterfaceFunction(M.getOrInsertFunction( ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(), @@ -1368,21 +1447,17 @@ void AddressSanitizer::initializeCallbacks(Module &M) { // virtual bool AddressSanitizer::doInitialization(Module &M) { // Initialize the private fields. No one has accessed them before. - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (!DLP) - report_fatal_error("data layout missing"); - DL = &DLP->getDataLayout(); GlobalsMD.init(M); C = &(M.getContext()); - LongSize = DL->getPointerSizeInBits(); + LongSize = M.getDataLayout().getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); TargetTriple = Triple(M.getTargetTriple()); - AsanCtorFunction = Function::Create( - FunctionType::get(Type::getVoidTy(*C), false), - GlobalValue::InternalLinkage, kAsanModuleCtorName, &M); + AsanCtorFunction = + Function::Create(FunctionType::get(Type::getVoidTy(*C), false), + GlobalValue::InternalLinkage, kAsanModuleCtorName, &M); BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction); // call __asan_init in the module ctor. IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB)); @@ -1424,22 +1499,21 @@ bool AddressSanitizer::runOnFunction(Function &F) { // If needed, insert __asan_init before checking for SanitizeAddress attr. maybeInsertAsanInitAtFunctionEntry(F); - if (!F.hasFnAttribute(Attribute::SanitizeAddress)) - return false; + if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return false; - if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) - return false; + if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false; // We want to instrument every address only once per basic block (unless there // are calls between uses). - SmallSet<Value*, 16> TempsToInstrument; - SmallVector<Instruction*, 16> ToInstrument; - SmallVector<Instruction*, 8> NoReturnCalls; - SmallVector<BasicBlock*, 16> AllBlocks; - SmallVector<Instruction*, 16> PointerComparisonsOrSubtracts; + SmallSet<Value *, 16> TempsToInstrument; + SmallVector<Instruction *, 16> ToInstrument; + SmallVector<Instruction *, 8> NoReturnCalls; + SmallVector<BasicBlock *, 16> AllBlocks; + SmallVector<Instruction *, 16> PointerComparisonsOrSubtracts; int NumAllocas = 0; bool IsWrite; unsigned Alignment; + uint64_t TypeSize; // Fill the set of memory operations to instrument. for (auto &BB : F) { @@ -1448,8 +1522,8 @@ bool AddressSanitizer::runOnFunction(Function &F) { int NumInsnsPerBB = 0; for (auto &Inst : BB) { if (LooksLikeCodeInBug11395(&Inst)) return false; - if (Value *Addr = - isInterestingMemoryAccess(&Inst, &IsWrite, &Alignment)) { + if (Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize, + &Alignment)) { if (ClOpt && ClOptSameTemp) { if (!TempsToInstrument.insert(Addr).second) continue; // We've seen this temp in the current BB. @@ -1461,21 +1535,18 @@ bool AddressSanitizer::runOnFunction(Function &F) { } else if (isa<MemIntrinsic>(Inst)) { // ok, take it. } else { - if (isa<AllocaInst>(Inst)) - NumAllocas++; + if (isa<AllocaInst>(Inst)) NumAllocas++; CallSite CS(&Inst); if (CS) { // A call inside BB. TempsToInstrument.clear(); - if (CS.doesNotReturn()) - NoReturnCalls.push_back(CS.getInstruction()); + if (CS.doesNotReturn()) NoReturnCalls.push_back(CS.getInstruction()); } continue; } ToInstrument.push_back(&Inst); NumInsnsPerBB++; - if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) - break; + if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break; } } @@ -1484,13 +1555,20 @@ bool AddressSanitizer::runOnFunction(Function &F) { ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold) UseCalls = true; + const TargetLibraryInfo *TLI = + &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + const DataLayout &DL = F.getParent()->getDataLayout(); + ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(), + /*RoundToAlign=*/true); + // Instrument. int NumInstrumented = 0; for (auto Inst : ToInstrument) { if (ClDebugMin < 0 || ClDebugMax < 0 || (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { - if (isInterestingMemoryAccess(Inst, &IsWrite, &Alignment)) - instrumentMop(Inst, UseCalls); + if (isInterestingMemoryAccess(Inst, &IsWrite, &TypeSize, &Alignment)) + instrumentMop(ObjSizeVis, Inst, UseCalls, + F.getParent()->getDataLayout()); else instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); } @@ -1549,10 +1627,9 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) { IntptrTy, IntptrTy, nullptr)); } -void -FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes, - IRBuilder<> &IRB, Value *ShadowBase, - bool DoPoison) { +void FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes, + IRBuilder<> &IRB, Value *ShadowBase, + bool DoPoison) { size_t n = ShadowBytes.size(); size_t i = 0; // We need to (un)poison n bytes of stack shadow. Poison as many as we can @@ -1563,7 +1640,7 @@ FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes, for (; i + LargeStoreSizeInBytes - 1 < n; i += LargeStoreSizeInBytes) { uint64_t Val = 0; for (size_t j = 0; j < LargeStoreSizeInBytes; j++) { - if (ASan.DL->isLittleEndian()) + if (F.getParent()->getDataLayout().isLittleEndian()) Val |= (uint64_t)ShadowBytes[i + j] << (8 * j); else Val = (Val << 8) | ShadowBytes[i + j]; @@ -1582,9 +1659,8 @@ FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes, static int StackMallocSizeClass(uint64_t LocalStackSize) { assert(LocalStackSize <= kMaxStackMallocSize); uint64_t MaxSize = kMinStackMallocSize; - for (int i = 0; ; i++, MaxSize *= 2) - if (LocalStackSize <= MaxSize) - return i; + for (int i = 0;; i++, MaxSize *= 2) + if (LocalStackSize <= MaxSize) return i; llvm_unreachable("impossible LocalStackSize"); } @@ -1596,18 +1672,21 @@ static int StackMallocSizeClass(uint64_t LocalStackSize) { void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined( IRBuilder<> &IRB, Value *ShadowBase, int Size) { assert(!(Size % 8)); - assert(kAsanStackAfterReturnMagic == 0xf5); + + // kAsanStackAfterReturnMagic is 0xf5. + const uint64_t kAsanStackAfterReturnMagic64 = 0xf5f5f5f5f5f5f5f5ULL; + for (int i = 0; i < Size; i += 8) { Value *p = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)); - IRB.CreateStore(ConstantInt::get(IRB.getInt64Ty(), 0xf5f5f5f5f5f5f5f5ULL), - IRB.CreateIntToPtr(p, IRB.getInt64Ty()->getPointerTo())); + IRB.CreateStore( + ConstantInt::get(IRB.getInt64Ty(), kAsanStackAfterReturnMagic64), + IRB.CreateIntToPtr(p, IRB.getInt64Ty()->getPointerTo())); } } static DebugLoc getFunctionEntryDebugLocation(Function &F) { for (const auto &Inst : F.getEntryBlock()) - if (!isa<AllocaInst>(Inst)) - return Inst.getDebugLoc(); + if (!isa<AllocaInst>(Inst)) return Inst.getDebugLoc(); return DebugLoc(); } @@ -1664,9 +1743,9 @@ void FunctionStackPoisoner::poisonStack() { SmallVector<ASanStackVariableDescription, 16> SVD; SVD.reserve(AllocaVec.size()); for (AllocaInst *AI : AllocaVec) { - ASanStackVariableDescription D = { AI->getName().data(), - getAllocaSizeInBytes(AI), - AI->getAlignment(), AI, 0}; + ASanStackVariableDescription D = {AI->getName().data(), + ASan.getAllocaSizeInBytes(AI), + AI->getAlignment(), AI, 0}; SVD.push_back(D); } // Minimal header size (left redzone) is 4 pointers, @@ -1757,19 +1836,19 @@ void FunctionStackPoisoner::poisonStack() { BasePlus0); // Write the frame description constant to redzone[1]. Value *BasePlus1 = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize/8)), - IntptrPtrTy); + IRB.CreateAdd(LocalStackBase, + ConstantInt::get(IntptrTy, ASan.LongSize / 8)), + IntptrPtrTy); GlobalVariable *StackDescriptionGlobal = createPrivateGlobalForString(*F.getParent(), L.DescriptionString, - /*AllowMerging*/true); - Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, - IntptrTy); + /*AllowMerging*/ true); + Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); IRB.CreateStore(Description, BasePlus1); // Write the PC to redzone[2]. Value *BasePlus2 = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, - 2 * ASan.LongSize/8)), - IntptrPtrTy); + IRB.CreateAdd(LocalStackBase, + ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)), + IntptrPtrTy); IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2); // Poison the stack redzones at the entry. @@ -1830,8 +1909,7 @@ void FunctionStackPoisoner::poisonStack() { } // We are done. Remove the old unused alloca instructions. - for (auto AI : AllocaVec) - AI->eraseFromParent(); + for (auto AI : AllocaVec) AI->eraseFromParent(); } void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, @@ -1839,9 +1917,9 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, // For now just insert the call to ASan runtime. Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy); Value *SizeArg = ConstantInt::get(IntptrTy, Size); - IRB.CreateCall2(DoPoison ? AsanPoisonStackMemoryFunc - : AsanUnpoisonStackMemoryFunc, - AddrArg, SizeArg); + IRB.CreateCall2( + DoPoison ? AsanPoisonStackMemoryFunc : AsanUnpoisonStackMemoryFunc, + AddrArg, SizeArg); } // Handling llvm.lifetime intrinsics for a given %alloca: @@ -1856,12 +1934,11 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) { if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) // We're intested only in allocas we can handle. - return isInterestingAlloca(*AI) ? AI : nullptr; + return ASan.isInterestingAlloca(*AI) ? AI : nullptr; // See if we've already calculated (or started to calculate) alloca for a // given value. AllocaForValueMapTy::iterator I = AllocaForValue.find(V); - if (I != AllocaForValue.end()) - return I->second; + if (I != AllocaForValue.end()) return I->second; // Store 0 while we're calculating alloca for value V to avoid // infinite recursion if the value references itself. AllocaForValue[V] = nullptr; @@ -1880,8 +1957,7 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) { Res = IncValueAI; } } - if (Res) - AllocaForValue[V] = Res; + if (Res) AllocaForValue[V] = Res; return Res; } @@ -1912,14 +1988,14 @@ Value *FunctionStackPoisoner::computePartialRzMagic(Value *PartialSize, Value *Shift = IRB.CreateAnd(PartialSize, IRB.getInt32(~7)); unsigned Val1Int = kAsanAllocaPartialVal1; unsigned Val2Int = kAsanAllocaPartialVal2; - if (!ASan.DL->isLittleEndian()) { + if (!F.getParent()->getDataLayout().isLittleEndian()) { Val1Int = sys::getSwappedBytes(Val1Int); Val2Int = sys::getSwappedBytes(Val2Int); } Value *Val1 = shiftAllocaMagic(IRB.getInt32(Val1Int), IRB, Shift); Value *PartialBits = IRB.CreateAnd(PartialSize, IRB.getInt32(7)); // For BigEndian get 0x000000YZ -> 0xYZ000000. - if (ASan.DL->isBigEndian()) + if (F.getParent()->getDataLayout().isBigEndian()) PartialBits = IRB.CreateShl(PartialBits, IRB.getInt32(24)); Value *Val2 = IRB.getInt32(Val2Int); Value *Cond = @@ -1953,7 +2029,8 @@ void FunctionStackPoisoner::handleDynamicAllocaCall( // redzones, and OldSize is number of allocated blocks with // ElementSize size, get allocated memory size in bytes by // OldSize * ElementSize. - unsigned ElementSize = ASan.DL->getTypeAllocSize(AI->getAllocatedType()); + unsigned ElementSize = + F.getParent()->getDataLayout().getTypeAllocSize(AI->getAllocatedType()); Value *OldSize = IRB.CreateMul(AI->getArraySize(), ConstantInt::get(IntptrTy, ElementSize)); @@ -2021,3 +2098,20 @@ void FunctionStackPoisoner::handleDynamicAllocaCall( AI->eraseFromParent(); NumInstrumentedDynamicAllocas++; } + +// isSafeAccess returns true if Addr is always inbounds with respect to its +// base object. For example, it is a field access or an array access with +// constant inbounds index. +bool AddressSanitizer::isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, + Value *Addr, uint64_t TypeSize) const { + SizeOffsetType SizeOffset = ObjSizeVis.compute(Addr); + if (!ObjSizeVis.bothKnown(SizeOffset)) return false; + uint64_t Size = SizeOffset.first.getZExtValue(); + int64_t Offset = SizeOffset.second.getSExtValue(); + // Three checks are required to ensure safety: + // . Offset >= 0 (since the offset is given from the base ptr) + // . Size >= Offset (unsigned) + // . Size - Offset >= NeededSize (unsigned) + return Offset >= 0 && Size >= uint64_t(Offset) && + Size - uint64_t(Offset) >= TypeSize / 8; +} diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index 2b5f39c..8113834 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetFolder.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" @@ -24,7 +25,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; #define DEBUG_TYPE "bounds-checking" @@ -49,12 +49,10 @@ namespace { bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DataLayoutPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); } private: - const DataLayout *DL; const TargetLibraryInfo *TLI; ObjectSizeOffsetEvaluator *ObjSizeEval; BuilderTy *Builder; @@ -63,7 +61,7 @@ namespace { BasicBlock *getTrapBB(); void emitBranchToTrap(Value *Cmp = nullptr); - bool instrument(Value *Ptr, Value *Val); + bool instrument(Value *Ptr, Value *Val, const DataLayout &DL); }; } @@ -125,8 +123,9 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) { /// result from the load or the value being stored. It is used to determine the /// size of memory block that is touched. /// Returns true if any change was made to the IR, false otherwise. -bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { - uint64_t NeededSize = DL->getTypeStoreSize(InstVal->getType()); +bool BoundsChecking::instrument(Value *Ptr, Value *InstVal, + const DataLayout &DL) { + uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType()); DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize) << " bytes\n"); @@ -141,7 +140,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { Value *Offset = SizeOffset.second; ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size); - Type *IntTy = DL->getIntPtrType(Ptr->getType()); + Type *IntTy = DL.getIntPtrType(Ptr->getType()); Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize); // three checks are required to ensure safety: @@ -165,7 +164,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { } bool BoundsChecking::runOnFunction(Function &F) { - DL = &getAnalysis<DataLayoutPass>().getDataLayout(); + const DataLayout &DL = F.getParent()->getDataLayout(); TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); TrapBB = nullptr; @@ -192,13 +191,16 @@ bool BoundsChecking::runOnFunction(Function &F) { Builder->SetInsertPoint(Inst); if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { - MadeChange |= instrument(LI->getPointerOperand(), LI); + MadeChange |= instrument(LI->getPointerOperand(), LI, DL); } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - MadeChange |= instrument(SI->getPointerOperand(), SI->getValueOperand()); + MadeChange |= + instrument(SI->getPointerOperand(), SI->getValueOperand(), DL); } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) { - MadeChange |= instrument(AI->getPointerOperand(),AI->getCompareOperand()); + MadeChange |= + instrument(AI->getPointerOperand(), AI->getCompareOperand(), DL); } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) { - MadeChange |= instrument(AI->getPointerOperand(), AI->getValOperand()); + MadeChange |= + instrument(AI->getPointerOperand(), AI->getValOperand(), DL); } else { llvm_unreachable("unknown Instruction type"); } diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 6adf0d2..b3925ee 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -217,7 +217,6 @@ class DataFlowSanitizer : public ModulePass { WK_Custom }; - const DataLayout *DL; Module *Mod; LLVMContext *Ctx; IntegerType *ShadowTy; @@ -422,16 +421,13 @@ bool DataFlowSanitizer::doInitialization(Module &M) { bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 || TargetTriple.getArch() == llvm::Triple::mips64el; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (!DLP) - report_fatal_error("data layout missing"); - DL = &DLP->getDataLayout(); + const DataLayout &DL = M.getDataLayout(); Mod = &M; Ctx = &M.getContext(); ShadowTy = IntegerType::get(*Ctx, ShadowWidth); ShadowPtrTy = PointerType::getUnqual(ShadowTy); - IntptrTy = DL->getIntPtrType(*Ctx); + IntptrTy = DL.getIntPtrType(*Ctx); ZeroShadow = ConstantInt::getSigned(ShadowTy, 0); ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidth / 8); if (IsX86_64) @@ -593,9 +589,6 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT, } bool DataFlowSanitizer::runOnModule(Module &M) { - if (!DL) - return false; - if (ABIList.isIn(M, "skip")) return false; @@ -1056,7 +1049,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8; SmallVector<Value *, 2> Objs; - GetUnderlyingObjects(Addr, Objs, DFS.DL); + GetUnderlyingObjects(Addr, Objs, Pos->getModule()->getDataLayout()); bool AllConstants = true; for (SmallVector<Value *, 2>::iterator i = Objs.begin(), e = Objs.end(); i != e; ++i) { @@ -1157,7 +1150,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, } void DFSanVisitor::visitLoadInst(LoadInst &LI) { - uint64_t Size = DFSF.DFS.DL->getTypeStoreSize(LI.getType()); + auto &DL = LI.getModule()->getDataLayout(); + uint64_t Size = DL.getTypeStoreSize(LI.getType()); if (Size == 0) { DFSF.setShadow(&LI, DFSF.DFS.ZeroShadow); return; @@ -1167,7 +1161,7 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) { if (ClPreserveAlignment) { Align = LI.getAlignment(); if (Align == 0) - Align = DFSF.DFS.DL->getABITypeAlignment(LI.getType()); + Align = DL.getABITypeAlignment(LI.getType()); } else { Align = 1; } @@ -1235,8 +1229,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align, } void DFSanVisitor::visitStoreInst(StoreInst &SI) { - uint64_t Size = - DFSF.DFS.DL->getTypeStoreSize(SI.getValueOperand()->getType()); + auto &DL = SI.getModule()->getDataLayout(); + uint64_t Size = DL.getTypeStoreSize(SI.getValueOperand()->getType()); if (Size == 0) return; @@ -1244,7 +1238,7 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) { if (ClPreserveAlignment) { Align = SI.getAlignment(); if (Align == 0) - Align = DFSF.DFS.DL->getABITypeAlignment(SI.getValueOperand()->getType()); + Align = DL.getABITypeAlignment(SI.getValueOperand()->getType()); } else { Align = 1; } diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index cb965fb..a793e69 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -47,6 +47,8 @@ using namespace llvm; static cl::opt<std::string> DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden, cl::ValueRequired); +static cl::opt<bool> DefaultExitBlockBeforeBody("gcov-exit-block-before-body", + cl::init(false), cl::Hidden); GCOVOptions GCOVOptions::getDefault() { GCOVOptions Options; @@ -55,6 +57,7 @@ GCOVOptions GCOVOptions::getDefault() { Options.UseCfgChecksum = false; Options.NoRedZone = false; Options.FunctionNamesInData = true; + Options.ExitBlockBeforeBody = DefaultExitBlockBeforeBody; if (DefaultGCOVVersion.size() != 4) { llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") + @@ -70,20 +73,10 @@ namespace { class GCOVProfiler : public ModulePass { public: static char ID; - GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) { - init(); - } - GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){ + GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {} + GCOVProfiler(const GCOVOptions &Opts) : ModulePass(ID), Options(Opts) { assert((Options.EmitNotes || Options.EmitData) && "GCOVProfiler asked to do nothing?"); - init(); - } - const char *getPassName() const override { - return "GCOV Profiler"; - } - - private: - void init() { ReversedVersion[0] = Options.Version[3]; ReversedVersion[1] = Options.Version[2]; ReversedVersion[2] = Options.Version[1]; @@ -91,6 +84,11 @@ namespace { ReversedVersion[4] = '\0'; initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } + const char *getPassName() const override { + return "GCOV Profiler"; + } + + private: bool runOnModule(Module &M) override; // Create the .gcno files for the Module based on DebugInfo. @@ -312,7 +310,7 @@ namespace { class GCOVFunction : public GCOVRecord { public: GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident, - bool UseCfgChecksum) + bool UseCfgChecksum, bool ExitBlockBeforeBody) : SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0), ReturnBlock(1, os) { this->os = os; @@ -322,11 +320,13 @@ namespace { uint32_t i = 0; for (auto &BB : *F) { - // Skip index 1 (0, 2, 3, 4, ...) because that's assigned to the - // ReturnBlock. - bool first = i == 0; - Blocks.insert(std::make_pair(&BB, GCOVBlock(i++ + !first, os))); + // Skip index 1 if it's assigned to the ReturnBlock. + if (i == 1 && ExitBlockBeforeBody) + ++i; + Blocks.insert(std::make_pair(&BB, GCOVBlock(i++, os))); } + if (!ExitBlockBeforeBody) + ReturnBlock.Number = i; std::string FunctionNameAndLine; raw_string_ostream FNLOS(FunctionNameAndLine); @@ -469,7 +469,7 @@ static bool functionHasLines(Function *F) { if (Loc.isUnknown()) continue; // Artificial lines such as calls to the global constructors. - if (Loc.getLine() == 0) continue; + if (Loc.getLine() == 0) continue; return true; } @@ -513,7 +513,8 @@ void GCOVProfiler::emitProfileNotes() { EntryBlock.splitBasicBlock(It); Funcs.push_back(make_unique<GCOVFunction>(SP, &out, FunctionIdent++, - Options.UseCfgChecksum)); + Options.UseCfgChecksum, + Options.ExitBlockBeforeBody)); GCOVFunction &Func = *Funcs.back(); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 4152679..c2aa1e2 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -274,7 +274,6 @@ class MemorySanitizer : public FunctionPass { MemorySanitizer(int TrackOrigins = 0) : FunctionPass(ID), TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)), - DL(nullptr), WarningFn(nullptr) {} const char *getPassName() const override { return "MemorySanitizer"; } bool runOnFunction(Function &F) override; @@ -287,7 +286,6 @@ class MemorySanitizer : public FunctionPass { /// \brief Track origins (allocation points) of uninitialized values. int TrackOrigins; - const DataLayout *DL; LLVMContext *C; Type *IntptrTy; Type *OriginTy; @@ -449,10 +447,7 @@ void MemorySanitizer::initializeCallbacks(Module &M) { /// /// inserts a call to __msan_init to the module's constructor list. bool MemorySanitizer::doInitialization(Module &M) { - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (!DLP) - report_fatal_error("data layout missing"); - DL = &DLP->getDataLayout(); + auto &DL = M.getDataLayout(); Triple TargetTriple(M.getTargetTriple()); switch (TargetTriple.getOS()) { @@ -604,7 +599,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) { - unsigned IntptrSize = MS.DL->getTypeStoreSize(MS.IntptrTy); + const DataLayout &DL = F.getParent()->getDataLayout(); + unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy); if (IntptrSize == kOriginSize) return Origin; assert(IntptrSize == kOriginSize * 2); Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false); @@ -614,8 +610,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// \brief Fill memory range with the given origin value. void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr, unsigned Size, unsigned Alignment) { - unsigned IntptrAlignment = MS.DL->getABITypeAlignment(MS.IntptrTy); - unsigned IntptrSize = MS.DL->getTypeStoreSize(MS.IntptrTy); + const DataLayout &DL = F.getParent()->getDataLayout(); + unsigned IntptrAlignment = DL.getABITypeAlignment(MS.IntptrTy); + unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy); assert(IntptrAlignment >= kMinOriginAlignment); assert(IntptrSize >= kOriginSize); @@ -643,8 +640,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin, unsigned Alignment, bool AsCall) { + const DataLayout &DL = F.getParent()->getDataLayout(); unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment); - unsigned StoreSize = MS.DL->getTypeStoreSize(Shadow->getType()); + unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType()); if (isa<StructType>(Shadow->getType())) { paintOrigin(IRB, updateOrigin(Origin, IRB), getOriginPtr(Addr, IRB, Alignment), StoreSize, @@ -661,7 +659,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } unsigned TypeSizeInBits = - MS.DL->getTypeSizeInBits(ConvertedShadow->getType()); + DL.getTypeSizeInBits(ConvertedShadow->getType()); unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits); if (AsCall && SizeIndex < kNumberOfAccessSizes) { Value *Fn = MS.MaybeStoreOriginFn[SizeIndex]; @@ -731,8 +729,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return; } - unsigned TypeSizeInBits = - MS.DL->getTypeSizeInBits(ConvertedShadow->getType()); + const DataLayout &DL = OrigIns->getModule()->getDataLayout(); + + unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType()); unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits); if (AsCall && SizeIndex < kNumberOfAccessSizes) { Value *Fn = MS.MaybeWarningFn[SizeIndex]; @@ -772,7 +771,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// \brief Add MemorySanitizer instrumentation to a function. bool runOnFunction() { MS.initializeCallbacks(*F.getParent()); - if (!MS.DL) return false; // In the presence of unreachable blocks, we may see Phi nodes with // incoming nodes from such blocks. Since InstVisitor skips unreachable @@ -828,8 +826,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // This may return weird-sized types like i1. if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy)) return IT; + const DataLayout &DL = F.getParent()->getDataLayout(); if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) { - uint32_t EltSize = MS.DL->getTypeSizeInBits(VT->getElementType()); + uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType()); return VectorType::get(IntegerType::get(*MS.C, EltSize), VT->getNumElements()); } @@ -845,7 +844,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n"); return Res; } - uint32_t TypeSize = MS.DL->getTypeSizeInBits(OrigTy); + uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy); return IntegerType::get(*MS.C, TypeSize); } @@ -1038,14 +1037,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Function *F = A->getParent(); IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI()); unsigned ArgOffset = 0; + const DataLayout &DL = F->getParent()->getDataLayout(); for (auto &FArg : F->args()) { if (!FArg.getType()->isSized()) { DEBUG(dbgs() << "Arg is not sized\n"); continue; } - unsigned Size = FArg.hasByValAttr() - ? MS.DL->getTypeAllocSize(FArg.getType()->getPointerElementType()) - : MS.DL->getTypeAllocSize(FArg.getType()); + unsigned Size = + FArg.hasByValAttr() + ? DL.getTypeAllocSize(FArg.getType()->getPointerElementType()) + : DL.getTypeAllocSize(FArg.getType()); if (A == &FArg) { bool Overflow = ArgOffset + Size > kParamTLSSize; Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset); @@ -1056,7 +1057,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { unsigned ArgAlign = FArg.getParamAlignment(); if (ArgAlign == 0) { Type *EltType = A->getType()->getPointerElementType(); - ArgAlign = MS.DL->getABITypeAlignment(EltType); + ArgAlign = DL.getABITypeAlignment(EltType); } if (Overflow) { // ParamTLS overflow. @@ -2427,10 +2428,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { DEBUG(dbgs() << " Arg#" << i << ": " << *A << " Shadow: " << *ArgShadow << "\n"); bool ArgIsInitialized = false; + const DataLayout &DL = F.getParent()->getDataLayout(); if (CS.paramHasAttr(i + 1, Attribute::ByVal)) { assert(A->getType()->isPointerTy() && "ByVal argument is not a pointer!"); - Size = MS.DL->getTypeAllocSize(A->getType()->getPointerElementType()); + Size = DL.getTypeAllocSize(A->getType()->getPointerElementType()); if (ArgOffset + Size > kParamTLSSize) break; unsigned ParamAlignment = CS.getParamAlignment(i + 1); unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment); @@ -2438,7 +2440,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB), Size, Alignment); } else { - Size = MS.DL->getTypeAllocSize(A->getType()); + Size = DL.getTypeAllocSize(A->getType()); if (ArgOffset + Size > kParamTLSSize) break; Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase, kShadowTLSAlignment); @@ -2531,7 +2533,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setShadow(&I, getCleanShadow(&I)); setOrigin(&I, getCleanOrigin()); IRBuilder<> IRB(I.getNextNode()); - uint64_t Size = MS.DL->getTypeAllocSize(I.getAllocatedType()); + const DataLayout &DL = F.getParent()->getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(I.getAllocatedType()); if (PoisonStack && ClPoisonStackWithCall) { IRB.CreateCall2(MS.MsanPoisonStackFn, IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), @@ -2723,6 +2726,7 @@ struct VarArgAMD64Helper : public VarArgHelper { unsigned GpOffset = 0; unsigned FpOffset = AMD64GpEndOffset; unsigned OverflowOffset = AMD64FpEndOffset; + const DataLayout &DL = F.getParent()->getDataLayout(); for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end(); ArgIt != End; ++ArgIt) { Value *A = *ArgIt; @@ -2732,7 +2736,7 @@ struct VarArgAMD64Helper : public VarArgHelper { // ByVal arguments always go to the overflow area. assert(A->getType()->isPointerTy()); Type *RealTy = A->getType()->getPointerElementType(); - uint64_t ArgSize = MS.DL->getTypeAllocSize(RealTy); + uint64_t ArgSize = DL.getTypeAllocSize(RealTy); Value *Base = getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset); OverflowOffset += RoundUpToAlignment(ArgSize, 8); IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB), @@ -2754,7 +2758,7 @@ struct VarArgAMD64Helper : public VarArgHelper { FpOffset += 16; break; case AK_Memory: - uint64_t ArgSize = MS.DL->getTypeAllocSize(A->getType()); + uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset); OverflowOffset += RoundUpToAlignment(ArgSize, 8); } @@ -2862,11 +2866,12 @@ struct VarArgMIPS64Helper : public VarArgHelper { void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override { unsigned VAArgOffset = 0; + const DataLayout &DL = F.getParent()->getDataLayout(); for (CallSite::arg_iterator ArgIt = CS.arg_begin() + 1, End = CS.arg_end(); ArgIt != End; ++ArgIt) { Value *A = *ArgIt; Value *Base; - uint64_t ArgSize = MS.DL->getTypeAllocSize(A->getType()); + uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); #if defined(__MIPSEB__) || defined(MIPSEB) // Adjusting the shadow for argument with size < 8 to match the placement // of bits in big endian system diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 8c56e87..289675e 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -59,6 +59,7 @@ static const char *const kSanCovWithCheckName = "__sanitizer_cov_with_check"; static const char *const kSanCovIndirCallName = "__sanitizer_cov_indir_call16"; static const char *const kSanCovTraceEnter = "__sanitizer_cov_trace_func_enter"; static const char *const kSanCovTraceBB = "__sanitizer_cov_trace_basic_block"; +static const char *const kSanCovTraceCmp = "__sanitizer_cov_trace_cmp"; static const char *const kSanCovModuleCtorName = "sancov.module_ctor"; static const uint64_t kSanCtorAndDtorPriority = 2; @@ -72,7 +73,7 @@ static cl::opt<unsigned> ClCoverageBlockThreshold( "sanitizer-coverage-block-threshold", cl::desc("Use a callback with a guard check inside it if there are" " more than this number of blocks."), - cl::Hidden, cl::init(1000)); + cl::Hidden, cl::init(500)); static cl::opt<bool> ClExperimentalTracing("sanitizer-coverage-experimental-tracing", @@ -80,6 +81,22 @@ static cl::opt<bool> "callbacks at every basic block"), cl::Hidden, cl::init(false)); +static cl::opt<bool> + ClExperimentalCMPTracing("sanitizer-coverage-experimental-trace-compares", + cl::desc("Experimental tracing of CMP and similar " + "instructions"), + cl::Hidden, cl::init(false)); + +// Experimental 8-bit counters used as an additional search heuristic during +// coverage-guided fuzzing. +// The counters are not thread-friendly: +// - contention on these counters may cause significant slowdown; +// - the counter updates are racy and the results may be inaccurate. +// They are also inaccurate due to 8-bit integer overflow. +static cl::opt<bool> ClUse8bitCounters("sanitizer-coverage-8bit-counters", + cl::desc("Experimental 8-bit counters"), + cl::Hidden, cl::init(false)); + namespace { class SanitizerCoverageModule : public ModulePass { @@ -94,26 +111,29 @@ class SanitizerCoverageModule : public ModulePass { return "SanitizerCoverageModule"; } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DataLayoutPass>(); - } - private: void InjectCoverageForIndirectCalls(Function &F, ArrayRef<Instruction *> IndirCalls); - bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks, - ArrayRef<Instruction *> IndirCalls); + void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets); + bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks); + void SetNoSanitizeMetada(Instruction *I); void InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls); + unsigned NumberOfInstrumentedBlocks() { + return SanCovFunction->getNumUses() + SanCovWithCheckFunction->getNumUses(); + } Function *SanCovFunction; Function *SanCovWithCheckFunction; Function *SanCovIndirCallFunction; Function *SanCovModuleInit; Function *SanCovTraceEnter, *SanCovTraceBB; + Function *SanCovTraceCmpFunction; InlineAsm *EmptyAsm; - Type *IntptrTy; + Type *IntptrTy, *Int64Ty; LLVMContext *C; + const DataLayout *DL; GlobalVariable *GuardArray; + GlobalVariable *EightBitCounterArray; int CoverageLevel; }; @@ -133,12 +153,13 @@ static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { bool SanitizerCoverageModule::runOnModule(Module &M) { if (!CoverageLevel) return false; C = &(M.getContext()); - DataLayoutPass *DLP = &getAnalysis<DataLayoutPass>(); - IntptrTy = Type::getIntNTy(*C, DLP->getDataLayout().getPointerSizeInBits()); + DL = &M.getDataLayout(); + IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits()); Type *VoidTy = Type::getVoidTy(*C); IRBuilder<> IRB(*C); Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); + Int64Ty = IRB.getInt64Ty(); Function *CtorFunc = Function::Create(FunctionType::get(VoidTy, false), @@ -152,9 +173,12 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { M.getOrInsertFunction(kSanCovWithCheckName, VoidTy, Int32PtrTy, nullptr)); SanCovIndirCallFunction = checkInterfaceFunction(M.getOrInsertFunction( kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr)); - SanCovModuleInit = checkInterfaceFunction( - M.getOrInsertFunction(kSanCovModuleInitName, Type::getVoidTy(*C), - Int32PtrTy, IntptrTy, Int8PtrTy, nullptr)); + SanCovTraceCmpFunction = checkInterfaceFunction(M.getOrInsertFunction( + kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr)); + + SanCovModuleInit = checkInterfaceFunction(M.getOrInsertFunction( + kSanCovModuleInitName, Type::getVoidTy(*C), Int32PtrTy, IntptrTy, + Int8PtrTy, Int8PtrTy, nullptr)); SanCovModuleInit->setLinkage(Function::ExternalLinkage); // We insert an empty inline asm after cov callbacks to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), @@ -171,26 +195,49 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { // At this point we create a dummy array of guards because we don't // know how many elements we will need. Type *Int32Ty = IRB.getInt32Ty(); + Type *Int8Ty = IRB.getInt8Ty(); + GuardArray = new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, nullptr, "__sancov_gen_cov_tmp"); + if (ClUse8bitCounters) + EightBitCounterArray = + new GlobalVariable(M, Int8Ty, false, GlobalVariable::ExternalLinkage, + nullptr, "__sancov_gen_cov_tmp"); for (auto &F : M) runOnFunction(F); + auto N = NumberOfInstrumentedBlocks(); + // Now we know how many elements we need. Create an array of guards // with one extra element at the beginning for the size. - Type *Int32ArrayNTy = - ArrayType::get(Int32Ty, SanCovFunction->getNumUses() + 1); + Type *Int32ArrayNTy = ArrayType::get(Int32Ty, N + 1); GlobalVariable *RealGuardArray = new GlobalVariable( M, Int32ArrayNTy, false, GlobalValue::PrivateLinkage, Constant::getNullValue(Int32ArrayNTy), "__sancov_gen_cov"); + // Replace the dummy array with the real one. GuardArray->replaceAllUsesWith( IRB.CreatePointerCast(RealGuardArray, Int32PtrTy)); GuardArray->eraseFromParent(); + GlobalVariable *RealEightBitCounterArray; + if (ClUse8bitCounters) { + // Make sure the array is 16-aligned. + static const int kCounterAlignment = 16; + Type *Int8ArrayNTy = + ArrayType::get(Int8Ty, RoundUpToAlignment(N, kCounterAlignment)); + RealEightBitCounterArray = new GlobalVariable( + M, Int8ArrayNTy, false, GlobalValue::PrivateLinkage, + Constant::getNullValue(Int8ArrayNTy), "__sancov_gen_cov_counter"); + RealEightBitCounterArray->setAlignment(kCounterAlignment); + EightBitCounterArray->replaceAllUsesWith( + IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy)); + EightBitCounterArray->eraseFromParent(); + } + // Create variable for module (compilation unit) name Constant *ModNameStrConst = ConstantDataArray::getString(M.getContext(), M.getName(), true); @@ -200,10 +247,13 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { // Call __sanitizer_cov_module_init IRB.SetInsertPoint(CtorFunc->getEntryBlock().getTerminator()); - IRB.CreateCall3(SanCovModuleInit, - IRB.CreatePointerCast(RealGuardArray, Int32PtrTy), - ConstantInt::get(IntptrTy, SanCovFunction->getNumUses()), - IRB.CreatePointerCast(ModuleName, Int8PtrTy)); + IRB.CreateCall4( + SanCovModuleInit, IRB.CreatePointerCast(RealGuardArray, Int32PtrTy), + ConstantInt::get(IntptrTy, N), + ClUse8bitCounters + ? IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy) + : Constant::getNullValue(Int8PtrTy), + IRB.CreatePointerCast(ModuleName, Int8PtrTy)); return true; } @@ -215,23 +265,28 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { SplitAllCriticalEdges(F); SmallVector<Instruction*, 8> IndirCalls; SmallVector<BasicBlock*, 16> AllBlocks; + SmallVector<Instruction*, 8> CmpTraceTargets; for (auto &BB : F) { AllBlocks.push_back(&BB); - if (CoverageLevel >= 4) - for (auto &Inst : BB) { + for (auto &Inst : BB) { + if (CoverageLevel >= 4) { CallSite CS(&Inst); if (CS && !CS.getCalledFunction()) IndirCalls.push_back(&Inst); } + if (ClExperimentalCMPTracing) + if (isa<ICmpInst>(&Inst)) + CmpTraceTargets.push_back(&Inst); + } } - InjectCoverage(F, AllBlocks, IndirCalls); + InjectCoverage(F, AllBlocks); + InjectCoverageForIndirectCalls(F, IndirCalls); + InjectTraceForCmp(F, CmpTraceTargets); return true; } -bool -SanitizerCoverageModule::InjectCoverage(Function &F, - ArrayRef<BasicBlock *> AllBlocks, - ArrayRef<Instruction *> IndirCalls) { +bool SanitizerCoverageModule::InjectCoverage(Function &F, + ArrayRef<BasicBlock *> AllBlocks) { if (!CoverageLevel) return false; if (CoverageLevel == 1) { @@ -241,7 +296,6 @@ SanitizerCoverageModule::InjectCoverage(Function &F, InjectCoverageAtBlock(F, *BB, ClCoverageBlockThreshold < AllBlocks.size()); } - InjectCoverageForIndirectCalls(F, IndirCalls); return true; } @@ -273,6 +327,32 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( } } +void SanitizerCoverageModule::InjectTraceForCmp( + Function &F, ArrayRef<Instruction *> CmpTraceTargets) { + if (!ClExperimentalCMPTracing) return; + for (auto I : CmpTraceTargets) { + if (ICmpInst *ICMP = dyn_cast<ICmpInst>(I)) { + IRBuilder<> IRB(ICMP); + Value *A0 = ICMP->getOperand(0); + Value *A1 = ICMP->getOperand(1); + if (!A0->getType()->isIntegerTy()) continue; + uint64_t TypeSize = DL->getTypeStoreSizeInBits(A0->getType()); + // __sanitizer_cov_indir_call((type_size << 32) | predicate, A0, A1); + IRB.CreateCall3( + SanCovTraceCmpFunction, + ConstantInt::get(Int64Ty, (TypeSize << 32) | ICMP->getPredicate()), + IRB.CreateIntCast(A0, Int64Ty, true), + IRB.CreateIntCast(A1, Int64Ty, true)); + } + } +} + +void SanitizerCoverageModule::SetNoSanitizeMetada(Instruction *I) { + I->setMetadata( + I->getParent()->getParent()->getParent()->getMDKindID("nosanitize"), + MDNode::get(*C, None)); +} + void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls) { BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end(); @@ -286,14 +366,15 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } bool IsEntryBB = &BB == &F.getEntryBlock(); - DebugLoc EntryLoc = - IsEntryBB ? IP->getDebugLoc().getFnDebugLoc(*C) : IP->getDebugLoc(); + DebugLoc EntryLoc = IsEntryBB && !IP->getDebugLoc().isUnknown() + ? IP->getDebugLoc().getFnDebugLoc(*C) + : IP->getDebugLoc(); IRBuilder<> IRB(IP); IRB.SetCurrentDebugLocation(EntryLoc); SmallVector<Value *, 1> Indices; Value *GuardP = IRB.CreateAdd( IRB.CreatePointerCast(GuardArray, IntptrTy), - ConstantInt::get(IntptrTy, (1 + SanCovFunction->getNumUses()) * 4)); + ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); if (UseCalls) { @@ -302,8 +383,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, LoadInst *Load = IRB.CreateLoad(GuardP); Load->setAtomic(Monotonic); Load->setAlignment(4); - Load->setMetadata(F.getParent()->getMDKindID("nosanitize"), - MDNode::get(*C, None)); + SetNoSanitizeMetada(Load); Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); Instruction *Ins = SplitBlockAndInsertIfThen( Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); @@ -314,6 +394,19 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, IRB.CreateCall(EmptyAsm); // Avoids callback merge. } + if(ClUse8bitCounters) { + IRB.SetInsertPoint(IP); + Value *P = IRB.CreateAdd( + IRB.CreatePointerCast(EightBitCounterArray, IntptrTy), + ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1)); + P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy()); + LoadInst *LI = IRB.CreateLoad(P); + Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1)); + StoreInst *SI = IRB.CreateStore(Inc, P); + SetNoSanitizeMetada(LI); + SetNoSanitizeMetada(SI); + } + if (ClExperimentalTracing) { // Experimental support for tracing. // Insert a callback with the same guard variable as used for coverage. diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index e4a4911..c3ba722 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -19,14 +19,14 @@ // The rest is handled by the run-time library. //===----------------------------------------------------------------------===// -#include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -76,7 +76,7 @@ namespace { /// ThreadSanitizer: instrument the code in module to find races. struct ThreadSanitizer : public FunctionPass { - ThreadSanitizer() : FunctionPass(ID), DL(nullptr) {} + ThreadSanitizer() : FunctionPass(ID) {} const char *getPassName() const override; bool runOnFunction(Function &F) override; bool doInitialization(Module &M) override; @@ -84,15 +84,15 @@ struct ThreadSanitizer : public FunctionPass { private: void initializeCallbacks(Module &M); - bool instrumentLoadOrStore(Instruction *I); - bool instrumentAtomic(Instruction *I); + bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL); + bool instrumentAtomic(Instruction *I, const DataLayout &DL); bool instrumentMemIntrinsic(Instruction *I); - void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local, - SmallVectorImpl<Instruction*> &All); + void chooseInstructionsToInstrument(SmallVectorImpl<Instruction *> &Local, + SmallVectorImpl<Instruction *> &All, + const DataLayout &DL); bool addrPointsToConstantData(Value *Addr); - int getMemoryAccessFuncIndex(Value *Addr); + int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL); - const DataLayout *DL; Type *IntptrTy; IntegerType *OrdTy; // Callbacks to run-time library are computed in doInitialization. @@ -230,10 +230,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { } bool ThreadSanitizer::doInitialization(Module &M) { - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (!DLP) - report_fatal_error("data layout missing"); - DL = &DLP->getDataLayout(); + const DataLayout &DL = M.getDataLayout(); // Always insert a call to __tsan_init into the module's CTORs. IRBuilder<> IRB(M.getContext()); @@ -285,8 +282,8 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { // 'Local' is a vector of insns within the same BB (no calls between). // 'All' is a vector of insns that will be instrumented. void ThreadSanitizer::chooseInstructionsToInstrument( - SmallVectorImpl<Instruction*> &Local, - SmallVectorImpl<Instruction*> &All) { + SmallVectorImpl<Instruction *> &Local, SmallVectorImpl<Instruction *> &All, + const DataLayout &DL) { SmallSet<Value*, 8> WriteTargets; // Iterate from the end. for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(), @@ -310,7 +307,7 @@ void ThreadSanitizer::chooseInstructionsToInstrument( Value *Addr = isa<StoreInst>(*I) ? cast<StoreInst>(I)->getPointerOperand() : cast<LoadInst>(I)->getPointerOperand(); - if (isa<AllocaInst>(GetUnderlyingObject(Addr, nullptr)) && + if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) && !PointerMayBeCaptured(Addr, true, true)) { // The variable is addressable but not captured, so it cannot be // referenced from a different thread and participate in a data race @@ -338,7 +335,6 @@ static bool isAtomic(Instruction *I) { } bool ThreadSanitizer::runOnFunction(Function &F) { - if (!DL) return false; initializeCallbacks(*F.getParent()); SmallVector<Instruction*, 8> RetVec; SmallVector<Instruction*, 8> AllLoadsAndStores; @@ -348,6 +344,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) { bool Res = false; bool HasCalls = false; bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread); + const DataLayout &DL = F.getParent()->getDataLayout(); // Traverse all instructions, collect loads/stores/returns, check for calls. for (auto &BB : F) { @@ -362,10 +359,11 @@ bool ThreadSanitizer::runOnFunction(Function &F) { if (isa<MemIntrinsic>(Inst)) MemIntrinCalls.push_back(&Inst); HasCalls = true; - chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); + chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, + DL); } } - chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); + chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, DL); } // We have collected all loads and stores. @@ -375,14 +373,14 @@ bool ThreadSanitizer::runOnFunction(Function &F) { // Instrument memory accesses only if we want to report bugs in the function. if (ClInstrumentMemoryAccesses && SanitizeFunction) for (auto Inst : AllLoadsAndStores) { - Res |= instrumentLoadOrStore(Inst); + Res |= instrumentLoadOrStore(Inst, DL); } // Instrument atomic memory accesses in any case (they can be used to // implement synchronization). if (ClInstrumentAtomics) for (auto Inst : AtomicAccesses) { - Res |= instrumentAtomic(Inst); + Res |= instrumentAtomic(Inst, DL); } if (ClInstrumentMemIntrinsics && SanitizeFunction) @@ -406,13 +404,14 @@ bool ThreadSanitizer::runOnFunction(Function &F) { return Res; } -bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { +bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I, + const DataLayout &DL) { IRBuilder<> IRB(I); bool IsWrite = isa<StoreInst>(*I); Value *Addr = IsWrite ? cast<StoreInst>(I)->getPointerOperand() : cast<LoadInst>(I)->getPointerOperand(); - int Idx = getMemoryAccessFuncIndex(Addr); + int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; if (IsWrite && isVtableAccess(I)) { @@ -443,7 +442,7 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { ? cast<StoreInst>(I)->getAlignment() : cast<LoadInst>(I)->getAlignment(); Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType(); - const uint32_t TypeSize = DL->getTypeStoreSizeInBits(OrigTy); + const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); Value *OnAccessFunc = nullptr; if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0) OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; @@ -504,11 +503,11 @@ bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) { // The following page contains more background information: // http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/ -bool ThreadSanitizer::instrumentAtomic(Instruction *I) { +bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { IRBuilder<> IRB(I); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { Value *Addr = LI->getPointerOperand(); - int Idx = getMemoryAccessFuncIndex(Addr); + int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; const size_t ByteSize = 1 << Idx; @@ -522,7 +521,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) { } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { Value *Addr = SI->getPointerOperand(); - int Idx = getMemoryAccessFuncIndex(Addr); + int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; const size_t ByteSize = 1 << Idx; @@ -536,7 +535,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) { ReplaceInstWithInst(I, C); } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) { Value *Addr = RMWI->getPointerOperand(); - int Idx = getMemoryAccessFuncIndex(Addr); + int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx]; @@ -553,7 +552,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) { ReplaceInstWithInst(I, C); } else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) { Value *Addr = CASI->getPointerOperand(); - int Idx = getMemoryAccessFuncIndex(Addr); + int Idx = getMemoryAccessFuncIndex(Addr, DL); if (Idx < 0) return false; const size_t ByteSize = 1 << Idx; @@ -583,11 +582,12 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) { return true; } -int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr) { +int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr, + const DataLayout &DL) { Type *OrigPtrTy = Addr->getType(); Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); assert(OrigTy->isSized()); - uint32_t TypeSize = DL->getTypeStoreSizeInBits(OrigTy); + uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); if (TypeSize != 8 && TypeSize != 16 && TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { NumAccessesWithBadSize++; diff --git a/lib/Transforms/ObjCARC/ARCInstKind.cpp b/lib/Transforms/ObjCARC/ARCInstKind.cpp index f1e9dce..72df9ab 100644 --- a/lib/Transforms/ObjCARC/ARCInstKind.cpp +++ b/lib/Transforms/ObjCARC/ARCInstKind.cpp @@ -168,6 +168,60 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) { return ARCInstKind::CallOrUser; } +// A whitelist of intrinsics that we know do not use objc pointers or decrement +// ref counts. +static bool isInertIntrinsic(unsigned ID) { + // TODO: Make this into a covered switch. + switch (ID) { + case Intrinsic::returnaddress: + case Intrinsic::frameaddress: + case Intrinsic::stacksave: + case Intrinsic::stackrestore: + case Intrinsic::vastart: + case Intrinsic::vacopy: + case Intrinsic::vaend: + case Intrinsic::objectsize: + case Intrinsic::prefetch: + case Intrinsic::stackprotector: + case Intrinsic::eh_return_i32: + case Intrinsic::eh_return_i64: + case Intrinsic::eh_typeid_for: + case Intrinsic::eh_dwarf_cfa: + case Intrinsic::eh_sjlj_lsda: + case Intrinsic::eh_sjlj_functioncontext: + case Intrinsic::init_trampoline: + case Intrinsic::adjust_trampoline: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + // Don't let dbg info affect our results. + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + // Short cut: Some intrinsics obviously don't use ObjC pointers. + return true; + default: + return false; + } +} + +// A whitelist of intrinsics that we know do not use objc pointers or decrement +// ref counts. +static bool isUseOnlyIntrinsic(unsigned ID) { + // We are conservative and even though intrinsics are unlikely to touch + // reference counts, we white list them for safety. + // + // TODO: Expand this into a covered switch. There is a lot more here. + switch (ID) { + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: + return true; + default: + return false; + } +} + /// \brief Determine what kind of construct V is. ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) { if (const Instruction *I = dyn_cast<Instruction>(V)) { @@ -180,49 +234,23 @@ ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) { switch (I->getOpcode()) { case Instruction::Call: { const CallInst *CI = cast<CallInst>(I); - // Check for calls to special functions. + // See if we have a function that we know something about. if (const Function *F = CI->getCalledFunction()) { ARCInstKind Class = GetFunctionClass(F); if (Class != ARCInstKind::CallOrUser) return Class; - - // None of the intrinsic functions do objc_release. For intrinsics, the - // only question is whether or not they may be users. - switch (F->getIntrinsicID()) { - case Intrinsic::returnaddress: - case Intrinsic::frameaddress: - case Intrinsic::stacksave: - case Intrinsic::stackrestore: - case Intrinsic::vastart: - case Intrinsic::vacopy: - case Intrinsic::vaend: - case Intrinsic::objectsize: - case Intrinsic::prefetch: - case Intrinsic::stackprotector: - case Intrinsic::eh_return_i32: - case Intrinsic::eh_return_i64: - case Intrinsic::eh_typeid_for: - case Intrinsic::eh_dwarf_cfa: - case Intrinsic::eh_sjlj_lsda: - case Intrinsic::eh_sjlj_functioncontext: - case Intrinsic::init_trampoline: - case Intrinsic::adjust_trampoline: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - // Don't let dbg info affect our results. - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - // Short cut: Some intrinsics obviously don't use ObjC pointers. + unsigned ID = F->getIntrinsicID(); + if (isInertIntrinsic(ID)) return ARCInstKind::None; - default: - break; - } + if (isUseOnlyIntrinsic(ID)) + return ARCInstKind::User; } + + // Otherwise, be conservative. return GetCallSiteClass(CI); } case Instruction::Invoke: + // Otherwise, be conservative. return GetCallSiteClass(cast<InvokeInst>(I)); case Instruction::BitCast: case Instruction::GetElementPtr: diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h index e286dbc..87de33b 100644 --- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h +++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h @@ -27,22 +27,22 @@ namespace llvm { namespace objcarc { +enum class ARCRuntimeEntryPointKind { + AutoreleaseRV, + Release, + Retain, + RetainBlock, + Autorelease, + StoreStrong, + RetainRV, + RetainAutorelease, + RetainAutoreleaseRV, +}; + /// Declarations for ObjC runtime functions and constants. These are initialized /// lazily to avoid cluttering up the Module with unused declarations. class ARCRuntimeEntryPoints { public: - enum EntryPointType { - EPT_AutoreleaseRV, - EPT_Release, - EPT_Retain, - EPT_RetainBlock, - EPT_Autorelease, - EPT_StoreStrong, - EPT_RetainRV, - EPT_RetainAutorelease, - EPT_RetainAutoreleaseRV - }; - ARCRuntimeEntryPoints() : TheModule(nullptr), AutoreleaseRV(nullptr), Release(nullptr), @@ -56,7 +56,7 @@ public: ~ARCRuntimeEntryPoints() { } - void Initialize(Module *M) { + void init(Module *M) { TheModule = M; AutoreleaseRV = nullptr; Release = nullptr; @@ -69,30 +69,30 @@ public: RetainAutoreleaseRV = nullptr; } - Constant *get(const EntryPointType entry) { + Constant *get(ARCRuntimeEntryPointKind kind) { assert(TheModule != nullptr && "Not initialized."); - switch (entry) { - case EPT_AutoreleaseRV: + switch (kind) { + case ARCRuntimeEntryPointKind::AutoreleaseRV: return getI8XRetI8XEntryPoint(AutoreleaseRV, "objc_autoreleaseReturnValue", true); - case EPT_Release: + case ARCRuntimeEntryPointKind::Release: return getVoidRetI8XEntryPoint(Release, "objc_release"); - case EPT_Retain: + case ARCRuntimeEntryPointKind::Retain: return getI8XRetI8XEntryPoint(Retain, "objc_retain", true); - case EPT_RetainBlock: + case ARCRuntimeEntryPointKind::RetainBlock: return getI8XRetI8XEntryPoint(RetainBlock, "objc_retainBlock", false); - case EPT_Autorelease: + case ARCRuntimeEntryPointKind::Autorelease: return getI8XRetI8XEntryPoint(Autorelease, "objc_autorelease", true); - case EPT_StoreStrong: + case ARCRuntimeEntryPointKind::StoreStrong: return getI8XRetI8XXI8XEntryPoint(StoreStrong, "objc_storeStrong"); - case EPT_RetainRV: + case ARCRuntimeEntryPointKind::RetainRV: return getI8XRetI8XEntryPoint(RetainRV, "objc_retainAutoreleasedReturnValue", true); - case EPT_RetainAutorelease: + case ARCRuntimeEntryPointKind::RetainAutorelease: return getI8XRetI8XEntryPoint(RetainAutorelease, "objc_retainAutorelease", true); - case EPT_RetainAutoreleaseRV: + case ARCRuntimeEntryPointKind::RetainAutoreleaseRV: return getI8XRetI8XEntryPoint(RetainAutoreleaseRV, "objc_retainAutoreleaseReturnValue", true); } diff --git a/lib/Transforms/ObjCARC/Android.mk b/lib/Transforms/ObjCARC/Android.mk index 97c5a9d..e120fbe 100644 --- a/lib/Transforms/ObjCARC/Android.mk +++ b/lib/Transforms/ObjCARC/Android.mk @@ -9,6 +9,7 @@ transforms_objcarc_SRC_FILES := \ ObjCARC.cpp \ ObjCARCExpand.cpp \ ObjCARCOpts.cpp \ + PtrState.cpp \ ProvenanceAnalysis.cpp \ ProvenanceAnalysisEvaluator.cpp diff --git a/lib/Transforms/ObjCARC/BlotMapVector.h b/lib/Transforms/ObjCARC/BlotMapVector.h new file mode 100644 index 0000000..d6439b6 --- /dev/null +++ b/lib/Transforms/ObjCARC/BlotMapVector.h @@ -0,0 +1,108 @@ +//===- BlotMapVector.h - A MapVector with the blot operation -*- C++ -*----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include <vector> +#include <algorithm> + +namespace llvm { +/// \brief An associative container with fast insertion-order (deterministic) +/// iteration over its elements. Plus the special blot operation. +template <class KeyT, class ValueT> class BlotMapVector { + /// Map keys to indices in Vector. + typedef DenseMap<KeyT, size_t> MapTy; + MapTy Map; + + typedef std::vector<std::pair<KeyT, ValueT>> VectorTy; + /// Keys and values. + VectorTy Vector; + +public: + typedef typename VectorTy::iterator iterator; + typedef typename VectorTy::const_iterator const_iterator; + iterator begin() { return Vector.begin(); } + iterator end() { return Vector.end(); } + const_iterator begin() const { return Vector.begin(); } + const_iterator end() const { return Vector.end(); } + +#ifdef XDEBUG + ~BlotMapVector() { + assert(Vector.size() >= Map.size()); // May differ due to blotting. + for (typename MapTy::const_iterator I = Map.begin(), E = Map.end(); I != E; + ++I) { + assert(I->second < Vector.size()); + assert(Vector[I->second].first == I->first); + } + for (typename VectorTy::const_iterator I = Vector.begin(), E = Vector.end(); + I != E; ++I) + assert(!I->first || (Map.count(I->first) && + Map[I->first] == size_t(I - Vector.begin()))); + } +#endif + + ValueT &operator[](const KeyT &Arg) { + std::pair<typename MapTy::iterator, bool> Pair = + Map.insert(std::make_pair(Arg, size_t(0))); + if (Pair.second) { + size_t Num = Vector.size(); + Pair.first->second = Num; + Vector.push_back(std::make_pair(Arg, ValueT())); + return Vector[Num].second; + } + return Vector[Pair.first->second].second; + } + + std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &InsertPair) { + std::pair<typename MapTy::iterator, bool> Pair = + Map.insert(std::make_pair(InsertPair.first, size_t(0))); + if (Pair.second) { + size_t Num = Vector.size(); + Pair.first->second = Num; + Vector.push_back(InsertPair); + return std::make_pair(Vector.begin() + Num, true); + } + return std::make_pair(Vector.begin() + Pair.first->second, false); + } + + iterator find(const KeyT &Key) { + typename MapTy::iterator It = Map.find(Key); + if (It == Map.end()) + return Vector.end(); + return Vector.begin() + It->second; + } + + const_iterator find(const KeyT &Key) const { + typename MapTy::const_iterator It = Map.find(Key); + if (It == Map.end()) + return Vector.end(); + return Vector.begin() + It->second; + } + + /// This is similar to erase, but instead of removing the element from the + /// vector, it just zeros out the key in the vector. This leaves iterators + /// intact, but clients must be prepared for zeroed-out keys when iterating. + void blot(const KeyT &Key) { + typename MapTy::iterator It = Map.find(Key); + if (It == Map.end()) + return; + Vector[It->second].first = KeyT(); + Map.erase(It); + } + + void clear() { + Map.clear(); + Vector.clear(); + } + + bool empty() const { + assert(Map.empty() == Vector.empty()); + return Map.empty(); + } +}; +} // diff --git a/lib/Transforms/ObjCARC/CMakeLists.txt b/lib/Transforms/ObjCARC/CMakeLists.txt index 2adea88..fbcae29 100644 --- a/lib/Transforms/ObjCARC/CMakeLists.txt +++ b/lib/Transforms/ObjCARC/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMObjCARCOpts DependencyAnalysis.cpp ProvenanceAnalysis.cpp ProvenanceAnalysisEvaluator.cpp + PtrState.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index 4985d0e..b197c97 100644 --- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -53,10 +53,12 @@ bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr, if (AliasAnalysis::onlyReadsMemory(MRB)) return false; if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { + const DataLayout &DL = Inst->getModule()->getDataLayout(); for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) { const Value *Op = *I; - if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && + PA.related(Ptr, Op, DL)) return true; } return false; @@ -87,6 +89,8 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr, if (Class == ARCInstKind::Call) return false; + const DataLayout &DL = Inst->getModule()->getDataLayout(); + // Consider various instructions which may have pointer arguments which are // not "uses". if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) { @@ -100,24 +104,26 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr, for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), OE = CS.arg_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && + PA.related(Ptr, Op, DL)) return true; } return false; } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // Special-case stores, because we don't care about the stored value, just // the store address. - const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); + const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand(), DL); // If we can't tell what the underlying object was, assume there is a // dependence. - return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Op, Ptr); + return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && + PA.related(Op, Ptr, DL); } // Check each operand for a match. for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op, DL)) return true; } return false; diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index df29f05..7595e2d 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -24,6 +24,7 @@ #define LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARC_H #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Optional.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ValueTracking.h" @@ -72,9 +73,10 @@ static inline bool ModuleHasARC(const Module &M) { /// \brief This is a wrapper around getUnderlyingObject which also knows how to /// look through objc_retain and objc_autorelease calls, which we know to return /// their argument verbatim. -static inline const Value *GetUnderlyingObjCPtr(const Value *V) { +static inline const Value *GetUnderlyingObjCPtr(const Value *V, + const DataLayout &DL) { for (;;) { - V = GetUnderlyingObject(V); + V = GetUnderlyingObject(V, DL); if (!IsForwarding(GetBasicARCInstKind(V))) break; V = cast<CallInst>(V)->getArgOperand(0); @@ -257,6 +259,55 @@ static inline bool IsObjCIdentifiedObject(const Value *V) { return false; } +enum class ARCMDKindID { + ImpreciseRelease, + CopyOnEscape, + NoObjCARCExceptions, +}; + +/// A cache of MDKinds used by various ARC optimizations. +class ARCMDKindCache { + Module *M; + + /// The Metadata Kind for clang.imprecise_release metadata. + llvm::Optional<unsigned> ImpreciseReleaseMDKind; + + /// The Metadata Kind for clang.arc.copy_on_escape metadata. + llvm::Optional<unsigned> CopyOnEscapeMDKind; + + /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata. + llvm::Optional<unsigned> NoObjCARCExceptionsMDKind; + +public: + void init(Module *Mod) { + M = Mod; + ImpreciseReleaseMDKind = NoneType::None; + CopyOnEscapeMDKind = NoneType::None; + NoObjCARCExceptionsMDKind = NoneType::None; + } + + unsigned get(ARCMDKindID ID) { + switch (ID) { + case ARCMDKindID::ImpreciseRelease: + if (!ImpreciseReleaseMDKind) + ImpreciseReleaseMDKind = + M->getContext().getMDKindID("clang.imprecise_release"); + return *ImpreciseReleaseMDKind; + case ARCMDKindID::CopyOnEscape: + if (!CopyOnEscapeMDKind) + CopyOnEscapeMDKind = + M->getContext().getMDKindID("clang.arc.copy_on_escape"); + return *CopyOnEscapeMDKind; + case ARCMDKindID::NoObjCARCExceptions: + if (!NoObjCARCExceptionsMDKind) + NoObjCARCExceptionsMDKind = + M->getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); + return *NoObjCARCExceptionsMDKind; + } + llvm_unreachable("Covered switch isn't covered?!"); + } +}; + } // end namespace objcarc } // end namespace llvm diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp index be291a0..b1515e3 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp @@ -46,6 +46,11 @@ ImmutablePass *llvm::createObjCARCAliasAnalysisPass() { return new ObjCARCAliasAnalysis(); } +bool ObjCARCAliasAnalysis::doInitialization(Module &M) { + InitializeAliasAnalysis(this, &M.getDataLayout()); + return true; +} + void ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -69,8 +74,8 @@ ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) { // If that failed, climb to the underlying object, including climbing through // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *UA = GetUnderlyingObjCPtr(SA); - const Value *UB = GetUnderlyingObjCPtr(SB); + const Value *UA = GetUnderlyingObjCPtr(SA, *DL); + const Value *UB = GetUnderlyingObjCPtr(SB, *DL); if (UA != SA || UB != SB) { Result = AliasAnalysis::alias(Location(UA), Location(UB)); // We can't use MustAlias or PartialAlias results here because @@ -99,7 +104,7 @@ ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc, // If that failed, climb to the underlying object, including climbing through // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *U = GetUnderlyingObjCPtr(S); + const Value *U = GetUnderlyingObjCPtr(S, *DL); if (U != S) return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal); diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h index 3fcea4e..3c5a021 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h +++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h @@ -44,9 +44,7 @@ namespace objcarc { } private: - void initializePass() override { - InitializeAliasAnalysis(this); - } + bool doInitialization(Module &M) override; /// This method is used when a pass implements an analysis interface through /// multiple inheritance. If needed, it should override this to adjust the diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 6473d3a..2a3139f 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::objcarc; @@ -134,7 +135,7 @@ bool ObjCARCContract::optimizeRetainCall(Function &F, Instruction *Retain) { // We do not have to worry about tail calls/does not throw since // retain/retainRV have the same properties. - Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_RetainRV); + Constant *Decl = EP.get(ARCRuntimeEntryPointKind::RetainRV); cast<CallInst>(Retain)->setCalledFunction(Decl); DEBUG(dbgs() << "New: " << *Retain << "\n"); @@ -181,8 +182,8 @@ bool ObjCARCContract::contractAutorelease( " Retain: " << *Retain << "\n"); Constant *Decl = EP.get(Class == ARCInstKind::AutoreleaseRV - ? ARCRuntimeEntryPoints::EPT_RetainAutoreleaseRV - : ARCRuntimeEntryPoints::EPT_RetainAutorelease); + ? ARCRuntimeEntryPointKind::RetainAutoreleaseRV + : ARCRuntimeEntryPointKind::RetainAutorelease); Retain->setCalledFunction(Decl); DEBUG(dbgs() << " New RetainAutorelease: " << *Retain << "\n"); @@ -380,7 +381,7 @@ void ObjCARCContract::tryToContractReleaseIntoStoreStrong(Instruction *Release, Args[0] = new BitCastInst(Args[0], I8XX, "", Store); if (Args[1]->getType() != I8X) Args[1] = new BitCastInst(Args[1], I8X, "", Store); - Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_StoreStrong); + Constant *Decl = EP.get(ARCRuntimeEntryPointKind::StoreStrong); CallInst *StoreStrong = CallInst::Create(Decl, Args, "", Store); StoreStrong->setDoesNotThrow(); StoreStrong->setDebugLoc(Store->getDebugLoc()); @@ -647,7 +648,7 @@ bool ObjCARCContract::doInitialization(Module &M) { if (!Run) return false; - EP.Initialize(&M); + EP.init(&M); // Initialize RetainRVMarker. RetainRVMarker = nullptr; diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index f55b77f..4d75658 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -26,9 +26,11 @@ #include "ObjCARC.h" #include "ARCRuntimeEntryPoints.h" +#include "BlotMapVector.h" #include "DependencyAnalysis.h" #include "ObjCARCAliasAnalysis.h" #include "ProvenanceAnalysis.h" +#include "PtrState.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" @@ -45,102 +47,6 @@ using namespace llvm::objcarc; #define DEBUG_TYPE "objc-arc-opts" -/// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific. -/// @{ - -namespace { - /// \brief An associative container with fast insertion-order (deterministic) - /// iteration over its elements. Plus the special blot operation. - template<class KeyT, class ValueT> - class MapVector { - /// Map keys to indices in Vector. - typedef DenseMap<KeyT, size_t> MapTy; - MapTy Map; - - typedef std::vector<std::pair<KeyT, ValueT> > VectorTy; - /// Keys and values. - VectorTy Vector; - - public: - typedef typename VectorTy::iterator iterator; - typedef typename VectorTy::const_iterator const_iterator; - iterator begin() { return Vector.begin(); } - iterator end() { return Vector.end(); } - const_iterator begin() const { return Vector.begin(); } - const_iterator end() const { return Vector.end(); } - -#ifdef XDEBUG - ~MapVector() { - assert(Vector.size() >= Map.size()); // May differ due to blotting. - for (typename MapTy::const_iterator I = Map.begin(), E = Map.end(); - I != E; ++I) { - assert(I->second < Vector.size()); - assert(Vector[I->second].first == I->first); - } - for (typename VectorTy::const_iterator I = Vector.begin(), - E = Vector.end(); I != E; ++I) - assert(!I->first || - (Map.count(I->first) && - Map[I->first] == size_t(I - Vector.begin()))); - } -#endif - - ValueT &operator[](const KeyT &Arg) { - std::pair<typename MapTy::iterator, bool> Pair = - Map.insert(std::make_pair(Arg, size_t(0))); - if (Pair.second) { - size_t Num = Vector.size(); - Pair.first->second = Num; - Vector.push_back(std::make_pair(Arg, ValueT())); - return Vector[Num].second; - } - return Vector[Pair.first->second].second; - } - - std::pair<iterator, bool> - insert(const std::pair<KeyT, ValueT> &InsertPair) { - std::pair<typename MapTy::iterator, bool> Pair = - Map.insert(std::make_pair(InsertPair.first, size_t(0))); - if (Pair.second) { - size_t Num = Vector.size(); - Pair.first->second = Num; - Vector.push_back(InsertPair); - return std::make_pair(Vector.begin() + Num, true); - } - return std::make_pair(Vector.begin() + Pair.first->second, false); - } - - iterator find(const KeyT &Key) { - typename MapTy::iterator It = Map.find(Key); - if (It == Map.end()) return Vector.end(); - return Vector.begin() + It->second; - } - - const_iterator find(const KeyT &Key) const { - typename MapTy::const_iterator It = Map.find(Key); - if (It == Map.end()) return Vector.end(); - return Vector.begin() + It->second; - } - - /// This is similar to erase, but instead of removing the element from the - /// vector, it just zeros out the key in the vector. This leaves iterators - /// intact, but clients must be prepared for zeroed-out keys when iterating. - void blot(const KeyT &Key) { - typename MapTy::iterator It = Map.find(Key); - if (It == Map.end()) return; - Vector[It->second].first = KeyT(); - Map.erase(It); - } - - void clear() { - Map.clear(); - Vector.clear(); - } - }; -} - -/// @} -/// /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC. /// @{ @@ -177,13 +83,14 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { /// This is a wrapper around getUnderlyingObjCPtr along the lines of /// GetUnderlyingObjects except that it returns early when it sees the first /// alloca. -static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) { +static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V, + const DataLayout &DL) { SmallPtrSet<const Value *, 4> Visited; SmallVector<const Value *, 4> Worklist; Worklist.push_back(V); do { const Value *P = Worklist.pop_back_val(); - P = GetUnderlyingObjCPtr(P); + P = GetUnderlyingObjCPtr(P, DL); if (isa<AllocaInst>(P)) return true; @@ -270,293 +177,6 @@ STATISTIC(NumReleasesAfterOpt, #endif namespace { - /// \enum Sequence - /// - /// \brief A sequence of states that a pointer may go through in which an - /// objc_retain and objc_release are actually needed. - enum Sequence { - S_None, - S_Retain, ///< objc_retain(x). - S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement. - S_Use, ///< any use of x. - S_Stop, ///< like S_Release, but code motion is stopped. - S_Release, ///< objc_release(x). - S_MovableRelease ///< objc_release(x), !clang.imprecise_release. - }; - - raw_ostream &operator<<(raw_ostream &OS, const Sequence S) - LLVM_ATTRIBUTE_UNUSED; - raw_ostream &operator<<(raw_ostream &OS, const Sequence S) { - switch (S) { - case S_None: - return OS << "S_None"; - case S_Retain: - return OS << "S_Retain"; - case S_CanRelease: - return OS << "S_CanRelease"; - case S_Use: - return OS << "S_Use"; - case S_Release: - return OS << "S_Release"; - case S_MovableRelease: - return OS << "S_MovableRelease"; - case S_Stop: - return OS << "S_Stop"; - } - llvm_unreachable("Unknown sequence type."); - } -} - -static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { - // The easy cases. - if (A == B) - return A; - if (A == S_None || B == S_None) - return S_None; - - if (A > B) std::swap(A, B); - if (TopDown) { - // Choose the side which is further along in the sequence. - if ((A == S_Retain || A == S_CanRelease) && - (B == S_CanRelease || B == S_Use)) - return B; - } else { - // Choose the side which is further along in the sequence. - if ((A == S_Use || A == S_CanRelease) && - (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease)) - return A; - // If both sides are releases, choose the more conservative one. - if (A == S_Stop && (B == S_Release || B == S_MovableRelease)) - return A; - if (A == S_Release && B == S_MovableRelease) - return A; - } - - return S_None; -} - -namespace { - /// \brief Unidirectional information about either a - /// retain-decrement-use-release sequence or release-use-decrement-retain - /// reverse sequence. - struct RRInfo { - /// After an objc_retain, the reference count of the referenced - /// object is known to be positive. Similarly, before an objc_release, the - /// reference count of the referenced object is known to be positive. If - /// there are retain-release pairs in code regions where the retain count - /// is known to be positive, they can be eliminated, regardless of any side - /// effects between them. - /// - /// Also, a retain+release pair nested within another retain+release - /// pair all on the known same pointer value can be eliminated, regardless - /// of any intervening side effects. - /// - /// KnownSafe is true when either of these conditions is satisfied. - bool KnownSafe; - - /// True of the objc_release calls are all marked with the "tail" keyword. - bool IsTailCallRelease; - - /// If the Calls are objc_release calls and they all have a - /// clang.imprecise_release tag, this is the metadata tag. - MDNode *ReleaseMetadata; - - /// For a top-down sequence, the set of objc_retains or - /// objc_retainBlocks. For bottom-up, the set of objc_releases. - SmallPtrSet<Instruction *, 2> Calls; - - /// The set of optimal insert positions for moving calls in the opposite - /// sequence. - SmallPtrSet<Instruction *, 2> ReverseInsertPts; - - /// If this is true, we cannot perform code motion but can still remove - /// retain/release pairs. - bool CFGHazardAfflicted; - - RRInfo() : - KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(nullptr), - CFGHazardAfflicted(false) {} - - void clear(); - - /// Conservatively merge the two RRInfo. Returns true if a partial merge has - /// occurred, false otherwise. - bool Merge(const RRInfo &Other); - - }; -} - -void RRInfo::clear() { - KnownSafe = false; - IsTailCallRelease = false; - ReleaseMetadata = nullptr; - Calls.clear(); - ReverseInsertPts.clear(); - CFGHazardAfflicted = false; -} - -bool RRInfo::Merge(const RRInfo &Other) { - // Conservatively merge the ReleaseMetadata information. - if (ReleaseMetadata != Other.ReleaseMetadata) - ReleaseMetadata = nullptr; - - // Conservatively merge the boolean state. - KnownSafe &= Other.KnownSafe; - IsTailCallRelease &= Other.IsTailCallRelease; - CFGHazardAfflicted |= Other.CFGHazardAfflicted; - - // Merge the call sets. - Calls.insert(Other.Calls.begin(), Other.Calls.end()); - - // Merge the insert point sets. If there are any differences, - // that makes this a partial merge. - bool Partial = ReverseInsertPts.size() != Other.ReverseInsertPts.size(); - for (Instruction *Inst : Other.ReverseInsertPts) - Partial |= ReverseInsertPts.insert(Inst).second; - return Partial; -} - -namespace { - /// \brief This class summarizes several per-pointer runtime properties which - /// are propogated through the flow graph. - class PtrState { - /// True if the reference count is known to be incremented. - bool KnownPositiveRefCount; - - /// True if we've seen an opportunity for partial RR elimination, such as - /// pushing calls into a CFG triangle or into one side of a CFG diamond. - bool Partial; - - /// The current position in the sequence. - unsigned char Seq : 8; - - /// Unidirectional information about the current sequence. - RRInfo RRI; - - public: - PtrState() : KnownPositiveRefCount(false), Partial(false), - Seq(S_None) {} - - - bool IsKnownSafe() const { - return RRI.KnownSafe; - } - - void SetKnownSafe(const bool NewValue) { - RRI.KnownSafe = NewValue; - } - - bool IsTailCallRelease() const { - return RRI.IsTailCallRelease; - } - - void SetTailCallRelease(const bool NewValue) { - RRI.IsTailCallRelease = NewValue; - } - - bool IsTrackingImpreciseReleases() const { - return RRI.ReleaseMetadata != nullptr; - } - - const MDNode *GetReleaseMetadata() const { - return RRI.ReleaseMetadata; - } - - void SetReleaseMetadata(MDNode *NewValue) { - RRI.ReleaseMetadata = NewValue; - } - - bool IsCFGHazardAfflicted() const { - return RRI.CFGHazardAfflicted; - } - - void SetCFGHazardAfflicted(const bool NewValue) { - RRI.CFGHazardAfflicted = NewValue; - } - - void SetKnownPositiveRefCount() { - DEBUG(dbgs() << "Setting Known Positive.\n"); - KnownPositiveRefCount = true; - } - - void ClearKnownPositiveRefCount() { - DEBUG(dbgs() << "Clearing Known Positive.\n"); - KnownPositiveRefCount = false; - } - - bool HasKnownPositiveRefCount() const { - return KnownPositiveRefCount; - } - - void SetSeq(Sequence NewSeq) { - DEBUG(dbgs() << "Old: " << Seq << "; New: " << NewSeq << "\n"); - Seq = NewSeq; - } - - Sequence GetSeq() const { - return static_cast<Sequence>(Seq); - } - - void ClearSequenceProgress() { - ResetSequenceProgress(S_None); - } - - void ResetSequenceProgress(Sequence NewSeq) { - DEBUG(dbgs() << "Resetting sequence progress.\n"); - SetSeq(NewSeq); - Partial = false; - RRI.clear(); - } - - void Merge(const PtrState &Other, bool TopDown); - - void InsertCall(Instruction *I) { - RRI.Calls.insert(I); - } - - void InsertReverseInsertPt(Instruction *I) { - RRI.ReverseInsertPts.insert(I); - } - - void ClearReverseInsertPts() { - RRI.ReverseInsertPts.clear(); - } - - bool HasReverseInsertPts() const { - return !RRI.ReverseInsertPts.empty(); - } - - const RRInfo &GetRRInfo() const { - return RRI; - } - }; -} - -void -PtrState::Merge(const PtrState &Other, bool TopDown) { - Seq = MergeSeqs(GetSeq(), Other.GetSeq(), TopDown); - KnownPositiveRefCount &= Other.KnownPositiveRefCount; - - // If we're not in a sequence (anymore), drop all associated state. - if (Seq == S_None) { - Partial = false; - RRI.clear(); - } else if (Partial || Other.Partial) { - // If we're doing a merge on a path that's previously seen a partial - // merge, conservatively drop the sequence, to avoid doing partial - // RR elimination. If the branch predicates for the two merge differ, - // mixing them is unsafe. - ClearSequenceProgress(); - } else { - // Otherwise merge the other PtrState's RRInfo into our RRInfo. At this - // point, we know that currently we are not partial. Stash whether or not - // the merge operation caused us to undergo a partial merging of reverse - // insertion points. - Partial = RRI.Merge(Other.RRI); - } -} - -namespace { /// \brief Per-BasicBlock state. class BBState { /// The number of unique control paths from the entry which can reach this @@ -566,20 +186,18 @@ namespace { /// The number of unique control paths to exits from this block. unsigned BottomUpPathCount; - /// A type for PerPtrTopDown and PerPtrBottomUp. - typedef MapVector<const Value *, PtrState> MapTy; - /// The top-down traversal uses this to record information known about a /// pointer at the bottom of each block. - MapTy PerPtrTopDown; + BlotMapVector<const Value *, TopDownPtrState> PerPtrTopDown; /// The bottom-up traversal uses this to record information known about a /// pointer at the top of each block. - MapTy PerPtrBottomUp; + BlotMapVector<const Value *, BottomUpPtrState> PerPtrBottomUp; /// Effective predecessors of the current block ignoring ignorable edges and /// ignored backedges. SmallVector<BasicBlock *, 2> Preds; + /// Effective successors of the current block ignoring ignorable edges and /// ignored backedges. SmallVector<BasicBlock *, 2> Succs; @@ -589,26 +207,38 @@ namespace { BBState() : TopDownPathCount(0), BottomUpPathCount(0) { } - typedef MapTy::iterator ptr_iterator; - typedef MapTy::const_iterator ptr_const_iterator; + typedef decltype(PerPtrTopDown)::iterator top_down_ptr_iterator; + typedef decltype(PerPtrTopDown)::const_iterator const_top_down_ptr_iterator; - ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } - ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } - ptr_const_iterator top_down_ptr_begin() const { + top_down_ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } + top_down_ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } + const_top_down_ptr_iterator top_down_ptr_begin() const { return PerPtrTopDown.begin(); } - ptr_const_iterator top_down_ptr_end() const { + const_top_down_ptr_iterator top_down_ptr_end() const { return PerPtrTopDown.end(); } + bool hasTopDownPtrs() const { + return !PerPtrTopDown.empty(); + } + + typedef decltype(PerPtrBottomUp)::iterator bottom_up_ptr_iterator; + typedef decltype( + PerPtrBottomUp)::const_iterator const_bottom_up_ptr_iterator; - ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); } - ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } - ptr_const_iterator bottom_up_ptr_begin() const { + bottom_up_ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); } - ptr_const_iterator bottom_up_ptr_end() const { + bottom_up_ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } + const_bottom_up_ptr_iterator bottom_up_ptr_begin() const { + return PerPtrBottomUp.begin(); + } + const_bottom_up_ptr_iterator bottom_up_ptr_end() const { return PerPtrBottomUp.end(); } + bool hasBottomUpPtrs() const { + return !PerPtrBottomUp.empty(); + } /// Mark this block as being an entry block, which has one path from the /// entry by definition. @@ -621,20 +251,20 @@ namespace { /// Attempt to find the PtrState object describing the top down state for /// pointer Arg. Return a new initialized PtrState describing the top down /// state for Arg if we do not find one. - PtrState &getPtrTopDownState(const Value *Arg) { + TopDownPtrState &getPtrTopDownState(const Value *Arg) { return PerPtrTopDown[Arg]; } /// Attempt to find the PtrState object describing the bottom up state for /// pointer Arg. Return a new initialized PtrState describing the bottom up /// state for Arg if we do not find one. - PtrState &getPtrBottomUpState(const Value *Arg) { + BottomUpPtrState &getPtrBottomUpState(const Value *Arg) { return PerPtrBottomUp[Arg]; } /// Attempt to find the PtrState object describing the bottom up state for /// pointer Arg. - ptr_iterator findPtrBottomUpState(const Value *Arg) { + bottom_up_ptr_iterator findPtrBottomUpState(const Value *Arg) { return PerPtrBottomUp.find(Arg); } @@ -685,6 +315,11 @@ namespace { const unsigned BBState::OverflowOccurredValue = 0xffffffff; } +namespace llvm { +raw_ostream &operator<<(raw_ostream &OS, + BBState &BBState) LLVM_ATTRIBUTE_UNUSED; +} + void BBState::InitFromPred(const BBState &Other) { PerPtrTopDown = Other.PerPtrTopDown; TopDownPathCount = Other.TopDownPathCount; @@ -724,19 +359,18 @@ void BBState::MergePred(const BBState &Other) { // For each entry in the other set, if our set has an entry with the same key, // merge the entries. Otherwise, copy the entry and merge it with an empty // entry. - for (ptr_const_iterator MI = Other.top_down_ptr_begin(), - ME = Other.top_down_ptr_end(); MI != ME; ++MI) { - std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI); - Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, + for (auto MI = Other.top_down_ptr_begin(), ME = Other.top_down_ptr_end(); + MI != ME; ++MI) { + auto Pair = PerPtrTopDown.insert(*MI); + Pair.first->second.Merge(Pair.second ? TopDownPtrState() : MI->second, /*TopDown=*/true); } // For each entry in our set, if the other set doesn't have an entry with the // same key, force it to merge with an empty entry. - for (ptr_iterator MI = top_down_ptr_begin(), - ME = top_down_ptr_end(); MI != ME; ++MI) + for (auto MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI) if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end()) - MI->second.Merge(PtrState(), /*TopDown=*/true); + MI->second.Merge(TopDownPtrState(), /*TopDown=*/true); } /// The bottom-up traversal uses this to merge information about successors to @@ -768,304 +402,80 @@ void BBState::MergeSucc(const BBState &Other) { // For each entry in the other set, if our set has an entry with the // same key, merge the entries. Otherwise, copy the entry and merge // it with an empty entry. - for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(), - ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) { - std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI); - Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, + for (auto MI = Other.bottom_up_ptr_begin(), ME = Other.bottom_up_ptr_end(); + MI != ME; ++MI) { + auto Pair = PerPtrBottomUp.insert(*MI); + Pair.first->second.Merge(Pair.second ? BottomUpPtrState() : MI->second, /*TopDown=*/false); } // For each entry in our set, if the other set doesn't have an entry // with the same key, force it to merge with an empty entry. - for (ptr_iterator MI = bottom_up_ptr_begin(), - ME = bottom_up_ptr_end(); MI != ME; ++MI) + for (auto MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME; + ++MI) if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end()) - MI->second.Merge(PtrState(), /*TopDown=*/false); + MI->second.Merge(BottomUpPtrState(), /*TopDown=*/false); } -// Only enable ARC Annotations if we are building a debug version of -// libObjCARCOpts. -#ifndef NDEBUG -#define ARC_ANNOTATIONS -#endif - -// Define some macros along the lines of DEBUG and some helper functions to make -// it cleaner to create annotations in the source code and to no-op when not -// building in debug mode. -#ifdef ARC_ANNOTATIONS - -#include "llvm/Support/CommandLine.h" - -/// Enable/disable ARC sequence annotations. -static cl::opt<bool> -EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false), - cl::desc("Enable emission of arc data flow analysis " - "annotations")); -static cl::opt<bool> -DisableCheckForCFGHazards("disable-objc-arc-checkforcfghazards", cl::init(false), - cl::desc("Disable check for cfg hazards when " - "annotating")); -static cl::opt<std::string> -ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier", - cl::init(""), - cl::desc("filter out all data flow annotations " - "but those that apply to the given " - "target llvm identifier.")); - -/// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an -/// instruction so that we can track backwards when post processing via the llvm -/// arc annotation processor tool. If the function is an -static MDString *AppendMDNodeToSourcePtr(unsigned NodeId, - Value *Ptr) { - MDString *Hash = nullptr; - - // If pointer is a result of an instruction and it does not have a source - // MDNode it, attach a new MDNode onto it. If pointer is a result of - // an instruction and does have a source MDNode attached to it, return a - // reference to said Node. Otherwise just return 0. - if (Instruction *Inst = dyn_cast<Instruction>(Ptr)) { - MDNode *Node; - if (!(Node = Inst->getMetadata(NodeId))) { - // We do not have any node. Generate and attatch the hash MDString to the - // instruction. - - // We just use an MDString to ensure that this metadata gets written out - // of line at the module level and to provide a very simple format - // encoding the information herein. Both of these makes it simpler to - // parse the annotations by a simple external program. - std::string Str; - raw_string_ostream os(Str); - os << "(" << Inst->getParent()->getParent()->getName() << ",%" - << Inst->getName() << ")"; - - Hash = MDString::get(Inst->getContext(), os.str()); - Inst->setMetadata(NodeId, MDNode::get(Inst->getContext(),Hash)); - } else { - // We have a node. Grab its hash and return it. - assert(Node->getNumOperands() == 1 && - "An ARCAnnotationProvenanceSourceMDKind can only have 1 operand."); - Hash = cast<MDString>(Node->getOperand(0)); +raw_ostream &llvm::operator<<(raw_ostream &OS, BBState &BBInfo) { + // Dump the pointers we are tracking. + OS << " TopDown State:\n"; + if (!BBInfo.hasTopDownPtrs()) { + DEBUG(llvm::dbgs() << " NONE!\n"); + } else { + for (auto I = BBInfo.top_down_ptr_begin(), E = BBInfo.top_down_ptr_end(); + I != E; ++I) { + const PtrState &P = I->second; + OS << " Ptr: " << *I->first + << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false") + << "\n ImpreciseRelease: " + << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n" + << " HasCFGHazards: " + << (P.IsCFGHazardAfflicted()?"true":"false") << "\n" + << " KnownPositive: " + << (P.HasKnownPositiveRefCount()?"true":"false") << "\n" + << " Seq: " + << P.GetSeq() << "\n"; } - } else if (Argument *Arg = dyn_cast<Argument>(Ptr)) { - std::string str; - raw_string_ostream os(str); - os << "(" << Arg->getParent()->getName() << ",%" << Arg->getName() - << ")"; - Hash = MDString::get(Arg->getContext(), os.str()); - } - - return Hash; -} - -static std::string SequenceToString(Sequence A) { - std::string str; - raw_string_ostream os(str); - os << A; - return os.str(); -} - -/// Helper function to change a Sequence into a String object using our overload -/// for raw_ostream so we only have printing code in one location. -static MDString *SequenceToMDString(LLVMContext &Context, - Sequence A) { - return MDString::get(Context, SequenceToString(A)); -} - -/// A simple function to generate a MDNode which describes the change in state -/// for Value *Ptr caused by Instruction *Inst. -static void AppendMDNodeToInstForPtr(unsigned NodeId, - Instruction *Inst, - Value *Ptr, - MDString *PtrSourceMDNodeID, - Sequence OldSeq, - Sequence NewSeq) { - MDNode *Node = nullptr; - Metadata *tmp[3] = {PtrSourceMDNodeID, - SequenceToMDString(Inst->getContext(), OldSeq), - SequenceToMDString(Inst->getContext(), NewSeq)}; - Node = MDNode::get(Inst->getContext(), tmp); - - Inst->setMetadata(NodeId, Node); -} - -/// Add to the beginning of the basic block llvm.ptr.annotations which show the -/// state of a pointer at the entrance to a basic block. -static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB, - Value *Ptr, Sequence Seq) { - // If we have a target identifier, make sure that we match it before - // continuing. - if(!ARCAnnotationTargetIdentifier.empty() && - !Ptr->getName().equals(ARCAnnotationTargetIdentifier)) - return; - - Module *M = BB->getParent()->getParent(); - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *I8XX = PointerType::getUnqual(I8X); - Type *Params[] = {I8XX, I8XX}; - FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), Params, - /*isVarArg=*/false); - Constant *Callee = M->getOrInsertFunction(Name, FTy); - - IRBuilder<> Builder(BB, BB->getFirstInsertionPt()); - - Value *PtrName; - StringRef Tmp = Ptr->getName(); - if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) { - Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp, - Tmp + "_STR"); - PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, - cast<Constant>(ActualPtrName), Tmp); - } - - Value *S; - std::string SeqStr = SequenceToString(Seq); - if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) { - Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr, - SeqStr + "_STR"); - S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, - cast<Constant>(ActualPtrName), SeqStr); - } - - Builder.CreateCall2(Callee, PtrName, S); -} - -/// Add to the end of the basic block llvm.ptr.annotations which show the state -/// of the pointer at the bottom of the basic block. -static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB, - Value *Ptr, Sequence Seq) { - // If we have a target identifier, make sure that we match it before emitting - // an annotation. - if(!ARCAnnotationTargetIdentifier.empty() && - !Ptr->getName().equals(ARCAnnotationTargetIdentifier)) - return; - - Module *M = BB->getParent()->getParent(); - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *I8XX = PointerType::getUnqual(I8X); - Type *Params[] = {I8XX, I8XX}; - FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), Params, - /*isVarArg=*/false); - Constant *Callee = M->getOrInsertFunction(Name, FTy); - - IRBuilder<> Builder(BB, std::prev(BB->end())); - - Value *PtrName; - StringRef Tmp = Ptr->getName(); - if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) { - Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp, - Tmp + "_STR"); - PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, - cast<Constant>(ActualPtrName), Tmp); } - Value *S; - std::string SeqStr = SequenceToString(Seq); - if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) { - Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr, - SeqStr + "_STR"); - S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, - cast<Constant>(ActualPtrName), SeqStr); + OS << " BottomUp State:\n"; + if (!BBInfo.hasBottomUpPtrs()) { + DEBUG(llvm::dbgs() << " NONE!\n"); + } else { + for (auto I = BBInfo.bottom_up_ptr_begin(), E = BBInfo.bottom_up_ptr_end(); + I != E; ++I) { + const PtrState &P = I->second; + OS << " Ptr: " << *I->first + << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false") + << "\n ImpreciseRelease: " + << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n" + << " HasCFGHazards: " + << (P.IsCFGHazardAfflicted()?"true":"false") << "\n" + << " KnownPositive: " + << (P.HasKnownPositiveRefCount()?"true":"false") << "\n" + << " Seq: " + << P.GetSeq() << "\n"; + } } - Builder.CreateCall2(Callee, PtrName, S); -} -/// Adds a source annotation to pointer and a state change annotation to Inst -/// referencing the source annotation and the old/new state of pointer. -static void GenerateARCAnnotation(unsigned InstMDId, - unsigned PtrMDId, - Instruction *Inst, - Value *Ptr, - Sequence OldSeq, - Sequence NewSeq) { - if (EnableARCAnnotations) { - // If we have a target identifier, make sure that we match it before - // emitting an annotation. - if(!ARCAnnotationTargetIdentifier.empty() && - !Ptr->getName().equals(ARCAnnotationTargetIdentifier)) - return; - - // First generate the source annotation on our pointer. This will return an - // MDString* if Ptr actually comes from an instruction implying we can put - // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL), - // then we know that our pointer is from an Argument so we put a reference - // to the argument number. - // - // The point of this is to make it easy for the - // llvm-arc-annotation-processor tool to cross reference where the source - // pointer is in the LLVM IR since the LLVM IR parser does not submit such - // information via debug info for backends to use (since why would anyone - // need such a thing from LLVM IR besides in non-standard cases - // [i.e. this]). - MDString *SourcePtrMDNode = - AppendMDNodeToSourcePtr(PtrMDId, Ptr); - AppendMDNodeToInstForPtr(InstMDId, Inst, Ptr, SourcePtrMDNode, OldSeq, - NewSeq); - } + return OS; } -// The actual interface for accessing the above functionality is defined via -// some simple macros which are defined below. We do this so that the user does -// not need to pass in what metadata id is needed resulting in cleaner code and -// additionally since it provides an easy way to conditionally no-op all -// annotation support in a non-debug build. - -/// Use this macro to annotate a sequence state change when processing -/// instructions bottom up, -#define ANNOTATE_BOTTOMUP(inst, ptr, old, new) \ - GenerateARCAnnotation(ARCAnnotationBottomUpMDKind, \ - ARCAnnotationProvenanceSourceMDKind, (inst), \ - const_cast<Value*>(ptr), (old), (new)) -/// Use this macro to annotate a sequence state change when processing -/// instructions top down. -#define ANNOTATE_TOPDOWN(inst, ptr, old, new) \ - GenerateARCAnnotation(ARCAnnotationTopDownMDKind, \ - ARCAnnotationProvenanceSourceMDKind, (inst), \ - const_cast<Value*>(ptr), (old), (new)) - -#define ANNOTATE_BB(_states, _bb, _name, _type, _direction) \ - do { \ - if (EnableARCAnnotations) { \ - for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \ - E = (_states)._direction##_ptr_end(); I != E; ++I) { \ - Value *Ptr = const_cast<Value*>(I->first); \ - Sequence Seq = I->second.GetSeq(); \ - GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq); \ - } \ - } \ - } while (0) - -#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \ - ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbstart", \ - Entrance, bottom_up) -#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \ - ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \ - Terminator, bottom_up) -#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \ - ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \ - Entrance, top_down) -#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \ - ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \ - Terminator, top_down) - -#else // !ARC_ANNOTATION -// If annotations are off, noop. -#define ANNOTATE_BOTTOMUP(inst, ptr, old, new) -#define ANNOTATE_TOPDOWN(inst, ptr, old, new) -#define ANNOTATE_BOTTOMUP_BBSTART(states, basicblock) -#define ANNOTATE_BOTTOMUP_BBEND(states, basicblock) -#define ANNOTATE_TOPDOWN_BBSTART(states, basicblock) -#define ANNOTATE_TOPDOWN_BBEND(states, basicblock) -#endif // !ARC_ANNOTATION - namespace { + /// \brief The main ARC optimization pass. class ObjCARCOpt : public FunctionPass { bool Changed; ProvenanceAnalysis PA; + + /// A cache of references to runtime entry point constants. ARCRuntimeEntryPoints EP; + /// A cache of MDKinds that can be passed into other functions to propagate + /// MDKind identifiers. + ARCMDKindCache MDKindCache; + // This is used to track if a pointer is stored into an alloca. DenseSet<const Value *> MultiOwnersSet; @@ -1076,24 +486,6 @@ namespace { /// is in fact used in the current function. unsigned UsedInThisFunction; - /// The Metadata Kind for clang.imprecise_release metadata. - unsigned ImpreciseReleaseMDKind; - - /// The Metadata Kind for clang.arc.copy_on_escape metadata. - unsigned CopyOnEscapeMDKind; - - /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata. - unsigned NoObjCARCExceptionsMDKind; - -#ifdef ARC_ANNOTATIONS - /// The Metadata Kind for llvm.arc.annotation.bottomup metadata. - unsigned ARCAnnotationBottomUpMDKind; - /// The Metadata Kind for llvm.arc.annotation.topdown metadata. - unsigned ARCAnnotationTopDownMDKind; - /// The Metadata Kind for llvm.arc.annotation.provenancesource metadata. - unsigned ARCAnnotationProvenanceSourceMDKind; -#endif // ARC_ANNOATIONS - bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, ARCInstKind &Class); @@ -1102,47 +494,41 @@ namespace { void CheckForCFGHazards(const BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, BBState &MyStates) const; - bool VisitInstructionBottomUp(Instruction *Inst, - BasicBlock *BB, - MapVector<Value *, RRInfo> &Retains, + bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB, + BlotMapVector<Value *, RRInfo> &Retains, BBState &MyStates); bool VisitBottomUp(BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains); + BlotMapVector<Value *, RRInfo> &Retains); bool VisitInstructionTopDown(Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates); bool VisitTopDown(BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, DenseMap<Value *, RRInfo> &Releases); - bool Visit(Function &F, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, + bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates, + BlotMapVector<Value *, RRInfo> &Retains, DenseMap<Value *, RRInfo> &Releases); void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, - MapVector<Value *, RRInfo> &Retains, + BlotMapVector<Value *, RRInfo> &Retains, DenseMap<Value *, RRInfo> &Releases, - SmallVectorImpl<Instruction *> &DeadInsts, - Module *M); - - bool ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - Module *M, - SmallVectorImpl<Instruction *> &NewRetains, - SmallVectorImpl<Instruction *> &NewReleases, - SmallVectorImpl<Instruction *> &DeadInsts, - RRInfo &RetainsToMove, - RRInfo &ReleasesToMove, - Value *Arg, - bool KnownSafe, - bool &AnyPairsCompletelyEliminated); + SmallVectorImpl<Instruction *> &DeadInsts, Module *M); + + bool + PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates, + BlotMapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, Module *M, + SmallVectorImpl<Instruction *> &NewRetains, + SmallVectorImpl<Instruction *> &NewReleases, + SmallVectorImpl<Instruction *> &DeadInsts, + RRInfo &RetainsToMove, RRInfo &ReleasesToMove, + Value *Arg, bool KnownSafe, + bool &AnyPairsCompletelyEliminated); bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - Module *M); + BlotMapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, Module *M); void OptimizeWeakCalls(Function &F); @@ -1238,7 +624,7 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { "objc_retain since the operand is not a return value.\n" "Old = " << *RetainRV << "\n"); - Constant *NewDecl = EP.get(ARCRuntimeEntryPoints::EPT_Retain); + Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain); cast<CallInst>(RetainRV)->setCalledFunction(NewDecl); DEBUG(dbgs() << "New = " << *RetainRV << "\n"); @@ -1274,7 +660,7 @@ void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, "Old = " << *AutoreleaseRV << "\n"); CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV); - Constant *NewDecl = EP.get(ARCRuntimeEntryPoints::EPT_Autorelease); + Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease); AutoreleaseRVCI->setCalledFunction(NewDecl); AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease. Class = ARCInstKind::Autorelease; @@ -1380,10 +766,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { // Create the declaration lazily. LLVMContext &C = Inst->getContext(); - Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Release); + Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release); CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "", Call); - NewCall->setMetadata(ImpreciseReleaseMDKind, MDNode::get(C, None)); + NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), + MDNode::get(C, None)); DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) " "since x is otherwise unused.\nOld: " << *Call << "\nNew: " @@ -1547,7 +934,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { /// no CFG hazards by checking the states of various bottom up pointers. static void CheckForUseCFGHazard(const Sequence SuccSSeq, const bool SuccSRRIKnownSafe, - PtrState &S, + TopDownPtrState &S, bool &SomeSuccHasSame, bool &AllSuccsHaveSame, bool &NotAllSeqEqualButKnownSafe, @@ -1585,7 +972,7 @@ static void CheckForUseCFGHazard(const Sequence SuccSSeq, /// pointers. static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq, const bool SuccSRRIKnownSafe, - PtrState &S, + TopDownPtrState &S, bool &SomeSuccHasSame, bool &AllSuccsHaveSame, bool &NotAllSeqEqualButKnownSafe) { @@ -1618,9 +1005,9 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, BBState &MyStates) const { // If any top-down local-use or possible-dec has a succ which is earlier in // the sequence, forget it. - for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(), - E = MyStates.top_down_ptr_end(); I != E; ++I) { - PtrState &S = I->second; + for (auto I = MyStates.top_down_ptr_begin(), E = MyStates.top_down_ptr_end(); + I != E; ++I) { + TopDownPtrState &S = I->second; const Sequence Seq = I->second.GetSeq(); // We only care about S_Retain, S_CanRelease, and S_Use. @@ -1646,7 +1033,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, const DenseMap<const BasicBlock *, BBState>::iterator BBI = BBStates.find(*SI); assert(BBI != BBStates.end()); - const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); + const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); const Sequence SuccSSeq = SuccS.GetSeq(); // If bottom up, the pointer is in an S_None state, clear the sequence @@ -1705,44 +1092,21 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, } } -bool -ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, - BasicBlock *BB, - MapVector<Value *, RRInfo> &Retains, - BBState &MyStates) { +bool ObjCARCOpt::VisitInstructionBottomUp( + Instruction *Inst, BasicBlock *BB, BlotMapVector<Value *, RRInfo> &Retains, + BBState &MyStates) { bool NestingDetected = false; ARCInstKind Class = GetARCInstKind(Inst); const Value *Arg = nullptr; - DEBUG(dbgs() << "Class: " << Class << "\n"); + DEBUG(dbgs() << " Class: " << Class << "\n"); switch (Class) { case ARCInstKind::Release: { Arg = GetArgRCIdentityRoot(Inst); - PtrState &S = MyStates.getPtrBottomUpState(Arg); - - // If we see two releases in a row on the same pointer. If so, make - // a note, and we'll cicle back to revisit it after we've - // hopefully eliminated the second release, which may allow us to - // eliminate the first release too. - // Theoretically we could implement removal of nested retain+release - // pairs by making PtrState hold a stack of states, but this is - // simple and avoids adding overhead for the non-nested case. - if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) { - DEBUG(dbgs() << "Found nested releases (i.e. a release pair)\n"); - NestingDetected = true; - } - - MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release; - ANNOTATE_BOTTOMUP(Inst, Arg, S.GetSeq(), NewSeq); - S.ResetSequenceProgress(NewSeq); - S.SetReleaseMetadata(ReleaseMetadata); - S.SetKnownSafe(S.HasKnownPositiveRefCount()); - S.SetTailCallRelease(cast<CallInst>(Inst)->isTailCall()); - S.InsertCall(Inst); - S.SetKnownPositiveRefCount(); + BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg); + NestingDetected |= S.InitBottomUp(MDKindCache, Inst); break; } case ARCInstKind::RetainBlock: @@ -1753,35 +1117,16 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, case ARCInstKind::Retain: case ARCInstKind::RetainRV: { Arg = GetArgRCIdentityRoot(Inst); - - PtrState &S = MyStates.getPtrBottomUpState(Arg); - S.SetKnownPositiveRefCount(); - - Sequence OldSeq = S.GetSeq(); - switch (OldSeq) { - case S_Stop: - case S_Release: - case S_MovableRelease: - case S_Use: - // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an - // imprecise release, clear our reverse insertion points. - if (OldSeq != S_Use || S.IsTrackingImpreciseReleases()) - S.ClearReverseInsertPts(); - // FALL THROUGH - case S_CanRelease: - // Don't do retain+release tracking for ARCInstKind::RetainRV, - // because it's - // better to let it remain as the first instruction after a call. - if (Class != ARCInstKind::RetainRV) + BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg); + if (S.MatchWithRetain()) { + // Don't do retain+release tracking for ARCInstKind::RetainRV, because + // it's better to let it remain as the first instruction after a call. + if (Class != ARCInstKind::RetainRV) { + DEBUG(llvm::dbgs() << " Matching with: " << *Inst << "\n"); Retains[Inst] = S.GetRRInfo(); + } S.ClearSequenceProgress(); - break; - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); } - ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq()); // A retain moving bottom up can be a use. break; } @@ -1807,9 +1152,10 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, // in the presence of allocas we only unconditionally remove pointers if // both our retain and our release are KnownSafe. if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand())) { - BBState::ptr_iterator I = MyStates.findPtrBottomUpState( - GetRCIdentityRoot(SI->getValueOperand())); + const DataLayout &DL = BB->getModule()->getDataLayout(); + if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand(), DL)) { + auto I = MyStates.findPtrBottomUpState( + GetRCIdentityRoot(SI->getValueOperand())); if (I != MyStates.bottom_up_ptr_end()) MultiOwnersSet.insert(I->first); } @@ -1821,90 +1167,26 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, // Consider any other possible effects of this instruction on each // pointer being tracked. - for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(), - ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) { + for (auto MI = MyStates.bottom_up_ptr_begin(), + ME = MyStates.bottom_up_ptr_end(); + MI != ME; ++MI) { const Value *Ptr = MI->first; if (Ptr == Arg) continue; // Handled above. - PtrState &S = MI->second; - Sequence Seq = S.GetSeq(); + BottomUpPtrState &S = MI->second; - // Check for possible releases. - if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr - << "\n"); - S.ClearKnownPositiveRefCount(); - switch (Seq) { - case S_Use: - S.SetSeq(S_CanRelease); - ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S.GetSeq()); - continue; - case S_CanRelease: - case S_Release: - case S_MovableRelease: - case S_Stop: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } + if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class)) + continue; - // Check for possible direct uses. - switch (Seq) { - case S_Release: - case S_MovableRelease: - if (CanUse(Inst, Ptr, PA, Class)) { - DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr - << "\n"); - assert(!S.HasReverseInsertPts()); - // If this is an invoke instruction, we're scanning it as part of - // one of its successor blocks, since we can't insert code after it - // in its own block, and we don't want to split critical edges. - if (isa<InvokeInst>(Inst)) - S.InsertReverseInsertPt(BB->getFirstInsertionPt()); - else - S.InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst))); - S.SetSeq(S_Use); - ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use); - } else if (Seq == S_Release && IsUser(Class)) { - DEBUG(dbgs() << "PreciseReleaseUse: Seq: " << Seq << "; " << *Ptr - << "\n"); - // Non-movable releases depend on any possible objc pointer use. - S.SetSeq(S_Stop); - ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop); - assert(!S.HasReverseInsertPts()); - // As above; handle invoke specially. - if (isa<InvokeInst>(Inst)) - S.InsertReverseInsertPt(BB->getFirstInsertionPt()); - else - S.InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst))); - } - break; - case S_Stop: - if (CanUse(Inst, Ptr, PA, Class)) { - DEBUG(dbgs() << "PreciseStopUse: Seq: " << Seq << "; " << *Ptr - << "\n"); - S.SetSeq(S_Use); - ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use); - } - break; - case S_CanRelease: - case S_Use: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } + S.HandlePotentialUse(BB, Inst, Ptr, PA, Class); } return NestingDetected; } -bool -ObjCARCOpt::VisitBottomUp(BasicBlock *BB, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains) { +bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + BlotMapVector<Value *, RRInfo> &Retains) { DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n"); @@ -1929,9 +1211,8 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, } } - // If ARC Annotations are enabled, output the current state of pointers at the - // bottom of the basic block. - ANNOTATE_BOTTOMUP_BBEND(MyStates, BB); + DEBUG(llvm::dbgs() << "Before:\n" << BBStates[BB] << "\n" + << "Performing Dataflow:\n"); // Visit all the instructions, bottom-up. for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { @@ -1941,7 +1222,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, if (isa<InvokeInst>(Inst)) continue; - DEBUG(dbgs() << "Visiting " << *Inst << "\n"); + DEBUG(dbgs() << " Visiting " << *Inst << "\n"); NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); } @@ -1956,9 +1237,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); } - // If ARC Annotations are enabled, output the current state of pointers at the - // top of the basic block. - ANNOTATE_BOTTOMUP_BBSTART(MyStates, BB); + DEBUG(llvm::dbgs() << "\nFinal State:\n" << BBStates[BB] << "\n"); return NestingDetected; } @@ -1971,144 +1250,63 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, ARCInstKind Class = GetARCInstKind(Inst); const Value *Arg = nullptr; + DEBUG(llvm::dbgs() << " Class: " << Class << "\n"); + switch (Class) { case ARCInstKind::RetainBlock: // In OptimizeIndividualCalls, we have strength reduced all optimizable // objc_retainBlocks to objc_retains. Thus at this point any - // objc_retainBlocks that we see are not optimizable. + // objc_retainBlocks that we see are not optimizable. We need to break since + // a retain can be a potential use. break; case ARCInstKind::Retain: case ARCInstKind::RetainRV: { Arg = GetArgRCIdentityRoot(Inst); - - PtrState &S = MyStates.getPtrTopDownState(Arg); - - // Don't do retain+release tracking for ARCInstKind::RetainRV, because - // it's - // better to let it remain as the first instruction after a call. - if (Class != ARCInstKind::RetainRV) { - // If we see two retains in a row on the same pointer. If so, make - // a note, and we'll cicle back to revisit it after we've - // hopefully eliminated the second retain, which may allow us to - // eliminate the first retain too. - // Theoretically we could implement removal of nested retain+release - // pairs by making PtrState hold a stack of states, but this is - // simple and avoids adding overhead for the non-nested case. - if (S.GetSeq() == S_Retain) - NestingDetected = true; - - ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_Retain); - S.ResetSequenceProgress(S_Retain); - S.SetKnownSafe(S.HasKnownPositiveRefCount()); - S.InsertCall(Inst); - } - - S.SetKnownPositiveRefCount(); - + TopDownPtrState &S = MyStates.getPtrTopDownState(Arg); + NestingDetected |= S.InitTopDown(Class, Inst); // A retain can be a potential use; procede to the generic checking // code below. break; } case ARCInstKind::Release: { Arg = GetArgRCIdentityRoot(Inst); - - PtrState &S = MyStates.getPtrTopDownState(Arg); - S.ClearKnownPositiveRefCount(); - - Sequence OldSeq = S.GetSeq(); - - MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - - switch (OldSeq) { - case S_Retain: - case S_CanRelease: - if (OldSeq == S_Retain || ReleaseMetadata != nullptr) - S.ClearReverseInsertPts(); - // FALL THROUGH - case S_Use: - S.SetReleaseMetadata(ReleaseMetadata); - S.SetTailCallRelease(cast<CallInst>(Inst)->isTailCall()); + TopDownPtrState &S = MyStates.getPtrTopDownState(Arg); + // Try to form a tentative pair in between this release instruction and the + // top down pointers that we are tracking. + if (S.MatchWithRelease(MDKindCache, Inst)) { + // If we succeed, copy S's RRInfo into the Release -> {Retain Set + // Map}. Then we clear S. + DEBUG(llvm::dbgs() << " Matching with: " << *Inst << "\n"); Releases[Inst] = S.GetRRInfo(); - ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None); S.ClearSequenceProgress(); - break; - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); } break; } case ARCInstKind::AutoreleasepoolPop: // Conservatively, clear MyStates for all known pointers. MyStates.clearTopDownPointers(); - return NestingDetected; + return false; case ARCInstKind::AutoreleasepoolPush: case ARCInstKind::None: - // These are irrelevant. - return NestingDetected; + // These can not be uses of + return false; default: break; } // Consider any other possible effects of this instruction on each // pointer being tracked. - for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(), - ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) { + for (auto MI = MyStates.top_down_ptr_begin(), + ME = MyStates.top_down_ptr_end(); + MI != ME; ++MI) { const Value *Ptr = MI->first; if (Ptr == Arg) continue; // Handled above. - PtrState &S = MI->second; - Sequence Seq = S.GetSeq(); - - // Check for possible releases. - if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr - << "\n"); - S.ClearKnownPositiveRefCount(); - switch (Seq) { - case S_Retain: - S.SetSeq(S_CanRelease); - ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_CanRelease); - assert(!S.HasReverseInsertPts()); - S.InsertReverseInsertPt(Inst); - - // One call can't cause a transition from S_Retain to S_CanRelease - // and S_CanRelease to S_Use. If we've made the first transition, - // we're done. - continue; - case S_Use: - case S_CanRelease: - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } - } + TopDownPtrState &S = MI->second; + if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class)) + continue; - // Check for possible direct uses. - switch (Seq) { - case S_CanRelease: - if (CanUse(Inst, Ptr, PA, Class)) { - DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr - << "\n"); - S.SetSeq(S_Use); - ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use); - } - break; - case S_Retain: - case S_Use: - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } + S.HandlePotentialUse(Inst, Ptr, PA, Class); } return NestingDetected; @@ -2140,27 +1338,22 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, } } - // If ARC Annotations are enabled, output the current state of pointers at the - // top of the basic block. - ANNOTATE_TOPDOWN_BBSTART(MyStates, BB); + DEBUG(llvm::dbgs() << "Before:\n" << BBStates[BB] << "\n" + << "Performing Dataflow:\n"); // Visit all the instructions, top-down. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { Instruction *Inst = I; - DEBUG(dbgs() << "Visiting " << *Inst << "\n"); + DEBUG(dbgs() << " Visiting " << *Inst << "\n"); NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates); } - // If ARC Annotations are enabled, output the current state of pointers at the - // bottom of the basic block. - ANNOTATE_TOPDOWN_BBEND(MyStates, BB); - -#ifdef ARC_ANNOTATIONS - if (!(EnableARCAnnotations && DisableCheckForCFGHazards)) -#endif + DEBUG(llvm::dbgs() << "\nState Before Checking for CFG Hazards:\n" + << BBStates[BB] << "\n\n"); CheckForCFGHazards(BB, BBStates, MyStates); + DEBUG(llvm::dbgs() << "Final State:\n" << BBStates[BB] << "\n"); return NestingDetected; } @@ -2246,11 +1439,10 @@ ComputePostOrders(Function &F, } // Visit the function both top-down and bottom-up. -bool -ObjCARCOpt::Visit(Function &F, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases) { +bool ObjCARCOpt::Visit(Function &F, + DenseMap<const BasicBlock *, BBState> &BBStates, + BlotMapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases) { // Use reverse-postorder traversals, because we magically know that loops // will be well behaved, i.e. they won't repeatedly call retain on a single @@ -2260,7 +1452,7 @@ ObjCARCOpt::Visit(Function &F, SmallVector<BasicBlock *, 16> PostOrder; SmallVector<BasicBlock *, 16> ReverseCFGPostOrder; ComputePostOrders(F, PostOrder, ReverseCFGPostOrder, - NoObjCARCExceptionsMDKind, + MDKindCache.get(ARCMDKindID::NoObjCARCExceptions), BBStates); // Use reverse-postorder on the reverse CFG for bottom-up. @@ -2281,10 +1473,9 @@ ObjCARCOpt::Visit(Function &F, } /// Move the calls in RetainsToMove and ReleasesToMove. -void ObjCARCOpt::MoveCalls(Value *Arg, - RRInfo &RetainsToMove, +void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, - MapVector<Value *, RRInfo> &Retains, + BlotMapVector<Value *, RRInfo> &Retains, DenseMap<Value *, RRInfo> &Releases, SmallVectorImpl<Instruction *> &DeadInsts, Module *M) { @@ -2297,7 +1488,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg, for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) { Value *MyArg = ArgTy == ParamTy ? Arg : new BitCastInst(Arg, ParamTy, "", InsertPt); - Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain); + Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt); Call->setDoesNotThrow(); Call->setTailCall(); @@ -2308,11 +1499,11 @@ void ObjCARCOpt::MoveCalls(Value *Arg, for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) { Value *MyArg = ArgTy == ParamTy ? Arg : new BitCastInst(Arg, ParamTy, "", InsertPt); - Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Release); + Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release); CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt); // Attach a clang.imprecise_release metadata tag, if appropriate. if (MDNode *M = ReleasesToMove.ReleaseMetadata) - Call->setMetadata(ImpreciseReleaseMDKind, M); + Call->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), M); Call->setDoesNotThrow(); if (ReleasesToMove.IsTailCallRelease) Call->setTailCall(); @@ -2335,20 +1526,15 @@ void ObjCARCOpt::MoveCalls(Value *Arg, } -bool -ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> - &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - Module *M, - SmallVectorImpl<Instruction *> &NewRetains, - SmallVectorImpl<Instruction *> &NewReleases, - SmallVectorImpl<Instruction *> &DeadInsts, - RRInfo &RetainsToMove, - RRInfo &ReleasesToMove, - Value *Arg, - bool KnownSafe, - bool &AnyPairsCompletelyEliminated) { +bool ObjCARCOpt::PairUpRetainsAndReleases( + DenseMap<const BasicBlock *, BBState> &BBStates, + BlotMapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, Module *M, + SmallVectorImpl<Instruction *> &NewRetains, + SmallVectorImpl<Instruction *> &NewReleases, + SmallVectorImpl<Instruction *> &DeadInsts, RRInfo &RetainsToMove, + RRInfo &ReleasesToMove, Value *Arg, bool KnownSafe, + bool &AnyPairsCompletelyEliminated) { // If a pair happens in a region where it is known that the reference count // is already incremented, we can similarly ignore possible decrements unless // we are dealing with a retainable object with multiple provenance sources. @@ -2369,15 +1555,14 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> for (SmallVectorImpl<Instruction *>::const_iterator NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) { Instruction *NewRetain = *NI; - MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain); + auto It = Retains.find(NewRetain); assert(It != Retains.end()); const RRInfo &NewRetainRRI = It->second; KnownSafeTD &= NewRetainRRI.KnownSafe; MultipleOwners = MultipleOwners || MultiOwnersSet.count(GetArgRCIdentityRoot(NewRetain)); for (Instruction *NewRetainRelease : NewRetainRRI.Calls) { - DenseMap<Value *, RRInfo>::const_iterator Jt = - Releases.find(NewRetainRelease); + auto Jt = Releases.find(NewRetainRelease); if (Jt == Releases.end()) return false; const RRInfo &NewRetainReleaseRRI = Jt->second; @@ -2446,15 +1631,13 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> for (SmallVectorImpl<Instruction *>::const_iterator NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) { Instruction *NewRelease = *NI; - DenseMap<Value *, RRInfo>::const_iterator It = - Releases.find(NewRelease); + auto It = Releases.find(NewRelease); assert(It != Releases.end()); const RRInfo &NewReleaseRRI = It->second; KnownSafeBU &= NewReleaseRRI.KnownSafe; CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted; for (Instruction *NewReleaseRetain : NewReleaseRRI.Calls) { - MapVector<Value *, RRInfo>::const_iterator Jt = - Retains.find(NewReleaseRetain); + auto Jt = Retains.find(NewReleaseRetain); if (Jt == Retains.end()) return false; const RRInfo &NewReleaseRetainRRI = Jt->second; @@ -2506,11 +1689,8 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> if (NewRetains.empty()) break; } - // If the pointer is known incremented in 1 direction and we do not have - // MultipleOwners, we can safely remove the retain/releases. Otherwise we need - // to be known safe in both directions. - bool UnconditionallySafe = (KnownSafeTD && KnownSafeBU) || - ((KnownSafeTD || KnownSafeBU) && !MultipleOwners); + // We can only remove pointers if we are known safe in both directions. + bool UnconditionallySafe = KnownSafeTD && KnownSafeBU; if (UnconditionallySafe) { RetainsToMove.ReverseInsertPts.clear(); ReleasesToMove.ReverseInsertPts.clear(); @@ -2540,12 +1720,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> if (OldDelta != 0) return false; -#ifdef ARC_ANNOTATIONS - // Do not move calls if ARC annotations are requested. - if (EnableARCAnnotations) - return false; -#endif // ARC_ANNOTATIONS - Changed = true; assert(OldCount != 0 && "Unreachable code?"); NumRRs += OldCount - NewCount; @@ -2558,12 +1732,10 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> /// Identify pairings between the retains and releases, and delete and/or move /// them. -bool -ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> - &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - Module *M) { +bool ObjCARCOpt::PerformCodePlacement( + DenseMap<const BasicBlock *, BBState> &BBStates, + BlotMapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, Module *M) { DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n"); bool AnyPairsCompletelyEliminated = false; @@ -2574,8 +1746,9 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> SmallVector<Instruction *, 8> DeadInsts; // Visit each retain. - for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), - E = Retains.end(); I != E; ++I) { + for (BlotMapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), + E = Retains.end(); + I != E; ++I) { Value *V = I->first; if (!V) continue; // blotted @@ -2602,11 +1775,10 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> // Connect the dots between the top-down-collected RetainsToMove and // bottom-up-collected ReleasesToMove to form sets of related calls. NewRetains.push_back(Retain); - bool PerformMoveCalls = - ConnectTDBUTraversals(BBStates, Retains, Releases, M, NewRetains, - NewReleases, DeadInsts, RetainsToMove, - ReleasesToMove, Arg, KnownSafe, - AnyPairsCompletelyEliminated); + bool PerformMoveCalls = PairUpRetainsAndReleases( + BBStates, Retains, Releases, M, NewRetains, NewReleases, DeadInsts, + RetainsToMove, ReleasesToMove, Arg, KnownSafe, + AnyPairsCompletelyEliminated); if (PerformMoveCalls) { // Ok, everything checks out and we're all set. Let's move/delete some @@ -2678,7 +1850,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { Changed = true; // If the load has a builtin retain, insert a plain retain for it. if (Class == ARCInstKind::LoadWeakRetained) { - Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain); + Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call); CI->setTailCall(); } @@ -2707,7 +1879,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { Changed = true; // If the load has a builtin retain, insert a plain retain for it. if (Class == ARCInstKind::LoadWeakRetained) { - Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain); + Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call); CI->setTailCall(); } @@ -2795,7 +1967,7 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) { // map stays valid when we get around to rewriting code and calls get // replaced by arguments. DenseMap<Value *, RRInfo> Releases; - MapVector<Value *, RRInfo> Retains; + BlotMapVector<Value *, RRInfo> Retains; // This is used during the traversal of the function to track the // states for each identified object at each block. @@ -2828,8 +2000,7 @@ HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain, if (DepInsts.size() != 1) return false; - CallInst *Call = - dyn_cast_or_null<CallInst>(*DepInsts.begin()); + auto *Call = dyn_cast_or_null<CallInst>(*DepInsts.begin()); // Check that the pointer is the return value of the call. if (!Call || Arg != Call) @@ -2857,8 +2028,7 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB, if (DepInsts.size() != 1) return nullptr; - CallInst *Retain = - dyn_cast_or_null<CallInst>(*DepInsts.begin()); + auto *Retain = dyn_cast_or_null<CallInst>(*DepInsts.begin()); // Check that we found a retain with the same argument. if (!Retain || !IsRetain(GetBasicARCInstKind(Retain)) || @@ -2883,8 +2053,7 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB, if (DepInsts.size() != 1) return nullptr; - CallInst *Autorelease = - dyn_cast_or_null<CallInst>(*DepInsts.begin()); + auto *Autorelease = dyn_cast_or_null<CallInst>(*DepInsts.begin()); if (!Autorelease) return nullptr; ARCInstKind AutoreleaseClass = GetBasicARCInstKind(Autorelease); @@ -2999,28 +2168,13 @@ bool ObjCARCOpt::doInitialization(Module &M) { if (!Run) return false; - // Identify the imprecise release metadata kind. - ImpreciseReleaseMDKind = - M.getContext().getMDKindID("clang.imprecise_release"); - CopyOnEscapeMDKind = - M.getContext().getMDKindID("clang.arc.copy_on_escape"); - NoObjCARCExceptionsMDKind = - M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); -#ifdef ARC_ANNOTATIONS - ARCAnnotationBottomUpMDKind = - M.getContext().getMDKindID("llvm.arc.annotation.bottomup"); - ARCAnnotationTopDownMDKind = - M.getContext().getMDKindID("llvm.arc.annotation.topdown"); - ARCAnnotationProvenanceSourceMDKind = - M.getContext().getMDKindID("llvm.arc.annotation.provenancesource"); -#endif // ARC_ANNOTATIONS - // Intuitively, objc_retain and others are nocapture, however in practice // they are not, because they return their argument value. And objc_release // calls finalizers which can have arbitrary side effects. + MDKindCache.init(&M); // Initialize our runtime entry point cache. - EP.Initialize(&M); + EP.init(&M); return false; } diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp index 410abfc..15ad8dc 100644 --- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp @@ -32,20 +32,22 @@ using namespace llvm::objcarc; bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) { + const DataLayout &DL = A->getModule()->getDataLayout(); // If the values are Selects with the same condition, we can do a more precise // check: just check for relations between the values on corresponding arms. if (const SelectInst *SB = dyn_cast<SelectInst>(B)) if (A->getCondition() == SB->getCondition()) - return related(A->getTrueValue(), SB->getTrueValue()) || - related(A->getFalseValue(), SB->getFalseValue()); + return related(A->getTrueValue(), SB->getTrueValue(), DL) || + related(A->getFalseValue(), SB->getFalseValue(), DL); // Check both arms of the Select node individually. - return related(A->getTrueValue(), B) || - related(A->getFalseValue(), B); + return related(A->getTrueValue(), B, DL) || + related(A->getFalseValue(), B, DL); } bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) { + const DataLayout &DL = A->getModule()->getDataLayout(); // If the values are PHIs in the same block, we can do a more precise as well // as efficient check: just check for relations between the values on // corresponding edges. @@ -53,7 +55,7 @@ bool ProvenanceAnalysis::relatedPHI(const PHINode *A, if (PNB->getParent() == A->getParent()) { for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) if (related(A->getIncomingValue(i), - PNB->getIncomingValueForBlock(A->getIncomingBlock(i)))) + PNB->getIncomingValueForBlock(A->getIncomingBlock(i)), DL)) return true; return false; } @@ -62,7 +64,7 @@ bool ProvenanceAnalysis::relatedPHI(const PHINode *A, SmallPtrSet<const Value *, 4> UniqueSrc; for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) { const Value *PV1 = A->getIncomingValue(i); - if (UniqueSrc.insert(PV1).second && related(PV1, B)) + if (UniqueSrc.insert(PV1).second && related(PV1, B, DL)) return true; } @@ -103,11 +105,11 @@ static bool IsStoredObjCPointer(const Value *P) { return false; } -bool ProvenanceAnalysis::relatedCheck(const Value *A, - const Value *B) { +bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B, + const DataLayout &DL) { // Skip past provenance pass-throughs. - A = GetUnderlyingObjCPtr(A); - B = GetUnderlyingObjCPtr(B); + A = GetUnderlyingObjCPtr(A, DL); + B = GetUnderlyingObjCPtr(B, DL); // Quick check. if (A == B) @@ -159,8 +161,8 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A, return true; } -bool ProvenanceAnalysis::related(const Value *A, - const Value *B) { +bool ProvenanceAnalysis::related(const Value *A, const Value *B, + const DataLayout &DL) { // Begin by inserting a conservative value into the map. If the insertion // fails, we have the answer already. If it succeeds, leave it there until we // compute the real answer to guard against recursive queries. @@ -170,7 +172,7 @@ bool ProvenanceAnalysis::related(const Value *A, if (!Pair.second) return Pair.first->second; - bool Result = relatedCheck(A, B); + bool Result = relatedCheck(A, B, DL); CachedResults[ValuePairTy(A, B)] = Result; return Result; } diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h index 4b5f4d8..0ac41d3 100644 --- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h @@ -30,6 +30,7 @@ namespace llvm { class Value; class AliasAnalysis; + class DataLayout; class PHINode; class SelectInst; } @@ -53,7 +54,7 @@ class ProvenanceAnalysis { typedef DenseMap<ValuePairTy, bool> CachedResultsTy; CachedResultsTy CachedResults; - bool relatedCheck(const Value *A, const Value *B); + bool relatedCheck(const Value *A, const Value *B, const DataLayout &DL); bool relatedSelect(const SelectInst *A, const Value *B); bool relatedPHI(const PHINode *A, const Value *B); @@ -67,7 +68,7 @@ public: AliasAnalysis *getAA() const { return AA; } - bool related(const Value *A, const Value *B); + bool related(const Value *A, const Value *B, const DataLayout &DL); void clear() { CachedResults.clear(); diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp index d836632..0be75af 100644 --- a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -65,6 +66,7 @@ bool PAEval::runOnFunction(Function &F) { ProvenanceAnalysis PA; PA.setAA(&getAnalysis<AliasAnalysis>()); + const DataLayout &DL = F.getParent()->getDataLayout(); for (Value *V1 : Values) { StringRef NameV1 = getName(V1); @@ -73,7 +75,7 @@ bool PAEval::runOnFunction(Function &F) { if (NameV1 >= NameV2) continue; errs() << NameV1 << " and " << NameV2; - if (PA.related(V1, V2)) + if (PA.related(V1, V2, DL)) errs() << " are related.\n"; else errs() << " are not related.\n"; diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp new file mode 100644 index 0000000..ae20e7e --- /dev/null +++ b/lib/Transforms/ObjCARC/PtrState.cpp @@ -0,0 +1,404 @@ +//===--- PtrState.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PtrState.h" +#include "DependencyAnalysis.h" +#include "ObjCARC.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::objcarc; + +#define DEBUG_TYPE "objc-arc-ptr-state" + +//===----------------------------------------------------------------------===// +// Utility +//===----------------------------------------------------------------------===// + +raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, const Sequence S) { + switch (S) { + case S_None: + return OS << "S_None"; + case S_Retain: + return OS << "S_Retain"; + case S_CanRelease: + return OS << "S_CanRelease"; + case S_Use: + return OS << "S_Use"; + case S_Release: + return OS << "S_Release"; + case S_MovableRelease: + return OS << "S_MovableRelease"; + case S_Stop: + return OS << "S_Stop"; + } + llvm_unreachable("Unknown sequence type."); +} + +//===----------------------------------------------------------------------===// +// Sequence +//===----------------------------------------------------------------------===// + +static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { + // The easy cases. + if (A == B) + return A; + if (A == S_None || B == S_None) + return S_None; + + if (A > B) + std::swap(A, B); + if (TopDown) { + // Choose the side which is further along in the sequence. + if ((A == S_Retain || A == S_CanRelease) && + (B == S_CanRelease || B == S_Use)) + return B; + } else { + // Choose the side which is further along in the sequence. + if ((A == S_Use || A == S_CanRelease) && + (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease)) + return A; + // If both sides are releases, choose the more conservative one. + if (A == S_Stop && (B == S_Release || B == S_MovableRelease)) + return A; + if (A == S_Release && B == S_MovableRelease) + return A; + } + + return S_None; +} + +//===----------------------------------------------------------------------===// +// RRInfo +//===----------------------------------------------------------------------===// + +void RRInfo::clear() { + KnownSafe = false; + IsTailCallRelease = false; + ReleaseMetadata = nullptr; + Calls.clear(); + ReverseInsertPts.clear(); + CFGHazardAfflicted = false; +} + +bool RRInfo::Merge(const RRInfo &Other) { + // Conservatively merge the ReleaseMetadata information. + if (ReleaseMetadata != Other.ReleaseMetadata) + ReleaseMetadata = nullptr; + + // Conservatively merge the boolean state. + KnownSafe &= Other.KnownSafe; + IsTailCallRelease &= Other.IsTailCallRelease; + CFGHazardAfflicted |= Other.CFGHazardAfflicted; + + // Merge the call sets. + Calls.insert(Other.Calls.begin(), Other.Calls.end()); + + // Merge the insert point sets. If there are any differences, + // that makes this a partial merge. + bool Partial = ReverseInsertPts.size() != Other.ReverseInsertPts.size(); + for (Instruction *Inst : Other.ReverseInsertPts) + Partial |= ReverseInsertPts.insert(Inst).second; + return Partial; +} + +//===----------------------------------------------------------------------===// +// PtrState +//===----------------------------------------------------------------------===// + +void PtrState::SetKnownPositiveRefCount() { + DEBUG(dbgs() << " Setting Known Positive.\n"); + KnownPositiveRefCount = true; +} + +void PtrState::ClearKnownPositiveRefCount() { + DEBUG(dbgs() << " Clearing Known Positive.\n"); + KnownPositiveRefCount = false; +} + +void PtrState::SetSeq(Sequence NewSeq) { + DEBUG(dbgs() << " Old: " << GetSeq() << "; New: " << NewSeq << "\n"); + Seq = NewSeq; +} + +void PtrState::ResetSequenceProgress(Sequence NewSeq) { + DEBUG(dbgs() << " Resetting sequence progress.\n"); + SetSeq(NewSeq); + Partial = false; + RRI.clear(); +} + +void PtrState::Merge(const PtrState &Other, bool TopDown) { + Seq = MergeSeqs(GetSeq(), Other.GetSeq(), TopDown); + KnownPositiveRefCount &= Other.KnownPositiveRefCount; + + // If we're not in a sequence (anymore), drop all associated state. + if (Seq == S_None) { + Partial = false; + RRI.clear(); + } else if (Partial || Other.Partial) { + // If we're doing a merge on a path that's previously seen a partial + // merge, conservatively drop the sequence, to avoid doing partial + // RR elimination. If the branch predicates for the two merge differ, + // mixing them is unsafe. + ClearSequenceProgress(); + } else { + // Otherwise merge the other PtrState's RRInfo into our RRInfo. At this + // point, we know that currently we are not partial. Stash whether or not + // the merge operation caused us to undergo a partial merging of reverse + // insertion points. + Partial = RRI.Merge(Other.RRI); + } +} + +//===----------------------------------------------------------------------===// +// BottomUpPtrState +//===----------------------------------------------------------------------===// + +bool BottomUpPtrState::InitBottomUp(ARCMDKindCache &Cache, Instruction *I) { + // If we see two releases in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second release, which may allow us to + // eliminate the first release too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + bool NestingDetected = false; + if (GetSeq() == S_Release || GetSeq() == S_MovableRelease) { + DEBUG(dbgs() << " Found nested releases (i.e. a release pair)\n"); + NestingDetected = true; + } + + MDNode *ReleaseMetadata = + I->getMetadata(Cache.get(ARCMDKindID::ImpreciseRelease)); + Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release; + ResetSequenceProgress(NewSeq); + SetReleaseMetadata(ReleaseMetadata); + SetKnownSafe(HasKnownPositiveRefCount()); + SetTailCallRelease(cast<CallInst>(I)->isTailCall()); + InsertCall(I); + SetKnownPositiveRefCount(); + return NestingDetected; +} + +bool BottomUpPtrState::MatchWithRetain() { + SetKnownPositiveRefCount(); + + Sequence OldSeq = GetSeq(); + switch (OldSeq) { + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an + // imprecise release, clear our reverse insertion points. + if (OldSeq != S_Use || IsTrackingImpreciseReleases()) + ClearReverseInsertPts(); + // FALL THROUGH + case S_CanRelease: + return true; + case S_None: + return false; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + llvm_unreachable("Sequence unknown enum value"); +} + +bool BottomUpPtrState::HandlePotentialAlterRefCount(Instruction *Inst, + const Value *Ptr, + ProvenanceAnalysis &PA, + ARCInstKind Class) { + Sequence S = GetSeq(); + + // Check for possible releases. + if (!CanAlterRefCount(Inst, Ptr, PA, Class)) + return false; + + DEBUG(dbgs() << " CanAlterRefCount: Seq: " << S << "; " << *Ptr + << "\n"); + switch (S) { + case S_Use: + SetSeq(S_CanRelease); + return true; + case S_CanRelease: + case S_Release: + case S_MovableRelease: + case S_Stop: + case S_None: + return false; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + llvm_unreachable("Sequence unknown enum value"); +} + +void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst, + const Value *Ptr, + ProvenanceAnalysis &PA, + ARCInstKind Class) { + // Check for possible direct uses. + switch (GetSeq()) { + case S_Release: + case S_MovableRelease: + if (CanUse(Inst, Ptr, PA, Class)) { + DEBUG(dbgs() << " CanUse: Seq: " << GetSeq() << "; " << *Ptr + << "\n"); + assert(!HasReverseInsertPts()); + // If this is an invoke instruction, we're scanning it as part of + // one of its successor blocks, since we can't insert code after it + // in its own block, and we don't want to split critical edges. + if (isa<InvokeInst>(Inst)) + InsertReverseInsertPt(BB->getFirstInsertionPt()); + else + InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst))); + SetSeq(S_Use); + } else if (Seq == S_Release && IsUser(Class)) { + DEBUG(dbgs() << " PreciseReleaseUse: Seq: " << GetSeq() << "; " + << *Ptr << "\n"); + // Non-movable releases depend on any possible objc pointer use. + SetSeq(S_Stop); + assert(!HasReverseInsertPts()); + // As above; handle invoke specially. + if (isa<InvokeInst>(Inst)) + InsertReverseInsertPt(BB->getFirstInsertionPt()); + else + InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst))); + } + break; + case S_Stop: + if (CanUse(Inst, Ptr, PA, Class)) { + DEBUG(dbgs() << " PreciseStopUse: Seq: " << GetSeq() << "; " + << *Ptr << "\n"); + SetSeq(S_Use); + } + break; + case S_CanRelease: + case S_Use: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } +} + +//===----------------------------------------------------------------------===// +// TopDownPtrState +//===----------------------------------------------------------------------===// + +bool TopDownPtrState::InitTopDown(ARCInstKind Kind, Instruction *I) { + bool NestingDetected = false; + // Don't do retain+release tracking for ARCInstKind::RetainRV, because + // it's + // better to let it remain as the first instruction after a call. + if (Kind != ARCInstKind::RetainRV) { + // If we see two retains in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second retain, which may allow us to + // eliminate the first retain too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + if (GetSeq() == S_Retain) + NestingDetected = true; + + ResetSequenceProgress(S_Retain); + SetKnownSafe(HasKnownPositiveRefCount()); + InsertCall(I); + } + + SetKnownPositiveRefCount(); + return NestingDetected; +} + +bool TopDownPtrState::MatchWithRelease(ARCMDKindCache &Cache, + Instruction *Release) { + ClearKnownPositiveRefCount(); + + Sequence OldSeq = GetSeq(); + + MDNode *ReleaseMetadata = + Release->getMetadata(Cache.get(ARCMDKindID::ImpreciseRelease)); + + switch (OldSeq) { + case S_Retain: + case S_CanRelease: + if (OldSeq == S_Retain || ReleaseMetadata != nullptr) + ClearReverseInsertPts(); + // FALL THROUGH + case S_Use: + SetReleaseMetadata(ReleaseMetadata); + SetTailCallRelease(cast<CallInst>(Release)->isTailCall()); + return true; + case S_None: + return false; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in bottom up state!"); + } + llvm_unreachable("Sequence unknown enum value"); +} + +bool TopDownPtrState::HandlePotentialAlterRefCount(Instruction *Inst, + const Value *Ptr, + ProvenanceAnalysis &PA, + ARCInstKind Class) { + // Check for possible releases. + if (!CanAlterRefCount(Inst, Ptr, PA, Class)) + return false; + + DEBUG(dbgs() << " CanAlterRefCount: Seq: " << GetSeq() << "; " << *Ptr + << "\n"); + ClearKnownPositiveRefCount(); + switch (GetSeq()) { + case S_Retain: + SetSeq(S_CanRelease); + assert(!HasReverseInsertPts()); + InsertReverseInsertPt(Inst); + + // One call can't cause a transition from S_Retain to S_CanRelease + // and S_CanRelease to S_Use. If we've made the first transition, + // we're done. + return true; + case S_Use: + case S_CanRelease: + case S_None: + return false; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + llvm_unreachable("covered switch is not covered!?"); +} + +void TopDownPtrState::HandlePotentialUse(Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, + ARCInstKind Class) { + // Check for possible direct uses. + switch (GetSeq()) { + case S_CanRelease: + if (!CanUse(Inst, Ptr, PA, Class)) + return; + DEBUG(dbgs() << " CanUse: Seq: " << GetSeq() << "; " << *Ptr + << "\n"); + SetSeq(S_Use); + return; + case S_Retain: + case S_Use: + case S_None: + return; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } +} diff --git a/lib/Transforms/ObjCARC/PtrState.h b/lib/Transforms/ObjCARC/PtrState.h new file mode 100644 index 0000000..e45e1ea --- /dev/null +++ b/lib/Transforms/ObjCARC/PtrState.h @@ -0,0 +1,210 @@ +//===--- PtrState.h - ARC State for a Ptr -------------------*- C++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains declarations for the ARC state associated with a ptr. It +// is only used by the ARC Sequence Dataflow computation. By separating this +// from the actual dataflow, it is easier to consider the mechanics of the ARC +// optimization separate from the actual predicates being used. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H +#define LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H + +#include "ARCInstKind.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" + +namespace llvm { +namespace objcarc { + +class ARCMDKindCache; +class ProvenanceAnalysis; + +/// \enum Sequence +/// +/// \brief A sequence of states that a pointer may go through in which an +/// objc_retain and objc_release are actually needed. +enum Sequence { + S_None, + S_Retain, ///< objc_retain(x). + S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement. + S_Use, ///< any use of x. + S_Stop, ///< like S_Release, but code motion is stopped. + S_Release, ///< objc_release(x). + S_MovableRelease ///< objc_release(x), !clang.imprecise_release. +}; + +raw_ostream &operator<<(raw_ostream &OS, + const Sequence S) LLVM_ATTRIBUTE_UNUSED; + +/// \brief Unidirectional information about either a +/// retain-decrement-use-release sequence or release-use-decrement-retain +/// reverse sequence. +struct RRInfo { + /// After an objc_retain, the reference count of the referenced + /// object is known to be positive. Similarly, before an objc_release, the + /// reference count of the referenced object is known to be positive. If + /// there are retain-release pairs in code regions where the retain count + /// is known to be positive, they can be eliminated, regardless of any side + /// effects between them. + /// + /// Also, a retain+release pair nested within another retain+release + /// pair all on the known same pointer value can be eliminated, regardless + /// of any intervening side effects. + /// + /// KnownSafe is true when either of these conditions is satisfied. + bool KnownSafe; + + /// True of the objc_release calls are all marked with the "tail" keyword. + bool IsTailCallRelease; + + /// If the Calls are objc_release calls and they all have a + /// clang.imprecise_release tag, this is the metadata tag. + MDNode *ReleaseMetadata; + + /// For a top-down sequence, the set of objc_retains or + /// objc_retainBlocks. For bottom-up, the set of objc_releases. + SmallPtrSet<Instruction *, 2> Calls; + + /// The set of optimal insert positions for moving calls in the opposite + /// sequence. + SmallPtrSet<Instruction *, 2> ReverseInsertPts; + + /// If this is true, we cannot perform code motion but can still remove + /// retain/release pairs. + bool CFGHazardAfflicted; + + RRInfo() + : KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(nullptr), + CFGHazardAfflicted(false) {} + + void clear(); + + /// Conservatively merge the two RRInfo. Returns true if a partial merge has + /// occurred, false otherwise. + bool Merge(const RRInfo &Other); +}; + +/// \brief This class summarizes several per-pointer runtime properties which +/// are propogated through the flow graph. +class PtrState { +protected: + /// True if the reference count is known to be incremented. + bool KnownPositiveRefCount; + + /// True if we've seen an opportunity for partial RR elimination, such as + /// pushing calls into a CFG triangle or into one side of a CFG diamond. + bool Partial; + + /// The current position in the sequence. + unsigned char Seq : 8; + + /// Unidirectional information about the current sequence. + RRInfo RRI; + + PtrState() : KnownPositiveRefCount(false), Partial(false), Seq(S_None) {} + +public: + bool IsKnownSafe() const { return RRI.KnownSafe; } + + void SetKnownSafe(const bool NewValue) { RRI.KnownSafe = NewValue; } + + bool IsTailCallRelease() const { return RRI.IsTailCallRelease; } + + void SetTailCallRelease(const bool NewValue) { + RRI.IsTailCallRelease = NewValue; + } + + bool IsTrackingImpreciseReleases() const { + return RRI.ReleaseMetadata != nullptr; + } + + const MDNode *GetReleaseMetadata() const { return RRI.ReleaseMetadata; } + + void SetReleaseMetadata(MDNode *NewValue) { RRI.ReleaseMetadata = NewValue; } + + bool IsCFGHazardAfflicted() const { return RRI.CFGHazardAfflicted; } + + void SetCFGHazardAfflicted(const bool NewValue) { + RRI.CFGHazardAfflicted = NewValue; + } + + void SetKnownPositiveRefCount(); + void ClearKnownPositiveRefCount(); + + bool HasKnownPositiveRefCount() const { return KnownPositiveRefCount; } + + void SetSeq(Sequence NewSeq); + + Sequence GetSeq() const { return static_cast<Sequence>(Seq); } + + void ClearSequenceProgress() { ResetSequenceProgress(S_None); } + + void ResetSequenceProgress(Sequence NewSeq); + void Merge(const PtrState &Other, bool TopDown); + + void InsertCall(Instruction *I) { RRI.Calls.insert(I); } + + void InsertReverseInsertPt(Instruction *I) { RRI.ReverseInsertPts.insert(I); } + + void ClearReverseInsertPts() { RRI.ReverseInsertPts.clear(); } + + bool HasReverseInsertPts() const { return !RRI.ReverseInsertPts.empty(); } + + const RRInfo &GetRRInfo() const { return RRI; } +}; + +struct BottomUpPtrState : PtrState { + BottomUpPtrState() : PtrState() {} + + /// (Re-)Initialize this bottom up pointer returning true if we detected a + /// pointer with nested releases. + bool InitBottomUp(ARCMDKindCache &Cache, Instruction *I); + + /// Return true if this set of releases can be paired with a release. Modifies + /// state appropriately to reflect that the matching occured if it is + /// successful. + /// + /// It is assumed that one has already checked that the RCIdentity of the + /// retain and the RCIdentity of this ptr state are the same. + bool MatchWithRetain(); + + void HandlePotentialUse(BasicBlock *BB, Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, ARCInstKind Class); + bool HandlePotentialAlterRefCount(Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, ARCInstKind Class); +}; + +struct TopDownPtrState : PtrState { + TopDownPtrState() : PtrState() {} + + /// (Re-)Initialize this bottom up pointer returning true if we detected a + /// pointer with nested releases. + bool InitTopDown(ARCInstKind Kind, Instruction *I); + + /// Return true if this set of retains can be paired with the given + /// release. Modifies state appropriately to reflect that the matching + /// occured. + bool MatchWithRelease(ARCMDKindCache &Cache, Instruction *Release); + + void HandlePotentialUse(Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, ARCInstKind Class); + + bool HandlePotentialAlterRefCount(Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, ARCInstKind Class); +}; + +} // end namespace objcarc +} // end namespace llvm + +#endif diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index 5c74885..5aa2b97 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -23,15 +23,15 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -71,7 +71,6 @@ struct AlignmentFromAssumptions : public FunctionPass { ScalarEvolution *SE; DominatorTree *DT; - const DataLayout *DL; bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV, const SCEV *&OffSCEV); @@ -123,7 +122,7 @@ static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV, // If the displacement is not an exact multiple, but the remainder is a // constant, then return this remainder (but only if it is a power of 2). - uint64_t DiffUnitsAbs = abs64(DiffUnits); + uint64_t DiffUnitsAbs = std::abs(DiffUnits); if (isPowerOf2_64(DiffUnitsAbs)) return (unsigned) DiffUnitsAbs; } @@ -316,7 +315,7 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) { continue; if (Instruction *K = dyn_cast<Instruction>(J)) - if (isValidAssumeForContext(ACall, K, DL, DT)) + if (isValidAssumeForContext(ACall, K, DT)) WorkList.push_back(K); } @@ -400,7 +399,7 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) { Visited.insert(J); for (User *UJ : J->users()) { Instruction *K = cast<Instruction>(UJ); - if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DL, DT)) + if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DT)) WorkList.push_back(K); } } @@ -413,8 +412,6 @@ bool AlignmentFromAssumptions::runOnFunction(Function &F) { auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; NewDestAlignments.clear(); NewSrcAlignments.clear(); diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk index ed803cd..cf30f39 100644 --- a/lib/Transforms/Scalar/Android.mk +++ b/lib/Transforms/Scalar/Android.mk @@ -20,6 +20,7 @@ transforms_scalar_SRC_FILES := \ LoopDeletion.cpp \ LoopIdiomRecognize.cpp \ LoopInstSimplify.cpp \ + LoopInterchange.cpp \ LoopRerollPass.cpp \ LoopRotation.cpp \ LoopStrengthReduce.cpp \ diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp index c7bd79d..09c605e 100644 --- a/lib/Transforms/Scalar/BDCE.cpp +++ b/lib/Transforms/Scalar/BDCE.cpp @@ -64,7 +64,6 @@ struct BDCE : public FunctionPass { APInt &KnownZero2, APInt &KnownOne2); AssumptionCache *AC; - const DataLayout *DL; DominatorTree *DT; }; } @@ -95,20 +94,21 @@ void BDCE::determineLiveOperandBits(const Instruction *UserI, // however, want to do this twice, so we cache the result in APInts that live // in the caller. For the two-relevant-operands case, both operand values are // provided here. - auto ComputeKnownBits = [&](unsigned BitWidth, const Value *V1, - const Value *V2) { - KnownZero = APInt(BitWidth, 0); - KnownOne = APInt(BitWidth, 0); - computeKnownBits(const_cast<Value*>(V1), KnownZero, KnownOne, DL, 0, AC, - UserI, DT); - - if (V2) { - KnownZero2 = APInt(BitWidth, 0); - KnownOne2 = APInt(BitWidth, 0); - computeKnownBits(const_cast<Value*>(V2), KnownZero2, KnownOne2, DL, 0, AC, - UserI, DT); - } - }; + auto ComputeKnownBits = + [&](unsigned BitWidth, const Value *V1, const Value *V2) { + const DataLayout &DL = I->getModule()->getDataLayout(); + KnownZero = APInt(BitWidth, 0); + KnownOne = APInt(BitWidth, 0); + computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0, + AC, UserI, DT); + + if (V2) { + KnownZero2 = APInt(BitWidth, 0); + KnownOne2 = APInt(BitWidth, 0); + computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL, + 0, AC, UserI, DT); + } + }; switch (UserI->getOpcode()) { default: break; @@ -263,7 +263,6 @@ bool BDCE::runOnFunction(Function& F) { return false; AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - DL = F.getParent()->getDataLayout(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); DenseMap<Instruction *, APInt> AliveBits; diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index d297eb1..d12fdb7 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_library(LLVMScalarOpts LoopDeletion.cpp LoopIdiomRecognize.cpp LoopInstSimplify.cpp + LoopInterchange.cpp LoopRerollPass.cpp LoopRotation.cpp LoopStrengthReduce.cpp diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index e3aab4b..4288742 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -43,6 +43,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include <tuple> using namespace llvm; diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 29d4e05..c974ebb 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -22,7 +22,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/IR/Constant.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/Pass.h" @@ -68,8 +67,7 @@ bool ConstantPropagation::runOnFunction(Function &F) { WorkList.insert(&*i); } bool Changed = false; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; + const DataLayout &DL = F.getParent()->getDataLayout(); TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 5a3b5cf..912d527 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -126,8 +127,9 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) { Changed = true; } - // FIXME: Provide DL, TLI, DT, AT to SimplifyInstruction. - if (Value *V = SimplifyInstruction(P)) { + // FIXME: Provide TLI, DT, AT to SimplifyInstruction. + const DataLayout &DL = BB->getModule()->getDataLayout(); + if (Value *V = SimplifyInstruction(P, DL)) { P->replaceAllUsesWith(V); P->eraseFromParent(); Changed = true; diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index c2ce1d5..cb8981b 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -33,7 +34,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -78,7 +79,8 @@ namespace { bool HandleFree(CallInst *F); bool handleEndBlock(BasicBlock &BB); void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, - SmallSetVector<Value*, 16> &DeadStackObjects); + SmallSetVector<Value *, 16> &DeadStackObjects, + const DataLayout &DL); void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -194,18 +196,12 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) { /// describe the memory operations for this instruction. static AliasAnalysis::Location getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { - const DataLayout *DL = AA.getDataLayout(); if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) return AA.getLocation(SI); if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) { // memcpy/memmove/memset. AliasAnalysis::Location Loc = AA.getLocationForDest(MI); - // If we don't have target data around, an unknown size in Location means - // that we should use the size of the pointee type. This isn't valid for - // memset/memcpy, which writes more than an i8. - if (Loc.Size == AliasAnalysis::UnknownSize && DL == nullptr) - return AliasAnalysis::Location(); return Loc; } @@ -215,11 +211,6 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { switch (II->getIntrinsicID()) { default: return AliasAnalysis::Location(); // Unhandled intrinsic. case Intrinsic::init_trampoline: - // If we don't have target data around, an unknown size in Location means - // that we should use the size of the pointee type. This isn't valid for - // init.trampoline, which writes more than an i8. - if (!DL) return AliasAnalysis::Location(); - // FIXME: We don't know the size of the trampoline, so we can't really // handle it here. return AliasAnalysis::Location(II->getArgOperand(0)); @@ -321,9 +312,10 @@ static Value *getStoredPointerOperand(Instruction *I) { return CS.getArgument(0); } -static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) { +static uint64_t getPointerSize(const Value *V, const DataLayout &DL, + const TargetLibraryInfo *TLI) { uint64_t Size; - if (getObjectSize(V, Size, AA.getDataLayout(), AA.getTargetLibraryInfo())) + if (getObjectSize(V, Size, DL, TLI)) return Size; return AliasAnalysis::UnknownSize; } @@ -343,10 +335,9 @@ namespace { /// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, const AliasAnalysis::Location &Earlier, - AliasAnalysis &AA, - int64_t &EarlierOff, - int64_t &LaterOff) { - const DataLayout *DL = AA.getDataLayout(); + const DataLayout &DL, + const TargetLibraryInfo *TLI, + int64_t &EarlierOff, int64_t &LaterOff) { const Value *P1 = Earlier.Ptr->stripPointerCasts(); const Value *P2 = Later.Ptr->stripPointerCasts(); @@ -367,7 +358,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // Otherwise, we have to have size information, and the later store has to be // larger than the earlier one. if (Later.Size == AliasAnalysis::UnknownSize || - Earlier.Size == AliasAnalysis::UnknownSize || DL == nullptr) + Earlier.Size == AliasAnalysis::UnknownSize) return OverwriteUnknown; // Check to see if the later store is to the entire object (either a global, @@ -382,7 +373,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, return OverwriteUnknown; // If the "Later" store is to a recognizable object, get its size. - uint64_t ObjectSize = getPointerSize(UO2, AA); + uint64_t ObjectSize = getPointerSize(UO2, DL, TLI); if (ObjectSize != AliasAnalysis::UnknownSize) if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size) return OverwriteComplete; @@ -560,8 +551,10 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { if (isRemovable(DepWrite) && !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) { int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA, - DepWriteOffset, InstWriteOffset); + const DataLayout &DL = BB.getModule()->getDataLayout(); + OverwriteResult OR = + isOverwrite(Loc, DepLoc, DL, AA->getTargetLibraryInfo(), + DepWriteOffset, InstWriteOffset); if (OR == OverwriteComplete) { DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite << "\n KILLER: " << *Inst << '\n'); @@ -655,6 +648,7 @@ bool DSE::HandleFree(CallInst *F) { AliasAnalysis::Location Loc = AliasAnalysis::Location(F->getOperand(0)); SmallVector<BasicBlock *, 16> Blocks; Blocks.push_back(F->getParent()); + const DataLayout &DL = F->getModule()->getDataLayout(); while (!Blocks.empty()) { BasicBlock *BB = Blocks.pop_back_val(); @@ -668,7 +662,7 @@ bool DSE::HandleFree(CallInst *F) { break; Value *DepPointer = - GetUnderlyingObject(getStoredPointerOperand(Dependency)); + GetUnderlyingObject(getStoredPointerOperand(Dependency), DL); // Check for aliasing. if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) @@ -728,6 +722,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (AI->hasByValOrInAllocaAttr()) DeadStackObjects.insert(AI); + const DataLayout &DL = BB.getModule()->getDataLayout(); + // Scan the basic block backwards for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){ --BBI; @@ -736,7 +732,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) { // See through pointer-to-pointer bitcasts SmallVector<Value *, 4> Pointers; - GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers); + GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers, DL); // Stores to stack values are valid candidates for removal. bool AllDead = true; @@ -799,8 +795,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // the call is live. DeadStackObjects.remove_if([&](Value *I) { // See if the call site touches the value. - AliasAnalysis::ModRefResult A = - AA->getModRefInfo(CS, I, getPointerSize(I, *AA)); + AliasAnalysis::ModRefResult A = AA->getModRefInfo( + CS, I, getPointerSize(I, DL, AA->getTargetLibraryInfo())); return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref; }); @@ -835,7 +831,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Remove any allocas from the DeadPointer set that are loaded, as this // makes any stores above the access live. - RemoveAccessedObjects(LoadedLoc, DeadStackObjects); + RemoveAccessedObjects(LoadedLoc, DeadStackObjects, DL); // If all of the allocas were clobbered by the access then we're not going // to find anything else to process. @@ -850,8 +846,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { /// of the stack objects in the DeadStackObjects set. If so, they become live /// because the location is being loaded. void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, - SmallSetVector<Value*, 16> &DeadStackObjects) { - const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr); + SmallSetVector<Value *, 16> &DeadStackObjects, + const DataLayout &DL) { + const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL); // A constant can't be in the dead pointer set. if (isa<Constant>(UnderlyingPointer)) @@ -867,7 +864,8 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, // Remove objects that could alias LoadedLoc. DeadStackObjects.remove_if([&](Value *I) { // See if the loaded location could alias the stack location. - AliasAnalysis::Location StackLoc(I, getPointerSize(I, *AA)); + AliasAnalysis::Location StackLoc( + I, getPointerSize(I, DL, AA->getTargetLibraryInfo())); return !AA->isNoAlias(StackLoc, LoadedLoc); }); } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 9309623..d5b9e03 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -27,7 +28,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include <deque> @@ -263,7 +264,6 @@ namespace { class EarlyCSE { public: Function &F; - const DataLayout *DL; const TargetLibraryInfo &TLI; const TargetTransformInfo &TTI; DominatorTree &DT; @@ -308,11 +308,10 @@ public: unsigned CurrentGeneration; /// \brief Set up the EarlyCSE runner for a particular function. - EarlyCSE(Function &F, const DataLayout *DL, const TargetLibraryInfo &TLI, + EarlyCSE(Function &F, const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI, DominatorTree &DT, AssumptionCache &AC) - : F(F), DL(DL), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) { - } + : F(F), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {} bool run(); @@ -469,6 +468,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { Instruction *LastStore = nullptr; bool Changed = false; + const DataLayout &DL = BB->getModule()->getDataLayout(); // See if any instructions in the block can be eliminated. If so, do it. If // not, add them to AvailableValues. @@ -685,14 +685,12 @@ bool EarlyCSE::run() { PreservedAnalyses EarlyCSEPass::run(Function &F, AnalysisManager<Function> *AM) { - const DataLayout *DL = F.getParent()->getDataLayout(); - auto &TLI = AM->getResult<TargetLibraryAnalysis>(F); auto &TTI = AM->getResult<TargetIRAnalysis>(F); auto &DT = AM->getResult<DominatorTreeAnalysis>(F); auto &AC = AM->getResult<AssumptionAnalysis>(F); - EarlyCSE CSE(F, DL, TLI, TTI, DT, AC); + EarlyCSE CSE(F, TLI, TTI, DT, AC); if (!CSE.run()) return PreservedAnalyses::all(); @@ -724,14 +722,12 @@ public: if (skipOptnoneFunction(F)) return false; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - auto *DL = DLP ? &DLP->getDataLayout() : nullptr; auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - EarlyCSE CSE(F, DL, TLI, TTI, DT, AC); + EarlyCSE CSE(F, TLI, TTI, DT, AC); return CSE.run(); } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 73a1f25..c73e60f 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -33,6 +33,7 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -45,7 +46,7 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -584,14 +585,13 @@ namespace { /// Emit code into this block to adjust the value defined here to the /// specified type. This handles various coercion cases. - Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const; + Value *MaterializeAdjustedValue(LoadInst *LI, GVN &gvn) const; }; class GVN : public FunctionPass { bool NoLoads; MemoryDependenceAnalysis *MD; DominatorTree *DT; - const DataLayout *DL; const TargetLibraryInfo *TLI; AssumptionCache *AC; SetVector<BasicBlock *> DeadBlocks; @@ -630,7 +630,6 @@ namespace { InstrsToErase.push_back(I); } - const DataLayout *getDataLayout() const { return DL; } DominatorTree &getDominatorTree() const { return *DT; } AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } MemoryDependenceAnalysis &getMemDep() const { return *MD; } @@ -956,8 +955,9 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, return -1; int64_t StoreOffset = 0, LoadOffset = 0; - Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&DL); - Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &DL); + Value *StoreBase = + GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL); + Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL); if (StoreBase != LoadBase) return -1; @@ -1021,13 +1021,13 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, /// This function is called when we have a /// memdep query of a load that ends up being a clobbering store. static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, - StoreInst *DepSI, - const DataLayout &DL) { + StoreInst *DepSI) { // Cannot handle reading from store of first-class aggregate yet. if (DepSI->getValueOperand()->getType()->isStructTy() || DepSI->getValueOperand()->getType()->isArrayTy()) return -1; + const DataLayout &DL = DepSI->getModule()->getDataLayout(); Value *StorePtr = DepSI->getPointerOperand(); uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()); return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, @@ -1052,11 +1052,11 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, // then we should widen it! int64_t LoadOffs = 0; const Value *LoadBase = - GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &DL); + GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL); unsigned LoadSize = DL.getTypeStoreSize(LoadTy); - unsigned Size = MemoryDependenceAnalysis:: - getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, DL); + unsigned Size = MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize( + LoadBase, LoadOffs, LoadSize, DepLI); if (Size == 0) return -1; return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL); @@ -1086,7 +1086,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, Constant *Src = dyn_cast<Constant>(MTI->getSource()); if (!Src) return -1; - GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &DL)); + GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL)); if (!GV || !GV->isConstant()) return -1; // See if the access is within the bounds of the transfer. @@ -1104,7 +1104,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); - if (ConstantFoldLoadFromConstPtr(Src, &DL)) + if (ConstantFoldLoadFromConstPtr(Src, DL)) return Offset; return -1; } @@ -1157,7 +1157,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, Instruction *InsertPt, GVN &gvn) { - const DataLayout &DL = *gvn.getDataLayout(); + const DataLayout &DL = SrcVal->getModule()->getDataLayout(); // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to // widen SrcVal out to a larger load. unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType()); @@ -1265,7 +1265,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); - return ConstantFoldLoadFromConstPtr(Src, &DL); + return ConstantFoldLoadFromConstPtr(Src, DL); } @@ -1281,7 +1281,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) { assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block"); - return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn); + return ValuesPerBlock[0].MaterializeAdjustedValue(LI, gvn); } // Otherwise, we have to construct SSA form. @@ -1289,8 +1289,6 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, SSAUpdater SSAUpdate(&NewPHIs); SSAUpdate.Initialize(LI->getType(), LI->getName()); - Type *LoadTy = LI->getType(); - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { const AvailableValueInBlock &AV = ValuesPerBlock[i]; BasicBlock *BB = AV.BB; @@ -1298,7 +1296,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, if (SSAUpdate.HasValueForBlock(BB)) continue; - SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn)); + SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LI, gvn)); } // Perform PHI construction. @@ -1326,16 +1324,16 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, return V; } -Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { +Value *AvailableValueInBlock::MaterializeAdjustedValue(LoadInst *LI, + GVN &gvn) const { Value *Res; + Type *LoadTy = LI->getType(); + const DataLayout &DL = LI->getModule()->getDataLayout(); if (isSimpleValue()) { Res = getSimpleValue(); if (Res->getType() != LoadTy) { - const DataLayout *DL = gvn.getDataLayout(); - assert(DL && "Need target data to handle type mismatch case"); - Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), - *DL); - + Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), DL); + DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " << *getSimpleValue() << '\n' << *Res << '\n' << "\n\n\n"); @@ -1353,10 +1351,8 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c << *Res << '\n' << "\n\n\n"); } } else if (isMemIntrinValue()) { - const DataLayout *DL = gvn.getDataLayout(); - assert(DL && "Need target data to handle type mismatch case"); - Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, - LoadTy, BB->getTerminator(), *DL); + Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy, + BB->getTerminator(), DL); DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset << " " << *getMemIntrinValue() << '\n' << *Res << '\n' << "\n\n\n"); @@ -1383,6 +1379,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, // dependencies that produce an unknown value for the load (such as a call // that could potentially clobber the load). unsigned NumDeps = Deps.size(); + const DataLayout &DL = LI->getModule()->getDataLayout(); for (unsigned i = 0, e = NumDeps; i != e; ++i) { BasicBlock *DepBB = Deps[i].getBB(); MemDepResult DepInfo = Deps[i].getResult(); @@ -1409,9 +1406,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, // read by the load, we can extract the bits we need for the load from the // stored value. if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { - if (DL && Address) { - int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, - DepSI, *DL); + if (Address) { + int Offset = + AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI); if (Offset != -1) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, DepSI->getValueOperand(), @@ -1428,9 +1425,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) { // If this is a clobber and L is the first instruction in its block, then // we have the first instruction in the entry block. - if (DepLI != LI && Address && DL) { - int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), Address, - DepLI, *DL); + if (DepLI != LI && Address) { + int Offset = + AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL); if (Offset != -1) { ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI, @@ -1443,9 +1440,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, // If the clobbering value is a memset/memcpy/memmove, see if we can // forward a value on from it. if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { - if (DL && Address) { + if (Address) { int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, - DepMI, *DL); + DepMI, DL); if (Offset != -1) { ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, Offset)); @@ -1484,8 +1481,8 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, if (S->getValueOperand()->getType() != LI->getType()) { // If the stored value is larger or equal to the loaded value, we can // reuse it. - if (!DL || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), - LI->getType(), *DL)) { + if (!CanCoerceMustAliasedValueToLoad(S->getValueOperand(), + LI->getType(), DL)) { UnavailableBlocks.push_back(DepBB); continue; } @@ -1501,7 +1498,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, if (LD->getType() != LI->getType()) { // If the stored value is larger or equal to the loaded value, we can // reuse it. - if (!DL || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)) { + if (!CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL)) { UnavailableBlocks.push_back(DepBB); continue; } @@ -1613,6 +1610,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Check if the load can safely be moved to all the unavailable predecessors. bool CanDoPRE = true; + const DataLayout &DL = LI->getModule()->getDataLayout(); SmallVector<Instruction*, 8> NewInsts; for (auto &PredLoad : PredLoads) { BasicBlock *UnavailablePred = PredLoad.first; @@ -1833,10 +1831,11 @@ bool GVN::processLoad(LoadInst *L) { // ... to a pointer that has been loaded from before... MemDepResult Dep = MD->getDependency(L); + const DataLayout &DL = L->getModule()->getDataLayout(); // If we have a clobber and target data is around, see if this is a clobber // that we can fix up through code synthesis. - if (Dep.isClobber() && DL) { + if (Dep.isClobber()) { // Check to see if we have something like this: // store i32 123, i32* %P // %A = bitcast i32* %P to i8* @@ -1849,12 +1848,11 @@ bool GVN::processLoad(LoadInst *L) { // access code. Value *AvailVal = nullptr; if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) { - int Offset = AnalyzeLoadFromClobberingStore(L->getType(), - L->getPointerOperand(), - DepSI, *DL); + int Offset = AnalyzeLoadFromClobberingStore( + L->getType(), L->getPointerOperand(), DepSI); if (Offset != -1) AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, - L->getType(), L, *DL); + L->getType(), L, DL); } // Check to see if we have something like this: @@ -1867,9 +1865,8 @@ bool GVN::processLoad(LoadInst *L) { if (DepLI == L) return false; - int Offset = AnalyzeLoadFromClobberingLoad(L->getType(), - L->getPointerOperand(), - DepLI, *DL); + int Offset = AnalyzeLoadFromClobberingLoad( + L->getType(), L->getPointerOperand(), DepLI, DL); if (Offset != -1) AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this); } @@ -1877,11 +1874,10 @@ bool GVN::processLoad(LoadInst *L) { // If the clobbering value is a memset/memcpy/memmove, see if we can forward // a value on from it. if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { - int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), - L->getPointerOperand(), - DepMI, *DL); + int Offset = AnalyzeLoadFromClobberingMemInst( + L->getType(), L->getPointerOperand(), DepMI, DL); if (Offset != -1) - AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *DL); + AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, DL); } if (AvailVal) { @@ -1932,17 +1928,13 @@ bool GVN::processLoad(LoadInst *L) { // actually have the same type. See if we know how to reuse the stored // value (depending on its type). if (StoredVal->getType() != L->getType()) { - if (DL) { - StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), - L, *DL); - if (!StoredVal) - return false; - - DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal - << '\n' << *L << "\n\n\n"); - } - else + StoredVal = + CoerceAvailableValueToLoadType(StoredVal, L->getType(), L, DL); + if (!StoredVal) return false; + + DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal + << '\n' << *L << "\n\n\n"); } // Remove it! @@ -1961,17 +1953,12 @@ bool GVN::processLoad(LoadInst *L) { // the same type. See if we know how to reuse the previously loaded value // (depending on its type). if (DepLI->getType() != L->getType()) { - if (DL) { - AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), - L, *DL); - if (!AvailableVal) - return false; - - DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal - << "\n" << *L << "\n\n\n"); - } - else + AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L, DL); + if (!AvailableVal) return false; + + DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal + << "\n" << *L << "\n\n\n"); } // Remove it! @@ -2239,6 +2226,7 @@ bool GVN::processInstruction(Instruction *I) { // to value numbering it. Value numbering often exposes redundancies, for // example if it determines that %y is equal to %x then the instruction // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. + const DataLayout &DL = I->getModule()->getDataLayout(); if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { I->replaceAllUsesWith(V); if (MD && V->getType()->getScalarType()->isPointerTy()) @@ -2357,8 +2345,6 @@ bool GVN::runOnFunction(Function& F) { if (!NoLoads) MD = &getAnalysis<MemoryDependenceAnalysis>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index f99ebbc..51e8041 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -44,7 +45,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" @@ -73,7 +73,6 @@ namespace { LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; - const DataLayout *DL; TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; @@ -82,8 +81,8 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), - DL(nullptr), Changed(false) { + IndVarSimplify() + : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) { initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); } @@ -663,14 +662,14 @@ namespace { /// extended by this sign or zero extend operation. This is used to determine /// the final width of the IV before actually widening it. static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, - const DataLayout *DL, const TargetTransformInfo *TTI) { + const TargetTransformInfo *TTI) { bool IsSigned = Cast->getOpcode() == Instruction::SExt; if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) return; Type *Ty = Cast->getType(); uint64_t Width = SE->getTypeSizeInBits(Ty); - if (DL && !DL->isLegalInteger(Width)) + if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) return; // Cast is either an sext or zext up to this point. @@ -1201,7 +1200,6 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { namespace { class IndVarSimplifyVisitor : public IVVisitor { ScalarEvolution *SE; - const DataLayout *DL; const TargetTransformInfo *TTI; PHINode *IVPhi; @@ -1209,9 +1207,9 @@ namespace { WideIVInfo WI; IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, - const DataLayout *DL, const TargetTransformInfo *TTI, + const TargetTransformInfo *TTI, const DominatorTree *DTree) - : SE(SCEV), DL(DL), TTI(TTI), IVPhi(IV) { + : SE(SCEV), TTI(TTI), IVPhi(IV) { DT = DTree; WI.NarrowIV = IVPhi; if (ReduceLiveIVs) @@ -1219,9 +1217,7 @@ namespace { } // Implement the interface used by simplifyUsersOfIV. - void visitCast(CastInst *Cast) override { - visitIVCast(Cast, WI, SE, DL, TTI); - } + void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } }; } @@ -1255,7 +1251,7 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L, PHINode *CurrIV = LoopPhis.pop_back_val(); // Information about sign/zero extensions of CurrIV. - IndVarSimplifyVisitor Visitor(CurrIV, SE, DL, TTI, DT); + IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor); @@ -1521,9 +1517,8 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride. /// This is difficult in general for SCEV because of potential overflow. But we /// could at least handle constant BECounts. -static PHINode * -FindLoopCounter(Loop *L, const SCEV *BECount, - ScalarEvolution *SE, DominatorTree *DT, const DataLayout *DL) { +static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount, + ScalarEvolution *SE, DominatorTree *DT) { uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); Value *Cond = @@ -1552,7 +1547,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount, // AR may be wider than BECount. With eq/ne tests overflow is immaterial. // AR may not be a narrower type, or we may never exit. uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); - if (PhiWidth < BCWidth || (DL && !DL->isLegalInteger(PhiWidth))) + if (PhiWidth < BCWidth || + !L->getHeader()->getModule()->getDataLayout().isLegalInteger(PhiWidth)) continue; const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); @@ -1705,51 +1701,15 @@ LinearFunctionTestReplace(Loop *L, // compare against the post-incremented value, otherwise we must compare // against the preincremented value. if (L->getExitingBlock() == L->getLoopLatch()) { + // Add one to the "backedge-taken" count to get the trip count. + // This addition may overflow, which is valid as long as the comparison is + // truncated to BackedgeTakenCount->getType(). + IVCount = SE->getAddExpr(BackedgeTakenCount, + SE->getConstant(BackedgeTakenCount->getType(), 1)); // The BackedgeTaken expression contains the number of times that the // backedge branches to the loop header. This is one less than the // number of times the loop executes, so use the incremented indvar. - llvm::Value *IncrementedIndvar = - IndVar->getIncomingValueForBlock(L->getExitingBlock()); - const auto *IncrementedIndvarSCEV = - cast<SCEVAddRecExpr>(SE->getSCEV(IncrementedIndvar)); - // It is unsafe to use the incremented indvar if it has a wrapping flag, we - // don't want to compare against a poison value. Check the SCEV that - // corresponds to the incremented indvar, the SCEVExpander will only insert - // flags in the IR if the SCEV originally had wrapping flags. - // FIXME: In theory, SCEV could drop flags even though they exist in IR. - // A more robust solution would involve getting a new expression for - // CmpIndVar by applying non-NSW/NUW AddExprs. - auto WrappingFlags = - ScalarEvolution::setFlags(SCEV::FlagNUW, SCEV::FlagNSW); - const SCEV *IVInit = IncrementedIndvarSCEV->getStart(); - if (SE->getTypeSizeInBits(IVInit->getType()) > - SE->getTypeSizeInBits(IVCount->getType())) - IVInit = SE->getTruncateExpr(IVInit, IVCount->getType()); - unsigned BitWidth = SE->getTypeSizeInBits(IVCount->getType()); - Type *WideTy = IntegerType::get(SE->getContext(), BitWidth + 1); - // Check if InitIV + BECount+1 requires sign/zero extension. - // If not, clear the corresponding flag from WrappingFlags because it is not - // necessary for those flags in the IncrementedIndvarSCEV expression. - if (SE->getSignExtendExpr(SE->getAddExpr(IVInit, BackedgeTakenCount), - WideTy) == - SE->getAddExpr(SE->getSignExtendExpr(IVInit, WideTy), - SE->getSignExtendExpr(BackedgeTakenCount, WideTy))) - WrappingFlags = ScalarEvolution::clearFlags(WrappingFlags, SCEV::FlagNSW); - if (SE->getZeroExtendExpr(SE->getAddExpr(IVInit, BackedgeTakenCount), - WideTy) == - SE->getAddExpr(SE->getZeroExtendExpr(IVInit, WideTy), - SE->getZeroExtendExpr(BackedgeTakenCount, WideTy))) - WrappingFlags = ScalarEvolution::clearFlags(WrappingFlags, SCEV::FlagNUW); - if (!ScalarEvolution::maskFlags(IncrementedIndvarSCEV->getNoWrapFlags(), - WrappingFlags)) { - // Add one to the "backedge-taken" count to get the trip count. - // This addition may overflow, which is valid as long as the comparison is - // truncated to BackedgeTakenCount->getType(). - IVCount = - SE->getAddExpr(BackedgeTakenCount, - SE->getConstant(BackedgeTakenCount->getType(), 1)); - CmpIndVar = IncrementedIndvar; - } + CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); } Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE); @@ -1932,12 +1892,11 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); TLI = TLIP ? &TLIP->getTLI() : nullptr; auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); DeadInsts.clear(); Changed = false; @@ -1949,7 +1908,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); // Create a rewriter object which we'll use to transform the code with. - SCEVExpander Rewriter(*SE, "indvars"); + SCEVExpander Rewriter(*SE, DL, "indvars"); #ifndef NDEBUG Rewriter.setDebugType(DEBUG_TYPE); #endif @@ -1978,7 +1937,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) { - PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, DL); + PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT); if (IndVar) { // Check preconditions for proper SCEVExpander operation. SCEV does not // express SCEVExpander's dependencies, such as LoopSimplify. Instead any diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index 8559e63..cbdacad 100644 --- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -42,7 +42,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Optional.h" - #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" @@ -51,27 +50,23 @@ #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" - #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/Verifier.h" - +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" - +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/Transforms/Utils/UnrollLoop.h" - -#include "llvm/Pass.h" - #include <array> using namespace llvm; @@ -82,6 +77,9 @@ static cl::opt<unsigned> LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden, static cl::opt<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden, cl::init(false)); +static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden, + cl::init(false)); + static cl::opt<int> MaxExitProbReciprocal("irce-max-exit-prob-reciprocal", cl::Hidden, cl::init(10)); @@ -96,23 +94,41 @@ namespace { /// /// and /// -/// 2. a condition that is provably true for some range of values taken by the -/// containing loop's induction variable. -/// -/// Currently all inductive range checks are branches conditional on an -/// expression of the form +/// 2. a condition that is provably true for some contiguous range of values +/// taken by the containing loop's induction variable. /// -/// 0 <= (Offset + Scale * I) < Length -/// -/// where `I' is the canonical induction variable of a loop to which Offset and -/// Scale are loop invariant, and Length is >= 0. Currently the 'false' branch -/// is considered cold, looking at profiling data to verify that is a TODO. - class InductiveRangeCheck { + // Classifies a range check + enum RangeCheckKind : unsigned { + // Range check of the form "0 <= I". + RANGE_CHECK_LOWER = 1, + + // Range check of the form "I < L" where L is known positive. + RANGE_CHECK_UPPER = 2, + + // The logical and of the RANGE_CHECK_LOWER and RANGE_CHECK_UPPER + // conditions. + RANGE_CHECK_BOTH = RANGE_CHECK_LOWER | RANGE_CHECK_UPPER, + + // Unrecognized range check condition. + RANGE_CHECK_UNKNOWN = (unsigned)-1 + }; + + static const char *rangeCheckKindToStr(RangeCheckKind); + const SCEV *Offset; const SCEV *Scale; Value *Length; BranchInst *Branch; + RangeCheckKind Kind; + + static RangeCheckKind parseRangeCheckICmp(Loop *L, ICmpInst *ICI, + ScalarEvolution &SE, Value *&Index, + Value *&Length); + + static InductiveRangeCheck::RangeCheckKind + parseRangeCheck(Loop *L, ScalarEvolution &SE, Value *Condition, + const SCEV *&Index, Value *&UpperLimit); InductiveRangeCheck() : Offset(nullptr), Scale(nullptr), Length(nullptr), Branch(nullptr) { } @@ -124,13 +140,17 @@ public: void print(raw_ostream &OS) const { OS << "InductiveRangeCheck:\n"; + OS << " Kind: " << rangeCheckKindToStr(Kind) << "\n"; OS << " Offset: "; Offset->print(OS); OS << " Scale: "; Scale->print(OS); OS << " Length: "; - Length->print(OS); - OS << " Branch: "; + if (Length) + Length->print(OS); + else + OS << "(null)"; + OS << "\n Branch: "; getBranch()->print(OS); OS << "\n"; } @@ -207,160 +227,156 @@ char InductiveRangeCheckElimination::ID = 0; INITIALIZE_PASS(InductiveRangeCheckElimination, "irce", "Inductive range check elimination", false, false) -static bool IsLowerBoundCheck(Value *Check, Value *&IndexV) { - using namespace llvm::PatternMatch; +const char *InductiveRangeCheck::rangeCheckKindToStr( + InductiveRangeCheck::RangeCheckKind RCK) { + switch (RCK) { + case InductiveRangeCheck::RANGE_CHECK_UNKNOWN: + return "RANGE_CHECK_UNKNOWN"; - ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; - Value *LHS = nullptr, *RHS = nullptr; + case InductiveRangeCheck::RANGE_CHECK_UPPER: + return "RANGE_CHECK_UPPER"; - if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) - return false; + case InductiveRangeCheck::RANGE_CHECK_LOWER: + return "RANGE_CHECK_LOWER"; + + case InductiveRangeCheck::RANGE_CHECK_BOTH: + return "RANGE_CHECK_BOTH"; + } + + llvm_unreachable("unknown range check type!"); +} + +/// Parse a single ICmp instruction, `ICI`, into a range check. If `ICI` +/// cannot +/// be interpreted as a range check, return `RANGE_CHECK_UNKNOWN` and set +/// `Index` and `Length` to `nullptr`. Otherwise set `Index` to the value +/// being +/// range checked, and set `Length` to the upper limit `Index` is being range +/// checked with if (and only if) the range check type is stronger or equal to +/// RANGE_CHECK_UPPER. +/// +InductiveRangeCheck::RangeCheckKind +InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI, + ScalarEvolution &SE, Value *&Index, + Value *&Length) { + + auto IsNonNegativeAndNotLoopVarying = [&SE, L](Value *V) { + const SCEV *S = SE.getSCEV(V); + if (isa<SCEVCouldNotCompute>(S)) + return false; + + return SE.getLoopDisposition(S, L) == ScalarEvolution::LoopInvariant && + SE.isKnownNonNegative(S); + }; + + using namespace llvm::PatternMatch; + + ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); switch (Pred) { default: - return false; + return RANGE_CHECK_UNKNOWN; case ICmpInst::ICMP_SLE: std::swap(LHS, RHS); // fallthrough case ICmpInst::ICMP_SGE: - if (!match(RHS, m_ConstantInt<0>())) - return false; - IndexV = LHS; - return true; + if (match(RHS, m_ConstantInt<0>())) { + Index = LHS; + return RANGE_CHECK_LOWER; + } + return RANGE_CHECK_UNKNOWN; case ICmpInst::ICMP_SLT: std::swap(LHS, RHS); // fallthrough case ICmpInst::ICMP_SGT: - if (!match(RHS, m_ConstantInt<-1>())) - return false; - IndexV = LHS; - return true; - } -} - -static bool IsUpperBoundCheck(Value *Check, Value *Index, Value *&UpperLimit) { - using namespace llvm::PatternMatch; - - ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; - Value *LHS = nullptr, *RHS = nullptr; - - if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) - return false; + if (match(RHS, m_ConstantInt<-1>())) { + Index = LHS; + return RANGE_CHECK_LOWER; + } - switch (Pred) { - default: - return false; + if (IsNonNegativeAndNotLoopVarying(LHS)) { + Index = RHS; + Length = LHS; + return RANGE_CHECK_UPPER; + } + return RANGE_CHECK_UNKNOWN; - case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_ULT: std::swap(LHS, RHS); // fallthrough - case ICmpInst::ICMP_SLT: - if (LHS != Index) - return false; - UpperLimit = RHS; - return true; - case ICmpInst::ICMP_UGT: - std::swap(LHS, RHS); - // fallthrough - case ICmpInst::ICMP_ULT: - if (LHS != Index) - return false; - UpperLimit = RHS; - return true; + if (IsNonNegativeAndNotLoopVarying(LHS)) { + Index = RHS; + Length = LHS; + return RANGE_CHECK_BOTH; + } + return RANGE_CHECK_UNKNOWN; } + + llvm_unreachable("default clause returns!"); } -/// Split a condition into something semantically equivalent to (0 <= I < -/// Limit), both comparisons signed and Len loop invariant on L and positive. -/// On success, return true and set Index to I and UpperLimit to Limit. Return -/// false on failure (we may still write to UpperLimit and Index on failure). -/// It does not try to interpret I as a loop index. -/// -static bool SplitRangeCheckCondition(Loop *L, ScalarEvolution &SE, +/// Parses an arbitrary condition into a range check. `Length` is set only if +/// the range check is recognized to be `RANGE_CHECK_UPPER` or stronger. +InductiveRangeCheck::RangeCheckKind +InductiveRangeCheck::parseRangeCheck(Loop *L, ScalarEvolution &SE, Value *Condition, const SCEV *&Index, - Value *&UpperLimit) { - - // TODO: currently this catches some silly cases like comparing "%idx slt 1". - // Our transformations are still correct, but less likely to be profitable in - // those cases. We have to come up with some heuristics that pick out the - // range checks that are more profitable to clone a loop for. This function - // in general can be made more robust. - + Value *&Length) { using namespace llvm::PatternMatch; Value *A = nullptr; Value *B = nullptr; - ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; - - // In these early checks we assume that the matched UpperLimit is positive. - // We'll verify that fact later, before returning true. if (match(Condition, m_And(m_Value(A), m_Value(B)))) { - Value *IndexV = nullptr; - Value *ExpectedUpperBoundCheck = nullptr; + Value *IndexA = nullptr, *IndexB = nullptr; + Value *LengthA = nullptr, *LengthB = nullptr; + ICmpInst *ICmpA = dyn_cast<ICmpInst>(A), *ICmpB = dyn_cast<ICmpInst>(B); - if (IsLowerBoundCheck(A, IndexV)) - ExpectedUpperBoundCheck = B; - else if (IsLowerBoundCheck(B, IndexV)) - ExpectedUpperBoundCheck = A; - else - return false; + if (!ICmpA || !ICmpB) + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; - if (!IsUpperBoundCheck(ExpectedUpperBoundCheck, IndexV, UpperLimit)) - return false; + auto RCKindA = parseRangeCheckICmp(L, ICmpA, SE, IndexA, LengthA); + auto RCKindB = parseRangeCheckICmp(L, ICmpB, SE, IndexB, LengthB); - Index = SE.getSCEV(IndexV); + if (RCKindA == InductiveRangeCheck::RANGE_CHECK_UNKNOWN || + RCKindB == InductiveRangeCheck::RANGE_CHECK_UNKNOWN) + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; - if (isa<SCEVCouldNotCompute>(Index)) - return false; + if (IndexA != IndexB) + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; - } else if (match(Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) { - switch (Pred) { - default: - return false; + if (LengthA != nullptr && LengthB != nullptr && LengthA != LengthB) + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; - case ICmpInst::ICMP_SGT: - std::swap(A, B); - // fall through - case ICmpInst::ICMP_SLT: - UpperLimit = B; - Index = SE.getSCEV(A); - if (isa<SCEVCouldNotCompute>(Index) || !SE.isKnownNonNegative(Index)) - return false; - break; + Index = SE.getSCEV(IndexA); + if (isa<SCEVCouldNotCompute>(Index)) + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; - case ICmpInst::ICMP_UGT: - std::swap(A, B); - // fall through - case ICmpInst::ICMP_ULT: - UpperLimit = B; - Index = SE.getSCEV(A); - if (isa<SCEVCouldNotCompute>(Index)) - return false; - break; - } - } else { - return false; + Length = LengthA == nullptr ? LengthB : LengthA; + + return (InductiveRangeCheck::RangeCheckKind)(RCKindA | RCKindB); } - const SCEV *UpperLimitSCEV = SE.getSCEV(UpperLimit); - if (isa<SCEVCouldNotCompute>(UpperLimitSCEV) || - !SE.isKnownNonNegative(UpperLimitSCEV)) - return false; + if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) { + Value *IndexVal = nullptr; - if (SE.getLoopDisposition(UpperLimitSCEV, L) != - ScalarEvolution::LoopInvariant) { - DEBUG(dbgs() << " in function: " << L->getHeader()->getParent()->getName() - << " "; - dbgs() << " UpperLimit is not loop invariant: " - << UpperLimit->getName() << "\n";); - return false; + auto RCKind = parseRangeCheckICmp(L, ICI, SE, IndexVal, Length); + + if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN) + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; + + Index = SE.getSCEV(IndexVal); + if (isa<SCEVCouldNotCompute>(Index)) + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; + + return RCKind; } - return true; + return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; } @@ -380,10 +396,15 @@ InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI, Value *Length = nullptr; const SCEV *IndexSCEV = nullptr; - if (!SplitRangeCheckCondition(L, SE, BI->getCondition(), IndexSCEV, Length)) + auto RCKind = InductiveRangeCheck::parseRangeCheck(L, SE, BI->getCondition(), + IndexSCEV, Length); + + if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN) return nullptr; - assert(IndexSCEV && Length && "contract with SplitRangeCheckCondition!"); + assert(IndexSCEV && "contract with SplitRangeCheckCondition!"); + assert((!(RCKind & InductiveRangeCheck::RANGE_CHECK_UPPER) || Length) && + "contract with SplitRangeCheckCondition!"); const SCEVAddRecExpr *IndexAddRec = dyn_cast<SCEVAddRecExpr>(IndexSCEV); bool IsAffineIndex = @@ -397,6 +418,7 @@ InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI, IRC->Offset = IndexAddRec->getStart(); IRC->Scale = IndexAddRec->getStepRecurrence(SE); IRC->Branch = BI; + IRC->Kind = RCKind; return IRC; } @@ -685,30 +707,40 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP } } - auto IsInductionVar = [&SE](const SCEVAddRecExpr *AR, bool &IsIncreasing) { - if (!AR->isAffine()) - return false; + auto HasNoSignedWrap = [&](const SCEVAddRecExpr *AR) { + if (AR->getNoWrapFlags(SCEV::FlagNSW)) + return true; IntegerType *Ty = cast<IntegerType>(AR->getType()); IntegerType *WideTy = IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2); - // Currently we only work with induction variables that have been proved to - // not wrap. This restriction can potentially be lifted in the future. - const SCEVAddRecExpr *ExtendAfterOp = dyn_cast<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy)); - if (!ExtendAfterOp) - return false; + if (ExtendAfterOp) { + const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy); + const SCEV *ExtendedStep = + SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy); - const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy); - const SCEV *ExtendedStep = - SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy); + bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart && + ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep; + + if (NoSignedWrap) + return true; + } + + // We may have proved this when computing the sign extension above. + return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap; + }; + + auto IsInductionVar = [&](const SCEVAddRecExpr *AR, bool &IsIncreasing) { + if (!AR->isAffine()) + return false; - bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart && - ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep; + // Currently we only work with induction variables that have been proved to + // not wrap. This restriction can potentially be lifted in the future. - if (!NoSignedWrap) + if (!HasNoSignedWrap(AR)) return false; if (const SCEVConstant *StepExpr = @@ -791,9 +823,10 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP "loop variant exit count doesn't make sense!"); assert(!L.contains(LatchExit) && "expected an exit block!"); - - Value *IndVarStartV = SCEVExpander(SE, "irce").expandCodeFor( - IndVarStart, IndVarTy, &*Preheader->rbegin()); + const DataLayout &DL = Preheader->getModule()->getDataLayout(); + Value *IndVarStartV = + SCEVExpander(SE, DL, "irce") + .expandCodeFor(IndVarStart, IndVarTy, &*Preheader->rbegin()); IndVarStartV->setName("indvar.start"); LoopStructure Result; @@ -831,12 +864,35 @@ LoopConstrainer::calculateSubRanges() const { const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt); bool Increasing = MainLoopStructure.IndVarIncreasing; + // We compute `Smallest` and `Greatest` such that [Smallest, Greatest) is the // range of values the induction variable takes. - const SCEV *Smallest = - Increasing ? Start : SE.getAddExpr(End, SE.getSCEV(One)); - const SCEV *Greatest = - Increasing ? End : SE.getAddExpr(Start, SE.getSCEV(One)); + + const SCEV *Smallest = nullptr, *Greatest = nullptr; + + if (Increasing) { + Smallest = Start; + Greatest = End; + } else { + // These two computations may sign-overflow. Here is why that is okay: + // + // We know that the induction variable does not sign-overflow on any + // iteration except the last one, and it starts at `Start` and ends at + // `End`, decrementing by one every time. + // + // * if `Smallest` sign-overflows we know `End` is `INT_SMAX`. Since the + // induction variable is decreasing we know that that the smallest value + // the loop body is actually executed with is `INT_SMIN` == `Smallest`. + // + // * if `Greatest` sign-overflows, we know it can only be `INT_SMIN`. In + // that case, `Clamp` will always return `Smallest` and + // [`Result.LowLimit`, `Result.HighLimit`) = [`Smallest`, `Smallest`) + // will be an empty range. Returning an empty range is always safe. + // + + Smallest = SE.getAddExpr(End, SE.getSCEV(One)); + Greatest = SE.getAddExpr(Start, SE.getSCEV(One)); + } auto Clamp = [this, Smallest, Greatest](const SCEV *S) { return SE.getSMaxExpr(Smallest, SE.getSMinExpr(Greatest, S)); @@ -1132,7 +1188,7 @@ bool LoopConstrainer::run() { IntegerType *IVTy = cast<IntegerType>(MainLoopStructure.IndVarNext->getType()); - SCEVExpander Expander(SE, "irce"); + SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "irce"); Instruction *InsertPt = OriginalPreheader->getTerminator(); // It would have been better to make `PreLoop' and `PostLoop' @@ -1293,8 +1349,19 @@ InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE, const SCEV *M = SE.getMinusSCEV(C, A); const SCEV *Begin = SE.getNegativeSCEV(M); - const SCEV *End = SE.getMinusSCEV(SE.getSCEV(getLength()), M); + const SCEV *UpperLimit = nullptr; + + // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L". + // We can potentially do much better here. + if (Value *V = getLength()) { + UpperLimit = SE.getSCEV(V); + } else { + assert(Kind == InductiveRangeCheck::RANGE_CHECK_LOWER && "invariant!"); + unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth(); + UpperLimit = SE.getConstant(APInt::getSignedMaxValue(BitWidth)); + } + const SCEV *End = SE.getMinusSCEV(UpperLimit, M); return InductiveRangeCheck::Range(Begin, End); } @@ -1344,12 +1411,18 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { if (RangeChecks.empty()) return false; - DEBUG(dbgs() << "irce: looking at loop "; L->print(dbgs()); - dbgs() << "irce: loop has " << RangeChecks.size() - << " inductive range checks: \n"; - for (InductiveRangeCheck *IRC : RangeChecks) - IRC->print(dbgs()); - ); + auto PrintRecognizedRangeChecks = [&](raw_ostream &OS) { + OS << "irce: looking at loop "; L->print(OS); + OS << "irce: loop has " << RangeChecks.size() + << " inductive range checks: \n"; + for (InductiveRangeCheck *IRC : RangeChecks) + IRC->print(OS); + }; + + DEBUG(PrintRecognizedRangeChecks(dbgs())); + + if (PrintRangeChecks) + PrintRecognizedRangeChecks(errs()); const char *FailureReason = nullptr; Optional<LoopStructure> MaybeLoopStructure = diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 8b54abd..83ac915 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" @@ -32,7 +33,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -78,7 +78,6 @@ namespace { /// revectored to the false side of the second if. /// class JumpThreading : public FunctionPass { - const DataLayout *DL; TargetLibraryInfo *TLI; LazyValueInfo *LVI; #ifdef NDEBUG @@ -159,8 +158,6 @@ bool JumpThreading::runOnFunction(Function &F) { return false; DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); LVI = &getAnalysis<LazyValueInfo>(); @@ -505,6 +502,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, assert(Preference == WantInteger && "Compares only produce integers"); PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0)); if (PN && PN->getParent() == BB) { + const DataLayout &DL = PN->getModule()->getDataLayout(); // We can do this simplification if any comparisons fold to true or false. // See if any do. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { @@ -709,7 +707,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // Run constant folding to see if we can reduce the condition to a simple // constant. if (Instruction *I = dyn_cast<Instruction>(Condition)) { - Value *SimpleVal = ConstantFoldInstruction(I, DL, TLI); + Value *SimpleVal = + ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI); if (SimpleVal) { I->replaceAllUsesWith(SimpleVal); I->eraseFromParent(); @@ -1521,7 +1520,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation. - SimplifyInstructionsInBlock(NewBB, DL, TLI); + SimplifyInstructionsInBlock(NewBB, TLI); // Threaded an edge! ++NumThreads; @@ -1586,7 +1585,6 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, BasicBlock::iterator BI = BB->begin(); for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); - // Clone the non-phi instructions of BB into PredBB, keeping track of the // mapping and using it to remap operands in the cloned instructions. for (; BI != BB->end(); ++BI) { @@ -1603,7 +1601,8 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, // If this instruction can be simplified after the operands are updated, // just use the simplified value instead. This frequently happens due to // phi translation. - if (Value *IV = SimplifyInstruction(New, DL)) { + if (Value *IV = + SimplifyInstruction(New, BB->getModule()->getDataLayout())) { delete New; ValueMapping[BI] = IV; } else { diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 14af38b..1333b02 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -38,6 +38,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -52,7 +53,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -76,21 +76,21 @@ static bool isNotUsedInLoop(Instruction &I, Loop *CurLoop); static bool hoist(Instruction &I, BasicBlock *Preheader); static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST ); -static bool isGuaranteedToExecute(Instruction &Inst, DominatorTree *DT, - Loop *CurLoop, LICMSafetyInfo * SafetyInfo); -static bool isSafeToExecuteUnconditionally(Instruction &Inst,DominatorTree *DT, - const DataLayout *DL, Loop *CurLoop, - LICMSafetyInfo * SafetyInfo); +static bool isGuaranteedToExecute(Instruction &Inst, DominatorTree *DT, + Loop *CurLoop, LICMSafetyInfo *SafetyInfo); +static bool isSafeToExecuteUnconditionally(Instruction &Inst, DominatorTree *DT, + Loop *CurLoop, + LICMSafetyInfo *SafetyInfo); static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, const AAMDNodes &AAInfo, AliasSetTracker *CurAST); static Instruction *CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, LoopInfo *LI); -static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, - DominatorTree *DT, const DataLayout *DL, - Loop *CurLoop, AliasSetTracker *CurAST, - LICMSafetyInfo * SafetyInfo); +static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, + DominatorTree *DT, Loop *CurLoop, + AliasSetTracker *CurAST, + LICMSafetyInfo *SafetyInfo); namespace { struct LICM : public LoopPass { @@ -130,7 +130,6 @@ namespace { LoopInfo *LI; // Current LoopInfo DominatorTree *DT; // Dominator Tree for the current Loop. - const DataLayout *DL; // DataLayout for constant folding. TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. // State that is updated as we process loops. @@ -181,8 +180,6 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); @@ -235,10 +232,10 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // instructions, we perform another pass to hoist them out of the loop. // if (L->hasDedicatedExits()) - Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, DL, TLI, - CurLoop, CurAST, &SafetyInfo); + Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, CurLoop, + CurAST, &SafetyInfo); if (Preheader) - Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, DL, TLI, + Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, CurLoop, CurAST, &SafetyInfo); // Now that all loop invariants have been removed from the loop, promote any @@ -291,10 +288,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { /// first order w.r.t the DominatorTree. This allows us to visit uses before /// definitions, allowing us to sink a loop body in one pass without iteration. /// -bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, - DominatorTree *DT, const DataLayout *DL, - TargetLibraryInfo *TLI, Loop *CurLoop, - AliasSetTracker *CurAST, LICMSafetyInfo * SafetyInfo) { +bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, + DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, + AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && @@ -311,8 +307,8 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // We are processing blocks in reverse dfo, so process children first. const std::vector<DomTreeNode*> &Children = N->getChildren(); for (unsigned i = 0, e = Children.size(); i != e; ++i) - Changed |= sinkRegion(Children[i], AA, LI, DT, DL, TLI, CurLoop, - CurAST, SafetyInfo); + Changed |= + sinkRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). if (inSubLoop(BB,CurLoop,LI)) return Changed; @@ -336,8 +332,8 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // outside of the loop. In this case, it doesn't even matter if the // operands of the instruction are loop invariant. // - if (isNotUsedInLoop(I, CurLoop) && - canSinkOrHoistInst(I, AA, DT, DL, CurLoop, CurAST, SafetyInfo)) { + if (isNotUsedInLoop(I, CurLoop) && + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo)) { ++II; Changed |= sink(I, LI, DT, CurLoop, CurAST); } @@ -350,10 +346,9 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, /// order w.r.t the DominatorTree. This allows us to visit definitions before /// uses, allowing us to hoist a loop body in one pass without iteration. /// -bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, - DominatorTree *DT, const DataLayout *DL, - TargetLibraryInfo *TLI, Loop *CurLoop, - AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { +bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, + DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, + AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && CurAST != nullptr && @@ -372,7 +367,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // Try constant folding this instruction. If all the operands are // constants, it is technically hoistable, but it would be better to just // fold it. - if (Constant *C = ConstantFoldInstruction(&I, DL, TLI)) { + if (Constant *C = ConstantFoldInstruction( + &I, I.getModule()->getDataLayout(), TLI)) { DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); CurAST->copyValue(&I, C); CurAST->deleteValue(&I); @@ -385,16 +381,16 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // if all of the operands of the instruction are loop invariant and if it // is safe to hoist the instruction. // - if (CurLoop->hasLoopInvariantOperands(&I) && - canSinkOrHoistInst(I, AA, DT, DL, CurLoop, CurAST, SafetyInfo) && - isSafeToExecuteUnconditionally(I, DT, DL, CurLoop, SafetyInfo)) + if (CurLoop->hasLoopInvariantOperands(&I) && + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo) && + isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo)) Changed |= hoist(I, CurLoop->getLoopPreheader()); } const std::vector<DomTreeNode*> &Children = N->getChildren(); for (unsigned i = 0, e = Children.size(); i != e; ++i) - Changed |= hoistRegion(Children[i], AA, LI, DT, DL, TLI, CurLoop, - CurAST, SafetyInfo); + Changed |= + hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); return Changed; } @@ -424,10 +420,9 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) { /// canSinkOrHoistInst - Return true if the hoister and sinker can handle this /// instruction. /// -bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, - DominatorTree *DT, const DataLayout *DL, - Loop *CurLoop, AliasSetTracker *CurAST, - LICMSafetyInfo * SafetyInfo) { +bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, + Loop *CurLoop, AliasSetTracker *CurAST, + LICMSafetyInfo *SafetyInfo) { // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { if (!LI->isUnordered()) @@ -487,7 +482,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, !isa<InsertValueInst>(I)) return false; - return isSafeToExecuteUnconditionally(I, DT, DL, CurLoop, SafetyInfo); + return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo); } /// Returns true if a PHINode is a trivially replaceable with an @@ -643,10 +638,10 @@ static bool hoist(Instruction &I, BasicBlock *Preheader) { /// or if it is a trapping instruction and is guaranteed to execute. /// static bool isSafeToExecuteUnconditionally(Instruction &Inst, DominatorTree *DT, - const DataLayout *DL, Loop *CurLoop, - LICMSafetyInfo * SafetyInfo) { + Loop *CurLoop, + LICMSafetyInfo *SafetyInfo) { // If it is not a trapping instruction, it is always safe to hoist. - if (isSafeToSpeculativelyExecute(&Inst, DL)) + if (isSafeToSpeculativelyExecute(&Inst)) return true; return isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo); diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp index 11e4d76..1f33f72 100644 --- a/lib/Transforms/Scalar/LoadCombine.cpp +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -12,17 +12,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" - #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/TargetFolder.h" -#include "llvm/Pass.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -52,13 +52,10 @@ struct LoadPOPPair { class LoadCombine : public BasicBlockPass { LLVMContext *C; - const DataLayout *DL; AliasAnalysis *AA; public: - LoadCombine() - : BasicBlockPass(ID), - C(nullptr), DL(nullptr), AA(nullptr) { + LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) { initializeSROAPass(*PassRegistry::getPassRegistry()); } @@ -85,12 +82,6 @@ private: bool LoadCombine::doInitialization(Function &F) { DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n"); C = &F.getContext(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (!DLP) { - DEBUG(dbgs() << " Skipping LoadCombine -- no target data!\n"); - return false; - } - DL = &DLP->getDataLayout(); return true; } @@ -100,9 +91,10 @@ PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) { POP.Offset = 0; while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) { if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) { - unsigned BitWidth = DL->getPointerTypeSizeInBits(GEP->getType()); + auto &DL = LI.getModule()->getDataLayout(); + unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType()); APInt Offset(BitWidth, 0); - if (GEP->accumulateConstantOffset(*DL, Offset)) + if (GEP->accumulateConstantOffset(DL, Offset)) POP.Offset += Offset.getZExtValue(); else // Can't handle GEPs with variable indices. @@ -145,7 +137,8 @@ bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) { if (PrevOffset == -1ull) { BaseLoad = L.Load; PrevOffset = L.POP.Offset; - PrevSize = DL->getTypeStoreSize(L.Load->getType()); + PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize( + L.Load->getType()); AggregateLoads.push_back(L); continue; } @@ -164,7 +157,8 @@ bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) { // FIXME: We may want to handle this case. continue; PrevOffset = L.POP.Offset; - PrevSize = DL->getTypeStoreSize(L.Load->getType()); + PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize( + L.Load->getType()); AggregateLoads.push_back(L); } if (combineLoads(AggregateLoads)) @@ -215,7 +209,8 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) { for (const auto &L : Loads) { Builder->SetInsertPoint(L.Load); Value *V = Builder->CreateExtractInteger( - *DL, NewLoad, cast<IntegerType>(L.Load->getType()), + L.Load->getModule()->getDataLayout(), NewLoad, + cast<IntegerType>(L.Load->getType()), L.POP.Offset - Loads[0].POP.Offset, "combine.extract"); L.Load->replaceAllUsesWith(V); } @@ -225,13 +220,13 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) { } bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { - if (skipOptnoneFunction(BB) || !DL) + if (skipOptnoneFunction(BB)) return false; AA = &getAnalysis<AliasAnalysis>(); - IRBuilder<true, TargetFolder> - TheBuilder(BB.getContext(), TargetFolder(DL)); + IRBuilder<true, TargetFolder> TheBuilder( + BB.getContext(), TargetFolder(BB.getModule()->getDataLayout())); Builder = &TheBuilder; DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap; diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 243c624..7bc2917 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -47,6 +47,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" @@ -56,7 +57,6 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -130,7 +130,6 @@ namespace { class LoopIdiomRecognize : public LoopPass { Loop *CurLoop; - const DataLayout *DL; DominatorTree *DT; ScalarEvolution *SE; TargetLibraryInfo *TLI; @@ -139,7 +138,10 @@ namespace { static char ID; explicit LoopIdiomRecognize() : LoopPass(ID) { initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); - DL = nullptr; DT = nullptr; SE = nullptr; TLI = nullptr; TTI = nullptr; + DT = nullptr; + SE = nullptr; + TLI = nullptr; + TTI = nullptr; } bool runOnLoop(Loop *L, LPPassManager &LPM) override; @@ -179,14 +181,6 @@ namespace { AU.addRequired<TargetTransformInfoWrapperPass>(); } - const DataLayout *getDataLayout() { - if (DL) - return DL; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; - return DL; - } - DominatorTree *getDominatorTree() { return DT ? DT : (DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree()); @@ -625,10 +619,6 @@ bool LoopIdiomRecognize::runOnCountableLoop() { if (BECst->getValue()->getValue() == 0) return false; - // We require target data for now. - if (!getDataLayout()) - return false; - // set DT (void)getDominatorTree(); @@ -742,7 +732,8 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { Value *StorePtr = SI->getPointerOperand(); // Reject stores that are so large that they overflow an unsigned. - uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); + auto &DL = CurLoop->getHeader()->getModule()->getDataLayout(); + uint64_t SizeInBits = DL.getTypeSizeInBits(StoredVal->getType()); if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) return false; @@ -917,7 +908,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // but it can be turned into memset_pattern if the target supports it. Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = nullptr; - + auto &DL = CurLoop->getHeader()->getModule()->getDataLayout(); unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); // If we're allowed to form a memset, and the stored value would be acceptable @@ -928,9 +919,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, CurLoop->isLoopInvariant(SplatValue)) { // Keep and use SplatValue. PatternValue = nullptr; - } else if (DestAS == 0 && - TLI->has(LibFunc::memset_pattern16) && - (PatternValue = getMemSetPatternValue(StoredVal, *DL))) { + } else if (DestAS == 0 && TLI->has(LibFunc::memset_pattern16) && + (PatternValue = getMemSetPatternValue(StoredVal, DL))) { // Don't create memset_pattern16s with address spaces. // It looks like we can use PatternValue! SplatValue = nullptr; @@ -945,7 +935,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // header. This allows us to insert code for it in the preheader. BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); - SCEVExpander Expander(*SE, "loop-idiom"); + SCEVExpander Expander(*SE, DL, "loop-idiom"); Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS); @@ -1005,7 +995,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // Otherwise we should form a memset_pattern16. PatternValue is known to be // an constant array of 16-bytes. Plop the value into a mergable global. GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, - GlobalValue::InternalLinkage, + GlobalValue::PrivateLinkage, PatternValue, ".memset_pattern"); GV->setUnnamedAddr(true); // Ok to merge these. GV->setAlignment(16); @@ -1042,7 +1032,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // header. This allows us to insert code for it in the preheader. BasicBlock *Preheader = CurLoop->getLoopPreheader(); IRBuilder<> Builder(Preheader->getTerminator()); - SCEVExpander Expander(*SE, "loop-idiom"); + const DataLayout &DL = Preheader->getModule()->getDataLayout(); + SCEVExpander Expander(*SE, DL, "loop-idiom"); // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 6dc600e..e125026 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -77,8 +77,6 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache( @@ -110,6 +108,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { WorklistItem Item = VisitStack.pop_back_val(); BasicBlock *BB = Item.getPointer(); bool IsSubloopHeader = Item.getInt(); + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); // Simplify instructions in the current basic block. for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp new file mode 100644 index 0000000..f7626c5 --- /dev/null +++ b/lib/Transforms/Scalar/LoopInterchange.cpp @@ -0,0 +1,1154 @@ +//===- LoopInterchange.cpp - Loop interchange pass------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This Pass handles loop interchange transform. +// This pass interchanges loops to provide a more cache-friendly memory access +// patterns. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +using namespace llvm; + +#define DEBUG_TYPE "loop-interchange" + +namespace { + +typedef SmallVector<Loop *, 8> LoopVector; + +// TODO: Check if we can use a sparse matrix here. +typedef std::vector<std::vector<char>> CharMatrix; + +// Maximum number of dependencies that can be handled in the dependency matrix. +static const unsigned MaxMemInstrCount = 100; + +// Maximum loop depth supported. +static const unsigned MaxLoopNestDepth = 10; + +struct LoopInterchange; + +#ifdef DUMP_DEP_MATRICIES +void printDepMatrix(CharMatrix &DepMatrix) { + for (auto I = DepMatrix.begin(), E = DepMatrix.end(); I != E; ++I) { + std::vector<char> Vec = *I; + for (auto II = Vec.begin(), EE = Vec.end(); II != EE; ++II) + DEBUG(dbgs() << *II << " "); + DEBUG(dbgs() << "\n"); + } +} +#endif + +bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, + DependenceAnalysis *DA) { + typedef SmallVector<Value *, 16> ValueVector; + ValueVector MemInstr; + + if (Level > MaxLoopNestDepth) { + DEBUG(dbgs() << "Cannot handle loops of depth greater than " + << MaxLoopNestDepth << "\n"); + return false; + } + + // For each block. + for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end(); + BB != BE; ++BB) { + // Scan the BB and collect legal loads and stores. + for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; + ++I) { + Instruction *Ins = dyn_cast<Instruction>(I); + if (!Ins) + return false; + LoadInst *Ld = dyn_cast<LoadInst>(I); + StoreInst *St = dyn_cast<StoreInst>(I); + if (!St && !Ld) + continue; + if (Ld && !Ld->isSimple()) + return false; + if (St && !St->isSimple()) + return false; + MemInstr.push_back(I); + } + } + + DEBUG(dbgs() << "Found " << MemInstr.size() + << " Loads and Stores to analyze\n"); + + ValueVector::iterator I, IE, J, JE; + + for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { + for (J = I, JE = MemInstr.end(); J != JE; ++J) { + std::vector<char> Dep; + Instruction *Src = dyn_cast<Instruction>(*I); + Instruction *Des = dyn_cast<Instruction>(*J); + if (Src == Des) + continue; + if (isa<LoadInst>(Src) && isa<LoadInst>(Des)) + continue; + if (auto D = DA->depends(Src, Des, true)) { + DEBUG(dbgs() << "Found Dependency between Src=" << Src << " Des=" << Des + << "\n"); + if (D->isFlow()) { + // TODO: Handle Flow dependence.Check if it is sufficient to populate + // the Dependence Matrix with the direction reversed. + DEBUG(dbgs() << "Flow dependence not handled"); + return false; + } + if (D->isAnti()) { + DEBUG(dbgs() << "Found Anti dependence \n"); + unsigned Levels = D->getLevels(); + char Direction; + for (unsigned II = 1; II <= Levels; ++II) { + const SCEV *Distance = D->getDistance(II); + const SCEVConstant *SCEVConst = + dyn_cast_or_null<SCEVConstant>(Distance); + if (SCEVConst) { + const ConstantInt *CI = SCEVConst->getValue(); + if (CI->isNegative()) + Direction = '<'; + else if (CI->isZero()) + Direction = '='; + else + Direction = '>'; + Dep.push_back(Direction); + } else if (D->isScalar(II)) { + Direction = 'S'; + Dep.push_back(Direction); + } else { + unsigned Dir = D->getDirection(II); + if (Dir == Dependence::DVEntry::LT || + Dir == Dependence::DVEntry::LE) + Direction = '<'; + else if (Dir == Dependence::DVEntry::GT || + Dir == Dependence::DVEntry::GE) + Direction = '>'; + else if (Dir == Dependence::DVEntry::EQ) + Direction = '='; + else + Direction = '*'; + Dep.push_back(Direction); + } + } + while (Dep.size() != Level) { + Dep.push_back('I'); + } + + DepMatrix.push_back(Dep); + if (DepMatrix.size() > MaxMemInstrCount) { + DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount + << " dependencies inside loop\n"); + return false; + } + } + } + } + } + + // We don't have a DepMatrix to check legality return false + if (DepMatrix.size() == 0) + return false; + return true; +} + +// A loop is moved from index 'from' to an index 'to'. Update the Dependence +// matrix by exchanging the two columns. +void interChangeDepedencies(CharMatrix &DepMatrix, unsigned FromIndx, + unsigned ToIndx) { + unsigned numRows = DepMatrix.size(); + for (unsigned i = 0; i < numRows; ++i) { + char TmpVal = DepMatrix[i][ToIndx]; + DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx]; + DepMatrix[i][FromIndx] = TmpVal; + } +} + +// Checks if outermost non '=','S'or'I' dependence in the dependence matrix is +// '>' +bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row, + unsigned Column) { + for (unsigned i = 0; i <= Column; ++i) { + if (DepMatrix[Row][i] == '<') + return false; + if (DepMatrix[Row][i] == '>') + return true; + } + // All dependencies were '=','S' or 'I' + return false; +} + +// Checks if no dependence exist in the dependency matrix in Row before Column. +bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, + unsigned Column) { + for (unsigned i = 0; i < Column; ++i) { + if (DepMatrix[Row][i] != '=' || DepMatrix[Row][i] != 'S' || + DepMatrix[Row][i] != 'I') + return false; + } + return true; +} + +bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, + unsigned OuterLoopId, char InnerDep, char OuterDep) { + + if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId)) + return false; + + if (InnerDep == OuterDep) + return true; + + // It is legal to interchange if and only if after interchange no row has a + // '>' direction as the leftmost non-'='. + + if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I') + return true; + + if (InnerDep == '<') + return true; + + if (InnerDep == '>') { + // If OuterLoopId represents outermost loop then interchanging will make the + // 1st dependency as '>' + if (OuterLoopId == 0) + return false; + + // If all dependencies before OuterloopId are '=','S'or 'I'. Then + // interchanging will result in this row having an outermost non '=' + // dependency of '>' + if (!containsNoDependence(DepMatrix, Row, OuterLoopId)) + return true; + } + + return false; +} + +// Checks if it is legal to interchange 2 loops. +// [Theorm] A permutation of the loops in a perfect nest is legal if and only if +// the direction matrix, after the same permutation is applied to its columns, +// has no ">" direction as the leftmost non-"=" direction in any row. +bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, + unsigned OuterLoopId) { + + unsigned NumRows = DepMatrix.size(); + // For each row check if it is valid to interchange. + for (unsigned Row = 0; Row < NumRows; ++Row) { + char InnerDep = DepMatrix[Row][InnerLoopId]; + char OuterDep = DepMatrix[Row][OuterLoopId]; + if (InnerDep == '*' || OuterDep == '*') + return false; + else if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, + OuterDep)) + return false; + } + return true; +} + +static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) { + + DEBUG(dbgs() << "Calling populateWorklist called\n"); + LoopVector LoopList; + Loop *CurrentLoop = &L; + std::vector<Loop *> vec = CurrentLoop->getSubLoopsVector(); + while (vec.size() != 0) { + // The current loop has multiple subloops in it hence it is not tightly + // nested. + // Discard all loops above it added into Worklist. + if (vec.size() != 1) { + LoopList.clear(); + return; + } + LoopList.push_back(CurrentLoop); + CurrentLoop = *(vec.begin()); + vec = CurrentLoop->getSubLoopsVector(); + } + LoopList.push_back(CurrentLoop); + V.push_back(LoopList); +} + +static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) { + PHINode *InnerIndexVar = L->getCanonicalInductionVariable(); + if (InnerIndexVar) + return InnerIndexVar; + if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr) + return nullptr; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + PHINode *PhiVar = cast<PHINode>(I); + Type *PhiTy = PhiVar->getType(); + if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && + !PhiTy->isPointerTy()) + return nullptr; + const SCEVAddRecExpr *AddRec = + dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar)); + if (!AddRec || !AddRec->isAffine()) + continue; + const SCEV *Step = AddRec->getStepRecurrence(*SE); + const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); + if (!C) + continue; + // Found the induction variable. + // FIXME: Handle loops with more than one induction variable. Note that, + // currently, legality makes sure we have only one induction variable. + return PhiVar; + } + return nullptr; +} + +/// LoopInterchangeLegality checks if it is legal to interchange the loop. +class LoopInterchangeLegality { +public: + LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, + LoopInterchange *Pass) + : OuterLoop(Outer), InnerLoop(Inner), SE(SE), CurrentPass(Pass) {} + + /// Check if the loops can be interchanged. + bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, + CharMatrix &DepMatrix); + /// Check if the loop structure is understood. We do not handle triangular + /// loops for now. + bool isLoopStructureUnderstood(PHINode *InnerInductionVar); + + bool currentLimitations(); + +private: + bool tightlyNested(Loop *Outer, Loop *Inner); + + Loop *OuterLoop; + Loop *InnerLoop; + + /// Scev analysis. + ScalarEvolution *SE; + LoopInterchange *CurrentPass; +}; + +/// LoopInterchangeProfitability checks if it is profitable to interchange the +/// loop. +class LoopInterchangeProfitability { +public: + LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE) + : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {} + + /// Check if the loop interchange is profitable + bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, + CharMatrix &DepMatrix); + +private: + int getInstrOrderCost(); + + Loop *OuterLoop; + Loop *InnerLoop; + + /// Scev analysis. + ScalarEvolution *SE; +}; + +/// LoopInterchangeTransform interchanges the loop +class LoopInterchangeTransform { +public: + LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, + LoopInfo *LI, DominatorTree *DT, + LoopInterchange *Pass, BasicBlock *LoopNestExit) + : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), + LoopExit(LoopNestExit) {} + + /// Interchange OuterLoop and InnerLoop. + bool transform(); + void restructureLoops(Loop *InnerLoop, Loop *OuterLoop); + void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop); + +private: + void splitInnerLoopLatch(Instruction *); + void splitOuterLoopLatch(); + void splitInnerLoopHeader(); + bool adjustLoopLinks(); + void adjustLoopPreheaders(); + void adjustOuterLoopPreheader(); + void adjustInnerLoopPreheader(); + bool adjustLoopBranches(); + + Loop *OuterLoop; + Loop *InnerLoop; + + /// Scev analysis. + ScalarEvolution *SE; + LoopInfo *LI; + DominatorTree *DT; + BasicBlock *LoopExit; +}; + +// Main LoopInterchange Pass +struct LoopInterchange : public FunctionPass { + static char ID; + ScalarEvolution *SE; + LoopInfo *LI; + DependenceAnalysis *DA; + DominatorTree *DT; + LoopInterchange() + : FunctionPass(ID), SE(nullptr), LI(nullptr), DA(nullptr), DT(nullptr) { + initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<ScalarEvolution>(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addRequired<DependenceAnalysis>(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + } + + bool runOnFunction(Function &F) override { + SE = &getAnalysis<ScalarEvolution>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + DA = &getAnalysis<DependenceAnalysis>(); + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + DT = DTWP ? &DTWP->getDomTree() : nullptr; + // Build up a worklist of loop pairs to analyze. + SmallVector<LoopVector, 8> Worklist; + + for (Loop *L : *LI) + populateWorklist(*L, Worklist); + + DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n"); + bool Changed = true; + while (!Worklist.empty()) { + LoopVector LoopList = Worklist.pop_back_val(); + Changed = processLoopList(LoopList); + } + return Changed; + } + + bool isComputableLoopNest(LoopVector LoopList) { + for (auto I = LoopList.begin(), E = LoopList.end(); I != E; ++I) { + Loop *L = *I; + const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); + if (ExitCountOuter == SE->getCouldNotCompute()) { + DEBUG(dbgs() << "Couldn't compute Backedge count\n"); + return false; + } + if (L->getNumBackEdges() != 1) { + DEBUG(dbgs() << "NumBackEdges is not equal to 1\n"); + return false; + } + if (!L->getExitingBlock()) { + DEBUG(dbgs() << "Loop Doesn't have unique exit block\n"); + return false; + } + } + return true; + } + + unsigned selectLoopForInterchange(LoopVector LoopList) { + // TODO: Add a better heuristic to select the loop to be interchanged based + // on the dependece matrix. Currently we select the innermost loop. + return LoopList.size() - 1; + } + + bool processLoopList(LoopVector LoopList) { + bool Changed = false; + bool containsLCSSAPHI = false; + CharMatrix DependencyMatrix; + if (LoopList.size() < 2) { + DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n"); + return false; + } + if (!isComputableLoopNest(LoopList)) { + DEBUG(dbgs() << "Not vaild loop candidate for interchange\n"); + return false; + } + Loop *OuterMostLoop = *(LoopList.begin()); + + DEBUG(dbgs() << "Processing LoopList of size = " << LoopList.size() + << "\n"); + + if (!populateDependencyMatrix(DependencyMatrix, LoopList.size(), + OuterMostLoop, DA)) { + DEBUG(dbgs() << "Populating Dependency matrix failed\n"); + return false; + } +#ifdef DUMP_DEP_MATRICIES + DEBUG(dbgs() << "Dependence before inter change \n"); + printDepMatrix(DependencyMatrix); +#endif + + BasicBlock *OuterMostLoopLatch = OuterMostLoop->getLoopLatch(); + BranchInst *OuterMostLoopLatchBI = + dyn_cast<BranchInst>(OuterMostLoopLatch->getTerminator()); + if (!OuterMostLoopLatchBI) + return false; + + // Since we currently do not handle LCSSA PHI's any failure in loop + // condition will now branch to LoopNestExit. + // TODO: This should be removed once we handle LCSSA PHI nodes. + + // Get the Outermost loop exit. + BasicBlock *LoopNestExit; + if (OuterMostLoopLatchBI->getSuccessor(0) == OuterMostLoop->getHeader()) + LoopNestExit = OuterMostLoopLatchBI->getSuccessor(1); + else + LoopNestExit = OuterMostLoopLatchBI->getSuccessor(0); + + for (auto I = LoopList.begin(), E = LoopList.end(); I != E; ++I) { + Loop *L = *I; + BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *Header = L->getHeader(); + if (Latch && Latch != Header && isa<PHINode>(Latch->begin())) { + containsLCSSAPHI = true; + break; + } + } + + // TODO: Handle lcssa PHI's. Currently LCSSA PHI's are not handled. Handle + // the same by splitting the loop latch and adjusting loop links + // accordingly. + if (containsLCSSAPHI) + return false; + + unsigned SelecLoopId = selectLoopForInterchange(LoopList); + // Move the selected loop outwards to the best posible position. + for (unsigned i = SelecLoopId; i > 0; i--) { + bool Interchanged = + processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix); + if (!Interchanged) + return Changed; + // Loops interchanged reflect the same in LoopList + std::swap(LoopList[i - 1], LoopList[i]); + + // Update the DependencyMatrix + interChangeDepedencies(DependencyMatrix, i, i - 1); + +#ifdef DUMP_DEP_MATRICIES + DEBUG(dbgs() << "Dependence after inter change \n"); + printDepMatrix(DependencyMatrix); +#endif + Changed |= Interchanged; + } + return Changed; + } + + bool processLoop(LoopVector LoopList, unsigned InnerLoopId, + unsigned OuterLoopId, BasicBlock *LoopNestExit, + std::vector<std::vector<char>> &DependencyMatrix) { + + DEBUG(dbgs() << "Processing Innder Loop Id = " << InnerLoopId + << " and OuterLoopId = " << OuterLoopId << "\n"); + Loop *InnerLoop = LoopList[InnerLoopId]; + Loop *OuterLoop = LoopList[OuterLoopId]; + + LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, this); + if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { + DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n"); + return false; + } + DEBUG(dbgs() << "Loops are legal to interchange\n"); + LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE); + if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) { + DEBUG(dbgs() << "Interchanging Loops not profitable\n"); + return false; + } + + LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, this, + LoopNestExit); + LIT.transform(); + DEBUG(dbgs() << "Loops interchanged\n"); + return true; + } +}; + +} // end of namespace + +static bool containsUnsafeInstructions(BasicBlock *BB) { + for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { + if (I->mayHaveSideEffects() || I->mayReadFromMemory()) + return true; + } + return false; +} + +bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { + BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); + + DEBUG(dbgs() << "Checking if Loops are Tightly Nested\n"); + + // A perfectly nested loop will not have any branch in between the outer and + // inner block i.e. outer header will branch to either inner preheader and + // outerloop latch. + BranchInst *outerLoopHeaderBI = + dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); + if (!outerLoopHeaderBI) + return false; + unsigned num = outerLoopHeaderBI->getNumSuccessors(); + for (unsigned i = 0; i < num; i++) { + if (outerLoopHeaderBI->getSuccessor(i) != InnerLoopPreHeader && + outerLoopHeaderBI->getSuccessor(i) != OuterLoopLatch) + return false; + } + + DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch \n"); + // We do not have any basic block in between now make sure the outer header + // and outer loop latch doesnt contain any unsafe instructions. + if (containsUnsafeInstructions(OuterLoopHeader) || + containsUnsafeInstructions(OuterLoopLatch)) + return false; + + DEBUG(dbgs() << "Loops are perfectly nested \n"); + // We have a perfect loop nest. + return true; +} + +static unsigned getPHICount(BasicBlock *BB) { + unsigned PhiCount = 0; + for (auto I = BB->begin(); isa<PHINode>(I); ++I) + PhiCount++; + return PhiCount; +} + +bool LoopInterchangeLegality::isLoopStructureUnderstood( + PHINode *InnerInduction) { + + unsigned Num = InnerInduction->getNumOperands(); + BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader(); + for (unsigned i = 0; i < Num; ++i) { + Value *Val = InnerInduction->getOperand(i); + if (isa<Constant>(Val)) + continue; + Instruction *I = dyn_cast<Instruction>(Val); + if (!I) + return false; + // TODO: Handle triangular loops. + // e.g. for(int i=0;i<N;i++) + // for(int j=i;j<N;j++) + unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i); + if (InnerInduction->getIncomingBlock(IncomBlockIndx) == + InnerLoopPreheader && + !OuterLoop->isLoopInvariant(I)) { + return false; + } + } + return true; +} + +// This function indicates the current limitations in the transform as a result +// of which we do not proceed. +bool LoopInterchangeLegality::currentLimitations() { + + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); + BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); + BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); + BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); + + PHINode *InnerInductionVar; + PHINode *OuterInductionVar; + + // We currently handle only 1 induction variable inside the loop. We also do + // not handle reductions as of now. + if (getPHICount(InnerLoopHeader) > 1) + return true; + + if (getPHICount(OuterLoopHeader) > 1) + return true; + + InnerInductionVar = getInductionVariable(InnerLoop, SE); + OuterInductionVar = getInductionVariable(OuterLoop, SE); + + if (!OuterInductionVar || !InnerInductionVar) { + DEBUG(dbgs() << "Induction variable not found\n"); + return true; + } + + // TODO: Triangular loops are not handled for now. + if (!isLoopStructureUnderstood(InnerInductionVar)) { + DEBUG(dbgs() << "Loop structure not understood by pass\n"); + return true; + } + + // TODO: Loops with LCSSA PHI's are currently not handled. + if (isa<PHINode>(OuterLoopLatch->begin())) { + DEBUG(dbgs() << "Found and LCSSA PHI in outer loop latch\n"); + return true; + } + if (InnerLoopLatch != InnerLoopHeader && + isa<PHINode>(InnerLoopLatch->begin())) { + DEBUG(dbgs() << "Found and LCSSA PHI in inner loop latch\n"); + return true; + } + + // TODO: Current limitation: Since we split the inner loop latch at the point + // were induction variable is incremented (induction.next); We cannot have + // more than 1 user of induction.next since it would result in broken code + // after split. + // e.g. + // for(i=0;i<N;i++) { + // for(j = 0;j<M;j++) { + // A[j+1][i+2] = A[j][i]+k; + // } + // } + bool FoundInduction = false; + Instruction *InnerIndexVarInc = nullptr; + if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader) + InnerIndexVarInc = + dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1)); + else + InnerIndexVarInc = + dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0)); + + if (!InnerIndexVarInc) + return true; + + // Since we split the inner loop latch on this induction variable. Make sure + // we do not have any instruction between the induction variable and branch + // instruction. + + for (auto I = InnerLoopLatch->rbegin(), E = InnerLoopLatch->rend(); + I != E && !FoundInduction; ++I) { + if (isa<BranchInst>(*I) || isa<CmpInst>(*I) || isa<TruncInst>(*I)) + continue; + const Instruction &Ins = *I; + // We found an instruction. If this is not induction variable then it is not + // safe to split this loop latch. + if (!Ins.isIdenticalTo(InnerIndexVarInc)) + return true; + else + FoundInduction = true; + } + // The loop latch ended and we didnt find the induction variable return as + // current limitation. + if (!FoundInduction) + return true; + + return false; +} + +bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, + unsigned OuterLoopId, + CharMatrix &DepMatrix) { + + if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) { + DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId + << "and OuterLoopId = " << OuterLoopId + << "due to dependence\n"); + return false; + } + + // Create unique Preheaders if we already do not have one. + BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + + // Create a unique outer preheader - + // 1) If OuterLoop preheader is not present. + // 2) If OuterLoop Preheader is same as OuterLoop Header + // 3) If OuterLoop Preheader is same as Header of the previous loop. + // 4) If OuterLoop Preheader is Entry node. + if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() || + isa<PHINode>(OuterLoopPreHeader->begin()) || + !OuterLoopPreHeader->getUniquePredecessor()) { + OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, CurrentPass); + } + + if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() || + InnerLoopPreHeader == OuterLoop->getHeader()) { + InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, CurrentPass); + } + + // Check if the loops are tightly nested. + if (!tightlyNested(OuterLoop, InnerLoop)) { + DEBUG(dbgs() << "Loops not tightly nested\n"); + return false; + } + + // TODO: The loops could not be interchanged due to current limitations in the + // transform module. + if (currentLimitations()) { + DEBUG(dbgs() << "Not legal because of current transform limitation\n"); + return false; + } + + return true; +} + +int LoopInterchangeProfitability::getInstrOrderCost() { + unsigned GoodOrder, BadOrder; + BadOrder = GoodOrder = 0; + for (auto BI = InnerLoop->block_begin(), BE = InnerLoop->block_end(); + BI != BE; ++BI) { + for (auto I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) { + const Instruction &Ins = *I; + if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) { + unsigned NumOp = GEP->getNumOperands(); + bool FoundInnerInduction = false; + bool FoundOuterInduction = false; + for (unsigned i = 0; i < NumOp; ++i) { + const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i)); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal); + if (!AR) + continue; + + // If we find the inner induction after an outer induction e.g. + // for(int i=0;i<N;i++) + // for(int j=0;j<N;j++) + // A[i][j] = A[i-1][j-1]+k; + // then it is a good order. + if (AR->getLoop() == InnerLoop) { + // We found an InnerLoop induction after OuterLoop induction. It is + // a good order. + FoundInnerInduction = true; + if (FoundOuterInduction) { + GoodOrder++; + break; + } + } + // If we find the outer induction after an inner induction e.g. + // for(int i=0;i<N;i++) + // for(int j=0;j<N;j++) + // A[j][i] = A[j-1][i-1]+k; + // then it is a bad order. + if (AR->getLoop() == OuterLoop) { + // We found an OuterLoop induction after InnerLoop induction. It is + // a bad order. + FoundOuterInduction = true; + if (FoundInnerInduction) { + BadOrder++; + break; + } + } + } + } + } + } + return GoodOrder - BadOrder; +} + +static bool isProfitabileForVectorization(unsigned InnerLoopId, + unsigned OuterLoopId, + CharMatrix &DepMatrix) { + // TODO: Improve this heuristic to catch more cases. + // If the inner loop is loop independent or doesn't carry any dependency it is + // profitable to move this to outer position. + unsigned Row = DepMatrix.size(); + for (unsigned i = 0; i < Row; ++i) { + if (DepMatrix[i][InnerLoopId] != 'S' && DepMatrix[i][InnerLoopId] != 'I') + return false; + // TODO: We need to improve this heuristic. + if (DepMatrix[i][OuterLoopId] != '=') + return false; + } + // If outer loop has dependence and inner loop is loop independent then it is + // profitable to interchange to enable parallelism. + return true; +} + +bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, + unsigned OuterLoopId, + CharMatrix &DepMatrix) { + + // TODO: Add Better Profitibility checks. + // e.g + // 1) Construct dependency matrix and move the one with no loop carried dep + // inside to enable vectorization. + + // This is rough cost estimation algorithm. It counts the good and bad order + // of induction variables in the instruction and allows reordering if number + // of bad orders is more than good. + int Cost = 0; + Cost += getInstrOrderCost(); + DEBUG(dbgs() << "Cost = " << Cost << "\n"); + if (Cost < 0) + return true; + + // It is not profitable as per current cache profitibility model. But check if + // we can move this loop outside to improve parallelism. + bool ImprovesPar = + isProfitabileForVectorization(InnerLoopId, OuterLoopId, DepMatrix); + return ImprovesPar; +} + +void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop, + Loop *InnerLoop) { + for (Loop::iterator I = OuterLoop->begin(), E = OuterLoop->end(); I != E; + ++I) { + if (*I == InnerLoop) { + OuterLoop->removeChildLoop(I); + return; + } + } + assert(false && "Couldn't find loop"); +} + +void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop, + Loop *OuterLoop) { + Loop *OuterLoopParent = OuterLoop->getParentLoop(); + if (OuterLoopParent) { + // Remove the loop from its parent loop. + removeChildLoop(OuterLoopParent, OuterLoop); + removeChildLoop(OuterLoop, InnerLoop); + OuterLoopParent->addChildLoop(InnerLoop); + } else { + removeChildLoop(OuterLoop, InnerLoop); + LI->changeTopLevelLoop(OuterLoop, InnerLoop); + } + + for (Loop::iterator I = InnerLoop->begin(), E = InnerLoop->end(); I != E; ++I) + OuterLoop->addChildLoop(InnerLoop->removeChildLoop(I)); + + InnerLoop->addChildLoop(OuterLoop); +} + +bool LoopInterchangeTransform::transform() { + + DEBUG(dbgs() << "transform\n"); + bool Transformed = false; + Instruction *InnerIndexVar; + + if (InnerLoop->getSubLoops().size() == 0) { + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + DEBUG(dbgs() << "Calling Split Inner Loop\n"); + PHINode *InductionPHI = getInductionVariable(InnerLoop, SE); + if (!InductionPHI) { + DEBUG(dbgs() << "Failed to find the point to split loop latch \n"); + return false; + } + + if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader) + InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1)); + else + InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0)); + + // + // Split at the place were the induction variable is + // incremented/decremented. + // TODO: This splitting logic may not work always. Fix this. + splitInnerLoopLatch(InnerIndexVar); + DEBUG(dbgs() << "splitInnerLoopLatch Done\n"); + + // Splits the inner loops phi nodes out into a seperate basic block. + splitInnerLoopHeader(); + DEBUG(dbgs() << "splitInnerLoopHeader Done\n"); + } + + Transformed |= adjustLoopLinks(); + if (!Transformed) { + DEBUG(dbgs() << "adjustLoopLinks Failed\n"); + return false; + } + + restructureLoops(InnerLoop, OuterLoop); + return true; +} + +void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) { + BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); + BasicBlock *InnerLoopLatchPred = InnerLoopLatch; + InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI); +} + +void LoopInterchangeTransform::splitOuterLoopLatch() { + BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); + BasicBlock *OuterLatchLcssaPhiBlock = OuterLoopLatch; + OuterLoopLatch = SplitBlock(OuterLatchLcssaPhiBlock, + OuterLoopLatch->getFirstNonPHI(), DT, LI); +} + +void LoopInterchangeTransform::splitInnerLoopHeader() { + + // Split the inner loop header out. + BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); + SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI); + + DEBUG(dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & " + "InnerLoopHeader \n"); +} + +/// \brief Move all instructions except the terminator from FromBB right before +/// InsertBefore +static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { + auto &ToList = InsertBefore->getParent()->getInstList(); + auto &FromList = FromBB->getInstList(); + + ToList.splice(InsertBefore, FromList, FromList.begin(), + FromBB->getTerminator()); +} + +void LoopInterchangeTransform::adjustOuterLoopPreheader() { + BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); + BasicBlock *InnerPreHeader = InnerLoop->getLoopPreheader(); + + moveBBContents(OuterLoopPreHeader, InnerPreHeader->getTerminator()); +} + +void LoopInterchangeTransform::adjustInnerLoopPreheader() { + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + BasicBlock *OuterHeader = OuterLoop->getHeader(); + + moveBBContents(InnerLoopPreHeader, OuterHeader->getTerminator()); +} + +bool LoopInterchangeTransform::adjustLoopBranches() { + + DEBUG(dbgs() << "adjustLoopBranches called\n"); + // Adjust the loop preheader + BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); + BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); + BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); + BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); + BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor(); + BasicBlock *InnerLoopLatchPredecessor = + InnerLoopLatch->getUniquePredecessor(); + BasicBlock *InnerLoopLatchSuccessor; + BasicBlock *OuterLoopLatchSuccessor; + + BranchInst *OuterLoopLatchBI = + dyn_cast<BranchInst>(OuterLoopLatch->getTerminator()); + BranchInst *InnerLoopLatchBI = + dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); + BranchInst *OuterLoopHeaderBI = + dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); + BranchInst *InnerLoopHeaderBI = + dyn_cast<BranchInst>(InnerLoopHeader->getTerminator()); + + if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor || + !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI || + !InnerLoopHeaderBI) + return false; + + BranchInst *InnerLoopLatchPredecessorBI = + dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator()); + BranchInst *OuterLoopPredecessorBI = + dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator()); + + if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) + return false; + BasicBlock *InnerLoopHeaderSucessor = InnerLoopHeader->getUniqueSuccessor(); + if (!InnerLoopHeaderSucessor) + return false; + + // Adjust Loop Preheader and headers + + unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors(); + for (unsigned i = 0; i < NumSucc; ++i) { + if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader) + OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader); + } + + NumSucc = OuterLoopHeaderBI->getNumSuccessors(); + for (unsigned i = 0; i < NumSucc; ++i) { + if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch) + OuterLoopHeaderBI->setSuccessor(i, LoopExit); + else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader) + OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSucessor); + } + + BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI); + InnerLoopHeaderBI->eraseFromParent(); + + // -------------Adjust loop latches----------- + if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader) + InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1); + else + InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0); + + NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors(); + for (unsigned i = 0; i < NumSucc; ++i) { + if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch) + InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor); + } + + if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) + OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); + else + OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0); + + if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor) + InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor); + else + InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor); + + if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) { + OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch); + } else { + OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch); + } + + return true; +} +void LoopInterchangeTransform::adjustLoopPreheaders() { + + // We have interchanged the preheaders so we need to interchange the data in + // the preheader as well. + // This is because the content of inner preheader was previously executed + // inside the outer loop. + BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); + BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); + BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); + BranchInst *InnerTermBI = + cast<BranchInst>(InnerLoopPreHeader->getTerminator()); + + BasicBlock *HeaderSplit = + SplitBlock(OuterLoopHeader, OuterLoopHeader->getTerminator(), DT, LI); + Instruction *InsPoint = HeaderSplit->getFirstNonPHI(); + // These instructions should now be executed inside the loop. + // Move instruction into a new block after outer header. + moveBBContents(InnerLoopPreHeader, InsPoint); + // These instructions were not executed previously in the loop so move them to + // the older inner loop preheader. + moveBBContents(OuterLoopPreHeader, InnerTermBI); +} + +bool LoopInterchangeTransform::adjustLoopLinks() { + + // Adjust all branches in the inner and outer loop. + bool Changed = adjustLoopBranches(); + if (Changed) + adjustLoopPreheaders(); + return Changed; +} + +char LoopInterchange::ID = 0; +INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange", + "Interchanges loops for cache reuse", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) + +INITIALIZE_PASS_END(LoopInterchange, "loop-interchange", + "Interchanges loops for cache reuse", false, false) + +Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); } diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index fdf7e3b..ed103e6 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -30,7 +31,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -160,7 +160,6 @@ namespace { AliasAnalysis *AA; LoopInfo *LI; ScalarEvolution *SE; - const DataLayout *DL; TargetLibraryInfo *TLI; DominatorTree *DT; @@ -367,10 +366,8 @@ namespace { struct DAGRootTracker { DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV, ScalarEvolution *SE, AliasAnalysis *AA, - TargetLibraryInfo *TLI, const DataLayout *DL) - : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), - DL(DL), IV(IV) { - } + TargetLibraryInfo *TLI) + : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV) {} /// Stage 1: Find all the DAG roots for the induction variable. bool findRoots(); @@ -416,7 +413,6 @@ namespace { ScalarEvolution *SE; AliasAnalysis *AA; TargetLibraryInfo *TLI; - const DataLayout *DL; // The loop induction variable. Instruction *IV; @@ -1131,7 +1127,7 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) { // needed because otherwise isSafeToSpeculativelyExecute returns // false on PHI nodes. if (!isa<PHINode>(I) && !isSimpleLoadStore(I) && - !isSafeToSpeculativelyExecute(I, DL)) + !isSafeToSpeculativelyExecute(I)) // Intervening instructions cause side effects. FutureSideEffects = true; } @@ -1161,11 +1157,10 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) { // side effects, and this instruction might also, then we can't reorder // them, and this matching fails. As an exception, we allow the alias // set tracker to handle regular (simple) load/store dependencies. - if (FutureSideEffects && - ((!isSimpleLoadStore(BaseInst) && - !isSafeToSpeculativelyExecute(BaseInst, DL)) || - (!isSimpleLoadStore(RootInst) && - !isSafeToSpeculativelyExecute(RootInst, DL)))) { + if (FutureSideEffects && ((!isSimpleLoadStore(BaseInst) && + !isSafeToSpeculativelyExecute(BaseInst)) || + (!isSimpleLoadStore(RootInst) && + !isSafeToSpeculativelyExecute(RootInst)))) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst << " vs. " << *RootInst << " (side effects prevent reordering)\n"); @@ -1272,6 +1267,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) { ++J; } + const DataLayout &DL = Header->getModule()->getDataLayout(); // We need to create a new induction variable for each different BaseInst. for (auto &DRS : RootSets) { @@ -1284,7 +1280,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) { SE->getConstant(RealIVSCEV->getType(), 1), L, SCEV::FlagAnyWrap)); { // Limit the lifetime of SCEVExpander. - SCEVExpander Expander(*SE, "reroll"); + SCEVExpander Expander(*SE, DL, "reroll"); Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin()); for (auto &KV : Uses) { @@ -1324,7 +1320,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) { } } - SimplifyInstructionsInBlock(Header, DL, TLI); + SimplifyInstructionsInBlock(Header, TLI); DeleteDeadPHIs(Header, TLI); } @@ -1448,7 +1444,7 @@ void LoopReroll::ReductionTracker::replaceSelected() { bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount, ReductionTracker &Reductions) { - DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DL); + DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI); if (!DAGRoots.findRoots()) return false; @@ -1477,8 +1473,6 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); SE = &getAnalysis<ScalarEvolution>(); TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); BasicBlock *Header = L->getHeader(); diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 4d12349..a675e12 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -24,8 +24,10 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -412,6 +414,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader); + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + // For the rest of the instructions, either hoist to the OrigPreheader if // possible or create a clone in the OldPreHeader if not. TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); @@ -442,8 +446,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // With the operands remapped, see if the instruction constant folds or is // otherwise simplifyable. This commonly occurs because the entry from PHI // nodes allows icmps and other instructions to fold. - // FIXME: Provide DL, TLI, DT, AC to SimplifyInstruction. - Value *V = SimplifyInstruction(C); + // FIXME: Provide TLI, DT, AC to SimplifyInstruction. + Value *V = SimplifyInstruction(C, DL); if (V && LI->replacementPreservesLCSSAForm(C, V)) { // If so, then delete the temporary instruction and stick the folded value // in the map. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 318065e..8445d5f 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -68,6 +68,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -3825,7 +3826,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) && (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale)) - .ule(abs64(NewF.BaseOffset))) + .ule(std::abs(NewF.BaseOffset))) continue; // OK, looks good. @@ -3856,7 +3857,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { J != JE; ++J) if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J)) if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt( - abs64(NewF.BaseOffset)) && + std::abs(NewF.BaseOffset)) && (C->getValue()->getValue() + NewF.BaseOffset).countTrailingZeros() >= countTrailingZeros<uint64_t>(NewF.BaseOffset)) @@ -4823,7 +4824,8 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, // we can remove them after we are done working. SmallVector<WeakVH, 16> DeadInsts; - SCEVExpander Rewriter(SE, "lsr"); + SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), + "lsr"); #ifndef NDEBUG Rewriter.setDebugType(DEBUG_TYPE); #endif @@ -5093,7 +5095,8 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { Changed |= DeleteDeadPHIs(L->getHeader()); if (EnablePhiElim && L->isLoopSimplifyForm()) { SmallVector<WeakVH, 16> DeadInsts; - SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr"); + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), DL, "lsr"); #ifndef NDEBUG Rewriter.setDebugType(DEBUG_TYPE); #endif diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 924be16..600cbde 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -23,14 +24,13 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/UnrollLoop.h" -#include "llvm/IR/InstVisitor.h" -#include "llvm/Analysis/InstructionSimplify.h" #include <climits> using namespace llvm; @@ -259,6 +259,7 @@ static bool isLoadFromConstantInitializer(Value *V) { return false; } +namespace { struct FindConstantPointers { bool LoadCanBeConstantFolded; bool IndexIsConstant; @@ -356,11 +357,12 @@ class UnrollAnalyzer : public InstVisitor<UnrollAnalyzer, bool> { if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) RHS = SimpleRHS; Value *SimpleV = nullptr; + const DataLayout &DL = I.getModule()->getDataLayout(); if (auto FI = dyn_cast<FPMathOperator>(&I)) SimpleV = - SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags()); + SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL); else - SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS); + SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL); if (SimpleV && CountedInstructions.insert(&I).second) NumberOfOptimizedInstructions += TTI.getUserCost(&I); @@ -540,6 +542,7 @@ public: return NumberOfOptimizedInstructions; } }; +} // namespace // Complete loop unrolling can make some loads constant, and we need to know if // that would expose any further optimization opportunities. @@ -619,6 +622,11 @@ static bool HasUnrollDisablePragma(const Loop *L) { return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable"); } +// Returns true if the loop has an runtime unroll(disable) pragma. +static bool HasRuntimeUnrollDisablePragma(const Loop *L) { + return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable"); +} + // If loop has an unroll_count pragma return the (necessarily // positive) value from the pragma. Otherwise return 0. static unsigned UnrollCountPragmaValue(const Loop *L) { @@ -807,6 +815,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Reduce count based on the type of unrolling and the threshold values. unsigned OriginalCount = Count; bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime; + if (HasRuntimeUnrollDisablePragma(L)) { + AllowRuntime = false; + } if (Unrolling == Partial) { bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; if (!AllowPartial && !CountSetExplicitly) { diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 987dc96..988d2af 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -42,6 +42,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -1082,6 +1083,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, /// pass. /// void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); while (!Worklist.empty()) { Instruction *I = Worklist.back(); Worklist.pop_back(); @@ -1104,7 +1106,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { // See if instruction simplification can hack this up. This is common for // things like "select false, X, Y" after unswitching made the condition be // 'false'. TODO: update the domtree properly so we can pass it here. - if (Value *V = SimplifyInstruction(I)) + if (Value *V = SimplifyInstruction(I, DL)) if (LI->replacementPreservesLCSSAForm(I, V)) { ReplaceUsesOfWith(I, V, Worklist, L, LPM); continue; diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 006b885..2b5a078 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -28,7 +29,6 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include <list> using namespace llvm; @@ -41,7 +41,8 @@ STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, - bool &VariableIdxFound, const DataLayout &TD){ + bool &VariableIdxFound, + const DataLayout &DL) { // Skip over the first indices. gep_type_iterator GTI = gep_type_begin(GEP); for (unsigned i = 1; i != Idx; ++i, ++GTI) @@ -57,13 +58,13 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, // Handle struct indices, which add their field offset to the pointer. if (StructType *STy = dyn_cast<StructType>(*GTI)) { - Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); continue; } // Otherwise, we have a sequential type like an array or vector. Multiply // the index by the ElementSize. - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()); Offset += Size*OpC->getSExtValue(); } @@ -74,7 +75,7 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, /// constant offset, and return that constant offset. For example, Ptr1 might /// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8. static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, - const DataLayout &TD) { + const DataLayout &DL) { Ptr1 = Ptr1->stripPointerCasts(); Ptr2 = Ptr2->stripPointerCasts(); @@ -92,12 +93,12 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, // If one pointer is a GEP and the other isn't, then see if the GEP is a // constant offset from the base, as in "P" and "gep P, 1". if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { - Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD); + Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, DL); return !VariableIdxFound; } if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { - Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD); + Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, DL); return !VariableIdxFound; } @@ -115,8 +116,8 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) break; - int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); - int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); + int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, DL); + int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, DL); if (VariableIdxFound) return false; Offset = Offset2-Offset1; @@ -150,12 +151,11 @@ struct MemsetRange { /// TheStores - The actual stores that make up this range. SmallVector<Instruction*, 16> TheStores; - bool isProfitableToUseMemset(const DataLayout &TD) const; - + bool isProfitableToUseMemset(const DataLayout &DL) const; }; } // end anon namespace -bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const { +bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const { // If we found more than 4 stores to merge or 16 bytes, use memset. if (TheStores.size() >= 4 || End-Start >= 16) return true; @@ -183,7 +183,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const { // size. If so, check to see whether we will end up actually reducing the // number of stores used. unsigned Bytes = unsigned(End-Start); - unsigned MaxIntSize = TD.getLargestLegalIntTypeSize(); + unsigned MaxIntSize = DL.getLargestLegalIntTypeSize(); if (MaxIntSize == 0) MaxIntSize = 1; unsigned NumPointerStores = Bytes / MaxIntSize; @@ -314,14 +314,12 @@ namespace { class MemCpyOpt : public FunctionPass { MemoryDependenceAnalysis *MD; TargetLibraryInfo *TLI; - const DataLayout *DL; public: static char ID; // Pass identification, replacement for typeid MemCpyOpt() : FunctionPass(ID) { initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); MD = nullptr; TLI = nullptr; - DL = nullptr; } bool runOnFunction(Function &F) override; @@ -377,13 +375,13 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", /// attempts to merge them together into a memcpy/memset. Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, Value *StartPtr, Value *ByteVal) { - if (!DL) return nullptr; + const DataLayout &DL = StartInst->getModule()->getDataLayout(); // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. - MemsetRanges Ranges(*DL); + MemsetRanges Ranges(DL); BasicBlock::iterator BI = StartInst; for (++BI; !isa<TerminatorInst>(BI); ++BI) { @@ -406,8 +404,8 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; - if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), - Offset, *DL)) + if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, + DL)) break; Ranges.addStore(Offset, NextStore); @@ -420,7 +418,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; - if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *DL)) + if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, DL)) break; Ranges.addMemSet(Offset, MSI); @@ -452,7 +450,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, if (Range.TheStores.size() == 1) continue; // If it is profitable to lower this range to memset, do so now. - if (!Range.isProfitableToUseMemset(*DL)) + if (!Range.isProfitableToUseMemset(DL)) continue; // Otherwise, we do want to transform this! Create a new memset. @@ -464,7 +462,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, if (Alignment == 0) { Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); - Alignment = DL->getABITypeAlignment(EltType); + Alignment = DL.getABITypeAlignment(EltType); } AMemSet = @@ -494,8 +492,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!SI->isSimple()) return false; - - if (!DL) return false; + const DataLayout &DL = SI->getModule()->getDataLayout(); // Detect cases where we're performing call slot forwarding, but // happen to be using a load-store pair to implement it, rather than @@ -525,16 +522,16 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (C) { unsigned storeAlign = SI->getAlignment(); if (!storeAlign) - storeAlign = DL->getABITypeAlignment(SI->getOperand(0)->getType()); + storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); unsigned loadAlign = LI->getAlignment(); if (!loadAlign) - loadAlign = DL->getABITypeAlignment(LI->getType()); + loadAlign = DL.getABITypeAlignment(LI->getType()); - bool changed = performCallSlotOptzn(LI, - SI->getPointerOperand()->stripPointerCasts(), - LI->getPointerOperand()->stripPointerCasts(), - DL->getTypeStoreSize(SI->getOperand(0)->getType()), - std::min(storeAlign, loadAlign), C); + bool changed = performCallSlotOptzn( + LI, SI->getPointerOperand()->stripPointerCasts(), + LI->getPointerOperand()->stripPointerCasts(), + DL.getTypeStoreSize(SI->getOperand(0)->getType()), + std::min(storeAlign, loadAlign), C); if (changed) { MD->removeInstruction(SI); SI->eraseFromParent(); @@ -606,15 +603,13 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, if (!srcAlloca) return false; - // Check that all of src is copied to dest. - if (!DL) return false; - ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); if (!srcArraySize) return false; - uint64_t srcSize = DL->getTypeAllocSize(srcAlloca->getAllocatedType()) * - srcArraySize->getZExtValue(); + const DataLayout &DL = cpy->getModule()->getDataLayout(); + uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) * + srcArraySize->getZExtValue(); if (cpyLen < srcSize) return false; @@ -628,8 +623,8 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, if (!destArraySize) return false; - uint64_t destSize = DL->getTypeAllocSize(A->getAllocatedType()) * - destArraySize->getZExtValue(); + uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) * + destArraySize->getZExtValue(); if (destSize < srcSize) return false; @@ -648,7 +643,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, return false; } - uint64_t destSize = DL->getTypeAllocSize(StructTy); + uint64_t destSize = DL.getTypeAllocSize(StructTy); if (destSize < srcSize) return false; } @@ -659,7 +654,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, // Check that dest points to memory that is at least as aligned as src. unsigned srcAlign = srcAlloca->getAlignment(); if (!srcAlign) - srcAlign = DL->getABITypeAlignment(srcAlloca->getAllocatedType()); + srcAlign = DL.getABITypeAlignment(srcAlloca->getAllocatedType()); bool isDestSufficientlyAligned = srcAlign <= cpyAlign; // If dest is not aligned enough and we can't increase its alignment then // bail out. @@ -959,12 +954,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { /// processByValArgument - This is called on every byval argument in call sites. bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { - if (!DL) return false; - + const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout(); // Find out what feeds this byval argument. Value *ByValArg = CS.getArgument(ArgNo); Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType(); - uint64_t ByValSize = DL->getTypeAllocSize(ByValTy); + uint64_t ByValSize = DL.getTypeAllocSize(ByValTy); MemDepResult DepInfo = MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize), true, CS.getInstruction(), @@ -997,8 +991,8 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { *CS->getParent()->getParent()); DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); if (MDep->getAlignment() < ByValAlign && - getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &AC, - CS.getInstruction(), &DT) < ByValAlign) + getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, + CS.getInstruction(), &AC, &DT) < ByValAlign) return false; // Verify that the copied-from memory doesn't change in between the memcpy and @@ -1077,8 +1071,6 @@ bool MemCpyOpt::runOnFunction(Function &F) { bool MadeChange = false; MD = &getAnalysis<MemoryDependenceAnalysis>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); // If we don't have at least memset and memcpy, there is little point of doing diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp index 8fad63f..73f4296 100644 --- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -81,12 +81,13 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include <vector> diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 98016b4..307cc73 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -321,10 +321,8 @@ unsigned Reassociate::getRank(Value *V) { // If this is a not or neg instruction, do not count it for rank. This // assures us that X and ~X will have the same rank. - Type *Ty = V->getType(); - if ((!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) || - (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) && - !BinaryOperator::isFNeg(I))) + if (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) && + !BinaryOperator::isFNeg(I)) ++Rank; DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank << "\n"); @@ -351,7 +349,7 @@ void Reassociate::canonicalizeOperands(Instruction *I) { static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp) { - if (S1->getType()->isIntegerTy()) + if (S1->getType()->isIntOrIntVectorTy()) return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore); else { BinaryOperator *Res = @@ -363,7 +361,7 @@ static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name, static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp) { - if (S1->getType()->isIntegerTy()) + if (S1->getType()->isIntOrIntVectorTy()) return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore); else { BinaryOperator *Res = @@ -375,7 +373,7 @@ static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name, static BinaryOperator *CreateNeg(Value *S1, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp) { - if (S1->getType()->isIntegerTy()) + if (S1->getType()->isIntOrIntVectorTy()) return BinaryOperator::CreateNeg(S1, Name, InsertBefore); else { BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore); @@ -388,8 +386,8 @@ static BinaryOperator *CreateNeg(Value *S1, const Twine &Name, /// static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) { Type *Ty = Neg->getType(); - Constant *NegOne = Ty->isIntegerTy() ? ConstantInt::getAllOnesValue(Ty) - : ConstantFP::get(Ty, -1.0); + Constant *NegOne = Ty->isIntOrIntVectorTy() ? + ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0); BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg); Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op. @@ -872,7 +870,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, Constant *Undef = UndefValue::get(I->getType()); NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode), Undef, Undef, "", I); - if (NewOp->getType()->isFloatingPointTy()) + if (NewOp->getType()->isFPOrFPVectorTy()) NewOp->setFastMathFlags(I->getFastMathFlags()); } else { NewOp = NodesToRewrite.pop_back_val(); @@ -1520,8 +1518,8 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // Insert a new multiply. Type *Ty = TheOp->getType(); - Constant *C = Ty->isIntegerTy() ? ConstantInt::get(Ty, NumFound) - : ConstantFP::get(Ty, NumFound); + Constant *C = Ty->isIntOrIntVectorTy() ? + ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound); Instruction *Mul = CreateMul(TheOp, C, "factor", I, I); // Now that we have inserted a multiply, optimize it. This allows us to @@ -1661,7 +1659,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // from an expression will drop a use of maxocc, and this can cause // RemoveFactorFromExpression on successive values to behave differently. Instruction *DummyInst = - I->getType()->isIntegerTy() + I->getType()->isIntOrIntVectorTy() ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal) : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal); @@ -1792,7 +1790,7 @@ static Value *buildMultiplyTree(IRBuilder<> &Builder, Value *LHS = Ops.pop_back_val(); do { - if (LHS->getType()->isIntegerTy()) + if (LHS->getType()->isIntOrIntVectorTy()) LHS = Builder.CreateMul(LHS, Ops.pop_back_val()); else LHS = Builder.CreateFMul(LHS, Ops.pop_back_val()); @@ -2090,8 +2088,9 @@ void Reassociate::OptimizeInst(Instruction *I) { if (I->isCommutative()) canonicalizeOperands(I); - // Don't optimize vector instructions. - if (I->getType()->isVectorTy()) + // TODO: We should optimize vector Xor instructions, but they are + // currently unsupported. + if (I->getType()->isVectorTy() && I->getOpcode() == Instruction::Xor) return; // Don't optimize floating point instructions that don't have unsafe algebra. @@ -2170,9 +2169,6 @@ void Reassociate::OptimizeInst(Instruction *I) { } void Reassociate::ReassociateExpression(BinaryOperator *I) { - assert(!I->getType()->isVectorTy() && - "Reassociation of vector instructions is not supported."); - // First, walk the expression tree, linearizing the tree, collecting the // operand information. SmallVector<RepeatedValue, 8> Tree; diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index ca9ab54..f5d21ff 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -548,9 +548,6 @@ public: } PhiState(Value *b) : status(Base), base(b) {} PhiState() : status(Unknown), base(nullptr) {} - PhiState(const PhiState &other) : status(other.status), base(other.base) { - assert(status != Base || base); - } Status getStatus() const { return status; } Value *getBase() const { return base; } @@ -684,12 +681,19 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, states[def] = PhiState(); // Recursively fill in all phis & selects reachable from the initial one // for which we don't already know a definite base value for - // PERF: Yes, this is as horribly inefficient as it looks. + // TODO: This should be rewritten with a worklist bool done = false; while (!done) { done = true; + // Since we're adding elements to 'states' as we run, we can't keep + // iterators into the set. + SmallVector<Value*, 16> Keys; + Keys.reserve(states.size()); for (auto Pair : states) { - Value *v = Pair.first; + Value *V = Pair.first; + Keys.push_back(V); + } + for (Value *v : Keys) { assert(!isKnownBaseResult(v) && "why did it get added?"); if (PHINode *phi = dyn_cast<PHINode>(v)) { assert(phi->getNumIncomingValues() > 0 && @@ -730,10 +734,12 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, // have reached conflict state. The current version seems too conservative. bool progress = true; - size_t oldSize = 0; while (progress) { - oldSize = states.size(); +#ifndef NDEBUG + size_t oldSize = states.size(); +#endif progress = false; + // We're only changing keys in this loop, thus safe to keep iterators for (auto Pair : states) { MeetPhiStates calculateMeet(states); Value *v = Pair.first; @@ -768,46 +774,58 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, } // Insert Phis for all conflicts + // We want to keep naming deterministic in the loop that follows, so + // sort the keys before iteration. This is useful in allowing us to + // write stable tests. Note that there is no invalidation issue here. + SmallVector<Value*, 16> Keys; + Keys.reserve(states.size()); for (auto Pair : states) { - Instruction *v = cast<Instruction>(Pair.first); - PhiState state = Pair.second; + Value *V = Pair.first; + Keys.push_back(V); + } + std::sort(Keys.begin(), Keys.end(), order_by_name); + // TODO: adjust naming patterns to avoid this order of iteration dependency + for (Value *V : Keys) { + Instruction *v = cast<Instruction>(V); + PhiState state = states[V]; assert(!isKnownBaseResult(v) && "why did it get added?"); assert(!state.isUnknown() && "Optimistic algorithm didn't complete!"); - if (state.isConflict()) { - if (isa<PHINode>(v)) { - int num_preds = - std::distance(pred_begin(v->getParent()), pred_end(v->getParent())); - assert(num_preds > 0 && "how did we reach here"); - PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v); - NewInsertedDefs.insert(phi); - // Add metadata marking this as a base value - auto *const_1 = ConstantInt::get( - Type::getInt32Ty( - v->getParent()->getParent()->getParent()->getContext()), - 1); - auto MDConst = ConstantAsMetadata::get(const_1); - MDNode *md = MDNode::get( - v->getParent()->getParent()->getParent()->getContext(), MDConst); - phi->setMetadata("is_base_value", md); - states[v] = PhiState(PhiState::Conflict, phi); - } else if (SelectInst *sel = dyn_cast<SelectInst>(v)) { - // The undef will be replaced later - UndefValue *undef = UndefValue::get(sel->getType()); - SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef, - undef, "base_select", sel); - NewInsertedDefs.insert(basesel); - // Add metadata marking this as a base value - auto *const_1 = ConstantInt::get( - Type::getInt32Ty( - v->getParent()->getParent()->getParent()->getContext()), - 1); - auto MDConst = ConstantAsMetadata::get(const_1); - MDNode *md = MDNode::get( - v->getParent()->getParent()->getParent()->getContext(), MDConst); - basesel->setMetadata("is_base_value", md); - states[v] = PhiState(PhiState::Conflict, basesel); - } else - llvm_unreachable("unknown conflict type"); + if (!state.isConflict()) + continue; + + if (isa<PHINode>(v)) { + int num_preds = + std::distance(pred_begin(v->getParent()), pred_end(v->getParent())); + assert(num_preds > 0 && "how did we reach here"); + PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v); + NewInsertedDefs.insert(phi); + // Add metadata marking this as a base value + auto *const_1 = ConstantInt::get( + Type::getInt32Ty( + v->getParent()->getParent()->getParent()->getContext()), + 1); + auto MDConst = ConstantAsMetadata::get(const_1); + MDNode *md = MDNode::get( + v->getParent()->getParent()->getParent()->getContext(), MDConst); + phi->setMetadata("is_base_value", md); + states[v] = PhiState(PhiState::Conflict, phi); + } else { + SelectInst *sel = cast<SelectInst>(v); + // The undef will be replaced later + UndefValue *undef = UndefValue::get(sel->getType()); + SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef, + undef, "base_select", sel); + NewInsertedDefs.insert(basesel); + // Add metadata marking this as a base value + auto *const_1 = ConstantInt::get( + Type::getInt32Ty( + v->getParent()->getParent()->getParent()->getContext()), + 1); + auto MDConst = ConstantAsMetadata::get(const_1); + MDNode *md = MDNode::get( + v->getParent()->getParent()->getParent()->getContext(), MDConst); + basesel->setMetadata("is_base_value", md); + states[v] = PhiState(PhiState::Conflict, basesel); } } @@ -818,97 +836,98 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, assert(!isKnownBaseResult(v) && "why did it get added?"); assert(!state.isUnknown() && "Optimistic algorithm didn't complete!"); - if (state.isConflict()) { - if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) { - PHINode *phi = cast<PHINode>(v); - unsigned NumPHIValues = phi->getNumIncomingValues(); - for (unsigned i = 0; i < NumPHIValues; i++) { - Value *InVal = phi->getIncomingValue(i); - BasicBlock *InBB = phi->getIncomingBlock(i); - - // If we've already seen InBB, add the same incoming value - // we added for it earlier. The IR verifier requires phi - // nodes with multiple entries from the same basic block - // to have the same incoming value for each of those - // entries. If we don't do this check here and basephi - // has a different type than base, we'll end up adding two - // bitcasts (and hence two distinct values) as incoming - // values for the same basic block. - - int blockIndex = basephi->getBasicBlockIndex(InBB); - if (blockIndex != -1) { - Value *oldBase = basephi->getIncomingValue(blockIndex); - basephi->addIncoming(oldBase, InBB); + if (!state.isConflict()) + continue; + + if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) { + PHINode *phi = cast<PHINode>(v); + unsigned NumPHIValues = phi->getNumIncomingValues(); + for (unsigned i = 0; i < NumPHIValues; i++) { + Value *InVal = phi->getIncomingValue(i); + BasicBlock *InBB = phi->getIncomingBlock(i); + + // If we've already seen InBB, add the same incoming value + // we added for it earlier. The IR verifier requires phi + // nodes with multiple entries from the same basic block + // to have the same incoming value for each of those + // entries. If we don't do this check here and basephi + // has a different type than base, we'll end up adding two + // bitcasts (and hence two distinct values) as incoming + // values for the same basic block. + + int blockIndex = basephi->getBasicBlockIndex(InBB); + if (blockIndex != -1) { + Value *oldBase = basephi->getIncomingValue(blockIndex); + basephi->addIncoming(oldBase, InBB); #ifndef NDEBUG - Value *base = findBaseOrBDV(InVal, cache); - if (!isKnownBaseResult(base)) { - // Either conflict or base. - assert(states.count(base)); - base = states[base].getBase(); - assert(base != nullptr && "unknown PhiState!"); - assert(NewInsertedDefs.count(base) && - "should have already added this in a prev. iteration!"); - } - - // In essense this assert states: the only way two - // values incoming from the same basic block may be - // different is by being different bitcasts of the same - // value. A cleanup that remains TODO is changing - // findBaseOrBDV to return an llvm::Value of the correct - // type (and still remain pure). This will remove the - // need to add bitcasts. - assert(base->stripPointerCasts() == oldBase->stripPointerCasts() && - "sanity -- findBaseOrBDV should be pure!"); -#endif - continue; - } - - // Find either the defining value for the PHI or the normal base for - // a non-phi node Value *base = findBaseOrBDV(InVal, cache); if (!isKnownBaseResult(base)) { // Either conflict or base. assert(states.count(base)); base = states[base].getBase(); assert(base != nullptr && "unknown PhiState!"); + assert(NewInsertedDefs.count(base) && + "should have already added this in a prev. iteration!"); } - assert(base && "can't be null"); - // Must use original input BB since base may not be Instruction - // The cast is needed since base traversal may strip away bitcasts - if (base->getType() != basephi->getType()) { - base = new BitCastInst(base, basephi->getType(), "cast", - InBB->getTerminator()); - NewInsertedDefs.insert(base); - } - basephi->addIncoming(base, InBB); + + // In essense this assert states: the only way two + // values incoming from the same basic block may be + // different is by being different bitcasts of the same + // value. A cleanup that remains TODO is changing + // findBaseOrBDV to return an llvm::Value of the correct + // type (and still remain pure). This will remove the + // need to add bitcasts. + assert(base->stripPointerCasts() == oldBase->stripPointerCasts() && + "sanity -- findBaseOrBDV should be pure!"); +#endif + continue; } - assert(basephi->getNumIncomingValues() == NumPHIValues); - } else if (SelectInst *basesel = dyn_cast<SelectInst>(state.getBase())) { - SelectInst *sel = cast<SelectInst>(v); - // Operand 1 & 2 are true, false path respectively. TODO: refactor to - // something more safe and less hacky. - for (int i = 1; i <= 2; i++) { - Value *InVal = sel->getOperand(i); - // Find either the defining value for the PHI or the normal base for - // a non-phi node - Value *base = findBaseOrBDV(InVal, cache); - if (!isKnownBaseResult(base)) { - // Either conflict or base. - assert(states.count(base)); - base = states[base].getBase(); - assert(base != nullptr && "unknown PhiState!"); - } - assert(base && "can't be null"); - // Must use original input BB since base may not be Instruction - // The cast is needed since base traversal may strip away bitcasts - if (base->getType() != basesel->getType()) { - base = new BitCastInst(base, basesel->getType(), "cast", basesel); - NewInsertedDefs.insert(base); - } - basesel->setOperand(i, base); + + // Find either the defining value for the PHI or the normal base for + // a non-phi node + Value *base = findBaseOrBDV(InVal, cache); + if (!isKnownBaseResult(base)) { + // Either conflict or base. + assert(states.count(base)); + base = states[base].getBase(); + assert(base != nullptr && "unknown PhiState!"); } - } else - llvm_unreachable("unexpected conflict type"); + assert(base && "can't be null"); + // Must use original input BB since base may not be Instruction + // The cast is needed since base traversal may strip away bitcasts + if (base->getType() != basephi->getType()) { + base = new BitCastInst(base, basephi->getType(), "cast", + InBB->getTerminator()); + NewInsertedDefs.insert(base); + } + basephi->addIncoming(base, InBB); + } + assert(basephi->getNumIncomingValues() == NumPHIValues); + } else { + SelectInst *basesel = cast<SelectInst>(state.getBase()); + SelectInst *sel = cast<SelectInst>(v); + // Operand 1 & 2 are true, false path respectively. TODO: refactor to + // something more safe and less hacky. + for (int i = 1; i <= 2; i++) { + Value *InVal = sel->getOperand(i); + // Find either the defining value for the PHI or the normal base for + // a non-phi node + Value *base = findBaseOrBDV(InVal, cache); + if (!isKnownBaseResult(base)) { + // Either conflict or base. + assert(states.count(base)); + base = states[base].getBase(); + assert(base != nullptr && "unknown PhiState!"); + } + assert(base && "can't be null"); + // Must use original input BB since base may not be Instruction + // The cast is needed since base traversal may strip away bitcasts + if (base->getType() != basesel->getType()) { + base = new BitCastInst(base, basesel->getType(), "cast", basesel); + NewInsertedDefs.insert(base); + } + basesel->setOperand(i, base); + } } } @@ -964,7 +983,13 @@ static void findBasePointers(const StatepointLiveSetTy &live, DenseMap<llvm::Value *, llvm::Value *> &PointerToBase, DominatorTree *DT, DefiningValueMapTy &DVCache, DenseSet<llvm::Value *> &NewInsertedDefs) { - for (Value *ptr : live) { + // For the naming of values inserted to be deterministic - which makes for + // much cleaner and more stable tests - we need to assign an order to the + // live values. DenseSets do not provide a deterministic order across runs. + SmallVector<Value*, 64> Temp; + Temp.insert(Temp.end(), live.begin(), live.end()); + std::sort(Temp.begin(), Temp.end(), order_by_name); + for (Value *ptr : Temp) { Value *base = findBasePointer(ptr, DVCache, NewInsertedDefs); assert(base && "failed to find base pointer"); PointerToBase[ptr] = base; @@ -993,10 +1018,19 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, findBasePointers(result.liveset, PointerToBase, &DT, DVCache, NewInsertedDefs); if (PrintBasePointers) { + // Note: Need to print these in a stable order since this is checked in + // some tests. errs() << "Base Pairs (w/o Relocation):\n"; + SmallVector<Value*, 64> Temp; + Temp.reserve(PointerToBase.size()); for (auto Pair : PointerToBase) { - errs() << " derived %" << Pair.first->getName() << " base %" - << Pair.second->getName() << "\n"; + Temp.push_back(Pair.first); + } + std::sort(Temp.begin(), Temp.end(), order_by_name); + for (Value *Ptr : Temp) { + Value *Base = PointerToBase[Ptr]; + errs() << " derived %" << Ptr->getName() << " base %" + << Base->getName() << "\n"; } } @@ -1131,11 +1165,11 @@ static AttributeSet legalizeCallAttributes(AttributeSet AS) { /// statepointToken - statepoint instruction to which relocates should be /// bound. /// Builder - Llvm IR builder to be used to construct new calls. -void CreateGCRelocates(ArrayRef<llvm::Value *> liveVariables, - const int liveStart, - ArrayRef<llvm::Value *> basePtrs, - Instruction *statepointToken, IRBuilder<> Builder) { - +static void CreateGCRelocates(ArrayRef<llvm::Value *> liveVariables, + const int liveStart, + ArrayRef<llvm::Value *> basePtrs, + Instruction *statepointToken, + IRBuilder<> Builder) { SmallVector<Instruction *, 64> NewDefs; NewDefs.reserve(liveVariables.size()); @@ -1559,8 +1593,18 @@ static void relocationViaAlloca( // store must be inserted after load, otherwise store will be in alloca's // use list and an extra load will be inserted before it StoreInst *store = new StoreInst(def, alloca); - if (isa<Instruction>(def)) { - store->insertAfter(cast<Instruction>(def)); + if (Instruction *inst = dyn_cast<Instruction>(def)) { + if (InvokeInst *invoke = dyn_cast<InvokeInst>(inst)) { + // InvokeInst is a TerminatorInst so the store need to be inserted + // into its normal destination block. + BasicBlock *normalDest = invoke->getNormalDest(); + store->insertBefore(normalDest->getFirstNonPHI()); + } else { + assert(!inst->isTerminator() && + "The only TerminatorInst that can produce a value is " + "InvokeInst which is handled above."); + store->insertAfter(inst); + } } else { assert((isa<Argument>(def) || isa<GlobalVariable>(def) || (isa<Constant>(def) && cast<Constant>(def)->isNullValue())) && diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 05b9608..875a007 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -35,7 +36,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> @@ -154,7 +154,7 @@ namespace { /// Constant Propagation. /// class SCCPSolver : public InstVisitor<SCCPSolver> { - const DataLayout *DL; + const DataLayout &DL; const TargetLibraryInfo *TLI; SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable. DenseMap<Value*, LatticeVal> ValueState; // The state each value is in. @@ -206,8 +206,8 @@ class SCCPSolver : public InstVisitor<SCCPSolver> { typedef std::pair<BasicBlock*, BasicBlock*> Edge; DenseSet<Edge> KnownFeasibleEdges; public: - SCCPSolver(const DataLayout *DL, const TargetLibraryInfo *tli) - : DL(DL), TLI(tli) {} + SCCPSolver(const DataLayout &DL, const TargetLibraryInfo *tli) + : DL(DL), TLI(tli) {} /// MarkBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. @@ -1561,8 +1561,7 @@ bool SCCP::runOnFunction(Function &F) { return false; DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n"); - const DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; + const DataLayout &DL = F.getParent()->getDataLayout(); const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); SCCPSolver Solver(DL, TLI); @@ -1691,8 +1690,7 @@ static bool AddressIsTaken(const GlobalValue *GV) { } bool IPSCCP::runOnModule(Module &M) { - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; + const DataLayout &DL = M.getDataLayout(); const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); SCCPSolver Solver(DL, TLI); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index f69c750..06b000f 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -247,7 +247,7 @@ public: /// hold. void insert(ArrayRef<Slice> NewSlices) { int OldSize = Slices.size(); - std::move(NewSlices.begin(), NewSlices.end(), std::back_inserter(Slices)); + Slices.append(NewSlices.begin(), NewSlices.end()); auto SliceI = Slices.begin() + OldSize; std::sort(SliceI, Slices.end()); std::inplace_merge(Slices.begin(), SliceI, Slices.end()); @@ -701,6 +701,7 @@ private: // by writing out the code here where we have tho underlying allocation // size readily available. APInt GEPOffset = Offset; + const DataLayout &DL = GEPI.getModule()->getDataLayout(); for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI); GTI != GTE; ++GTI) { @@ -750,6 +751,7 @@ private: if (!IsOffsetKnown) return PI.setAborted(&LI); + const DataLayout &DL = LI.getModule()->getDataLayout(); uint64_t Size = DL.getTypeStoreSize(LI.getType()); return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); } @@ -761,6 +763,7 @@ private: if (!IsOffsetKnown) return PI.setAborted(&SI); + const DataLayout &DL = SI.getModule()->getDataLayout(); uint64_t Size = DL.getTypeStoreSize(ValOp->getType()); // If this memory access can be shown to *statically* extend outside the @@ -898,6 +901,7 @@ private: SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses; Visited.insert(Root); Uses.push_back(std::make_pair(cast<Instruction>(*U), Root)); + const DataLayout &DL = Root->getModule()->getDataLayout(); // If there are no loads or stores, the access is dead. We mark that as // a size zero access. Size = 0; @@ -1194,7 +1198,6 @@ class SROA : public FunctionPass { const bool RequiresDomTree; LLVMContext *C; - const DataLayout *DL; DominatorTree *DT; AssumptionCache *AC; @@ -1243,7 +1246,7 @@ class SROA : public FunctionPass { public: SROA(bool RequiresDomTree = true) : FunctionPass(ID), RequiresDomTree(RequiresDomTree), C(nullptr), - DL(nullptr), DT(nullptr) { + DT(nullptr) { initializeSROAPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -1349,7 +1352,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B, /// /// FIXME: This should be hoisted into a generic utility, likely in /// Transforms/Util/Local.h -static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) { +static bool isSafePHIToSpeculate(PHINode &PN) { // For now, we can only do this promotion if the load is in the same block // as the PHI, and if there are no stores between the phi and load. // TODO: Allow recursive phi users. @@ -1381,6 +1384,8 @@ static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) { if (!HaveLoad) return false; + const DataLayout &DL = PN.getModule()->getDataLayout(); + // We can only transform this if it is safe to push the loads into the // predecessor blocks. The only thing to watch out for is that we can't put // a possibly trapping load in the predecessor if it is a critical edge. @@ -1403,7 +1408,7 @@ static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) { // is already a load in the block, then we can move the load to the pred // block. if (InVal->isDereferenceablePointer(DL) || - isSafeToLoadUnconditionally(InVal, TI, MaxAlign, DL)) + isSafeToLoadUnconditionally(InVal, TI, MaxAlign)) continue; return false; @@ -1468,10 +1473,10 @@ static void speculatePHINodeLoads(PHINode &PN) { /// /// We can do this to a select if its only uses are loads and if the operand /// to the select can be loaded unconditionally. -static bool isSafeSelectToSpeculate(SelectInst &SI, - const DataLayout *DL = nullptr) { +static bool isSafeSelectToSpeculate(SelectInst &SI) { Value *TValue = SI.getTrueValue(); Value *FValue = SI.getFalseValue(); + const DataLayout &DL = SI.getModule()->getDataLayout(); bool TDerefable = TValue->isDereferenceablePointer(DL); bool FDerefable = FValue->isDereferenceablePointer(DL); @@ -1484,10 +1489,10 @@ static bool isSafeSelectToSpeculate(SelectInst &SI, // absolutely (e.g. allocas) or at this point because we can see other // accesses to it. if (!TDerefable && - !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment(), DL)) + !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment())) return false; if (!FDerefable && - !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment(), DL)) + !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment())) return false; } @@ -3699,6 +3704,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { // them to the alloca slices. SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap; std::vector<LoadInst *> SplitLoads; + const DataLayout &DL = AI.getModule()->getDataLayout(); for (LoadInst *LI : Loads) { SplitLoads.clear(); @@ -3724,10 +3730,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); auto *PartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace()); LoadInst *PLoad = IRB.CreateAlignedLoad( - getAdjustedPtr(IRB, *DL, BasePtr, - APInt(DL->getPointerSizeInBits(), PartOffset), + getAdjustedPtr(IRB, DL, BasePtr, + APInt(DL.getPointerSizeInBits(), PartOffset), PartPtrTy, BasePtr->getName() + "."), - getAdjustedAlignment(LI, PartOffset, *DL), /*IsVolatile*/ false, + getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); // Append this load onto the list of split loads so we can find it later @@ -3777,10 +3783,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { PLoad->getType()->getPointerTo(SI->getPointerAddressSpace()); StoreInst *PStore = IRB.CreateAlignedStore( - PLoad, getAdjustedPtr(IRB, *DL, StoreBasePtr, - APInt(DL->getPointerSizeInBits(), PartOffset), + PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, + APInt(DL.getPointerSizeInBits(), PartOffset), PartPtrTy, StoreBasePtr->getName() + "."), - getAdjustedAlignment(SI, PartOffset, *DL), /*IsVolatile*/ false); + getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); (void)PStore; DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); } @@ -3857,20 +3863,20 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { } else { IRB.SetInsertPoint(BasicBlock::iterator(LI)); PLoad = IRB.CreateAlignedLoad( - getAdjustedPtr(IRB, *DL, LoadBasePtr, - APInt(DL->getPointerSizeInBits(), PartOffset), + getAdjustedPtr(IRB, DL, LoadBasePtr, + APInt(DL.getPointerSizeInBits(), PartOffset), PartPtrTy, LoadBasePtr->getName() + "."), - getAdjustedAlignment(LI, PartOffset, *DL), /*IsVolatile*/ false, + getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); } // And store this partition. IRB.SetInsertPoint(BasicBlock::iterator(SI)); StoreInst *PStore = IRB.CreateAlignedStore( - PLoad, getAdjustedPtr(IRB, *DL, StoreBasePtr, - APInt(DL->getPointerSizeInBits(), PartOffset), + PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, + APInt(DL.getPointerSizeInBits(), PartOffset), PartPtrTy, StoreBasePtr->getName() + "."), - getAdjustedAlignment(SI, PartOffset, *DL), /*IsVolatile*/ false); + getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); // Now build a new slice for the alloca. NewSlices.push_back( @@ -3970,25 +3976,26 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // won't always succeed, in which case we fall back to a legal integer type // or an i8 array of an appropriate size. Type *SliceTy = nullptr; + const DataLayout &DL = AI.getModule()->getDataLayout(); if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset())) - if (DL->getTypeAllocSize(CommonUseTy) >= P.size()) + if (DL.getTypeAllocSize(CommonUseTy) >= P.size()) SliceTy = CommonUseTy; if (!SliceTy) - if (Type *TypePartitionTy = getTypePartition(*DL, AI.getAllocatedType(), + if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), P.beginOffset(), P.size())) SliceTy = TypePartitionTy; if ((!SliceTy || (SliceTy->isArrayTy() && SliceTy->getArrayElementType()->isIntegerTy())) && - DL->isLegalInteger(P.size() * 8)) + DL.isLegalInteger(P.size() * 8)) SliceTy = Type::getIntNTy(*C, P.size() * 8); if (!SliceTy) SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size()); - assert(DL->getTypeAllocSize(SliceTy) >= P.size()); + assert(DL.getTypeAllocSize(SliceTy) >= P.size()); - bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, *DL); + bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL); VectorType *VecTy = - IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, *DL); + IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL); if (VecTy) SliceTy = VecTy; @@ -4010,12 +4017,12 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // The minimum alignment which users can rely on when the explicit // alignment is omitted or zero is that required by the ABI for this // type. - Alignment = DL->getABITypeAlignment(AI.getAllocatedType()); + Alignment = DL.getABITypeAlignment(AI.getAllocatedType()); } Alignment = MinAlign(Alignment, P.beginOffset()); // If we will get at least this much alignment from the type alone, leave // the alloca's alignment unconstrained. - if (Alignment <= DL->getABITypeAlignment(SliceTy)) + if (Alignment <= DL.getABITypeAlignment(SliceTy)) Alignment = 0; NewAI = new AllocaInst( SliceTy, nullptr, Alignment, @@ -4035,7 +4042,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, SmallPtrSet<PHINode *, 8> PHIUsers; SmallPtrSet<SelectInst *, 8> SelectUsers; - AllocaSliceRewriter Rewriter(*DL, AS, *this, AI, *NewAI, P.beginOffset(), + AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(), P.endOffset(), IsIntegerPromotable, VecTy, PHIUsers, SelectUsers); bool Promotable = true; @@ -4057,7 +4064,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(), E = PHIUsers.end(); I != E; ++I) - if (!isSafePHIToSpeculate(**I, DL)) { + if (!isSafePHIToSpeculate(**I)) { Promotable = false; PHIUsers.clear(); SelectUsers.clear(); @@ -4066,7 +4073,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(), E = SelectUsers.end(); I != E; ++I) - if (!isSafeSelectToSpeculate(**I, DL)) { + if (!isSafeSelectToSpeculate(**I)) { Promotable = false; PHIUsers.clear(); SelectUsers.clear(); @@ -4110,6 +4117,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { unsigned NumPartitions = 0; bool Changed = false; + const DataLayout &DL = AI.getModule()->getDataLayout(); // First try to pre-split loads and stores. Changed |= presplitLoadsAndStores(AI, AS); @@ -4127,7 +4135,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { // confident that the above handling of splittable loads and stores is // completely sufficient before we forcibly disable the remaining handling. if (S.beginOffset() == 0 && - S.endOffset() >= DL->getTypeAllocSize(AI.getAllocatedType())) + S.endOffset() >= DL.getTypeAllocSize(AI.getAllocatedType())) continue; if (isa<LoadInst>(S.getUse()->getUser()) || isa<StoreInst>(S.getUse()->getUser())) { @@ -4155,7 +4163,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { Changed = true; if (NewAI != &AI) { uint64_t SizeOfByte = 8; - uint64_t AllocaSize = DL->getTypeSizeInBits(NewAI->getAllocatedType()); + uint64_t AllocaSize = DL.getTypeSizeInBits(NewAI->getAllocatedType()); // Don't include any padding. uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte); Pieces.push_back(Piece(NewAI, P.beginOffset() * SizeOfByte, Size)); @@ -4236,21 +4244,22 @@ bool SROA::runOnAlloca(AllocaInst &AI) { AI.eraseFromParent(); return true; } + const DataLayout &DL = AI.getModule()->getDataLayout(); // Skip alloca forms that this analysis can't handle. if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() || - DL->getTypeAllocSize(AI.getAllocatedType()) == 0) + DL.getTypeAllocSize(AI.getAllocatedType()) == 0) return false; bool Changed = false; // First, split any FCA loads and stores touching this alloca to promote // better splitting and promotion opportunities. - AggLoadStoreRewriter AggRewriter(*DL); + AggLoadStoreRewriter AggRewriter(DL); Changed |= AggRewriter.rewrite(AI); // Build the slices using a recursive instruction-visiting builder. - AllocaSlices AS(*DL, AI); + AllocaSlices AS(DL, AI); DEBUG(AS.print(dbgs())); if (AS.isEscaped()) return Changed; @@ -4423,12 +4432,6 @@ bool SROA::runOnFunction(Function &F) { DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); C = &F.getContext(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (!DLP) { - DEBUG(dbgs() << " Skipping SROA -- no target data!\n"); - return false; - } - DL = &DLP->getDataLayout(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DT = DTWP ? &DTWP->getDomTree() : nullptr; diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp index c7232a9..3e7cf04 100644 --- a/lib/Transforms/Scalar/SampleProfile.cpp +++ b/lib/Transforms/Scalar/SampleProfile.cpp @@ -217,6 +217,9 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) { /// \returns The profiled weight of I. unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) { DebugLoc DLoc = Inst.getDebugLoc(); + if (DLoc.isUnknown()) + return 0; + unsigned Lineno = DLoc.getLine(); if (Lineno < HeaderLineno) return 0; diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 621633b..6cc8411 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -48,6 +48,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopDeletionPass(Registry); initializeLoopAccessAnalysisPass(Registry); initializeLoopInstSimplifyPass(Registry); + initializeLoopInterchangePass(Registry); initializeLoopRotatePass(Registry); initializeLoopStrengthReducePass(Registry); initializeLoopRerollPass(Registry); @@ -209,7 +210,6 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) { void LLVMAddVerifierPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createVerifierPass()); - // FIXME: should this also add createDebugInfoVerifierPass()? } void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 5c49a55..acd8585 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -89,7 +89,6 @@ namespace { private: bool HasDomTree; - const DataLayout *DL; /// DeadInsts - Keep track of instructions we have made dead, so that /// we can remove them after we are done working. @@ -159,9 +158,10 @@ namespace { void isSafeMemAccess(uint64_t Offset, uint64_t MemSize, Type *MemOpType, bool isStore, AllocaInfo &Info, Instruction *TheAccess, bool AllowWholeAccess); - bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size); - uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, - Type *&IdxTy); + bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size, + const DataLayout &DL); + uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy, + const DataLayout &DL); void DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList); @@ -699,9 +699,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // If the source and destination are both to the same alloca, then this is // a noop copy-to-self, just delete it. Otherwise, emit a load and store // as appropriate. - AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &DL, 0)); + AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, DL, 0)); - if (GetUnderlyingObject(MTI->getSource(), &DL, 0) != OrigAI) { + if (GetUnderlyingObject(MTI->getSource(), DL, 0) != OrigAI) { // Dest must be OrigAI, change this to be a load from the original // pointer (bitcasted), then a store to our new alloca. assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); @@ -717,7 +717,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); SrcVal->setAlignment(MTI->getAlignment()); Builder.CreateStore(SrcVal, NewAI); - } else if (GetUnderlyingObject(MTI->getDest(), &DL, 0) != OrigAI) { + } else if (GetUnderlyingObject(MTI->getDest(), DL, 0) != OrigAI) { // Src must be OrigAI, change this to be a load from NewAI then a store // through the original dest pointer (bitcasted). assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); @@ -1032,17 +1032,8 @@ bool SROA::runOnFunction(Function &F) { if (skipOptnoneFunction(F)) return false; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; - bool Changed = performPromotion(F); - // FIXME: ScalarRepl currently depends on DataLayout more than it - // theoretically needs to. It should be refactored in order to support - // target-independent IR. Until this is done, just skip the actual - // scalar-replacement portion of this pass. - if (!DL) return Changed; - while (1) { bool LocalChange = performScalarRepl(F); if (!LocalChange) break; // No need to repromote if no scalarrepl @@ -1148,7 +1139,8 @@ public: /// /// We can do this to a select if its only uses are loads and if the operand to /// the select can be loaded unconditionally. -static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) { +static bool isSafeSelectToSpeculate(SelectInst *SI) { + const DataLayout &DL = SI->getModule()->getDataLayout(); bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(DL); bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(DL); @@ -1158,11 +1150,13 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) { // Both operands to the select need to be dereferencable, either absolutely // (e.g. allocas) or at this point because we can see other accesses to it. - if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, - LI->getAlignment(), DL)) + if (!TDerefable && + !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, + LI->getAlignment())) return false; - if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI, - LI->getAlignment(), DL)) + if (!FDerefable && + !isSafeToLoadUnconditionally(SI->getFalseValue(), LI, + LI->getAlignment())) return false; } @@ -1185,7 +1179,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) { /// /// We can do this to a select if its only uses are loads and if the operand to /// the select can be loaded unconditionally. -static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) { +static bool isSafePHIToSpeculate(PHINode *PN) { // For now, we can only do this promotion if the load is in the same block as // the PHI, and if there are no stores between the phi and load. // TODO: Allow recursive phi users. @@ -1209,6 +1203,8 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) { MaxAlign = std::max(MaxAlign, LI->getAlignment()); } + const DataLayout &DL = PN->getModule()->getDataLayout(); + // Okay, we know that we have one or more loads in the same block as the PHI. // We can transform this if it is safe to push the loads into the predecessor // blocks. The only thing to watch out for is that we can't put a possibly @@ -1234,7 +1230,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) { // If this pointer is always safe to load, or if we can prove that there is // already a load in the block, then we can move the load to the pred block. if (InVal->isDereferenceablePointer(DL) || - isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, DL)) + isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign)) continue; return false; @@ -1248,7 +1244,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) { /// direct (non-volatile) loads and stores to it. If the alloca is close but /// not quite there, this will transform the code to allow promotion. As such, /// it is a non-pure predicate. -static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) { +static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout &DL) { SetVector<Instruction*, SmallVector<Instruction*, 4>, SmallPtrSet<Instruction*, 4> > InstsToRewrite; for (User *U : AI->users()) { @@ -1279,7 +1275,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) { // If it is safe to turn "load (select c, AI, ptr)" into a select of two // loads, then we can transform this by rewriting the select. - if (!isSafeSelectToSpeculate(SI, DL)) + if (!isSafeSelectToSpeculate(SI)) return false; InstsToRewrite.insert(SI); @@ -1294,7 +1290,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) { // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads // in the pred blocks, then we can transform this by rewriting the PHI. - if (!isSafePHIToSpeculate(PN, DL)) + if (!isSafePHIToSpeculate(PN)) return false; InstsToRewrite.insert(PN); @@ -1416,6 +1412,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) { bool SROA::performPromotion(Function &F) { std::vector<AllocaInst*> Allocas; + const DataLayout &DL = F.getParent()->getDataLayout(); DominatorTree *DT = nullptr; if (HasDomTree) DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); @@ -1479,6 +1476,7 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) { // bool SROA::performScalarRepl(Function &F) { std::vector<AllocaInst*> WorkList; + const DataLayout &DL = F.getParent()->getDataLayout(); // Scan the entry basic block, adding allocas to the worklist. BasicBlock &BB = F.getEntryBlock(); @@ -1508,7 +1506,7 @@ bool SROA::performScalarRepl(Function &F) { // transform the allocation instruction if it is an array allocation // (allocations OF arrays are ok though), and an allocation of a scalar // value cannot be decomposed at all. - uint64_t AllocaSize = DL->getTypeAllocSize(AI->getAllocatedType()); + uint64_t AllocaSize = DL.getTypeAllocSize(AI->getAllocatedType()); // Do not promote [0 x %struct]. if (AllocaSize == 0) continue; @@ -1531,8 +1529,9 @@ bool SROA::performScalarRepl(Function &F) { // promoted itself. If so, we don't want to transform it needlessly. Note // that we can't just check based on the type: the alloca may be of an i32 // but that has pointer arithmetic to set byte 3 of it or something. - if (AllocaInst *NewAI = ConvertToScalarInfo( - (unsigned)AllocaSize, *DL, ScalarLoadThreshold).TryConvert(AI)) { + if (AllocaInst *NewAI = + ConvertToScalarInfo((unsigned)AllocaSize, DL, ScalarLoadThreshold) + .TryConvert(AI)) { NewAI->takeName(AI); AI->eraseFromParent(); ++NumConverted; @@ -1610,6 +1609,7 @@ void SROA::DeleteDeadInstructions() { /// referenced by this instruction. void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info) { + const DataLayout &DL = I->getModule()->getDataLayout(); for (Use &U : I->uses()) { Instruction *User = cast<Instruction>(U.getUser()); @@ -1632,8 +1632,8 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, if (!LI->isSimple()) return MarkUnsafe(Info, User); Type *LIType = LI->getType(); - isSafeMemAccess(Offset, DL->getTypeAllocSize(LIType), - LIType, false, Info, LI, true /*AllowWholeAccess*/); + isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info, + LI, true /*AllowWholeAccess*/); Info.hasALoadOrStore = true; } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { @@ -1642,8 +1642,8 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, return MarkUnsafe(Info, User); Type *SIType = SI->getOperand(0)->getType(); - isSafeMemAccess(Offset, DL->getTypeAllocSize(SIType), - SIType, true, Info, SI, true /*AllowWholeAccess*/); + isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info, + SI, true /*AllowWholeAccess*/); Info.hasALoadOrStore = true; } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { if (II->getIntrinsicID() != Intrinsic::lifetime_start && @@ -1675,6 +1675,7 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, if (!Info.CheckedPHIs.insert(PN).second) return; + const DataLayout &DL = I->getModule()->getDataLayout(); for (User *U : I->users()) { Instruction *UI = cast<Instruction>(U); @@ -1691,8 +1692,8 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, if (!LI->isSimple()) return MarkUnsafe(Info, UI); Type *LIType = LI->getType(); - isSafeMemAccess(Offset, DL->getTypeAllocSize(LIType), - LIType, false, Info, LI, false /*AllowWholeAccess*/); + isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info, + LI, false /*AllowWholeAccess*/); Info.hasALoadOrStore = true; } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { @@ -1701,8 +1702,8 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, return MarkUnsafe(Info, UI); Type *SIType = SI->getOperand(0)->getType(); - isSafeMemAccess(Offset, DL->getTypeAllocSize(SIType), - SIType, true, Info, SI, false /*AllowWholeAccess*/); + isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info, + SI, false /*AllowWholeAccess*/); Info.hasALoadOrStore = true; } else if (isa<PHINode>(UI) || isa<SelectInst>(UI)) { isSafePHISelectUseForScalarRepl(UI, Offset, Info); @@ -1746,9 +1747,11 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, // constant part of the offset. if (NonConstant) Indices.pop_back(); - Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices); - if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, - NonConstantIdxSize)) + + const DataLayout &DL = GEPI->getModule()->getDataLayout(); + Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices); + if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, NonConstantIdxSize, + DL)) MarkUnsafe(Info, GEPI); } @@ -1803,9 +1806,10 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, Type *MemOpType, bool isStore, AllocaInfo &Info, Instruction *TheAccess, bool AllowWholeAccess) { + const DataLayout &DL = TheAccess->getModule()->getDataLayout(); // Check if this is a load/store of the entire alloca. if (Offset == 0 && AllowWholeAccess && - MemSize == DL->getTypeAllocSize(Info.AI->getAllocatedType())) { + MemSize == DL.getTypeAllocSize(Info.AI->getAllocatedType())) { // This can be safe for MemIntrinsics (where MemOpType is 0) and integer // loads/stores (which are essentially the same as the MemIntrinsics with // regard to copying padding between elements). But, if an alloca is @@ -1828,7 +1832,7 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, } // Check if the offset/size correspond to a component within the alloca type. Type *T = Info.AI->getAllocatedType(); - if (TypeHasComponent(T, Offset, MemSize)) { + if (TypeHasComponent(T, Offset, MemSize, DL)) { Info.hasSubelementAccess = true; return; } @@ -1838,24 +1842,25 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, /// TypeHasComponent - Return true if T has a component type with the /// specified offset and size. If Size is zero, do not check the size. -bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) { +bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size, + const DataLayout &DL) { Type *EltTy; uint64_t EltSize; if (StructType *ST = dyn_cast<StructType>(T)) { - const StructLayout *Layout = DL->getStructLayout(ST); + const StructLayout *Layout = DL.getStructLayout(ST); unsigned EltIdx = Layout->getElementContainingOffset(Offset); EltTy = ST->getContainedType(EltIdx); - EltSize = DL->getTypeAllocSize(EltTy); + EltSize = DL.getTypeAllocSize(EltTy); Offset -= Layout->getElementOffset(EltIdx); } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) { EltTy = AT->getElementType(); - EltSize = DL->getTypeAllocSize(EltTy); + EltSize = DL.getTypeAllocSize(EltTy); if (Offset >= AT->getNumElements() * EltSize) return false; Offset %= EltSize; } else if (VectorType *VT = dyn_cast<VectorType>(T)) { EltTy = VT->getElementType(); - EltSize = DL->getTypeAllocSize(EltTy); + EltSize = DL.getTypeAllocSize(EltTy); if (Offset >= VT->getNumElements() * EltSize) return false; Offset %= EltSize; @@ -1867,7 +1872,7 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) { // Check if the component spans multiple elements. if (Offset + Size > EltSize) return false; - return TypeHasComponent(EltTy, Offset, Size); + return TypeHasComponent(EltTy, Offset, Size, DL); } /// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite @@ -1876,6 +1881,7 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) { /// instruction. void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, SmallVectorImpl<AllocaInst *> &NewElts) { + const DataLayout &DL = I->getModule()->getDataLayout(); for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) { Use &TheUse = *UI++; Instruction *User = cast<Instruction>(TheUse.getUser()); @@ -1893,8 +1899,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); uint64_t MemSize = Length->getZExtValue(); - if (Offset == 0 && - MemSize == DL->getTypeAllocSize(AI->getAllocatedType())) + if (Offset == 0 && MemSize == DL.getTypeAllocSize(AI->getAllocatedType())) RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); // Otherwise the intrinsic can only touch a single element and the // address operand will be updated, so nothing else needs to be done. @@ -1930,8 +1935,8 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, LI->replaceAllUsesWith(Insert); DeadInsts.push_back(LI); } else if (LIType->isIntegerTy() && - DL->getTypeAllocSize(LIType) == - DL->getTypeAllocSize(AI->getAllocatedType())) { + DL.getTypeAllocSize(LIType) == + DL.getTypeAllocSize(AI->getAllocatedType())) { // If this is a load of the entire alloca to an integer, rewrite it. RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); } @@ -1957,8 +1962,8 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, } DeadInsts.push_back(SI); } else if (SIType->isIntegerTy() && - DL->getTypeAllocSize(SIType) == - DL->getTypeAllocSize(AI->getAllocatedType())) { + DL.getTypeAllocSize(SIType) == + DL.getTypeAllocSize(AI->getAllocatedType())) { // If this is a store of the entire alloca from an integer, rewrite it. RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); } @@ -2001,7 +2006,8 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, Type *T = AI->getAllocatedType(); uint64_t EltOffset = 0; Type *IdxTy; - uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy); + uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy, + BC->getModule()->getDataLayout()); Instruction *Val = NewElts[Idx]; if (Val->getType() != BC->getDestTy()) { Val = new BitCastInst(Val, BC->getDestTy(), "", BC); @@ -2016,11 +2022,12 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, /// Sets T to the type of the element and Offset to the offset within that /// element. IdxTy is set to the type of the index result to be used in a /// GEP instruction. -uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, - Type *&IdxTy) { +uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy, + const DataLayout &DL) { uint64_t Idx = 0; + if (StructType *ST = dyn_cast<StructType>(T)) { - const StructLayout *Layout = DL->getStructLayout(ST); + const StructLayout *Layout = DL.getStructLayout(ST); Idx = Layout->getElementContainingOffset(Offset); T = ST->getContainedType(Idx); Offset -= Layout->getElementOffset(Idx); @@ -2028,7 +2035,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, return Idx; } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) { T = AT->getElementType(); - uint64_t EltSize = DL->getTypeAllocSize(T); + uint64_t EltSize = DL.getTypeAllocSize(T); Idx = Offset / EltSize; Offset -= Idx * EltSize; IdxTy = Type::getInt64Ty(T->getContext()); @@ -2036,7 +2043,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, } VectorType *VT = cast<VectorType>(T); T = VT->getElementType(); - uint64_t EltSize = DL->getTypeAllocSize(T); + uint64_t EltSize = DL.getTypeAllocSize(T); Idx = Offset / EltSize; Offset -= Idx * EltSize; IdxTy = Type::getInt64Ty(T->getContext()); @@ -2049,6 +2056,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, SmallVectorImpl<AllocaInst *> &NewElts) { uint64_t OldOffset = Offset; + const DataLayout &DL = GEPI->getModule()->getDataLayout(); SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); // If the GEP was dynamic then it must have been a dynamic vector lookup. // In this case, it must be the last GEP operand which is dynamic so keep that @@ -2057,19 +2065,19 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, Value* NonConstantIdx = nullptr; if (!GEPI->hasAllConstantIndices()) NonConstantIdx = Indices.pop_back_val(); - Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices); + Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices); RewriteForScalarRepl(GEPI, AI, Offset, NewElts); Type *T = AI->getAllocatedType(); Type *IdxTy; - uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy); + uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy, DL); if (GEPI->getOperand(0) == AI) OldIdx = ~0ULL; // Force the GEP to be rewritten. T = AI->getAllocatedType(); uint64_t EltOffset = Offset; - uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy); + uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy, DL); // If this GEP does not move the pointer across elements of the alloca // being split, then it does not needs to be rewritten. @@ -2080,7 +2088,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, SmallVector<Value*, 8> NewArgs; NewArgs.push_back(Constant::getNullValue(i32Ty)); while (EltOffset != 0) { - uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy); + uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy, DL); NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx)); } if (NonConstantIdx) { @@ -2114,9 +2122,10 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, // Put matching lifetime markers on everything from Offset up to // Offset+OldSize. Type *AIType = AI->getAllocatedType(); + const DataLayout &DL = II->getModule()->getDataLayout(); uint64_t NewOffset = Offset; Type *IdxTy; - uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy); + uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy, DL); IRBuilder<> Builder(II); uint64_t Size = OldSize->getLimitedValue(); @@ -2129,7 +2138,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, V = Builder.CreateGEP(V, Builder.getInt64(NewOffset)); IdxTy = NewElts[Idx]->getAllocatedType(); - uint64_t EltSize = DL->getTypeAllocSize(IdxTy) - NewOffset; + uint64_t EltSize = DL.getTypeAllocSize(IdxTy) - NewOffset; if (EltSize > Size) { EltSize = Size; Size = 0; @@ -2145,7 +2154,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, for (; Idx != NewElts.size() && Size; ++Idx) { IdxTy = NewElts[Idx]->getAllocatedType(); - uint64_t EltSize = DL->getTypeAllocSize(IdxTy); + uint64_t EltSize = DL.getTypeAllocSize(IdxTy); if (EltSize > Size) { EltSize = Size; Size = 0; @@ -2221,6 +2230,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, bool SROADest = MI->getRawDest() == Inst; Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); + const DataLayout &DL = MI->getModule()->getDataLayout(); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // If this is a memcpy/memmove, emit a GEP of the other element address. @@ -2237,10 +2247,10 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); Type *OtherTy = OtherPtrTy->getElementType(); if (StructType *ST = dyn_cast<StructType>(OtherTy)) { - EltOffset = DL->getStructLayout(ST)->getElementOffset(i); + EltOffset = DL.getStructLayout(ST)->getElementOffset(i); } else { Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); - EltOffset = DL->getTypeAllocSize(EltTy)*i; + EltOffset = DL.getTypeAllocSize(EltTy) * i; } // The alignment of the other pointer is the guaranteed alignment of the @@ -2281,7 +2291,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, Type *ValTy = EltTy->getScalarType(); // Construct an integer with the right value. - unsigned EltSize = DL->getTypeSizeInBits(ValTy); + unsigned EltSize = DL.getTypeSizeInBits(ValTy); APInt OneVal(EltSize, CI->getZExtValue()); APInt TotalVal(OneVal); // Set each byte. @@ -2311,7 +2321,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // this element. } - unsigned EltSize = DL->getTypeAllocSize(EltTy); + unsigned EltSize = DL.getTypeAllocSize(EltTy); if (!EltSize) continue; @@ -2345,12 +2355,13 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, // and store the element value to the individual alloca. Value *SrcVal = SI->getOperand(0); Type *AllocaEltTy = AI->getAllocatedType(); - uint64_t AllocaSizeBits = DL->getTypeAllocSizeInBits(AllocaEltTy); + const DataLayout &DL = SI->getModule()->getDataLayout(); + uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy); IRBuilder<> Builder(SI); // Handle tail padding by extending the operand - if (DL->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) + if (DL.getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) SrcVal = Builder.CreateZExt(SrcVal, IntegerType::get(SI->getContext(), AllocaSizeBits)); @@ -2360,15 +2371,15 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { - const StructLayout *Layout = DL->getStructLayout(EltSTy); + const StructLayout *Layout = DL.getStructLayout(EltSTy); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // Get the number of bits to shift SrcVal to get the value. Type *FieldTy = EltSTy->getElementType(i); uint64_t Shift = Layout->getElementOffsetInBits(i); - if (DL->isBigEndian()) - Shift = AllocaSizeBits-Shift-DL->getTypeAllocSizeInBits(FieldTy); + if (DL.isBigEndian()) + Shift = AllocaSizeBits - Shift - DL.getTypeAllocSizeInBits(FieldTy); Value *EltVal = SrcVal; if (Shift) { @@ -2377,7 +2388,7 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, } // Truncate down to an integer of the right size. - uint64_t FieldSizeBits = DL->getTypeSizeInBits(FieldTy); + uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy); // Ignore zero sized fields like {}, they obviously contain no data. if (FieldSizeBits == 0) continue; @@ -2402,12 +2413,12 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, } else { ArrayType *ATy = cast<ArrayType>(AllocaEltTy); Type *ArrayEltTy = ATy->getElementType(); - uint64_t ElementOffset = DL->getTypeAllocSizeInBits(ArrayEltTy); - uint64_t ElementSizeBits = DL->getTypeSizeInBits(ArrayEltTy); + uint64_t ElementOffset = DL.getTypeAllocSizeInBits(ArrayEltTy); + uint64_t ElementSizeBits = DL.getTypeSizeInBits(ArrayEltTy); uint64_t Shift; - if (DL->isBigEndian()) + if (DL.isBigEndian()) Shift = AllocaSizeBits-ElementOffset; else Shift = 0; @@ -2441,7 +2452,7 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, } new StoreInst(EltVal, DestField, SI); - if (DL->isBigEndian()) + if (DL.isBigEndian()) Shift -= ElementOffset; else Shift += ElementOffset; @@ -2459,7 +2470,8 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, // Extract each element out of the NewElts according to its structure offset // and form the result value. Type *AllocaEltTy = AI->getAllocatedType(); - uint64_t AllocaSizeBits = DL->getTypeAllocSizeInBits(AllocaEltTy); + const DataLayout &DL = LI->getModule()->getDataLayout(); + uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy); DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI << '\n'); @@ -2469,10 +2481,10 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, const StructLayout *Layout = nullptr; uint64_t ArrayEltBitOffset = 0; if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { - Layout = DL->getStructLayout(EltSTy); + Layout = DL.getStructLayout(EltSTy); } else { Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); - ArrayEltBitOffset = DL->getTypeAllocSizeInBits(ArrayEltTy); + ArrayEltBitOffset = DL.getTypeAllocSizeInBits(ArrayEltTy); } Value *ResultVal = @@ -2484,7 +2496,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, Value *SrcField = NewElts[i]; Type *FieldTy = cast<PointerType>(SrcField->getType())->getElementType(); - uint64_t FieldSizeBits = DL->getTypeSizeInBits(FieldTy); + uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy); // Ignore zero sized fields like {}, they obviously contain no data. if (FieldSizeBits == 0) continue; @@ -2515,7 +2527,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, else // Array case. Shift = i*ArrayEltBitOffset; - if (DL->isBigEndian()) + if (DL.isBigEndian()) Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); if (Shift) { @@ -2532,7 +2544,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, } // Handle tail padding by truncating the result - if (DL->getTypeSizeInBits(LI->getType()) != AllocaSizeBits) + if (DL.getTypeSizeInBits(LI->getType()) != AllocaSizeBits) ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); LI->replaceAllUsesWith(ResultVal); @@ -2589,13 +2601,15 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { return false; } + const DataLayout &DL = AI->getModule()->getDataLayout(); + // Okay, we know all the users are promotable. If the aggregate is a memcpy // source and destination, we have to be careful. In particular, the memcpy // could be moving around elements that live in structure padding of the LLVM // types, but may actually be used. In these cases, we refuse to promote the // struct. if (Info.isMemCpySrc && Info.isMemCpyDst && - HasPadding(AI->getAllocatedType(), *DL)) + HasPadding(AI->getAllocatedType(), DL)) return false; // If the alloca never has an access to just *part* of it, but is accessed diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index 6036c09..a457cba 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -165,7 +165,7 @@ private: void gather(Instruction *, const ValueVector &); bool canTransferMetadata(unsigned Kind); void transferMetadata(Instruction *, const ValueVector &); - bool getVectorLayout(Type *, unsigned, VectorLayout &); + bool getVectorLayout(Type *, unsigned, VectorLayout &, const DataLayout &); bool finish(); template<typename T> bool splitBinary(Instruction &, const T &); @@ -173,7 +173,6 @@ private: ScatterMap Scattered; GatherList Gathered; unsigned ParallelLoopAccessMDKind; - const DataLayout *DL; bool ScalarizeLoadStore; }; @@ -248,8 +247,6 @@ bool Scalarizer::doInitialization(Module &M) { } bool Scalarizer::runOnFunction(Function &F) { - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { BasicBlock *BB = BBI; for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { @@ -345,10 +342,7 @@ void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) { // Try to fill in Layout from Ty, returning true on success. Alignment is // the alignment of the vector, or 0 if the ABI default should be used. bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment, - VectorLayout &Layout) { - if (!DL) - return false; - + VectorLayout &Layout, const DataLayout &DL) { // Make sure we're dealing with a vector. Layout.VecTy = dyn_cast<VectorType>(Ty); if (!Layout.VecTy) @@ -356,15 +350,15 @@ bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment, // Check that we're dealing with full-byte elements. Layout.ElemTy = Layout.VecTy->getElementType(); - if (DL->getTypeSizeInBits(Layout.ElemTy) != - DL->getTypeStoreSizeInBits(Layout.ElemTy)) + if (DL.getTypeSizeInBits(Layout.ElemTy) != + DL.getTypeStoreSizeInBits(Layout.ElemTy)) return false; if (Alignment) Layout.VecAlign = Alignment; else - Layout.VecAlign = DL->getABITypeAlignment(Layout.VecTy); - Layout.ElemSize = DL->getTypeStoreSize(Layout.ElemTy); + Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy); + Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy); return true; } @@ -456,7 +450,7 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) { Indices.resize(NumIndices); for (unsigned J = 0; J < NumIndices; ++J) Indices[J] = Ops[J][I]; - Res[I] = Builder.CreateGEP(Base[I], Indices, + Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices, GEPI.getName() + ".i" + Twine(I)); if (GEPI.isInBounds()) if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I])) @@ -595,7 +589,8 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) { return false; VectorLayout Layout; - if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout)) + if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout, + LI.getModule()->getDataLayout())) return false; unsigned NumElems = Layout.VecTy->getNumElements(); @@ -619,7 +614,8 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) { VectorLayout Layout; Value *FullValue = SI.getValueOperand(); - if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout)) + if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout, + SI.getModule()->getDataLayout())) return false; unsigned NumElems = Layout.VecTy->getNumElements(); diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index bffe8df..1a04d74 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -199,18 +199,15 @@ class ConstantOffsetExtractor { /// new index representing the remainder (equal to the original index minus /// the constant offset), or nullptr if we cannot extract a constant offset. /// \p Idx The given GEP index - /// \p DL The datalayout of the module /// \p GEP The given GEP - static Value *Extract(Value *Idx, const DataLayout *DL, - GetElementPtrInst *GEP); + static Value *Extract(Value *Idx, GetElementPtrInst *GEP); /// Looks for a constant offset from the given GEP index without extracting /// it. It returns the numeric value of the extracted constant offset (0 if /// failed). The meaning of the arguments are the same as Extract. - static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP); + static int64_t Find(Value *Idx, GetElementPtrInst *GEP); private: - ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt) - : DL(Layout), IP(InsertionPt) {} + ConstantOffsetExtractor(Instruction *InsertionPt) : IP(InsertionPt) {} /// Searches the expression that computes V for a non-zero constant C s.t. /// V can be reassociated into the form V' + C. If the searching is /// successful, returns C and update UserChain as a def-use chain from C to V; @@ -294,8 +291,6 @@ class ConstantOffsetExtractor { /// A data structure used in rebuildWithoutConstOffset. Contains all /// sext/zext instructions along UserChain. SmallVector<CastInst *, 16> ExtInsts; - /// The data layout of the module. Used in ComputeKnownBits. - const DataLayout *DL; Instruction *IP; /// Insertion position of cloned instructions. }; @@ -312,19 +307,10 @@ class SeparateConstOffsetFromGEP : public FunctionPass { } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DataLayoutPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); AU.setPreservesCFG(); } - bool doInitialization(Module &M) override { - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (DLP == nullptr) - report_fatal_error("data layout missing"); - DL = &DLP->getDataLayout(); - return false; - } - bool runOnFunction(Function &F) override; private: @@ -372,7 +358,6 @@ class SeparateConstOffsetFromGEP : public FunctionPass { /// Verified in @i32_add in split-gep.ll bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP); - const DataLayout *DL; const TargetMachine *TM; /// Whether to lower a GEP with multiple indices into arithmetic operations or /// multiple GEPs with a single index. @@ -386,7 +371,6 @@ INITIALIZE_PASS_BEGIN( "Split GEPs to a variadic base and a constant offset for better CSE", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DataLayoutPass) INITIALIZE_PASS_END( SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", "Split GEPs to a variadic base and a constant offset for better CSE", false, @@ -647,9 +631,8 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) { return BO; } -Value *ConstantOffsetExtractor::Extract(Value *Idx, const DataLayout *DL, - GetElementPtrInst *GEP) { - ConstantOffsetExtractor Extractor(DL, GEP); +Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP) { + ConstantOffsetExtractor Extractor(GEP); // Find a non-zero constant offset first. APInt ConstantOffset = Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, @@ -660,10 +643,9 @@ Value *ConstantOffsetExtractor::Extract(Value *Idx, const DataLayout *DL, return Extractor.rebuildWithoutConstOffset(); } -int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL, - GetElementPtrInst *GEP) { +int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP) { // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative. - return ConstantOffsetExtractor(DL, GEP) + return ConstantOffsetExtractor(GEP) .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, GEP->isInBounds()) .getSExtValue(); @@ -674,6 +656,7 @@ void ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne, IntegerType *IT = cast<IntegerType>(V->getType()); KnownOne = APInt(IT->getBitWidth(), 0); KnownZero = APInt(IT->getBitWidth(), 0); + const DataLayout &DL = IP->getModule()->getDataLayout(); llvm::computeKnownBits(V, KnownZero, KnownOne, DL, 0); } @@ -689,7 +672,8 @@ bool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const { bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize( GetElementPtrInst *GEP) { bool Changed = false; - Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + const DataLayout &DL = GEP->getModule()->getDataLayout(); + Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); gep_type_iterator GTI = gep_type_begin(*GEP); for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E; ++I, ++GTI) { @@ -710,18 +694,19 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP, NeedsExtraction = false; int64_t AccumulativeByteOffset = 0; gep_type_iterator GTI = gep_type_begin(*GEP); + const DataLayout &DL = GEP->getModule()->getDataLayout(); for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { if (isa<SequentialType>(*GTI)) { // Tries to extract a constant offset from this GEP index. int64_t ConstantOffset = - ConstantOffsetExtractor::Find(GEP->getOperand(I), DL, GEP); + ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP); if (ConstantOffset != 0) { NeedsExtraction = true; // A GEP may have multiple indices. We accumulate the extracted // constant offset to a byte offset, and later offset the remainder of // the original GEP with this byte offset. AccumulativeByteOffset += - ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType()); + ConstantOffset * DL.getTypeAllocSize(GTI.getIndexedType()); } } else if (LowerGEP) { StructType *StTy = cast<StructType>(*GTI); @@ -730,7 +715,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP, if (Field != 0) { NeedsExtraction = true; AccumulativeByteOffset += - DL->getStructLayout(StTy)->getElementOffset(Field); + DL.getStructLayout(StTy)->getElementOffset(Field); } } } @@ -740,7 +725,8 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP, void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) { IRBuilder<> Builder(Variadic); - Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); + const DataLayout &DL = Variadic->getModule()->getDataLayout(); + Type *IntPtrTy = DL.getIntPtrType(Variadic->getType()); Type *I8PtrTy = Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace()); @@ -760,7 +746,7 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( continue; APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), - DL->getTypeAllocSize(GTI.getIndexedType())); + DL.getTypeAllocSize(GTI.getIndexedType())); // Scale the index by element size. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { @@ -791,7 +777,8 @@ void SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) { IRBuilder<> Builder(Variadic); - Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); + const DataLayout &DL = Variadic->getModule()->getDataLayout(); + Type *IntPtrTy = DL.getIntPtrType(Variadic->getType()); Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy); gep_type_iterator GTI = gep_type_begin(*Variadic); @@ -807,7 +794,7 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, continue; APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), - DL->getTypeAllocSize(GTI.getIndexedType())); + DL.getTypeAllocSize(GTI.getIndexedType())); // Scale the index by element size. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { @@ -880,8 +867,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { if (isa<SequentialType>(*GTI)) { // Splits this GEP index into a variadic part and a constant offset, and // uses the variadic part as the new index. - Value *NewIdx = - ConstantOffsetExtractor::Extract(GEP->getOperand(I), DL, GEP); + Value *NewIdx = ConstantOffsetExtractor::Extract(GEP->getOperand(I), GEP); if (NewIdx != nullptr) { GEP->setOperand(I, NewIdx); } @@ -958,15 +944,17 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned = // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is // used with unsigned integers later. + const DataLayout &DL = GEP->getModule()->getDataLayout(); int64_t ElementTypeSizeOfGEP = static_cast<int64_t>( - DL->getTypeAllocSize(GEP->getType()->getElementType())); - Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + DL.getTypeAllocSize(GEP->getType()->getElementType())); + Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) { // Very likely. As long as %gep is natually aligned, the byte offset we // extracted should be a multiple of sizeof(*%gep). int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP; - NewGEP = GetElementPtrInst::Create( - NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP); + NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP, + ConstantInt::get(IntPtrTy, Index, true), + GEP->getName(), GEP); } else { // Unlikely but possible. For example, // #pragma pack(1) @@ -986,8 +974,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { GEP->getPointerAddressSpace()); NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP); NewGEP = GetElementPtrInst::Create( - NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), - "uglygep", GEP); + Type::getInt8Ty(GEP->getContext()), NewGEP, + ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep", + GEP); if (GEP->getType() != I8PtrTy) NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP); } diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index fb8fe38..8566cd9 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -127,7 +127,7 @@ static bool mergeEmptyReturnBlocks(Function &F) { /// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, - const DataLayout *DL, AssumptionCache *AC, + AssumptionCache *AC, unsigned BonusInstThreshold) { bool Changed = false; bool LocalChange = true; @@ -137,7 +137,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, // Loop over all of the basic blocks and remove them if they are unneeded... // for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { - if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, DL, AC)) { + if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) { LocalChange = true; ++NumSimpl; } @@ -148,11 +148,10 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, } static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, - const DataLayout *DL, AssumptionCache *AC, - int BonusInstThreshold) { + AssumptionCache *AC, int BonusInstThreshold) { bool EverChanged = removeUnreachableBlocks(F); EverChanged |= mergeEmptyReturnBlocks(F); - EverChanged |= iterativelySimplifyCFG(F, TTI, DL, AC, BonusInstThreshold); + EverChanged |= iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold); // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -166,7 +165,7 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, return true; do { - EverChanged = iterativelySimplifyCFG(F, TTI, DL, AC, BonusInstThreshold); + EverChanged = iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold); EverChanged |= removeUnreachableBlocks(F); } while (EverChanged); @@ -181,11 +180,10 @@ SimplifyCFGPass::SimplifyCFGPass(int BonusInstThreshold) PreservedAnalyses SimplifyCFGPass::run(Function &F, AnalysisManager<Function> *AM) { - auto *DL = F.getParent()->getDataLayout(); auto &TTI = AM->getResult<TargetIRAnalysis>(F); auto &AC = AM->getResult<AssumptionAnalysis>(F); - if (!simplifyFunctionCFG(F, TTI, DL, &AC, BonusInstThreshold)) + if (!simplifyFunctionCFG(F, TTI, &AC, BonusInstThreshold)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); @@ -207,9 +205,7 @@ struct CFGSimplifyPass : public FunctionPass { &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; - return simplifyFunctionCFG(F, TTI, DL, AC, BonusInstThreshold); + return simplifyFunctionCFG(F, TTI, AC, BonusInstThreshold); } void getAnalysisUsage(AnalysisUsage &AU) const override { diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index d0ee0a6..b169d56 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -35,7 +36,6 @@ namespace { DominatorTree *DT; LoopInfo *LI; AliasAnalysis *AA; - const DataLayout *DL; public: static char ID; // Pass identification @@ -100,8 +100,6 @@ bool Sinking::runOnFunction(Function &F) { DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); AA = &getAnalysis<AliasAnalysis>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; bool MadeChange, EverMadeChange = false; @@ -196,7 +194,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst, if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. - if (!isSafeToSpeculativelyExecute(Inst, DL)) + if (!isSafeToSpeculativelyExecute(Inst)) return false; // We don't want to sink across a critical edge if we don't dominate the diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index 4edc86c..e71031c 100644 --- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -15,19 +15,30 @@ // // There are many optimizations we can perform in the domain of SLSR. This file // for now contains only an initial step. Specifically, we look for strength -// reduction candidate in the form of +// reduction candidates in two forms: // -// (B + i) * S +// Form 1: (B + i) * S +// Form 2: &B[i * S] // -// where B and S are integer constants or variables, and i is a constant -// integer. If we found two such candidates +// where S is an integer variable, and i is a constant integer. If we found two +// candidates // -// S1: X = (B + i) * S S2: Y = (B + i') * S +// S1: X = (B + i) * S +// S2: Y = (B + i') * S +// +// or +// +// S1: X = &B[i * S] +// S2: Y = &B[i' * S] // // and S1 dominates S2, we call S1 a basis of S2, and can replace S2 with // // Y = X + (i' - i) * S // +// or +// +// Y = &X[(i' - i) * S] +// // where (i' - i) * S is folded to the extent possible. When S2 has multiple // bases, we pick the one that is closest to S2, or S2's "immediate" basis. // @@ -35,8 +46,6 @@ // // - Handle candidates in the form of B + i * S // -// - Handle candidates in the form of pointer arithmetics. e.g., B[i * S] -// // - Floating point arithmetics when fast math is enabled. // // - SLSR may decrease ILP at the architecture level. Targets that are very @@ -45,6 +54,10 @@ #include <vector> #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" @@ -58,14 +71,30 @@ using namespace PatternMatch; namespace { class StraightLineStrengthReduce : public FunctionPass { - public: +public: // SLSR candidate. Such a candidate must be in the form of // (Base + Index) * Stride + // or + // Base[..][Index * Stride][..] struct Candidate : public ilist_node<Candidate> { - Candidate(Value *B = nullptr, ConstantInt *Idx = nullptr, - Value *S = nullptr, Instruction *I = nullptr) - : Base(B), Index(Idx), Stride(S), Ins(I), Basis(nullptr) {} - Value *Base; + enum Kind { + Invalid, // reserved for the default constructor + Mul, // (B + i) * S + GEP, // &B[..][i * S][..] + }; + + Candidate() + : CandidateKind(Invalid), Base(nullptr), Index(nullptr), + Stride(nullptr), Ins(nullptr), Basis(nullptr) {} + Candidate(Kind CT, const SCEV *B, ConstantInt *Idx, Value *S, + Instruction *I) + : CandidateKind(CT), Base(B), Index(Idx), Stride(S), Ins(I), + Basis(nullptr) {} + Kind CandidateKind; + const SCEV *Base; + // Note that Index and Stride of a GEP candidate may not have the same + // integer type. In that case, during rewriting, Stride will be + // sign-extended or truncated to Index's type. ConstantInt *Index; Value *Stride; // The instruction this candidate corresponds to. It helps us to rewrite a @@ -90,33 +119,70 @@ class StraightLineStrengthReduce : public FunctionPass { static char ID; - StraightLineStrengthReduce() : FunctionPass(ID), DT(nullptr) { + StraightLineStrengthReduce() + : FunctionPass(ID), DL(nullptr), DT(nullptr), TTI(nullptr) { initializeStraightLineStrengthReducePass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<ScalarEvolution>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); // We do not modify the shape of the CFG. AU.setPreservesCFG(); } + bool doInitialization(Module &M) override { + DL = &M.getDataLayout(); + return false; + } + bool runOnFunction(Function &F) override; - private: +private: // Returns true if Basis is a basis for C, i.e., Basis dominates C and they // share the same base and stride. bool isBasisFor(const Candidate &Basis, const Candidate &C); // Checks whether I is in a candidate form. If so, adds all the matching forms // to Candidates, and tries to find the immediate basis for each of them. void allocateCandidateAndFindBasis(Instruction *I); - // Given that I is in the form of "(B + Idx) * S", adds this form to - // Candidates, and finds its immediate basis. - void allocateCandidateAndFindBasis(Value *B, ConstantInt *Idx, Value *S, + // Allocate candidates and find bases for Mul instructions. + void allocateCandidateAndFindBasisForMul(Instruction *I); + // Splits LHS into Base + Index and, if succeeds, calls + // allocateCandidateAndFindBasis. + void allocateCandidateAndFindBasisForMul(Value *LHS, Value *RHS, + Instruction *I); + // Allocate candidates and find bases for GetElementPtr instructions. + void allocateCandidateAndFindBasisForGEP(GetElementPtrInst *GEP); + // A helper function that scales Idx with ElementSize before invoking + // allocateCandidateAndFindBasis. + void allocateCandidateAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx, + Value *S, uint64_t ElementSize, + Instruction *I); + // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate + // basis. + void allocateCandidateAndFindBasis(Candidate::Kind CT, const SCEV *B, + ConstantInt *Idx, Value *S, Instruction *I); // Rewrites candidate C with respect to Basis. void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis); + // A helper function that factors ArrayIdx to a product of a stride and a + // constant index, and invokes allocateCandidateAndFindBasis with the + // factorings. + void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize, + GetElementPtrInst *GEP); + // Emit code that computes the "bump" from Basis to C. If the candidate is a + // GEP and the bump is not divisible by the element size of the GEP, this + // function sets the BumpWithUglyGEP flag to notify its caller to bump the + // basis using an ugly GEP. + static Value *emitBump(const Candidate &Basis, const Candidate &C, + IRBuilder<> &Builder, const DataLayout *DL, + bool &BumpWithUglyGEP); + const DataLayout *DL; DominatorTree *DT; + ScalarEvolution *SE; + TargetTransformInfo *TTI; ilist<Candidate> Candidates; // Temporarily holds all instructions that are unlinked (but not deleted) by // rewriteCandidateWithBasis. These instructions will be actually removed @@ -129,6 +195,8 @@ char StraightLineStrengthReduce::ID = 0; INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr", "Straight line strength reduction", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr", "Straight line strength reduction", false, false) @@ -141,9 +209,47 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis, return (Basis.Ins != C.Ins && // skip the same instruction // Basis must dominate C in order to rewrite C with respect to Basis. DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) && - // They share the same base and stride. + // They share the same base, stride, and candidate kind. Basis.Base == C.Base && - Basis.Stride == C.Stride); + Basis.Stride == C.Stride && + Basis.CandidateKind == C.CandidateKind); +} + +static bool isCompletelyFoldable(GetElementPtrInst *GEP, + const TargetTransformInfo *TTI, + const DataLayout *DL) { + GlobalVariable *BaseGV = nullptr; + int64_t BaseOffset = 0; + bool HasBaseReg = false; + int64_t Scale = 0; + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand())) + BaseGV = GV; + else + HasBaseReg = true; + + gep_type_iterator GTI = gep_type_begin(GEP); + for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I, ++GTI) { + if (isa<SequentialType>(*GTI)) { + int64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); + if (ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I)) { + BaseOffset += ConstIdx->getSExtValue() * ElementSize; + } else { + // Needs scale register. + if (Scale != 0) { + // No addressing mode takes two scale registers. + return false; + } + Scale = ElementSize; + } + } else { + StructType *STy = cast<StructType>(*GTI); + uint64_t Field = cast<ConstantInt>(*I)->getZExtValue(); + BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field); + } + } + return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV, + BaseOffset, HasBaseReg, Scale); } // TODO: We currently implement an algorithm whose time complexity is linear to @@ -153,11 +259,17 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis, // table is indexed by the base and the stride of a candidate. Therefore, // finding the immediate basis of a candidate boils down to one hash-table look // up. -void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Value *B, - ConstantInt *Idx, - Value *S, - Instruction *I) { - Candidate C(B, Idx, S, I); +void StraightLineStrengthReduce::allocateCandidateAndFindBasis( + Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S, + Instruction *I) { + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + // If &B[Idx * S] fits into an addressing mode, do not turn it into + // non-free computation. + if (isCompletelyFoldable(GEP, TTI, DL)) + return; + } + + Candidate C(CT, B, Idx, S, I); // Try to compute the immediate basis of C. unsigned NumIterations = 0; // Limit the scan radius to avoid running forever. @@ -176,60 +288,209 @@ void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Value *B, } void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Mul: + allocateCandidateAndFindBasisForMul(I); + break; + case Instruction::GetElementPtr: + allocateCandidateAndFindBasisForGEP(cast<GetElementPtrInst>(I)); + break; + } +} + +void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul( + Value *LHS, Value *RHS, Instruction *I) { Value *B = nullptr; ConstantInt *Idx = nullptr; - // "(Base + Index) * Stride" must be a Mul instruction at the first hand. - if (I->getOpcode() == Instruction::Mul) { - if (IntegerType *ITy = dyn_cast<IntegerType>(I->getType())) { - Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); - for (unsigned Swapped = 0; Swapped < 2; ++Swapped) { - // Only handle the canonical operand ordering. - if (match(LHS, m_Add(m_Value(B), m_ConstantInt(Idx)))) { - // If LHS is in the form of "Base + Index", then I is in the form of - // "(Base + Index) * RHS". - allocateCandidateAndFindBasis(B, Idx, RHS, I); - } else { - // Otherwise, at least try the form (LHS + 0) * RHS. - allocateCandidateAndFindBasis(LHS, ConstantInt::get(ITy, 0), RHS, I); - } - // Swap LHS and RHS so that we also cover the cases where LHS is the - // stride. - if (LHS == RHS) - break; - std::swap(LHS, RHS); - } - } + // Only handle the canonical operand ordering. + if (match(LHS, m_Add(m_Value(B), m_ConstantInt(Idx)))) { + // If LHS is in the form of "Base + Index", then I is in the form of + // "(Base + Index) * RHS". + allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I); + } else { + // Otherwise, at least try the form (LHS + 0) * RHS. + ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0); + allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS, + I); + } +} + +void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul( + Instruction *I) { + // Try matching (B + i) * S. + // TODO: we could extend SLSR to float and vector types. + if (!isa<IntegerType>(I->getType())) + return; + + Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); + allocateCandidateAndFindBasisForMul(LHS, RHS, I); + if (LHS != RHS) { + // Symmetrically, try to split RHS to Base + Index. + allocateCandidateAndFindBasisForMul(RHS, LHS, I); + } +} + +void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP( + const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize, + Instruction *I) { + // I = B + sext(Idx *nsw S) *nsw ElementSize + // = B + (sext(Idx) * ElementSize) * sext(S) + // Casting to IntegerType is safe because we skipped vector GEPs. + IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType())); + ConstantInt *ScaledIdx = ConstantInt::get( + IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true); + allocateCandidateAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I); +} + +void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx, + const SCEV *Base, + uint64_t ElementSize, + GetElementPtrInst *GEP) { + // At least, ArrayIdx = ArrayIdx *s 1. + allocateCandidateAndFindBasisForGEP( + Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1), + ArrayIdx, ElementSize, GEP); + Value *LHS = nullptr; + ConstantInt *RHS = nullptr; + // TODO: handle shl. e.g., we could treat (S << 2) as (S * 4). + // + // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx + // itself. This would allow us to handle the shl case for free. However, + // matching SCEVs has two issues: + // + // 1. this would complicate rewriting because the rewriting procedure + // would have to translate SCEVs back to IR instructions. This translation + // is difficult when LHS is further evaluated to a composite SCEV. + // + // 2. ScalarEvolution is designed to be control-flow oblivious. It tends + // to strip nsw/nuw flags which are critical for SLSR to trace into + // sext'ed multiplication. + if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) { + // SLSR is currently unsafe if i * S may overflow. + // GEP = Base + sext(LHS *nsw RHS) *nsw ElementSize + allocateCandidateAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP); + } +} + +void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP( + GetElementPtrInst *GEP) { + // TODO: handle vector GEPs + if (GEP->getType()->isVectorTy()) + return; + + const SCEV *GEPExpr = SE->getSCEV(GEP); + Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + + gep_type_iterator GTI = gep_type_begin(GEP); + for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) { + if (!isa<SequentialType>(*GTI++)) + continue; + Value *ArrayIdx = *I; + // Compute the byte offset of this index. + uint64_t ElementSize = DL->getTypeAllocSize(*GTI); + const SCEV *ElementSizeExpr = SE->getSizeOfExpr(IntPtrTy, *GTI); + const SCEV *ArrayIdxExpr = SE->getSCEV(ArrayIdx); + ArrayIdxExpr = SE->getTruncateOrSignExtend(ArrayIdxExpr, IntPtrTy); + const SCEV *LocalOffset = + SE->getMulExpr(ArrayIdxExpr, ElementSizeExpr, SCEV::FlagNSW); + // The base of this candidate equals GEPExpr less the byte offset of this + // index. + const SCEV *Base = SE->getMinusSCEV(GEPExpr, LocalOffset); + factorArrayIndex(ArrayIdx, Base, ElementSize, GEP); + // When ArrayIdx is the sext of a value, we try to factor that value as + // well. Handling this case is important because array indices are + // typically sign-extended to the pointer size. + Value *TruncatedArrayIdx = nullptr; + if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx)))) + factorArrayIndex(TruncatedArrayIdx, Base, ElementSize, GEP); } } +// A helper function that unifies the bitwidth of A and B. +static void unifyBitWidth(APInt &A, APInt &B) { + if (A.getBitWidth() < B.getBitWidth()) + A = A.sext(B.getBitWidth()); + else if (A.getBitWidth() > B.getBitWidth()) + B = B.sext(A.getBitWidth()); +} + +Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis, + const Candidate &C, + IRBuilder<> &Builder, + const DataLayout *DL, + bool &BumpWithUglyGEP) { + APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue(); + unifyBitWidth(Idx, BasisIdx); + APInt IndexOffset = Idx - BasisIdx; + + BumpWithUglyGEP = false; + if (Basis.CandidateKind == Candidate::GEP) { + APInt ElementSize( + IndexOffset.getBitWidth(), + DL->getTypeAllocSize( + cast<GetElementPtrInst>(Basis.Ins)->getType()->getElementType())); + APInt Q, R; + APInt::sdivrem(IndexOffset, ElementSize, Q, R); + if (R.getSExtValue() == 0) + IndexOffset = Q; + else + BumpWithUglyGEP = true; + } + // Compute Bump = C - Basis = (i' - i) * S. + // Common case 1: if (i' - i) is 1, Bump = S. + if (IndexOffset.getSExtValue() == 1) + return C.Stride; + // Common case 2: if (i' - i) is -1, Bump = -S. + if (IndexOffset.getSExtValue() == -1) + return Builder.CreateNeg(C.Stride); + // Otherwise, Bump = (i' - i) * sext/trunc(S). + ConstantInt *Delta = ConstantInt::get(Basis.Ins->getContext(), IndexOffset); + Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, Delta->getType()); + return Builder.CreateMul(ExtendedStride, Delta); +} + void StraightLineStrengthReduce::rewriteCandidateWithBasis( const Candidate &C, const Candidate &Basis) { + assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base && + C.Stride == Basis.Stride); + // An instruction can correspond to multiple candidates. Therefore, instead of // simply deleting an instruction when we rewrite it, we mark its parent as // nullptr (i.e. unlink it) so that we can skip the candidates whose // instruction is already rewritten. if (!C.Ins->getParent()) return; - assert(C.Base == Basis.Base && C.Stride == Basis.Stride); - // Basis = (B + i) * S - // C = (B + i') * S - // ==> - // C = Basis + (i' - i) * S + IRBuilder<> Builder(C.Ins); - ConstantInt *IndexOffset = ConstantInt::get( - C.Ins->getContext(), C.Index->getValue() - Basis.Index->getValue()); - Value *Reduced; - // TODO: preserve nsw/nuw in some cases. - if (IndexOffset->isOne()) { - // If (i' - i) is 1, fold C into Basis + S. - Reduced = Builder.CreateAdd(Basis.Ins, C.Stride); - } else if (IndexOffset->isMinusOne()) { - // If (i' - i) is -1, fold C into Basis - S. - Reduced = Builder.CreateSub(Basis.Ins, C.Stride); - } else { - Value *Bump = Builder.CreateMul(C.Stride, IndexOffset); + bool BumpWithUglyGEP; + Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP); + Value *Reduced = nullptr; // equivalent to but weaker than C.Ins + switch (C.CandidateKind) { + case Candidate::Mul: Reduced = Builder.CreateAdd(Basis.Ins, Bump); - } + break; + case Candidate::GEP: + { + Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType()); + if (BumpWithUglyGEP) { + // C = (char *)Basis + Bump + unsigned AS = Basis.Ins->getType()->getPointerAddressSpace(); + Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS); + Reduced = Builder.CreateBitCast(Basis.Ins, CharTy); + // We only considered inbounds GEP as candidates. + Reduced = Builder.CreateInBoundsGEP(Reduced, Bump); + Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType()); + } else { + // C = gep Basis, Bump + // Canonicalize bump to pointer size. + Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy); + Reduced = Builder.CreateInBoundsGEP(Basis.Ins, Bump); + } + } + break; + default: + llvm_unreachable("C.CandidateKind is invalid"); + }; Reduced->takeName(C.Ins); C.Ins->replaceAllUsesWith(Reduced); C.Ins->dropAllReferences(); @@ -243,15 +504,15 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) { if (skipOptnoneFunction(F)) return false; + TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + SE = &getAnalysis<ScalarEvolution>(); // Traverse the dominator tree in the depth-first order. This order makes sure // all bases of a candidate are in Candidates when we process it. for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT); node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) { - BasicBlock *B = node->getBlock(); - for (auto I = B->begin(); I != B->end(); ++I) { - allocateCandidateAndFindBasis(I); - } + for (auto &I : *node->getBlock()) + allocateCandidateAndFindBasis(&I); } // Rewrite candidates in the reverse depth-first order. This order makes sure diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index aaf6f9a..6c3ce58 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -9,8 +9,8 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SCCIterator.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" @@ -18,6 +18,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 715ddeb..9eef132 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -54,8 +54,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" @@ -87,7 +87,6 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced"); namespace { struct TailCallElim : public FunctionPass { const TargetTransformInfo *TTI; - const DataLayout *DL; static char ID; // Pass identification, replacement for typeid TailCallElim() : FunctionPass(ID) { @@ -159,8 +158,6 @@ bool TailCallElim::runOnFunction(Function &F) { if (skipOptnoneFunction(F)) return false; - DL = F.getParent()->getDataLayout(); - bool AllCallsAreTailCalls = false; bool Modified = markTails(F, AllCallsAreTailCalls); if (AllCallsAreTailCalls) @@ -392,10 +389,9 @@ bool TailCallElim::runTRE(Function &F) { SmallVector<PHINode*, 8> ArgumentPHIs; bool MadeChange = false; - // CanTRETailMarkedCall - If false, we cannot perform TRE on tail calls - // marked with the 'tail' attribute, because doing so would cause the stack - // size to increase (real TRE would deallocate variable sized allocas, TRE - // doesn't). + // If false, we cannot perform TRE on tail calls marked with the 'tail' + // attribute, because doing so would cause the stack size to increase (real + // TRE would deallocate variable sized allocas, TRE doesn't). bool CanTRETailMarkedCall = CanTRE(F); // Change any tail recursive calls to loops. @@ -404,28 +400,19 @@ bool TailCallElim::runTRE(Function &F) { // alloca' is changed from being a static alloca to being a dynamic alloca. // Until this is resolved, disable this transformation if that would ever // happen. This bug is PR962. - SmallVector<BasicBlock*, 8> BBToErase; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) { + BasicBlock *BB = BBI++; // FoldReturnAndProcessPred may delete BB. if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, !CanTRETailMarkedCall); - if (!Change && BB->getFirstNonPHIOrDbg() == Ret) { + if (!Change && BB->getFirstNonPHIOrDbg() == Ret) Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, !CanTRETailMarkedCall); - // FoldReturnAndProcessPred may have emptied some BB. Remember to - // erase them. - if (Change && BB->empty()) - BBToErase.push_back(BB); - - } MadeChange |= Change; } } - for (auto BB: BBToErase) - BB->eraseFromParent(); - // If we eliminated any tail recursions, it's possible that we inserted some // silly PHI nodes which just merge an initial value (the incoming operand) // with themselves. Check to see if we did and clean up our mess if so. This @@ -435,7 +422,7 @@ bool TailCallElim::runTRE(Function &F) { PHINode *PN = ArgumentPHIs[i]; // If the PHI Node is a dynamic constant, replace it with the value it is. - if (Value *PNV = SimplifyInstruction(PN)) { + if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) { PN->replaceAllUsesWith(PNV); PN->eraseFromParent(); } @@ -445,7 +432,7 @@ bool TailCallElim::runTRE(Function &F) { } -/// CanMoveAboveCall - Return true if it is safe to move the specified +/// Return true if it is safe to move the specified /// instruction from after the call to before the call, assuming that all /// instructions between the call and this instruction are movable. /// @@ -464,7 +451,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { // being loaded from. if (CI->mayWriteToMemory() || !isSafeToLoadUnconditionally(L->getPointerOperand(), L, - L->getAlignment(), DL)) + L->getAlignment())) return false; } } @@ -480,13 +467,11 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { return true; } -// isDynamicConstant - Return true if the specified value is the same when the -// return would exit as it was when the initial iteration of the recursive -// function was executed. -// -// We currently handle static constants and arguments that are not modified as -// part of the recursion. -// +/// Return true if the specified value is the same when the return would exit +/// as it was when the initial iteration of the recursive function was executed. +/// +/// We currently handle static constants and arguments that are not modified as +/// part of the recursion. static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { if (isa<Constant>(V)) return true; // Static constants are always dyn consts @@ -518,10 +503,9 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { return false; } -// getCommonReturnValue - Check to see if the function containing the specified -// tail call consistently returns the same runtime-constant value at all exit -// points except for IgnoreRI. If so, return the returned value. -// +/// Check to see if the function containing the specified tail call consistently +/// returns the same runtime-constant value at all exit points except for +/// IgnoreRI. If so, return the returned value. static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { Function *F = CI->getParent()->getParent(); Value *ReturnedValue = nullptr; @@ -545,10 +529,9 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { return ReturnedValue; } -/// CanTransformAccumulatorRecursion - If the specified instruction can be -/// transformed using accumulator recursion elimination, return the constant -/// which is the start of the accumulator value. Otherwise return null. -/// +/// If the specified instruction can be transformed using accumulator recursion +/// elimination, return the constant which is the start of the accumulator +/// value. Otherwise return null. Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI) { if (!I->isAssociative() || !I->isCommutative()) return nullptr; @@ -836,14 +819,11 @@ bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB, ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred); // Cleanup: if all predecessors of BB have been eliminated by - // FoldReturnIntoUncondBranch, we would like to delete it, but we - // can not just nuke it as it is being used as an iterator by our caller. - // Just empty it, and the caller will erase it when it is safe to do so. - // It is important to empty it, because the ret instruction in there is - // still using a value which EliminateRecursiveTailCall will attempt - // to remove. + // FoldReturnIntoUncondBranch, delete it. It is important to empty it, + // because the ret instruction in there is still using a value which + // EliminateRecursiveTailCall will attempt to remove. if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) - BB->getInstList().clear(); + BB->eraseFromParent(); EliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 762a83f..671cbfe 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -33,7 +33,7 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { /// EmitStrLen - Emit a call to the strlen function to the builder, for the /// specified pointer. This always returns an integer value of size intptr_t. -Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, +Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strlen)) return nullptr; @@ -45,12 +45,9 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrLen = M->getOrInsertFunction("strlen", - AttributeSet::get(M->getContext(), - AS), - TD->getIntPtrType(Context), - B.getInt8PtrTy(), - nullptr); + Constant *StrLen = M->getOrInsertFunction( + "strlen", AttributeSet::get(M->getContext(), AS), + DL.getIntPtrType(Context), B.getInt8PtrTy(), nullptr); CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -62,7 +59,7 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, /// specified pointer. Ptr is required to be some pointer type, MaxLen must /// be of size_t type, and the return value has 'intptr_t' type. Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, - const DataLayout *TD, const TargetLibraryInfo *TLI) { + const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strnlen)) return nullptr; @@ -73,13 +70,10 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrNLen = M->getOrInsertFunction("strnlen", - AttributeSet::get(M->getContext(), - AS), - TD->getIntPtrType(Context), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), - nullptr); + Constant *StrNLen = + M->getOrInsertFunction("strnlen", AttributeSet::get(M->getContext(), AS), + DL.getIntPtrType(Context), B.getInt8PtrTy(), + DL.getIntPtrType(Context), nullptr); CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen"); if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -91,7 +85,7 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, /// specified pointer and character. Ptr is required to be some pointer type, /// and the return value has 'i8*' type. Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, - const DataLayout *TD, const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strchr)) return nullptr; @@ -114,9 +108,8 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, } /// EmitStrNCmp - Emit a call to the strncmp function to the builder. -Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, - IRBuilder<> &B, const DataLayout *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strncmp)) return nullptr; @@ -128,13 +121,9 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *StrNCmp = M->getOrInsertFunction("strncmp", - AttributeSet::get(M->getContext(), - AS), - B.getInt32Ty(), - B.getInt8PtrTy(), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), nullptr); + Value *StrNCmp = M->getOrInsertFunction( + "strncmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), + B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr); CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len, "strncmp"); @@ -147,8 +136,7 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, - const DataLayout *TD, const TargetLibraryInfo *TLI, - StringRef Name) { + const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strcpy)) return nullptr; @@ -170,8 +158,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the /// specified pointer arguments. -Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, - IRBuilder<> &B, const DataLayout *TD, +Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B, const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strncpy)) return nullptr; @@ -198,7 +185,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src /// are pointers. Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, - IRBuilder<> &B, const DataLayout *TD, + IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcpy_chk)) return nullptr; @@ -208,13 +195,10 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", - AttributeSet::get(M->getContext(), AS), - B.getInt8PtrTy(), - B.getInt8PtrTy(), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), - TD->getIntPtrType(Context), nullptr); + Value *MemCpy = M->getOrInsertFunction( + "__memcpy_chk", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(), + B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), + DL.getIntPtrType(Context), nullptr); Dst = CastToCStr(Dst, B); Src = CastToCStr(Src, B); CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize); @@ -225,9 +209,8 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. -Value *llvm::EmitMemChr(Value *Ptr, Value *Val, - Value *Len, IRBuilder<> &B, const DataLayout *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memchr)) return nullptr; @@ -236,13 +219,9 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemChr = M->getOrInsertFunction("memchr", - AttributeSet::get(M->getContext(), AS), - B.getInt8PtrTy(), - B.getInt8PtrTy(), - B.getInt32Ty(), - TD->getIntPtrType(Context), - nullptr); + Value *MemChr = M->getOrInsertFunction( + "memchr", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(), + B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context), nullptr); CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts())) @@ -252,9 +231,8 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, } /// EmitMemCmp - Emit a call to the memcmp function. -Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, - Value *Len, IRBuilder<> &B, const DataLayout *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcmp)) return nullptr; @@ -266,12 +244,9 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemCmp = M->getOrInsertFunction("memcmp", - AttributeSet::get(M->getContext(), AS), - B.getInt32Ty(), - B.getInt8PtrTy(), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), nullptr); + Value *MemCmp = M->getOrInsertFunction( + "memcmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), + B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr); CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len, "memcmp"); @@ -339,7 +314,7 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. -Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, +Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::putchar)) return nullptr; @@ -361,7 +336,7 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, /// EmitPutS - Emit a call to the puts function. This assumes that Str is /// some pointer. -Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, +Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::puts)) return nullptr; @@ -386,7 +361,7 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is /// an integer and File is a pointer to FILE. Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, - const DataLayout *TD, const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputc)) return nullptr; @@ -419,7 +394,7 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, - const DataLayout *TD, const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputs)) return nullptr; @@ -450,9 +425,8 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. -Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, - IRBuilder<> &B, const DataLayout *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, + const DataLayout &DL, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fwrite)) return nullptr; @@ -466,21 +440,18 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction(FWriteName, - AttributeSet::get(M->getContext(), AS), - TD->getIntPtrType(Context), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), - TD->getIntPtrType(Context), - File->getType(), nullptr); + F = M->getOrInsertFunction( + FWriteName, AttributeSet::get(M->getContext(), AS), + DL.getIntPtrType(Context), B.getInt8PtrTy(), DL.getIntPtrType(Context), + DL.getIntPtrType(Context), File->getType(), nullptr); else - F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context), - B.getInt8PtrTy(), - TD->getIntPtrType(Context), - TD->getIntPtrType(Context), - File->getType(), nullptr); - CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, - ConstantInt::get(TD->getIntPtrType(Context), 1), File); + F = M->getOrInsertFunction(FWriteName, DL.getIntPtrType(Context), + B.getInt8PtrTy(), DL.getIntPtrType(Context), + DL.getIntPtrType(Context), File->getType(), + nullptr); + CallInst *CI = + B.CreateCall4(F, CastToCStr(Ptr, B), Size, + ConstantInt::get(DL.getIntPtrType(Context), 1), File); if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 09279b6..f04ea9c 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -34,7 +34,7 @@ #include <map> using namespace llvm; -// CloneBasicBlock - See comments in Cloning.h +/// See comments in Cloning.h. BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, @@ -202,7 +202,7 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc, } } -/// CloneFunction - Return a copy of the specified function, but without +/// Return a copy of the specified function, but without /// embedding the function into another module. Also, any references specified /// in the VMap are changed to refer to their mapped value instead of the /// original one. If any of the arguments to the function are in the VMap, @@ -250,8 +250,7 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, namespace { - /// PruningFunctionCloner - This class is a private class used to implement - /// the CloneAndPruneFunctionInto method. + /// This is a private class used to implement CloneAndPruneFunctionInto. struct PruningFunctionCloner { Function *NewFunc; const Function *OldFunc; @@ -259,23 +258,18 @@ namespace { bool ModuleLevelChanges; const char *NameSuffix; ClonedCodeInfo *CodeInfo; - const DataLayout *DL; CloningDirector *Director; ValueMapTypeRemapper *TypeMapper; ValueMaterializer *Materializer; public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, - ValueToValueMapTy &valueMap, - bool moduleLevelChanges, - const char *nameSuffix, - ClonedCodeInfo *codeInfo, - const DataLayout *DL, + ValueToValueMapTy &valueMap, bool moduleLevelChanges, + const char *nameSuffix, ClonedCodeInfo *codeInfo, CloningDirector *Director) - : NewFunc(newFunc), OldFunc(oldFunc), - VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), - NameSuffix(nameSuffix), CodeInfo(codeInfo), DL(DL), - Director(Director) { + : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), + ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), + CodeInfo(codeInfo), Director(Director) { // These are optional components. The Director may return null. if (Director) { TypeMapper = Director->getTypeRemapper(); @@ -286,7 +280,7 @@ namespace { } } - /// CloneBlock - The specified block is found to be reachable, clone it and + /// The specified block is found to be reachable, clone it and /// anything that it can reach. void CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, @@ -294,7 +288,7 @@ namespace { }; } -/// CloneBlock - The specified block is found to be reachable, clone it and +/// The specified block is found to be reachable, clone it and /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, @@ -360,7 +354,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If we can simplify this instruction to some other value, simply add // a mapping to that value rather than inserting a new instruction into // the basic block. - if (Value *V = SimplifyInstruction(NewInst, DL)) { + if (Value *V = + SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) { // On the off-chance that this simplifies to an instruction in the old // function, map it back into the new function. if (Value *MappedV = VMap.lookup(V)) @@ -397,6 +392,14 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // terminator into the new basic block in this case. if (Action == CloningDirector::StopCloningBB) return; + if (Action == CloningDirector::CloneSuccessors) { + // If the director says to skip with a terminate instruction, we still + // need to clone this block's successors. + const TerminatorInst *TI = BB->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + ToClone.push_back(TI->getSuccessor(i)); + return; + } assert(Action != CloningDirector::SkipInstruction && "SkipInstruction is not valid for terminators."); } @@ -455,10 +458,9 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, } } -/// CloneAndPruneIntoFromInst - This works like CloneAndPruneFunctionInto, except -/// that it does not clone the entire function. Instead it starts at an -/// instruction provided by the caller and copies (and prunes) only the code -/// reachable from that instruction. +/// This works like CloneAndPruneFunctionInto, except that it does not clone the +/// entire function. Instead it starts at an instruction provided by the caller +/// and copies (and prunes) only the code reachable from that instruction. void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, @@ -466,7 +468,6 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, - const DataLayout *DL, CloningDirector *Director) { assert(NameSuffix && "NameSuffix cannot be null!"); @@ -488,7 +489,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, - NameSuffix, CodeInfo, DL, Director); + NameSuffix, CodeInfo, Director); const BasicBlock *StartingBB; if (StartingInst) StartingBB = StartingInst->getParent(); @@ -523,11 +524,18 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Handle PHI nodes specially, as we have to remove references to dead // blocks. - for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) - if (const PHINode *PN = dyn_cast<PHINode>(I)) - PHIToResolve.push_back(PN); - else + for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) { + // PHI nodes may have been remapped to non-PHI nodes by the caller or + // during the cloning process. + if (const PHINode *PN = dyn_cast<PHINode>(I)) { + if (isa<PHINode>(VMap[PN])) + PHIToResolve.push_back(PN); + else + break; + } else { break; + } + } // Finally, remap the terminator instructions, as those can't be remapped // until all BBs are mapped. @@ -626,10 +634,10 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // node). for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]])) - recursivelySimplifyInstruction(PN, DL); + recursivelySimplifyInstruction(PN); // Now that the inlined function body has been fully constructed, go through - // and zap unconditional fall-through branches. This happen all the time when + // and zap unconditional fall-through branches. This happens all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB]); @@ -680,7 +688,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Do not increment I, iteratively merge all things this block branches to. } - // Make a final pass over the basic blocks from theh old function to gather + // Make a final pass over the basic blocks from the old function to gather // any return instructions which survived folding. We have to do this here // because we can iteratively remove and merge returns above. for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB]), @@ -691,7 +699,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, } -/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, +/// This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The /// effect of this is to copy significantly less code in cases where (for /// example) a function call with constant arguments is inlined, and those @@ -704,9 +712,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, - const DataLayout *DL, Instruction *TheCall) { - CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(), - VMap, ModuleLevelChanges, Returns, NameSuffix, - CodeInfo, DL, nullptr); + CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(), VMap, + ModuleLevelChanges, Returns, NameSuffix, CodeInfo, + nullptr); } diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index e70a7d6..ab89b41 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -332,11 +332,11 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, DEBUG(dbgs() << **i << ", "); DEBUG(dbgs() << ")\n"); + StructType *StructTy; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - PointerType *StructPtr = - PointerType::getUnqual(StructType::get(M->getContext(), paramTy)); + StructTy = StructType::get(M->getContext(), paramTy); paramTy.clear(); - paramTy.push_back(StructPtr); + paramTy.push_back(PointerType::getUnqual(StructTy)); } FunctionType *funcType = FunctionType::get(RetTy, paramTy, false); @@ -364,8 +364,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); TerminatorInst *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = - GetElementPtrInst::Create(AI, Idx, "gep_" + inputs[i]->getName(), TI); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructTy, AI, Idx, "gep_" + inputs[i]->getName(), TI); RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI); } else RewriteVal = AI++; @@ -447,6 +447,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, } } + StructType *StructArgTy = nullptr; AllocaInst *Struct = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { std::vector<Type*> ArgTypes; @@ -455,7 +456,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, ArgTypes.push_back((*v)->getType()); // Allocate a struct at the beginning of this function - Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); + StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); Struct = new AllocaInst(StructArgTy, nullptr, "structArg", codeReplacer->getParent()->begin()->begin()); @@ -465,9 +466,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = - GetElementPtrInst::Create(Struct, Idx, - "gep_" + StructValues[i]->getName()); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); codeReplacer->getInstList().push_back(GEP); StoreInst *SI = new StoreInst(StructValues[i], GEP); codeReplacer->getInstList().push_back(SI); @@ -491,9 +491,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP - = GetElementPtrInst::Create(Struct, Idx, - "gep_reload_" + outputs[i]->getName()); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); codeReplacer->getInstList().push_back(GEP); Output = GEP; } else { @@ -606,10 +605,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut+out); - GetElementPtrInst *GEP = - GetElementPtrInst::Create(OAI, Idx, - "gep_" + outputs[out]->getName(), - NTRet); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, OAI, Idx, "gep_" + outputs[out]->getName(), + NTRet); new StoreInst(outputs[out], GEP, NTRet); } else { new StoreInst(outputs[out], OAI, NTRet); diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp index 26875e8..dc95089 100644 --- a/lib/Transforms/Utils/CtorUtils.cpp +++ b/lib/Transforms/Utils/CtorUtils.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/BitVector.h" #include "llvm/Transforms/Utils/CtorUtils.h" +#include "llvm/ADT/BitVector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "ctor_utils" diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index c2ef1ac..df3e1d4 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -89,7 +89,7 @@ namespace { CallerLPad = cast<LandingPadInst>(I); } - /// getOuterResumeDest - The outer unwind destination is the target of + /// The outer unwind destination is the target of /// unwind edges introduced for calls within the inlined function. BasicBlock *getOuterResumeDest() const { return OuterResumeDest; @@ -99,17 +99,16 @@ namespace { LandingPadInst *getLandingPadInst() const { return CallerLPad; } - /// forwardResume - Forward the 'resume' instruction to the caller's landing - /// pad block. When the landing pad block has only one predecessor, this is + /// Forward the 'resume' instruction to the caller's landing pad block. + /// When the landing pad block has only one predecessor, this is /// a simple branch. When there is more than one predecessor, we need to /// split the landing pad block after the landingpad instruction and jump /// to there. void forwardResume(ResumeInst *RI, SmallPtrSetImpl<LandingPadInst*> &InlinedLPads); - /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind - /// destination block for the given basic block, using the values for the - /// original invoke's source block. + /// Add incoming-PHI values to the unwind destination block for the given + /// basic block, using the values for the original invoke's source block. void addIncomingPHIValuesFor(BasicBlock *BB) const { addIncomingPHIValuesForInto(BB, OuterResumeDest); } @@ -124,7 +123,7 @@ namespace { }; } -/// getInnerResumeDest - Get or create a target for the branch from ResumeInsts. +/// Get or create a target for the branch from ResumeInsts. BasicBlock *InvokeInliningInfo::getInnerResumeDest() { if (InnerResumeDest) return InnerResumeDest; @@ -159,8 +158,8 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { return InnerResumeDest; } -/// forwardResume - Forward the 'resume' instruction to the caller's landing pad -/// block. When the landing pad block has only one predecessor, this is a simple +/// Forward the 'resume' instruction to the caller's landing pad block. +/// When the landing pad block has only one predecessor, this is a simple /// branch. When there is more than one predecessor, we need to split the /// landing pad block after the landingpad instruction and jump to there. void InvokeInliningInfo::forwardResume(ResumeInst *RI, @@ -178,9 +177,9 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI, RI->eraseFromParent(); } -/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into -/// an invoke, we have to turn all of the calls that can throw into -/// invokes. This function analyze BB to see if there are any calls, and if so, +/// When we inline a basic block into an invoke, +/// we have to turn all of the calls that can throw into invokes. +/// This function analyze BB to see if there are any calls, and if so, /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI /// nodes in that block with the values specified in InvokeDestPHIValues. static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, @@ -228,7 +227,7 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, } } -/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls +/// If we inlined an invoke site, we need to convert calls /// in the body of the inlined function into invokes. /// /// II is the invoke instruction being inlined. FirstNewBlock is the first @@ -279,8 +278,8 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, InvokeDest->removePredecessor(II->getParent()); } -/// CloneAliasScopeMetadata - When inlining a function that contains noalias -/// scope metadata, this metadata needs to be cloned so that the inlined blocks +/// When inlining a function that contains noalias scope metadata, +/// this metadata needs to be cloned so that the inlined blocks /// have different "unqiue scopes" at every call site. Were this not done, then /// aliasing scopes from a function inlined into a caller multiple times could /// not be differentiated (and this would lead to miscompiles because the @@ -391,12 +390,12 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { } } -/// AddAliasScopeMetadata - If the inlined function has noalias arguments, then -/// add new alias scopes for each noalias argument, tag the mapped noalias +/// If the inlined function has noalias arguments, +/// then add new alias scopes for each noalias argument, tag the mapped noalias /// parameters with noalias metadata specifying the new scope, and tag all /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, - const DataLayout *DL, AliasAnalysis *AA) { + const DataLayout &DL, AliasAnalysis *AA) { if (!EnableNoAliasConversion) return; @@ -622,8 +621,9 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, /// If the inlined function has non-byval align arguments, then /// add @llvm.assume-based alignment assumptions to preserve this information. static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { - if (!PreserveAlignmentAssumptions || !IFI.DL) + if (!PreserveAlignmentAssumptions) return; + auto &DL = CS.getCaller()->getParent()->getDataLayout(); // To avoid inserting redundant assumptions, we should check for assumptions // already in the caller. To do this, we might need a DT of the caller. @@ -645,20 +645,20 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { // If we can already prove the asserted alignment in the context of the // caller, then don't bother inserting the assumption. Value *Arg = CS.getArgument(I->getArgNo()); - if (getKnownAlignment(Arg, IFI.DL, + if (getKnownAlignment(Arg, DL, CS.getInstruction(), &IFI.ACT->getAssumptionCache(*CalledFunc), - CS.getInstruction(), &DT) >= Align) + &DT) >= Align) continue; - IRBuilder<>(CS.getInstruction()).CreateAlignmentAssumption(*IFI.DL, Arg, - Align); + IRBuilder<>(CS.getInstruction()) + .CreateAlignmentAssumption(DL, Arg, Align); } } } -/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee -/// into the caller, update the specified callgraph to reflect the changes we -/// made. Note that it's possible that not all code was copied over, so only +/// Once we have cloned code over from a callee into the caller, +/// update the specified callgraph to reflect the changes we made. +/// Note that it's possible that not all code was copied over, so only /// some edges of the callgraph may remain. static void UpdateCallGraphAfterInlining(CallSite CS, Function::iterator FirstNewBlock, @@ -693,8 +693,15 @@ static void UpdateCallGraphAfterInlining(CallSite CS, // If the call was inlined, but then constant folded, there is no edge to // add. Check for this case. Instruction *NewCall = dyn_cast<Instruction>(VMI->second); - if (!NewCall) continue; + if (!NewCall) + continue; + // We do not treat intrinsic calls like real function calls because we + // expect them to become inline code; do not add an edge for an intrinsic. + CallSite CS = CallSite(NewCall); + if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic()) + continue; + // Remember that this call site got inlined for the client of // InlineFunction. IFI.InlinedCalls.push_back(NewCall); @@ -726,11 +733,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, Type *AggTy = cast<PointerType>(Src->getType())->getElementType(); IRBuilder<> Builder(InsertBlock->begin()); - Value *Size; - if (IFI.DL == nullptr) - Size = ConstantExpr::getSizeOf(AggTy); - else - Size = Builder.getInt64(IFI.DL->getTypeStoreSize(AggTy)); + Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy)); // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer @@ -738,7 +741,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1); } -/// HandleByValArgument - When inlining a call site that has a byval argument, +/// When inlining a call site that has a byval argument, /// we have to make the implicit memcpy explicit by adding it. static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, @@ -759,11 +762,13 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. return Arg; + const DataLayout &DL = Caller->getParent()->getDataLayout(); + // If the pointer is already known to be sufficiently aligned, or if we can // round it up to a larger alignment, then we don't need a temporary. - if (getOrEnforceKnownAlignment(Arg, ByValAlignment, IFI.DL, - &IFI.ACT->getAssumptionCache(*Caller), - TheCall) >= ByValAlignment) + if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, + &IFI.ACT->getAssumptionCache(*Caller)) >= + ByValAlignment) return Arg; // Otherwise, we have to make a memcpy to get a safe alignment. This is bad @@ -771,10 +776,9 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, } // Create the alloca. If we have DataLayout, use nice alignment. - unsigned Align = 1; - if (IFI.DL) - Align = IFI.DL->getPrefTypeAlignment(AggTy); - + unsigned Align = + Caller->getParent()->getDataLayout().getPrefTypeAlignment(AggTy); + // If the byval had an alignment specified, we *must* use at least that // alignment, as it is required by the byval argument (and uses of the // pointer inside the callee). @@ -789,8 +793,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, return NewAlloca; } -// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime -// intrinsic. +// Check whether this Value is used by a lifetime intrinsic. static bool isUsedByLifetimeMarker(Value *V) { for (User *U : V->users()) { if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { @@ -805,7 +808,7 @@ static bool isUsedByLifetimeMarker(Value *V) { return false; } -// hasLifetimeMarkers - Check whether the given alloca already has +// Check whether the given alloca already has // lifetime.start or lifetime.end intrinsics. static bool hasLifetimeMarkers(AllocaInst *AI) { Type *Ty = AI->getType(); @@ -862,7 +865,7 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode, return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), Last); } -/// fixupLineNumbers - Update inlined instructions' line numbers to +/// Update inlined instructions' line numbers to /// to encode location where these instructions are inlined. static void fixupLineNumbers(Function *Fn, Function::iterator FI, Instruction *TheCall) { @@ -920,10 +923,9 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, } } -/// InlineFunction - This function inlines the called function into the basic -/// block of the caller. This returns false if it is not possible to inline -/// this call. The program is still in a well defined state if this occurs -/// though. +/// This function inlines the called function into the basic block of the +/// caller. This returns false if it is not possible to inline this call. +/// The program is still in a well defined state if this occurs though. /// /// Note that this only does one level of inlining. For example, if the /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now @@ -1008,6 +1010,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Keep a list of pair (dst, src) to emit byval initializations. SmallVector<std::pair<Value*, Value*>, 4> ByValInit; + auto &DL = Caller->getParent()->getDataLayout(); + assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); @@ -1042,9 +1046,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. - CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", - &InlinedFunctionInfo, IFI.DL, TheCall); + &InlinedFunctionInfo, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; @@ -1065,7 +1069,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, CloneAliasScopeMetadata(CS, VMap); // Add noalias metadata if necessary. - AddAliasScopeMetadata(CS, VMap, IFI.DL, IFI.AA); + AddAliasScopeMetadata(CS, VMap, DL, IFI.AA); // FIXME: We could register any cloned assumptions instead of clearing the // whole function's cache. @@ -1173,18 +1177,17 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, ConstantInt *AllocaSize = nullptr; if (ConstantInt *AIArraySize = dyn_cast<ConstantInt>(AI->getArraySize())) { - if (IFI.DL) { - Type *AllocaType = AI->getAllocatedType(); - uint64_t AllocaTypeSize = IFI.DL->getTypeAllocSize(AllocaType); - uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); - assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); - // Check that array size doesn't saturate uint64_t and doesn't - // overflow when it's multiplied by type size. - if (AllocaArraySize != ~0ULL && - UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { - AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), - AllocaArraySize * AllocaTypeSize); - } + auto &DL = Caller->getParent()->getDataLayout(); + Type *AllocaType = AI->getAllocatedType(); + uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType); + uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); + assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); + // Check that array size doesn't saturate uint64_t and doesn't + // overflow when it's multiplied by type size. + if (AllocaArraySize != ~0ULL && + UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { + AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), + AllocaArraySize * AllocaTypeSize); } } @@ -1445,7 +1448,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) { - if (Value *V = SimplifyInstruction(PHI, IFI.DL, nullptr, nullptr, + auto &DL = Caller->getParent()->getDataLayout(); + if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, &IFI.ACT->getAssumptionCache(*Caller))) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 4830568..bd15f9e 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -17,8 +17,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/LibCallSemantics.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LibCallSemantics.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" @@ -417,7 +417,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, /// /// This returns true if it changed the code, note that it can delete /// instructions in other blocks as well in this block. -bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD, +bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI) { bool MadeChange = false; @@ -434,7 +434,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD, Instruction *Inst = BI++; WeakVH BIHandle(BI); - if (recursivelySimplifyInstruction(Inst, TD, TLI)) { + if (recursivelySimplifyInstruction(Inst, TLI)) { MadeChange = true; if (BIHandle != BI) BI = BB->begin(); @@ -464,8 +464,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD, /// /// .. and delete the predecessor corresponding to the '1', this will attempt to /// recursively fold the and to 0. -void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, - DataLayout *TD) { +void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { // This only adjusts blocks with PHI nodes. if (!isa<PHINode>(BB->begin())) return; @@ -480,7 +479,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); Value *OldPhiIt = PhiIt; - if (!recursivelySimplifyInstruction(PN, TD)) + if (!recursivelySimplifyInstruction(PN)) continue; // If recursive simplification ended up deleting the next PHI node we would @@ -900,13 +899,14 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { /// their preferred alignment from the beginning. /// static unsigned enforceKnownAlignment(Value *V, unsigned Align, - unsigned PrefAlign, const DataLayout *TD) { + unsigned PrefAlign, + const DataLayout &DL) { V = V->stripPointerCasts(); if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { // If the preferred alignment is greater than the natural stack alignment // then don't round up. This avoids dynamic stack realignment. - if (TD && TD->exceedsNaturalStackAlignment(PrefAlign)) + if (DL.exceedsNaturalStackAlignment(PrefAlign)) return Align; // If there is a requested alignment and if this is an alloca, round up. if (AI->getAlignment() >= PrefAlign) @@ -945,13 +945,13 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, /// and it is more than the alignment of the ultimate object, see if we can /// increase the alignment of the ultimate object, making this check succeed. unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, - const DataLayout *DL, - AssumptionCache *AC, + const DataLayout &DL, const Instruction *CxtI, + AssumptionCache *AC, const DominatorTree *DT) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); - unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64; + unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType()); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT); diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index a0f8268..90dfaba 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -57,8 +57,10 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -209,10 +211,11 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, DominatorTree *DT, AssumptionCache *AC) { + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) { + if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); @@ -476,7 +479,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, /// explicit if they accepted the analysis directly and then updated it. static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, - ScalarEvolution *SE, Pass *PP, const DataLayout *DL, + ScalarEvolution *SE, Pass *PP, AssumptionCache *AC) { bool Changed = false; ReprocessLoop: @@ -608,13 +611,15 @@ ReprocessLoop: } } + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + // Scan over the PHI nodes in the loop header. Since they now have only two // incoming values (the loop is canonicalized), we may have simplified the PHI // down to 'X = phi [X, Y]', which should be replaced with 'Y'. PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) { + if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { if (AA) AA->deleteValue(PN); if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); @@ -676,7 +681,8 @@ ReprocessLoop: // The block has now been cleared of all instructions except for // a comparison and a conditional branch. SimplifyCFG may be able // to fold it now. - if (!FoldBranchToCommonDest(BI, DL)) continue; + if (!FoldBranchToCommonDest(BI)) + continue; // Success. The block is now dead, so remove it from the loop, // update the dominator tree and delete it. @@ -714,7 +720,7 @@ ReprocessLoop: bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, AliasAnalysis *AA, ScalarEvolution *SE, - const DataLayout *DL, AssumptionCache *AC) { + AssumptionCache *AC) { bool Changed = false; // Worklist maintains our depth-first queue of loops in this nest to process. @@ -731,7 +737,7 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, while (!Worklist.empty()) Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI, - SE, PP, DL, AC); + SE, PP, AC); return Changed; } @@ -749,7 +755,6 @@ namespace { DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; - const DataLayout *DL; AssumptionCache *AC; bool runOnFunction(Function &F) override; @@ -797,13 +802,11 @@ bool LoopSimplify::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = getAnalysisIfAvailable<ScalarEvolution>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL, AC); + Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, AC); return Changed; } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index accb731..6b3aa02 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -26,8 +26,8 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -500,6 +500,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. + const DataLayout &DL = Header->getModule()->getDataLayout(); const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(), BBE = NewLoopBlocks.end(); BB != BBE; ++BB) @@ -508,7 +509,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); - else if (Value *V = SimplifyInstruction(Inst)) + else if (Value *V = SimplifyInstruction(Inst, DL)) if (LI->replacementPreservesLCSSAForm(Inst, V)) { Inst->replaceAllUsesWith(V); (*BB)->getInstList().erase(Inst); @@ -531,9 +532,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { - DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; - simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL, AC); + simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, AC); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 91b688c..381d8fc 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -31,6 +31,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" @@ -339,10 +340,11 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI); BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI); BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); + const DataLayout &DL = Header->getModule()->getDataLayout(); // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % (loop unroll factor + 1) - SCEVExpander Expander(*SE, "loop-unroll"); + SCEVExpander Expander(*SE, DL, "loop-unroll"); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index b3bdae4..e0e0e90 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -14,17 +14,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/CFG.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include <algorithm> using namespace llvm; @@ -175,11 +175,16 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, // Remove additional occurences coming from condensed cases and keep the // number of incoming values equal to the number of branches to SuccBB. + SmallVector<unsigned, 8> Indices; for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx) if (PN->getIncomingBlock(Idx) == OrigBB) { - PN->removeIncomingValue(Idx); + Indices.push_back(Idx); LocalNumMergedCases--; } + // Remove incoming values in the reverse order to prevent invalidating + // *successive* index. + for (auto III = Indices.rbegin(), IIE = Indices.rend(); III != IIE; ++III) + PN->removeIncomingValue(*III); } } diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index dabadb7..4b34b19 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <queue> @@ -667,6 +668,8 @@ void PromoteMem2Reg::run() { A->eraseFromParent(); } + const DataLayout &DL = F.getParent()->getDataLayout(); + // Remove alloca's dbg.declare instrinsics from the function. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) @@ -691,7 +694,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT, AC)) { + if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index c057b06..955ce30 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -155,7 +156,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. - if (Value *V = SimplifyInstruction(InsertedPHI)) { + if (Value *V = + SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) { InsertedPHI->eraseFromParent(); return V; } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 3248a83..c7c0ca6 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -110,8 +110,8 @@ namespace { class SimplifyCFGOpt { const TargetTransformInfo &TTI; + const DataLayout &DL; unsigned BonusInstThreshold; - const DataLayout *const DL; AssumptionCache *AC; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, @@ -131,9 +131,9 @@ class SimplifyCFGOpt { bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder); public: - SimplifyCFGOpt(const TargetTransformInfo &TTI, unsigned BonusInstThreshold, - const DataLayout *DL, AssumptionCache *AC) - : TTI(TTI), BonusInstThreshold(BonusInstThreshold), DL(DL), AC(AC) {} + SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, + unsigned BonusInstThreshold, AssumptionCache *AC) + : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {} bool run(BasicBlock *BB); }; } @@ -223,9 +223,9 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, /// given instruction, which is assumed to be safe to speculate. TCC_Free means /// cheap, TCC_Basic means less cheap, and TCC_Expensive means prohibitively /// expensive. -static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL, +static unsigned ComputeSpeculationCost(const User *I, const TargetTransformInfo &TTI) { - assert(isSafeToSpeculativelyExecute(I, DL) && + assert(isSafeToSpeculativelyExecute(I) && "Instruction is not safe to speculatively execute!"); return TTI.getUserCost(I); } @@ -249,7 +249,6 @@ static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL, static bool DominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl<Instruction*> *AggressiveInsts, unsigned &CostRemaining, - const DataLayout *DL, const TargetTransformInfo &TTI) { Instruction *I = dyn_cast<Instruction>(V); if (!I) { @@ -283,10 +282,10 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. - if (!isSafeToSpeculativelyExecute(I, DL)) + if (!isSafeToSpeculativelyExecute(I)) return false; - unsigned Cost = ComputeSpeculationCost(I, DL, TTI); + unsigned Cost = ComputeSpeculationCost(I, TTI); if (Cost > CostRemaining) return false; @@ -296,7 +295,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL, TTI)) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts->insert(I); @@ -305,15 +304,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, /// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr /// and PointerNullValue. Return NULL if value is not a constant int. -static ConstantInt *GetConstantInt(Value *V, const DataLayout *DL) { +static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) { // Normal constant int. ConstantInt *CI = dyn_cast<ConstantInt>(V); - if (CI || !DL || !isa<Constant>(V) || !V->getType()->isPointerTy()) + if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy()) return CI; // This is some kind of pointer constant. Turn it into a pointer-sized // ConstantInt if possible. - IntegerType *PtrTy = cast<IntegerType>(DL->getIntPtrType(V->getType())); + IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType())); // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). if (isa<ConstantPointerNull>(V)) @@ -346,16 +345,16 @@ namespace { /// while for a chain of '&&' it will build the set elements that make the test /// fail. struct ConstantComparesGatherer { - + const DataLayout &DL; Value *CompValue; /// Value found for the switch comparison Value *Extra; /// Extra clause to be checked before the switch SmallVector<ConstantInt *, 8> Vals; /// Set of integers to match in switch unsigned UsedICmps; /// Number of comparisons matched in the and/or chain /// Construct and compute the result for the comparison instruction Cond - ConstantComparesGatherer(Instruction *Cond, const DataLayout *DL) - : CompValue(nullptr), Extra(nullptr), UsedICmps(0) { - gather(Cond, DL); + ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) + : DL(DL), CompValue(nullptr), Extra(nullptr), UsedICmps(0) { + gather(Cond); } /// Prevent copy @@ -380,7 +379,7 @@ private: /// against is placed in CompValue. /// If CompValue is already set, the function is expected to fail if a match /// is found but the value compared to is different. - bool matchInstruction(Instruction *I, const DataLayout *DL, bool isEQ) { + bool matchInstruction(Instruction *I, bool isEQ) { // If this is an icmp against a constant, handle this as one of the cases. ICmpInst *ICI; ConstantInt *C; @@ -422,8 +421,8 @@ private: } // If we have "x ult 3", for example, then we can add 0,1,2 to the set. - ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(), - C->getValue()); + ConstantRange Span = ConstantRange::makeAllowedICmpRegion( + ICI->getPredicate(), C->getValue()); // Shift the range if the compare is fed by an add. This is the range // compare idiom as emitted by instcombine. @@ -462,7 +461,7 @@ private: /// the value being compared, and stick the list constants into the Vals /// vector. /// One "Extra" case is allowed to differ from the other. - void gather(Value *V, const DataLayout *DL) { + void gather(Value *V) { Instruction *I = dyn_cast<Instruction>(V); bool isEQ = (I->getOpcode() == Instruction::Or); @@ -484,7 +483,7 @@ private: } // Try to match the current instruction - if (matchInstruction(I, DL, isEQ)) + if (matchInstruction(I, isEQ)) // Match succeed, continue the loop continue; } @@ -532,15 +531,16 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { CV = SI->getCondition(); } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) if (BI->isConditional() && BI->getCondition()->hasOneUse()) - if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) + if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) { if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL)) CV = ICI->getOperand(0); + } // Unwrap any lossless ptrtoint cast. - if (DL && CV) { + if (CV) { if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) { Value *Ptr = PTII->getPointerOperand(); - if (PTII->getType() == DL->getIntPtrType(Ptr->getType())) + if (PTII->getType() == DL.getIntPtrType(Ptr->getType())) CV = Ptr; } } @@ -981,8 +981,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, Builder.SetInsertPoint(PTI); // Convert pointer to int before we switch. if (CV->getType()->isPointerTy()) { - assert(DL && "Cannot switch on pointer without DataLayout"); - CV = Builder.CreatePtrToInt(CV, DL->getIntPtrType(CV->getType()), + CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr"); } @@ -1053,7 +1052,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I); /// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and /// BB2, hoist any common code in the two blocks up into the branch block. The /// caller of this function guarantees that BI's block dominates BB1 and BB2. -static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL, +static bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI) { // This does very trivial matching, with limited scanning, to find identical // instructions in the two blocks. In particular, we don't want to get into @@ -1145,9 +1144,9 @@ HoistTerminator: passingValueIsAlwaysUndefined(BB2V, PN)) return Changed; - if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V, DL)) + if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) return Changed; - if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V, DL)) + if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V)) return Changed; } } @@ -1467,7 +1466,6 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, /// /// \returns true if the conditional block is removed. static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, - const DataLayout *DL, const TargetTransformInfo &TTI) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); @@ -1511,14 +1509,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, return false; // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(I, DL) && - !(HoistCondStores && - (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB, - EndBB)))) + if (!isSafeToSpeculativelyExecute(I) && + !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore( + I, BB, ThenBB, EndBB)))) return false; if (!SpeculatedStoreValue && - ComputeSpeculationCost(I, DL, TTI) > PHINodeFoldingThreshold * - TargetTransformInfo::TCC_Basic) + ComputeSpeculationCost(I, TTI) > + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic) return false; // Store the store speculation candidate. @@ -1574,11 +1571,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if (!OrigCE && !ThenCE) continue; // Known safe and cheap. - if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE, DL)) || - (OrigCE && !isSafeToSpeculativelyExecute(OrigCE, DL))) + if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || + (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) return false; - unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL, TTI) : 0; - unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL, TTI) : 0; + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; unsigned MaxCost = 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; if (OrigCost + ThenCost > MaxCost) @@ -1688,7 +1685,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { /// that is defined in the same block as the branch and if any PHI entries are /// constants, thread edges corresponding to that entry to be branches to their /// ultimate destination. -static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) { +static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) { BasicBlock *BB = BI->getParent(); PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); // NOTE: we currently cannot transform this case if the PHI node is used @@ -1786,8 +1783,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) { /// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry /// PHI node, see if we can eliminate it. -static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL, - const TargetTransformInfo &TTI) { +static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, + const DataLayout &DL) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which @@ -1830,9 +1827,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL, } if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, - MaxCostVal0, DL, TTI) || + MaxCostVal0, TTI) || !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, - MaxCostVal1, DL, TTI)) + MaxCostVal1, TTI)) return false; } @@ -2052,8 +2049,7 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { /// FoldBranchToCommonDest - If this basic block is simple enough, and if a /// predecessor branches to us and one of our successors, fold the block into /// the predecessor and use logical operations to pick the right destination. -bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL, - unsigned BonusInstThreshold) { +bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { BasicBlock *BB = BI->getParent(); Instruction *Cond = nullptr; @@ -2109,7 +2105,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL, // Ignore dbg intrinsics. if (isa<DbgInfoIntrinsic>(I)) continue; - if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I, DL)) + if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I)) return false; // I has only one use and can be executed unconditionally. Instruction *User = dyn_cast<Instruction>(I->user_back()); @@ -2702,8 +2698,9 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. static bool TryToSimplifyUncondBranchWithICmpInIt( - ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, const DataLayout *DL, AssumptionCache *AC) { + ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL, + const TargetTransformInfo &TTI, unsigned BonusInstThreshold, + AssumptionCache *AC) { BasicBlock *BB = ICI->getParent(); // If the block has any PHIs in it or the icmp has multiple uses, it is too @@ -2736,7 +2733,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( ICI->eraseFromParent(); } // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // Ok, the block is reachable from the default dest. If the constant we're @@ -2752,7 +2749,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // The use of the icmp has to be in the 'end' block, by the only PHI node in @@ -2808,8 +2805,8 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( /// SimplifyBranchOnICmpChain - The specified branch is a conditional branch. /// Check to see if it is branching on an or/and chain of icmp instructions, and /// fold it into a switch instruction if so. -static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL, - IRBuilder<> &Builder) { +static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, + const DataLayout &DL) { Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (!Cond) return false; @@ -2884,10 +2881,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL, Builder.SetInsertPoint(BI); // Convert pointer to int before we switch. if (CompVal->getType()->isPointerTy()) { - assert(DL && "Cannot switch on pointer without DataLayout"); - CompVal = Builder.CreatePtrToInt(CompVal, - DL->getIntPtrType(CompVal->getType()), - "magicptr"); + CompVal = Builder.CreatePtrToInt( + CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr"); } // Create the new switch instruction now. @@ -3246,8 +3241,8 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { /// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch /// and use it to remove dead cases. -static bool EliminateDeadSwitchCases(SwitchInst *SI, const DataLayout *DL, - AssumptionCache *AC) { +static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, + const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); APInt KnownZero(Bits, 0), KnownOne(Bits, 0); @@ -3398,9 +3393,8 @@ static Constant *LookupConstant(Value *V, /// constant or can be replaced by constants from the ConstantPool. Returns the /// resulting constant on success, 0 otherwise. static Constant * -ConstantFold(Instruction *I, - const SmallDenseMap<Value *, Constant *> &ConstantPool, - const DataLayout *DL) { +ConstantFold(Instruction *I, const DataLayout &DL, + const SmallDenseMap<Value *, Constant *> &ConstantPool) { if (SelectInst *Select = dyn_cast<SelectInst>(I)) { Constant *A = LookupConstant(Select->getCondition(), ConstantPool); if (!A) @@ -3420,9 +3414,10 @@ ConstantFold(Instruction *I, return nullptr; } - if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) + if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0], COps[1], DL); + } return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL); } @@ -3432,12 +3427,10 @@ ConstantFold(Instruction *I, /// destionations CaseDest corresponding to value CaseVal (0 for the default /// case), of a switch instruction SI. static bool -GetCaseResults(SwitchInst *SI, - ConstantInt *CaseVal, - BasicBlock *CaseDest, +GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, - SmallVectorImpl<std::pair<PHINode *, Constant *> > &Res, - const DataLayout *DL) { + SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res, + const DataLayout &DL) { // The block from which we enter the common destination. BasicBlock *Pred = SI->getParent(); @@ -3456,7 +3449,7 @@ GetCaseResults(SwitchInst *SI, } else if (isa<DbgInfoIntrinsic>(I)) { // Skip debug intrinsic. continue; - } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) { + } else if (Constant *C = ConstantFold(I, DL, ConstantPool)) { // Instruction is side-effect free and constant. // If the instruction has uses outside this block or a phi node slot for @@ -3527,11 +3520,11 @@ static void MapCaseToResult(ConstantInt *CaseVal, // results for the PHI node of the common destination block for a switch // instruction. Returns false if multiple PHI nodes have been found or if // there is not a common destination block for the switch. -static bool InitializeUniqueCases( - SwitchInst *SI, const DataLayout *DL, PHINode *&PHI, - BasicBlock *&CommonDest, - SwitchCaseResultVectorTy &UniqueResults, - Constant *&DefaultResult) { +static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, + BasicBlock *&CommonDest, + SwitchCaseResultVectorTy &UniqueResults, + Constant *&DefaultResult, + const DataLayout &DL) { for (auto &I : SI->cases()) { ConstantInt *CaseVal = I.getCaseValue(); @@ -3638,15 +3631,15 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, /// phi nodes in a common successor block with only two different /// constant values, replace the switch with select. static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, - const DataLayout *DL, AssumptionCache *AC) { + AssumptionCache *AC, const DataLayout &DL) { Value *const Cond = SI->getCondition(); PHINode *PHI = nullptr; BasicBlock *CommonDest = nullptr; Constant *DefaultResult; SwitchCaseResultVectorTy UniqueResults; // Collect all the cases that will deliver the same value from the switch. - if (!InitializeUniqueCases(SI, DL, PHI, CommonDest, UniqueResults, - DefaultResult)) + if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, + DL)) return false; // Selects choose between maximum two values. if (UniqueResults.size() != 2) @@ -3673,12 +3666,10 @@ namespace { /// SwitchLookupTable - Create a lookup table to use as a switch replacement /// with the contents of Values, using DefaultValue to fill any holes in the /// table. - SwitchLookupTable(Module &M, - uint64_t TableSize, - ConstantInt *Offset, - const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values, - Constant *DefaultValue, - const DataLayout *DL); + SwitchLookupTable( + Module &M, uint64_t TableSize, ConstantInt *Offset, + const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, + Constant *DefaultValue, const DataLayout &DL); /// BuildLookup - Build instructions with Builder to retrieve the value at /// the position given by Index in the lookup table. @@ -3686,8 +3677,7 @@ namespace { /// WouldFitInRegister - Return true if a table with TableSize elements of /// type ElementType would fit in a target-legal register. - static bool WouldFitInRegister(const DataLayout *DL, - uint64_t TableSize, + static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, const Type *ElementType); private: @@ -3729,12 +3719,10 @@ namespace { }; } -SwitchLookupTable::SwitchLookupTable(Module &M, - uint64_t TableSize, - ConstantInt *Offset, - const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values, - Constant *DefaultValue, - const DataLayout *DL) +SwitchLookupTable::SwitchLookupTable( + Module &M, uint64_t TableSize, ConstantInt *Offset, + const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values, + Constant *DefaultValue, const DataLayout &DL) : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr), LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) { assert(Values.size() && "Can't build lookup table without values!"); @@ -3904,11 +3892,9 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { llvm_unreachable("Unknown lookup table kind!"); } -bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL, +bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, const Type *ElementType) { - if (!DL) - return false; const IntegerType *IT = dyn_cast<IntegerType>(ElementType); if (!IT) return false; @@ -3918,17 +3904,16 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL, // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. if (TableSize >= UINT_MAX/IT->getBitWidth()) return false; - return DL->fitsInLegalInteger(TableSize * IT->getBitWidth()); + return DL.fitsInLegalInteger(TableSize * IT->getBitWidth()); } /// ShouldBuildLookupTable - Determine whether a lookup table should be built /// for this switch, based on the number of cases, size of the table and the /// types of the results. -static bool ShouldBuildLookupTable(SwitchInst *SI, - uint64_t TableSize, - const TargetTransformInfo &TTI, - const DataLayout *DL, - const SmallDenseMap<PHINode*, Type*>& ResultTypes) { +static bool +ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, + const TargetTransformInfo &TTI, const DataLayout &DL, + const SmallDenseMap<PHINode *, Type *> &ResultTypes) { if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) return false; // TableSize overflowed, or mul below might overflow. @@ -4051,10 +4036,9 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, /// SwitchToLookupTable - If the switch is only used to initialize one or more /// phi nodes in a common successor block with different constant values, /// replace the switch with lookup tables. -static bool SwitchToLookupTable(SwitchInst *SI, - IRBuilder<> &Builder, - const TargetTransformInfo &TTI, - const DataLayout* DL) { +static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, + const DataLayout &DL, + const TargetTransformInfo &TTI) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); // Only build lookup table when we have a target that supports it. @@ -4125,14 +4109,14 @@ static bool SwitchToLookupTable(SwitchInst *SI, // or a bitmask that fits in a register. SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; bool HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(), - &CommonDest, DefaultResultsList, DL); + &CommonDest, DefaultResultsList, DL); bool NeedMask = (TableHasHoles && !HasDefaultResults); if (NeedMask) { // As an extra penalty for the validity test we require more cases. if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark). return false; - if (!(DL && DL->fitsInLegalInteger(TableSize))) + if (!DL.fitsInLegalInteger(TableSize)) return false; } @@ -4290,12 +4274,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // see if that predecessor totally determines the outcome of this switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; Value *Cond = SI->getCondition(); if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) if (SimplifySwitchOnSelect(SI, Select)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // If the block only contains the switch, see if we can fold the block // away into any preds. @@ -4305,25 +4289,25 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { ++BBI; if (SI == &*BBI) if (FoldValueComparisonIntoPredecessors(SI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // Try to transform the switch into an icmp and a branch. if (TurnSwitchRangeIntoICmp(SI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // Remove unreachable cases. - if (EliminateDeadSwitchCases(SI, DL, AC)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (EliminateDeadSwitchCases(SI, AC, DL)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; - if (SwitchToSelect(SI, Builder, DL, AC)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (SwitchToSelect(SI, Builder, AC, DL)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; if (ForwardSwitchConditionToPHI(SI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; - if (SwitchToLookupTable(SI, Builder, TTI, DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (SwitchToLookupTable(SI, Builder, DL, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; return false; } @@ -4360,11 +4344,87 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } return Changed; } +/// Given an block with only a single landing pad and a unconditional branch +/// try to find another basic block which this one can be merged with. This +/// handles cases where we have multiple invokes with unique landing pads, but +/// a shared handler. +/// +/// We specifically choose to not worry about merging non-empty blocks +/// here. That is a PRE/scheduling problem and is best solved elsewhere. In +/// practice, the optimizer produces empty landing pad blocks quite frequently +/// when dealing with exception dense code. (see: instcombine, gvn, if-else +/// sinking in this file) +/// +/// This is primarily a code size optimization. We need to avoid performing +/// any transform which might inhibit optimization (such as our ability to +/// specialize a particular handler via tail commoning). We do this by not +/// merging any blocks which require us to introduce a phi. Since the same +/// values are flowing through both blocks, we don't loose any ability to +/// specialize. If anything, we make such specialization more likely. +/// +/// TODO - This transformation could remove entries from a phi in the target +/// block when the inputs in the phi are the same for the two blocks being +/// merged. In some cases, this could result in removal of the PHI entirely. +static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, + BasicBlock *BB) { + auto Succ = BB->getUniqueSuccessor(); + assert(Succ); + // If there's a phi in the successor block, we'd likely have to introduce + // a phi into the merged landing pad block. + if (isa<PHINode>(*Succ->begin())) + return false; + + for (BasicBlock *OtherPred : predecessors(Succ)) { + if (BB == OtherPred) + continue; + BasicBlock::iterator I = OtherPred->begin(); + LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I); + if (!LPad2 || !LPad2->isIdenticalTo(LPad)) + continue; + for (++I; isa<DbgInfoIntrinsic>(I); ++I) {} + BranchInst *BI2 = dyn_cast<BranchInst>(I); + if (!BI2 || !BI2->isIdenticalTo(BI)) + continue; + + // We've found an identical block. Update our predeccessors to take that + // path instead and make ourselves dead. + SmallSet<BasicBlock *, 16> Preds; + Preds.insert(pred_begin(BB), pred_end(BB)); + for (BasicBlock *Pred : Preds) { + InvokeInst *II = cast<InvokeInst>(Pred->getTerminator()); + assert(II->getNormalDest() != BB && + II->getUnwindDest() == BB && "unexpected successor"); + II->setUnwindDest(OtherPred); + } + + // The debug info in OtherPred doesn't cover the merged control flow that + // used to go through BB. We need to delete it or update it. + for (auto I = OtherPred->begin(), E = OtherPred->end(); + I != E;) { + Instruction &Inst = *I; I++; + if (isa<DbgInfoIntrinsic>(Inst)) + Inst.eraseFromParent(); + } + + SmallSet<BasicBlock *, 16> Succs; + Succs.insert(succ_begin(BB), succ_end(BB)); + for (BasicBlock *Succ : Succs) { + Succ->removePredecessor(BB); + } + + IRBuilder<> Builder(BI); + Builder.CreateUnreachable(); + BI->eraseFromParent(); + return true; + } + return false; +} + bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); @@ -4384,17 +4444,26 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ for (++I; isa<DbgInfoIntrinsic>(I); ++I) ; if (I->isTerminator() && - TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, - BonusInstThreshold, DL, AC)) + TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI, + BonusInstThreshold, AC)) return true; } + // See if we can merge an empty landing pad block with another which is + // equivalent. + if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) { + for (++I; isa<DbgInfoIntrinsic>(I); ++I) {} + if (I->isTerminator() && + TryToMergeLandingPad(LPad, BI, BB)) + return true; + } + // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (FoldBranchToCommonDest(BI, BonusInstThreshold)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; return false; } @@ -4409,7 +4478,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. @@ -4419,26 +4488,26 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { ++I; if (&*I == BI) { if (FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } else if (&*I == cast<Instruction>(BI->getCondition())){ ++I; // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(I)) ++I; if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } } // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. - if (SimplifyBranchOnICmpChain(BI, DL, Builder)) + if (SimplifyBranchOnICmpChain(BI, Builder, DL)) return true; // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (FoldBranchToCommonDest(BI, BonusInstThreshold)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if @@ -4446,16 +4515,16 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // can hoist it up to the branching block. if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistThenElseCodeToIf(BI, DL, TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (HoistThenElseCodeToIf(BI, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL, TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor()) { // If Successor #0 has multiple preds, we may be able to conditionally @@ -4463,8 +4532,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL, TTI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; } // If this is a branch on a phi node in the current block, thread control @@ -4472,14 +4541,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) if (PN->getParent() == BI->getParent()) if (FoldCondBranchOnPHI(BI, DL)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) if (SimplifyCondBranchToCondBranch(PBI, BI)) - return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; return false; } @@ -4591,7 +4660,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // eliminate it, do so now. if (PHINode *PN = dyn_cast<PHINode>(BB->begin())) if (PN->getNumIncomingValues() == 2) - Changed |= FoldTwoEntryPHINode(PN, DL, TTI); + Changed |= FoldTwoEntryPHINode(PN, TTI, DL); Builder.SetInsertPoint(BB->getTerminator()); if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { @@ -4623,7 +4692,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// of the CFG. It returns true if a modification was made. /// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - unsigned BonusInstThreshold, const DataLayout *DL, - AssumptionCache *AC) { - return SimplifyCFGOpt(TTI, BonusInstThreshold, DL, AC).run(BB); + unsigned BonusInstThreshold, AssumptionCache *AC) { + return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), + BonusInstThreshold, AC).run(BB); } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 6a5d885..8bfc5fb 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -270,95 +270,57 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, Value *IVOperand) { - // Currently we only handle instructions of the form "add <indvar> <value>" - unsigned Op = BO->getOpcode(); - if (Op != Instruction::Add) + // Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`. + if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap()) return false; - // If BO is already both nuw and nsw then there is nothing left to do - if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap()) + const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *, + SCEV::NoWrapFlags); + + switch (BO->getOpcode()) { + default: return false; - IntegerType *IT = cast<IntegerType>(IVOperand->getType()); - Value *OtherOperand = nullptr; - if (BO->getOperand(0) == IVOperand) { - OtherOperand = BO->getOperand(1); - } else { - assert(BO->getOperand(1) == IVOperand && "only other use!"); - OtherOperand = BO->getOperand(0); + case Instruction::Add: + GetExprForBO = &ScalarEvolution::getAddExpr; + break; + + case Instruction::Sub: + GetExprForBO = &ScalarEvolution::getMinusSCEV; + break; + + case Instruction::Mul: + GetExprForBO = &ScalarEvolution::getMulExpr; + break; } - bool Changed = false; - const SCEV *OtherOpSCEV = SE->getSCEV(OtherOperand); - if (OtherOpSCEV == SE->getCouldNotCompute()) - return false; + unsigned BitWidth = cast<IntegerType>(BO->getType())->getBitWidth(); + Type *WideTy = IntegerType::get(BO->getContext(), BitWidth * 2); + const SCEV *LHS = SE->getSCEV(BO->getOperand(0)); + const SCEV *RHS = SE->getSCEV(BO->getOperand(1)); - const SCEV *IVOpSCEV = SE->getSCEV(IVOperand); - const SCEV *ZeroSCEV = SE->getConstant(IVOpSCEV->getType(), 0); + bool Changed = false; - if (!BO->hasNoSignedWrap()) { - // Upgrade the add to an "add nsw" if we can prove that it will never - // sign-overflow or sign-underflow. - - const SCEV *SignedMax = - SE->getConstant(APInt::getSignedMaxValue(IT->getBitWidth())); - const SCEV *SignedMin = - SE->getConstant(APInt::getSignedMinValue(IT->getBitWidth())); - - // The addition "IVOperand + OtherOp" does not sign-overflow if the result - // is sign-representable in 2's complement in the given bit-width. - // - // If OtherOp is SLT 0, then for an IVOperand in [SignedMin - OtherOp, - // SignedMax], "IVOperand + OtherOp" is in [SignedMin, SignedMax + OtherOp]. - // Everything in [SignedMin, SignedMax + OtherOp] is representable since - // SignedMax + OtherOp is at least -1. - // - // If OtherOp is SGE 0, then for an IVOperand in [SignedMin, SignedMax - - // OtherOp], "IVOperand + OtherOp" is in [SignedMin + OtherOp, SignedMax]. - // Everything in [SignedMin + OtherOp, SignedMax] is representable since - // SignedMin + OtherOp is at most -1. - // - // It follows that for all values of IVOperand in [SignedMin - smin(0, - // OtherOp), SignedMax - smax(0, OtherOp)] the result of the add is - // representable (i.e. there is no sign-overflow). - - const SCEV *UpperDelta = SE->getSMaxExpr(ZeroSCEV, OtherOpSCEV); - const SCEV *UpperLimit = SE->getMinusSCEV(SignedMax, UpperDelta); - - bool NeverSignedOverflows = - SE->isKnownPredicate(ICmpInst::ICMP_SLE, IVOpSCEV, UpperLimit); - - if (NeverSignedOverflows) { - const SCEV *LowerDelta = SE->getSMinExpr(ZeroSCEV, OtherOpSCEV); - const SCEV *LowerLimit = SE->getMinusSCEV(SignedMin, LowerDelta); - - bool NeverSignedUnderflows = - SE->isKnownPredicate(ICmpInst::ICMP_SGE, IVOpSCEV, LowerLimit); - if (NeverSignedUnderflows) { - BO->setHasNoSignedWrap(true); - Changed = true; - } + if (!BO->hasNoUnsignedWrap()) { + const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy); + const SCEV *OpAfterExtend = (SE->*GetExprForBO)( + SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy), + SCEV::FlagAnyWrap); + if (ExtendAfterOp == OpAfterExtend) { + BO->setHasNoUnsignedWrap(); + SE->forgetValue(BO); + Changed = true; } } - if (!BO->hasNoUnsignedWrap()) { - // Upgrade the add computing "IVOperand + OtherOp" to an "add nuw" if we can - // prove that it will never unsigned-overflow (i.e. the result will always - // be representable in the given bit-width). - // - // "IVOperand + OtherOp" is unsigned-representable in 2's complement iff it - // does not produce a carry. "IVOperand + OtherOp" produces no carry iff - // IVOperand ULE (UnsignedMax - OtherOp). - - const SCEV *UnsignedMax = - SE->getConstant(APInt::getMaxValue(IT->getBitWidth())); - const SCEV *UpperLimit = SE->getMinusSCEV(UnsignedMax, OtherOpSCEV); - - bool NeverUnsignedOverflows = - SE->isKnownPredicate(ICmpInst::ICMP_ULE, IVOpSCEV, UpperLimit); - - if (NeverUnsignedOverflows) { - BO->setHasNoUnsignedWrap(true); + if (!BO->hasNoSignedWrap()) { + const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy); + const SCEV *OpAfterExtend = (SE->*GetExprForBO)( + SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy), + SCEV::FlagAnyWrap); + if (ExtendAfterOp == OpAfterExtend) { + BO->setHasNoSignedWrap(); + SE->forgetValue(BO); Changed = true; } } diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 55a4455..c499c87 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -51,8 +51,7 @@ namespace { const DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; + const DataLayout &DL = F.getParent()->getDataLayout(); const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); AssumptionCache *AC = diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index fb1d83f..5867d65 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -120,12 +120,12 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, /// string/memory copying library function \p Func. /// Acceptable functions are st[rp][n]?cpy, memove, memcpy, and memset. /// Their fortified (_chk) counterparts are also accepted. -static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func, - const DataLayout *DL) { +static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func) { + const DataLayout &DL = F->getParent()->getDataLayout(); FunctionType *FT = F->getFunctionType(); LLVMContext &Context = F->getContext(); Type *PCharTy = Type::getInt8PtrTy(Context); - Type *SizeTTy = DL ? DL->getIntPtrType(Context) : nullptr; + Type *SizeTTy = DL.getIntPtrType(Context); unsigned NumParams = FT->getNumParams(); // All string libfuncs return the same type as the first parameter. @@ -208,10 +208,6 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) { if (Len == 0) return Dst; - // These optimizations require DataLayout. - if (!DL) - return nullptr; - return emitStrLenMemCpy(Src, Dst, Len, B); } @@ -230,9 +226,9 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy( - CpyDst, Src, - ConstantInt::get(DL->getIntPtrType(Src->getContext()), Len + 1), 1); + B.CreateMemCpy(CpyDst, Src, + ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1), + 1); return Dst; } @@ -269,10 +265,6 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) { if (SrcLen == 0 || Len == 0) return Dst; - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // We don't optimize this case if (Len < SrcLen) return nullptr; @@ -297,24 +289,20 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { // of the input string and turn this into memchr. ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); if (!CharC) { - // These optimizations require DataLayout. - if (!DL) - return nullptr; - uint64_t Len = GetStringLength(SrcStr); if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. return nullptr; - return EmitMemChr( - SrcStr, CI->getArgOperand(1), // include nul. - ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len), B, DL, TLI); + return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), + B, DL, TLI); } // Otherwise, the character is a constant, see if the first argument is // a string literal. If so, we can constant fold. StringRef Str; if (!getConstantStringInfo(SrcStr, Str)) { - if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p) + if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p) return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr"); return nullptr; } @@ -350,8 +338,8 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { StringRef Str; if (!getConstantStringInfo(SrcStr, Str)) { // strrchr(s, 0) -> strchr(s, 0) - if (DL && CharC->isZero()) - return EmitStrChr(SrcStr, '\0', B, DL, TLI); + if (CharC->isZero()) + return EmitStrChr(SrcStr, '\0', B, TLI); return nullptr; } @@ -398,12 +386,8 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { uint64_t Len1 = GetStringLength(Str1P); uint64_t Len2 = GetStringLength(Str2P); if (Len1 && Len2) { - // These optimizations require DataLayout. - if (!DL) - return nullptr; - return EmitMemCmp(Str1P, Str2P, - ConstantInt::get(DL->getIntPtrType(CI->getContext()), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), std::min(Len1, Len2)), B, DL, TLI); } @@ -435,7 +419,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { if (Length == 0) // strncmp(x,y,0) -> 0 return ConstantInt::get(CI->getType(), 0); - if (DL && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) + if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI); StringRef Str1, Str2; @@ -462,17 +446,13 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy)) return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // strcpy(x,x) -> x return Src; - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); if (Len == 0) @@ -481,7 +461,7 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. B.CreateMemCpy(Dst, Src, - ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len), 1); + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1); return Dst; } @@ -490,11 +470,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { // Verify the "stpcpy" function prototype. FunctionType *FT = Callee->getFunctionType(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy, DL)) - return nullptr; - - // These optimizations require DataLayout. - if (!DL) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy)) return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); @@ -509,9 +485,9 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { return nullptr; Type *PT = FT->getParamType(0); - Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len); + Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len); Value *DstEnd = - B.CreateGEP(Dst, ConstantInt::get(DL->getIntPtrType(PT), Len - 1)); + B.CreateGEP(Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. @@ -523,7 +499,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy)) return nullptr; Value *Dst = CI->getArgOperand(0); @@ -551,17 +527,13 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { if (Len == 0) return Dst; // strncpy(x, y, 0) -> x - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // Let strncpy handle the zero padding if (Len > SrcLen + 1) return nullptr; Type *PT = FT->getParamType(0); // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] - B.CreateMemCpy(Dst, Src, ConstantInt::get(DL->getIntPtrType(PT), Len), 1); + B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1); return Dst; } @@ -629,8 +601,8 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) { } // strpbrk(s, "a") -> strchr(s, 'a') - if (DL && HasS2 && S2.size() == 1) - return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI); + if (HasS2 && S2.size() == 1) + return EmitStrChr(CI->getArgOperand(0), S2[0], B, TLI); return nullptr; } @@ -706,7 +678,7 @@ Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) { } // strcspn(s, "") -> strlen(s) - if (DL && HasS2 && S2.empty()) + if (HasS2 && S2.empty()) return EmitStrLen(CI->getArgOperand(0), B, DL, TLI); return nullptr; @@ -725,7 +697,7 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) { return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 - if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI); if (!StrLen) return nullptr; @@ -767,12 +739,98 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) { // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) { - Value *StrChr = EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI); + Value *StrChr = EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI); return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr; } return nullptr; } +Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy(32) || + !FT->getParamType(2)->isIntegerTy() || + !FT->getReturnType()->isPointerTy()) + return nullptr; + + Value *SrcStr = CI->getArgOperand(0); + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + + // memchr(x, y, 0) -> null + if (LenC && LenC->isNullValue()) + return Constant::getNullValue(CI->getType()); + + // From now on we need at least constant length and string. + StringRef Str; + if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false)) + return nullptr; + + // Truncate the string to LenC. If Str is smaller than LenC we will still only + // scan the string, as reading past the end of it is undefined and we can just + // return null if we don't find the char. + Str = Str.substr(0, LenC->getZExtValue()); + + // If the char is variable but the input str and length are not we can turn + // this memchr call into a simple bit field test. Of course this only works + // when the return value is only checked against null. + // + // It would be really nice to reuse switch lowering here but we can't change + // the CFG at this point. + // + // memchr("\r\n", C, 2) != nullptr -> (C & ((1 << '\r') | (1 << '\n'))) != 0 + // after bounds check. + if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) { + unsigned char Max = + *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()), + reinterpret_cast<const unsigned char *>(Str.end())); + + // Make sure the bit field we're about to create fits in a register on the + // target. + // FIXME: On a 64 bit architecture this prevents us from using the + // interesting range of alpha ascii chars. We could do better by emitting + // two bitfields or shifting the range by 64 if no lower chars are used. + if (!DL.fitsInLegalInteger(Max + 1)) + return nullptr; + + // For the bit field use a power-of-2 type with at least 8 bits to avoid + // creating unnecessary illegal types. + unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max)); + + // Now build the bit field. + APInt Bitfield(Width, 0); + for (char C : Str) + Bitfield.setBit((unsigned char)C); + Value *BitfieldC = B.getInt(Bitfield); + + // First check that the bit field access is within bounds. + Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType()); + Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width), + "memchr.bounds"); + + // Create code that checks if the given bit is set in the field. + Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C); + Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits"); + + // Finally merge both checks and cast to pointer type. The inttoptr + // implicitly zexts the i1 to intptr type. + return B.CreateIntToPtr(B.CreateAnd(Bounds, Bits, "memchr"), CI->getType()); + } + + // Check if all arguments are constants. If so, we can constant fold. + if (!CharC) + return nullptr; + + // Compute the offset. + size_t I = Str.find(CharC->getSExtValue() & 0xFF); + if (I == StringRef::npos) // Didn't find the char. memchr returns null. + return Constant::getNullValue(CI->getType()); + + // memchr(s+n,c,l) -> gep(s+n+i,c) + return B.CreateGEP(SrcStr, B.getInt64(I), "memchr"); +} + Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); FunctionType *FT = Callee->getFunctionType(); @@ -827,11 +885,8 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // These optimizations require DataLayout. - if (!DL) - return nullptr; - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy)) return nullptr; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) @@ -842,11 +897,8 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // These optimizations require DataLayout. - if (!DL) - return nullptr; - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove)) return nullptr; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) @@ -857,11 +909,8 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // These optimizations require DataLayout. - if (!DL) - return nullptr; - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset)) return nullptr; // memset(p, v, n) -> llvm.memset(p, v, n, 1) @@ -1521,7 +1570,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { // printf("x") -> putchar('x'), even for '%'. if (FormatStr.size() == 1) { - Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, DL, TLI); + Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TLI); if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); @@ -1534,7 +1583,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { // pass to be run after this pass, to merge duplicate strings. FormatStr = FormatStr.drop_back(); Value *GV = B.CreateGlobalString(FormatStr, "str"); - Value *NewCI = EmitPutS(GV, B, DL, TLI); + Value *NewCI = EmitPutS(GV, B, TLI); return (CI->use_empty() || !NewCI) ? NewCI : ConstantInt::get(CI->getType(), FormatStr.size() + 1); @@ -1544,7 +1593,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { // printf("%c", chr) --> putchar(chr) if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isIntegerTy()) { - Value *Res = EmitPutChar(CI->getArgOperand(1), B, DL, TLI); + Value *Res = EmitPutChar(CI->getArgOperand(1), B, TLI); if (CI->use_empty() || !Res) return Res; @@ -1554,7 +1603,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) { // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isPointerTy()) { - return EmitPutS(CI->getArgOperand(1), B, DL, TLI); + return EmitPutS(CI->getArgOperand(1), B, TLI); } return nullptr; } @@ -1600,16 +1649,11 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { if (FormatStr[i] == '%') return nullptr; // we found a format specifier, bail out. - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - B.CreateMemCpy( - CI->getArgOperand(0), CI->getArgOperand(1), - ConstantInt::get(DL->getIntPtrType(CI->getContext()), - FormatStr.size() + 1), - 1); // Copy the null byte. + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), + FormatStr.size() + 1), + 1); // Copy the null byte. return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1634,10 +1678,6 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { } if (FormatStr[1] == 's') { - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; @@ -1702,13 +1742,9 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) { if (FormatStr[i] == '%') // Could handle %% -> % if we cared. return nullptr; // We found a format specifier. - // These optimizations require DataLayout. - if (!DL) - return nullptr; - return EmitFWrite( CI->getArgOperand(1), - ConstantInt::get(DL->getIntPtrType(CI->getContext()), FormatStr.size()), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()), CI->getArgOperand(0), B, DL, TLI); } @@ -1723,14 +1759,14 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) { // fprintf(F, "%c", chr) --> fputc(chr, F) if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr; - return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI); + return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) --> fputs(str, F) if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; - return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI); + return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI); } return nullptr; } @@ -1790,7 +1826,7 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) { // This optimisation is only valid, if the return value is unused. if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); - Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI); + Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TLI); return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; } @@ -1802,10 +1838,6 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - // These optimizations require DataLayout. - if (!DL) - return nullptr; - // Require two pointers. Also, we can't optimize if return value is used. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || @@ -1820,7 +1852,7 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { // Known to have no uses (see above). return EmitFWrite( CI->getArgOperand(0), - ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len - 1), + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1), CI->getArgOperand(1), B, DL, TLI); } @@ -1839,7 +1871,7 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) { if (Str.empty() && CI->use_empty()) { // puts("") -> putchar('\n') - Value *Res = EmitPutChar(B.getInt32('\n'), B, DL, TLI); + Value *Res = EmitPutChar(B.getInt32('\n'), B, TLI); if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); @@ -1906,6 +1938,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeStrCSpn(CI, Builder); case LibFunc::strstr: return optimizeStrStr(CI, Builder); + case LibFunc::memchr: + return optimizeMemChr(CI, Builder); case LibFunc::memcmp: return optimizeMemCmp(CI, Builder); case LibFunc::memcpy: @@ -2089,9 +2123,9 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { } LibCallSimplifier::LibCallSimplifier( - const DataLayout *DL, const TargetLibraryInfo *TLI, + const DataLayout &DL, const TargetLibraryInfo *TLI, function_ref<void(Instruction *, Value *)> Replacer) - : FortifiedSimplifier(DL, TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false), + : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false), Replacer(Replacer) {} void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { @@ -2187,7 +2221,7 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk)) return nullptr; if (isFortifiedCallFoldable(CI, 3, 2, false)) { @@ -2201,7 +2235,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> & Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk)) return nullptr; if (isFortifiedCallFoldable(CI, 3, 2, false)) { @@ -2215,7 +2249,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk, DL)) + if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk)) return nullptr; if (isFortifiedCallFoldable(CI, 3, 2, false)) { @@ -2231,8 +2265,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, LibFunc::Func Func) { Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); + const DataLayout &DL = CI->getModule()->getDataLayout(); - if (!checkStringCopyLibFuncSignature(Callee, Func, DL)) + if (!checkStringCopyLibFuncSignature(Callee, Func)) return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1), @@ -2250,7 +2285,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. if (isFortifiedCallFoldable(CI, 2, 1, true)) { - Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6)); + Value *Ret = EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6)); return Ret; } else if (!OnlyLowerUnknownSize) { // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. @@ -2258,11 +2293,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, if (Len == 0) return nullptr; - // This optimization requires DataLayout. - if (!DL) - return nullptr; - - Type *SizeTTy = DL->getIntPtrType(CI->getContext()); + Type *SizeTTy = DL.getIntPtrType(CI->getContext()); Value *LenV = ConstantInt::get(SizeTTy, Len); Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); // If the function was an __stpcpy_chk, and we were able to fold it into @@ -2280,12 +2311,11 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, Function *Callee = CI->getCalledFunction(); StringRef Name = Callee->getName(); - if (!checkStringCopyLibFuncSignature(Callee, Func, DL)) + if (!checkStringCopyLibFuncSignature(Callee, Func)) return nullptr; if (isFortifiedCallFoldable(CI, 3, 2, false)) { - Value *Ret = - EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, DL, TLI, Name.substr(2, 7)); + Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TLI, Name.substr(2, 7)); return Ret; } return nullptr; @@ -2328,8 +2358,6 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { return nullptr; } -FortifiedLibCallSimplifier:: -FortifiedLibCallSimplifier(const DataLayout *DL, const TargetLibraryInfo *TLI, - bool OnlyLowerUnknownSize) - : DL(DL), TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) { -} +FortifiedLibCallSimplifier::FortifiedLibCallSimplifier( + const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize) + : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {} diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index b343cc4..a2a54da 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -60,6 +60,7 @@ #define DEBUG_TYPE "symbol-rewriter" #include "llvm/CodeGen/Passes.h" #include "llvm/Pass.h" +#include "llvm/ADT/SmallString.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -72,15 +73,15 @@ #include "llvm/Transforms/Utils/SymbolRewriter.h" using namespace llvm; +using namespace SymbolRewriter; static cl::list<std::string> RewriteMapFiles("rewrite-map-file", cl::desc("Symbol Rewrite Map"), cl::value_desc("filename")); -namespace llvm { -namespace SymbolRewriter { -void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source, - const std::string &Target) { +static void rewriteComdat(Module &M, GlobalObject *GO, + const std::string &Source, + const std::string &Target) { if (Comdat *CD = GO->getComdat()) { auto &Comdats = M.getComdatSymbolTable(); @@ -92,6 +93,7 @@ void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source, } } +namespace { template <RewriteDescriptor::Type DT, typename ValueType, ValueType *(llvm::Module::*Get)(StringRef) const> class ExplicitRewriteDescriptor : public RewriteDescriptor { @@ -226,6 +228,7 @@ typedef PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, &llvm::Module::getNamedAlias, &llvm::Module::aliases> PatternRewriteNamedAliasDescriptor; +} // namespace bool RewriteMapParser::parse(const std::string &MapFile, RewriteDescriptorList *DL) { @@ -489,8 +492,6 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, return true; } -} -} namespace { class RewriteSymbols : public ModulePass { diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 49c0902..54c7688 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -291,14 +291,18 @@ static Metadata *MapMetadataImpl(const Metadata *MD, return nullptr; } + // Note: this cast precedes the Flags check so we always get its associated + // assertion. const MDNode *Node = cast<MDNode>(MD); - assert(Node->isResolved() && "Unexpected unresolved node"); // If this is a module-level metadata and we know that nothing at the // module level is changing, then use an identity mapping. if (Flags & RF_NoModuleLevelChanges) return mapToSelf(VM, MD); + // Require resolved nodes whenever metadata might be remapped. + assert(Node->isResolved() && "Unexpected unresolved node"); + if (Node->isDistinct()) return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer); diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 525c050..29fb01f 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" @@ -206,8 +207,6 @@ namespace { AA = &P->getAnalysis<AliasAnalysis>(); DT = &P->getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = &P->getAnalysis<ScalarEvolution>(); - DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; TTI = IgnoreTargetInfo ? nullptr : &P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); @@ -222,7 +221,6 @@ namespace { AliasAnalysis *AA; DominatorTree *DT; ScalarEvolution *SE; - const DataLayout *DL; const TargetTransformInfo *TTI; // FIXME: const correct? @@ -442,8 +440,6 @@ namespace { AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = &getAnalysis<ScalarEvolution>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; TTI = IgnoreTargetInfo ? nullptr : &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( @@ -642,19 +638,19 @@ namespace { dyn_cast<SCEVConstant>(OffsetSCEV)) { ConstantInt *IntOff = ConstOffSCEV->getValue(); int64_t Offset = IntOff->getSExtValue(); - + const DataLayout &DL = I->getModule()->getDataLayout(); Type *VTy = IPtr->getType()->getPointerElementType(); - int64_t VTyTSS = (int64_t) DL->getTypeStoreSize(VTy); + int64_t VTyTSS = (int64_t)DL.getTypeStoreSize(VTy); Type *VTy2 = JPtr->getType()->getPointerElementType(); if (VTy != VTy2 && Offset < 0) { - int64_t VTy2TSS = (int64_t) DL->getTypeStoreSize(VTy2); + int64_t VTy2TSS = (int64_t)DL.getTypeStoreSize(VTy2); OffsetInElmts = Offset/VTy2TSS; - return (abs64(Offset) % VTy2TSS) == 0; + return (std::abs(Offset) % VTy2TSS) == 0; } OffsetInElmts = Offset/VTyTSS; - return (abs64(Offset) % VTyTSS) == 0; + return (std::abs(Offset) % VTyTSS) == 0; } return false; @@ -846,7 +842,7 @@ namespace { // It is important to cleanup here so that future iterations of this // function have less work to do. - (void) SimplifyInstructionsInBlock(&BB, DL, AA->getTargetLibraryInfo()); + (void)SimplifyInstructionsInBlock(&BB, AA->getTargetLibraryInfo()); return true; } @@ -900,10 +896,6 @@ namespace { return false; } - // We can't vectorize memory operations without target data - if (!DL && IsSimpleLoadStore) - return false; - Type *T1, *T2; getInstructionTypes(I, T1, T2); @@ -938,9 +930,8 @@ namespace { if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy()) return false; - if ((!Config.VectorizePointers || !DL) && - (T1->getScalarType()->isPointerTy() || - T2->getScalarType()->isPointerTy())) + if (!Config.VectorizePointers && (T1->getScalarType()->isPointerTy() || + T2->getScalarType()->isPointerTy())) return false; if (!TTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits || @@ -985,8 +976,8 @@ namespace { unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace; int64_t OffsetInElmts = 0; if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, - IAddressSpace, JAddressSpace, - OffsetInElmts) && abs64(OffsetInElmts) == 1) { + IAddressSpace, JAddressSpace, OffsetInElmts) && + std::abs(OffsetInElmts) == 1) { FixedOrder = (int) OffsetInElmts; unsigned BottomAlignment = IAlignment; if (OffsetInElmts < 0) BottomAlignment = JAlignment; @@ -1001,8 +992,8 @@ namespace { // An aligned load or store is possible only if the instruction // with the lower offset has an alignment suitable for the // vector type. - - unsigned VecAlignment = DL->getPrefTypeAlignment(VType); + const DataLayout &DL = I->getModule()->getDataLayout(); + unsigned VecAlignment = DL.getPrefTypeAlignment(VType); if (BottomAlignment < VecAlignment) return false; } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 6142306..b7d0ae4 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -218,6 +218,15 @@ public: R.getInstr()) {} }; +/// A helper function for converting Scalar types to vector types. +/// If the incoming type is void, we return void. If the VF is 1, we return +/// the scalar type. +static Type* ToVectorTy(Type *Scalar, unsigned VF) { + if (Scalar->isVoidTy() || VF == 1) + return Scalar; + return VectorType::get(Scalar, VF); +} + /// InnerLoopVectorizer vectorizes loops which contain only one basic /// block to a specified vectorization factor (VF). /// This class performs the widening of scalars into vectors, or multiple @@ -235,13 +244,13 @@ public: class InnerLoopVectorizer { public: InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI, - DominatorTree *DT, const DataLayout *DL, - const TargetLibraryInfo *TLI, unsigned VecWidth, + DominatorTree *DT, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, unsigned VecWidth, unsigned UnrollFactor) - : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI), + : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor), - Legal(nullptr) {} + Legal(nullptr), AddedSafetyChecks(false) {} // Perform the actual loop widening (vectorization). void vectorize(LoopVectorizationLegality *L) { @@ -255,6 +264,11 @@ public: updateAnalysis(); } + // Return true if any runtime check is added. + bool IsSafetyChecksAdded() { + return AddedSafetyChecks; + } + virtual ~InnerLoopVectorizer() {} protected: @@ -389,10 +403,10 @@ protected: DominatorTree *DT; /// Alias Analysis. AliasAnalysis *AA; - /// Data Layout. - const DataLayout *DL; /// Target Library Info. const TargetLibraryInfo *TLI; + /// Target Transform Info. + const TargetTransformInfo *TTI; /// The vectorization SIMD factor to use. Each vector will have this many /// vector elements. @@ -434,14 +448,17 @@ protected: EdgeMaskCache MaskCache; LoopVectorizationLegality *Legal; + + // Record whether runtime check is added. + bool AddedSafetyChecks; }; class InnerLoopUnroller : public InnerLoopVectorizer { public: InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI, - DominatorTree *DT, const DataLayout *DL, - const TargetLibraryInfo *TLI, unsigned UnrollFactor) : - InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { } + DominatorTree *DT, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, unsigned UnrollFactor) + : InnerLoopVectorizer(OrigLoop, SE, LI, DT, TLI, TTI, 1, UnrollFactor) {} private: void scalarizeInstruction(Instruction *Instr, @@ -488,7 +505,7 @@ static std::string getDebugLocString(const Loop *L) { raw_string_ostream OS(Result); const DebugLoc LoopDbgLoc = L->getStartLoc(); if (!LoopDbgLoc.isUnknown()) - LoopDbgLoc.print(L->getHeader()->getContext(), OS); + LoopDbgLoc.print(OS); else // Just print the module name. OS << L->getHeader()->getParent()->getParent()->getModuleIdentifier(); @@ -543,14 +560,13 @@ static void propagateMetadata(SmallVectorImpl<Value *> &To, const Instruction *F /// induction variable and the different reduction variables. class LoopVectorizationLegality { public: - LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL, - DominatorTree *DT, TargetLibraryInfo *TLI, - AliasAnalysis *AA, Function *F, - const TargetTransformInfo *TTI, + LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT, + TargetLibraryInfo *TLI, AliasAnalysis *AA, + Function *F, const TargetTransformInfo *TTI, LoopAccessAnalysis *LAA) - : NumPredStores(0), TheLoop(L), SE(SE), DL(DL), - TLI(TLI), TheFunction(F), TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), - Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {} + : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F), + TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), Induction(nullptr), + WidestIndTy(nullptr), HasFunNoNaNAttr(false) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -842,8 +858,6 @@ private: Loop *TheLoop; /// Scev analysis. ScalarEvolution *SE; - /// DataLayout analysis. - const DataLayout *DL; /// Target Library Info. TargetLibraryInfo *TLI; /// Parent function @@ -884,7 +898,7 @@ private: ValueToValueMap Strides; SmallPtrSet<Value *, 8> StrideSet; - + /// While vectorizing these instructions we have to generate a /// call to the appropriate masked intrinsic SmallPtrSet<const Instruction*, 8> MaskedOp; @@ -902,10 +916,9 @@ public: LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, LoopVectorizationLegality *Legal, const TargetTransformInfo &TTI, - const DataLayout *DL, const TargetLibraryInfo *TLI, - AssumptionCache *AC, const Function *F, - const LoopVectorizeHints *Hints) - : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI), + const TargetLibraryInfo *TLI, AssumptionCache *AC, + const Function *F, const LoopVectorizeHints *Hints) + : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), TheFunction(F), Hints(Hints) { CodeMetrics::collectEphemeralValues(L, AC, EphValues); } @@ -958,11 +971,6 @@ private: /// width. Vector width of one means scalar. unsigned getInstructionCost(Instruction *I, unsigned VF); - /// A helper function for converting Scalar types to vector types. - /// If the incoming type is void, we return void. If the VF is 1, we return - /// the scalar type. - static Type* ToVectorTy(Type *Scalar, unsigned VF); - /// Returns whether the instruction is a load or store and will be a emitted /// as a vector operation. bool isConsecutiveLoadOrStore(Instruction *I); @@ -988,8 +996,6 @@ private: LoopVectorizationLegality *Legal; /// Vector target information. const TargetTransformInfo &TTI; - /// Target data layout information. - const DataLayout *DL; /// Target Library Info. const TargetLibraryInfo *TLI; const Function *TheFunction; @@ -1254,7 +1260,6 @@ struct LoopVectorize : public FunctionPass { } ScalarEvolution *SE; - const DataLayout *DL; LoopInfo *LI; TargetTransformInfo *TTI; DominatorTree *DT; @@ -1270,8 +1275,6 @@ struct LoopVectorize : public FunctionPass { bool runOnFunction(Function &F) override { SE = &getAnalysis<ScalarEvolution>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); @@ -1292,12 +1295,6 @@ struct LoopVectorize : public FunctionPass { if (!TTI->getNumberOfRegisters(true)) return false; - if (!DL) { - DEBUG(dbgs() << "\nLV: Not vectorizing " << F.getName() - << ": Missing data layout\n"); - return false; - } - // Build up a worklist of inner-loops to vectorize. This is necessary as // the act of vectorizing or partially unrolling a loop creates new loops // and can invalidate iterators across the loops. @@ -1317,6 +1314,40 @@ struct LoopVectorize : public FunctionPass { return Changed; } + static void AddRuntimeUnrollDisableMetaData(Loop *L) { + SmallVector<Metadata *, 4> MDs; + // Reserve first location for self reference to the LoopID metadata node. + MDs.push_back(nullptr); + bool IsUnrollMetadata = false; + MDNode *LoopID = L->getLoopID(); + if (LoopID) { + // First find existing loop unrolling disable metadata. + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); + if (MD) { + const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); + IsUnrollMetadata = + S && S->getString().startswith("llvm.loop.unroll.disable"); + } + MDs.push_back(LoopID->getOperand(i)); + } + } + + if (!IsUnrollMetadata) { + // Add runtime unroll disable metadata. + LLVMContext &Context = L->getHeader()->getContext(); + SmallVector<Metadata *, 1> DisableOperands; + DisableOperands.push_back( + MDString::get(Context, "llvm.loop.unroll.runtime.disable")); + MDNode *DisableNode = MDNode::get(Context, DisableOperands); + MDs.push_back(DisableNode); + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + L->setLoopID(NewLoopID); + } + } + bool processLoop(Loop *L) { assert(L->empty() && "Only process inner loops."); @@ -1391,7 +1422,7 @@ struct LoopVectorize : public FunctionPass { } // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI, LAA); + LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); emitMissedWarning(F, L, Hints); @@ -1399,8 +1430,7 @@ struct LoopVectorize : public FunctionPass { } // Use the cost model. - LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, AC, F, - &Hints); + LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, AC, F, &Hints); // Check the function attributes to find out if this function should be // optimized for size. @@ -1464,14 +1494,20 @@ struct LoopVectorize : public FunctionPass { // We decided not to vectorize, but we may want to unroll. - InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF); + InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, UF); Unroller.vectorize(&LVL); } else { // If we decided that it is *legal* to vectorize the loop then do it. - InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF); + InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, UF); LB.vectorize(&LVL); ++LoopsVectorized; + // Add metadata to disable runtime unrolling scalar loop when there's no + // runtime check about strides and memory. Because at this situation, + // scalar loop is rarely used not worthy to be unrolled. + if (!LB.IsSafetyChecksAdded()) + AddRuntimeUnrollDisableMetaData(L); + // Report the vectorization decision. emitOptimizationRemark( F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), @@ -1561,10 +1597,10 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, /// \brief Find the operand of the GEP that should be checked for consecutive /// stores. This ignores trailing indices that have no effect on the final /// pointer. -static unsigned getGEPInductionOperand(const DataLayout *DL, - const GetElementPtrInst *Gep) { +static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) { + const DataLayout &DL = Gep->getModule()->getDataLayout(); unsigned LastOperand = Gep->getNumOperands() - 1; - unsigned GEPAllocSize = DL->getTypeAllocSize( + unsigned GEPAllocSize = DL.getTypeAllocSize( cast<PointerType>(Gep->getType()->getScalarType())->getElementType()); // Walk backwards and try to peel off zeros. @@ -1575,7 +1611,7 @@ static unsigned getGEPInductionOperand(const DataLayout *DL, // If it's a type with the same allocation size as the result of the GEP we // can peel off the zero index. - if (DL->getTypeAllocSize(*GEPTI) != GEPAllocSize) + if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize) break; --LastOperand; } @@ -1621,7 +1657,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { return II.getConsecutiveDirection(); } - unsigned InductionOperand = getGEPInductionOperand(DL, Gep); + unsigned InductionOperand = getGEPInductionOperand(Gep); // Check that all of the gep indices are uniform except for our induction // operand. @@ -1714,11 +1750,12 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment(); // An alignment of 0 means target abi alignment. We need to use the scalar's // target abi alignment in such a case. + const DataLayout &DL = Instr->getModule()->getDataLayout(); if (!Alignment) - Alignment = DL->getABITypeAlignment(ScalarDataTy); + Alignment = DL.getABITypeAlignment(ScalarDataTy); unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace(); - unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy); - unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF; + unsigned ScalarAllocatedSize = DL.getTypeAllocSize(ScalarDataTy); + unsigned VectorElementSize = DL.getTypeStoreSize(DataTy) / VF; if (SI && Legal->blockNeedsPredication(SI->getParent()) && !Legal->isMaskRequired(SI)) @@ -1759,7 +1796,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; unsigned NumOperands = Gep->getNumOperands(); - unsigned InductionOperand = getGEPInductionOperand(DL, Gep); + unsigned InductionOperand = getGEPInductionOperand(Gep); // Create the new GEP with the new induction variable. GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); @@ -2080,9 +2117,11 @@ void InnerLoopVectorizer::createEmptyLoop() { ExitCount = SE->getAddExpr(BackedgeTakeCount, SE->getConstant(BackedgeTakeCount->getType(), 1)); + const DataLayout &DL = OldBasicBlock->getModule()->getDataLayout(); + // Expand the trip count and place the new instructions in the preheader. // Notice that the pre-header does not change, only the loop body. - SCEVExpander Exp(*SE, "induction"); + SCEVExpander Exp(*SE, DL, "induction"); // We need to test whether the backedge-taken count is uint##_max. Adding one // to it will cause overflow and an incorrect loop trip count in the vector @@ -2218,6 +2257,7 @@ void InnerLoopVectorizer::createEmptyLoop() { std::tie(FirstCheckInst, StrideCheck) = addStrideCheck(LastBypassBlock->getTerminator()); if (StrideCheck) { + AddedSafetyChecks = true; // Create a new block containing the stride check. BasicBlock *CheckBlock = LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck"); @@ -2242,6 +2282,7 @@ void InnerLoopVectorizer::createEmptyLoop() { std::tie(FirstCheckInst, MemRuntimeCheck) = Legal->getLAI()->addRuntimeCheck(LastBypassBlock->getTerminator()); if (MemRuntimeCheck) { + AddedSafetyChecks = true; // Create a new block containing the memory check. BasicBlock *CheckBlock = LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck"); @@ -2480,10 +2521,9 @@ getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) { } } -Value *createMinMaxOp(IRBuilder<> &Builder, - LoopVectorizationLegality::MinMaxReductionKind RK, - Value *Left, - Value *Right) { +static Value *createMinMaxOp(IRBuilder<> &Builder, + LoopVectorizationLegality::MinMaxReductionKind RK, + Value *Left, Value *Right) { CmpInst::Predicate P = CmpInst::ICMP_NE; switch (RK) { default: @@ -2594,6 +2634,95 @@ static Value *addFastMathFlag(Value *V) { return V; } +/// Estimate the overhead of scalarizing a value. Insert and Extract are set if +/// the result needs to be inserted and/or extracted from vectors. +static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract, + const TargetTransformInfo &TTI) { + if (Ty->isVoidTy()) + return 0; + + assert(Ty->isVectorTy() && "Can only scalarize vectors"); + unsigned Cost = 0; + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + if (Insert) + Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, i); + if (Extract) + Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + + return Cost; +} + +// Estimate cost of a call instruction CI if it were vectorized with factor VF. +// Return the cost of the instruction, including scalarization overhead if it's +// needed. The flag NeedToScalarize shows if the call needs to be scalarized - +// i.e. either vector version isn't available, or is too expensive. +static unsigned getVectorCallCost(CallInst *CI, unsigned VF, + const TargetTransformInfo &TTI, + const TargetLibraryInfo *TLI, + bool &NeedToScalarize) { + Function *F = CI->getCalledFunction(); + StringRef FnName = CI->getCalledFunction()->getName(); + Type *ScalarRetTy = CI->getType(); + SmallVector<Type *, 4> Tys, ScalarTys; + for (auto &ArgOp : CI->arg_operands()) + ScalarTys.push_back(ArgOp->getType()); + + // Estimate cost of scalarized vector call. The source operands are assumed + // to be vectors, so we need to extract individual elements from there, + // execute VF scalar calls, and then gather the result into the vector return + // value. + unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys); + if (VF == 1) + return ScalarCallCost; + + // Compute corresponding vector type for return value and arguments. + Type *RetTy = ToVectorTy(ScalarRetTy, VF); + for (unsigned i = 0, ie = ScalarTys.size(); i != ie; ++i) + Tys.push_back(ToVectorTy(ScalarTys[i], VF)); + + // Compute costs of unpacking argument values for the scalar calls and + // packing the return values to a vector. + unsigned ScalarizationCost = + getScalarizationOverhead(RetTy, true, false, TTI); + for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) + ScalarizationCost += getScalarizationOverhead(Tys[i], false, true, TTI); + + unsigned Cost = ScalarCallCost * VF + ScalarizationCost; + + // If we can't emit a vector call for this function, then the currently found + // cost is the cost we need to return. + NeedToScalarize = true; + if (!TLI || !TLI->isFunctionVectorizable(FnName, VF) || CI->isNoBuiltin()) + return Cost; + + // If the corresponding vector cost is cheaper, return its cost. + unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys); + if (VectorCallCost < Cost) { + NeedToScalarize = false; + return VectorCallCost; + } + return Cost; +} + +// Estimate cost of an intrinsic call instruction CI if it were vectorized with +// factor VF. Return the cost of the instruction, including scalarization +// overhead if it's needed. +static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF, + const TargetTransformInfo &TTI, + const TargetLibraryInfo *TLI) { + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); + assert(ID && "Expected intrinsic call!"); + + Type *RetTy = ToVectorTy(CI->getType(), VF); + SmallVector<Type *, 4> Tys; + for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) + Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF)); + + return TTI.getIntrinsicInstrCost(ID, RetTy, Tys); +} + void InnerLoopVectorizer::vectorizeLoop() { //===------------------------------------------------===// // @@ -3181,37 +3310,71 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { Module *M = BB->getParent()->getParent(); CallInst *CI = cast<CallInst>(it); + + StringRef FnName = CI->getCalledFunction()->getName(); + Function *F = CI->getCalledFunction(); + Type *RetTy = ToVectorTy(CI->getType(), VF); + SmallVector<Type *, 4> Tys; + for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) + Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF)); + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); - assert(ID && "Not an intrinsic call!"); - switch (ID) { - case Intrinsic::assume: - case Intrinsic::lifetime_end: - case Intrinsic::lifetime_start: + if (ID && + (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || + ID == Intrinsic::lifetime_start)) { scalarizeInstruction(it); break; - default: - bool HasScalarOpd = hasVectorInstrinsicScalarOpd(ID, 1); - for (unsigned Part = 0; Part < UF; ++Part) { - SmallVector<Value *, 4> Args; - for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { - if (HasScalarOpd && i == 1) { - Args.push_back(CI->getArgOperand(i)); - continue; - } - VectorParts &Arg = getVectorValue(CI->getArgOperand(i)); - Args.push_back(Arg[Part]); - } - Type *Tys[] = {CI->getType()}; - if (VF > 1) - Tys[0] = VectorType::get(CI->getType()->getScalarType(), VF); + } + // The flag shows whether we use Intrinsic or a usual Call for vectorized + // version of the instruction. + // Is it beneficial to perform intrinsic call compared to lib call? + bool NeedToScalarize; + unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize); + bool UseVectorIntrinsic = + ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost; + if (!UseVectorIntrinsic && NeedToScalarize) { + scalarizeInstruction(it); + break; + } - Function *F = Intrinsic::getDeclaration(M, ID, Tys); - Entry[Part] = Builder.CreateCall(F, Args); + for (unsigned Part = 0; Part < UF; ++Part) { + SmallVector<Value *, 4> Args; + for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { + Value *Arg = CI->getArgOperand(i); + // Some intrinsics have a scalar argument - don't replace it with a + // vector. + if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) { + VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i)); + Arg = VectorArg[Part]; + } + Args.push_back(Arg); } - propagateMetadata(Entry, it); - break; + Function *VectorF; + if (UseVectorIntrinsic) { + // Use vector version of the intrinsic. + Type *TysForDecl[] = {CI->getType()}; + if (VF > 1) + TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF); + VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); + } else { + // Use vector version of the library call. + StringRef VFnName = TLI->getVectorizedFunction(FnName, VF); + assert(!VFnName.empty() && "Vector function name is empty."); + VectorF = M->getFunction(VFnName); + if (!VectorF) { + // Generate a declaration + FunctionType *FTy = FunctionType::get(RetTy, Tys, false); + VectorF = + Function::Create(FTy, Function::ExternalLinkage, VFnName, M); + VectorF->copyAttributesFrom(F); + } + } + assert(VectorF && "Can't create vector function."); + Entry[Part] = Builder.CreateCall(VectorF, Args); } + + propagateMetadata(Entry, it); break; } @@ -3463,6 +3626,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Look for the attribute signaling the absence of NaNs. Function &F = *Header->getParent(); + const DataLayout &DL = F.getParent()->getDataLayout(); if (F.hasFnAttribute("no-nans-fp-math")) HasFunNoNaNAttr = F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; @@ -3518,9 +3682,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (IK_NoInduction != IK) { // Get the widest type. if (!WidestIndTy) - WidestIndTy = convertPointerToIntegerType(*DL, PhiTy); + WidestIndTy = convertPointerToIntegerType(DL, PhiTy); else - WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy); + WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); // Int inductions are special because we only allow one IV. if (IK == IK_IntInduction && StepValue->isOne()) { @@ -3591,13 +3755,17 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { return false; }// end of PHI handling - // We still don't handle functions. However, we can ignore dbg intrinsic - // calls and we do handle certain intrinsic and libm functions. + // We handle calls that: + // * Are debug info intrinsics. + // * Have a mapping to an IR intrinsic. + // * Have a vector version available. CallInst *CI = dyn_cast<CallInst>(it); - if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) { + if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI) && + !(CI->getCalledFunction() && TLI && + TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) { emitAnalysis(VectorizationReport(it) << "call instruction cannot be vectorized"); - DEBUG(dbgs() << "LV: Found a call site.\n"); + DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n"); return false; } @@ -3665,13 +3833,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { ///\brief Remove GEPs whose indices but the last one are loop invariant and /// return the induction operand of the gep pointer. -static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, - const DataLayout *DL, Loop *Lp) { +static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); if (!GEP) return Ptr; - unsigned InductionOperand = getGEPInductionOperand(DL, GEP); + unsigned InductionOperand = getGEPInductionOperand(GEP); // Check that all of the gep indices are uniform except for our induction // operand. @@ -3700,8 +3867,7 @@ static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) { ///\brief Get the stride of a pointer access in a loop. /// Looks for symbolic strides "a[i*stride]". Returns the symbolic stride as a /// pointer to the Value, or null otherwise. -static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, - const DataLayout *DL, Loop *Lp) { +static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); if (!PtrTy || PtrTy->isAggregateType()) return nullptr; @@ -3714,7 +3880,7 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, // The size of the pointer access. int64_t PtrAccessSize = 1; - Ptr = stripGetElementPtr(Ptr, SE, DL, Lp); + Ptr = stripGetElementPtr(Ptr, SE, Lp); const SCEV *V = SE->getSCEV(Ptr); if (Ptr != OrigPtr) @@ -3733,7 +3899,8 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, // Strip off the size of access multiplication if we are still analyzing the // pointer. if (OrigPtr == Ptr) { - DL->getTypeAllocSize(PtrTy->getElementType()); + const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout(); + DL.getTypeAllocSize(PtrTy->getElementType()); if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) { if (M->getOperand(0)->getSCEVType() != scConstant) return nullptr; @@ -3785,7 +3952,7 @@ void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) { else return; - Value *Stride = getStrideFromPointer(Ptr, SE, DL, TheLoop); + Value *Stride = getStrideFromPointer(Ptr, SE, TheLoop); if (!Stride) return; @@ -3837,7 +4004,19 @@ bool LoopVectorizationLegality::canVectorizeMemory() { auto &OptionalReport = LAI->getReport(); if (OptionalReport) emitAnalysis(VectorizationReport(*OptionalReport)); - return LAI->canVectorizeMemory(); + if (!LAI->canVectorizeMemory()) + return false; + + if (LAI->getNumRuntimePointerChecks() > + VectorizerParams::RuntimeMemoryCheckThreshold) { + emitAnalysis(VectorizationReport() + << LAI->getNumRuntimePointerChecks() << " exceeds limit of " + << VectorizerParams::RuntimeMemoryCheckThreshold + << " dependent memory operations checked at runtime"); + DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); + return false; + } + return true; } static bool hasMultipleUsesOf(Instruction *I, @@ -4163,7 +4342,8 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi, if (!PointerElementType->isSized()) return IK_NoInduction; - int64_t Size = static_cast<int64_t>(DL->getTypeAllocSize(PointerElementType)); + const DataLayout &DL = Phi->getModule()->getDataLayout(); + int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); int64_t CVSize = CV->getSExtValue(); if (CVSize % Size) return IK_NoInduction; @@ -4375,6 +4555,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { unsigned LoopVectorizationCostModel::getWidestType() { unsigned MaxWidth = 8; + const DataLayout &DL = TheFunction->getParent()->getDataLayout(); // For each block. for (Loop::block_iterator bb = TheLoop->block_begin(), @@ -4409,7 +4590,7 @@ unsigned LoopVectorizationCostModel::getWidestType() { continue; MaxWidth = std::max(MaxWidth, - (unsigned)DL->getTypeSizeInBits(T->getScalarType())); + (unsigned)DL.getTypeSizeInBits(T->getScalarType())); } } @@ -4561,6 +4742,14 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, return SmallUF; } + // Unroll if this is a large loop (small loops are already dealt with by this + // point) that could benefit from interleaved unrolling. + bool HasReductions = (Legal->getReductionVars()->size() > 0); + if (TTI.enableAggressiveInterleaving(HasReductions)) { + DEBUG(dbgs() << "LV: Unrolling to expose ILP.\n"); + return UF; + } + DEBUG(dbgs() << "LV: Not Unrolling.\n"); return 1; } @@ -4898,8 +5087,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { // Scalarized loads/stores. int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); bool Reverse = ConsecutiveStride < 0; - unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy); - unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF; + const DataLayout &DL = I->getModule()->getDataLayout(); + unsigned ScalarAllocatedSize = DL.getTypeAllocSize(ValTy); + unsigned VectorElementSize = DL.getTypeStoreSize(VectorTy) / VF; if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) { bool IsComplexComputation = isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop); @@ -4960,14 +5150,12 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy); } case Instruction::Call: { + bool NeedToScalarize; CallInst *CI = cast<CallInst>(I); - Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); - assert(ID && "Not an intrinsic call!"); - Type *RetTy = ToVectorTy(CI->getType(), VF); - SmallVector<Type*, 4> Tys; - for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) - Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF)); - return TTI.getIntrinsicInstrCost(ID, RetTy, Tys); + unsigned CallCost = getVectorCallCost(CI, VF, TTI, TLI, NeedToScalarize); + if (getIntrinsicIDForCall(CI, TLI)) + return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI)); + return CallCost; } default: { // We are scalarizing the instruction. Return the cost of the scalar @@ -4994,12 +5182,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { }// end of switch. } -Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) { - if (Scalar->isVoidTy() || VF == 1) - return Scalar; - return VectorType::get(Scalar, VF); -} - char LoopVectorize::ID = 0; static const char lv_name[] = "Loop Vectorization"; INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false) diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index baf9741..8fc4cc1 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -17,9 +17,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Vectorize.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" @@ -342,11 +342,11 @@ public: typedef SmallPtrSet<Value *, 16> ValueSet; typedef SmallVector<StoreInst *, 8> StoreList; - BoUpSLP(Function *Func, ScalarEvolution *Se, const DataLayout *Dl, - TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa, - LoopInfo *Li, DominatorTree *Dt, AssumptionCache *AC) + BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, + TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li, + DominatorTree *Dt, AssumptionCache *AC) : NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func), - SE(Se), DL(Dl), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), + SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), Builder(Se->getContext()) { CodeMetrics::collectEphemeralValues(F, AC, EphValues); } @@ -383,7 +383,7 @@ public: } /// \returns true if the memory operations A and B are consecutive. - bool isConsecutiveAccess(Value *A, Value *B); + bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL); /// \brief Perform LICM and CSE on the newly generated gather sequences. void optimizeGatherSequence(); @@ -877,7 +877,6 @@ private: // Analysis and block reference. Function *F; ScalarEvolution *SE; - const DataLayout *DL; TargetTransformInfo *TTI; TargetLibraryInfo *TLI; AliasAnalysis *AA; @@ -1130,8 +1129,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); return; } - if (!isConsecutiveAccess(VL[i], VL[i + 1])) { - if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0])) { + const DataLayout &DL = F->getParent()->getDataLayout(); + if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) { + if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) { ++NumLoadsWantToChangeOrder; } BS.cancelScheduling(VL); @@ -1300,9 +1300,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { return; } case Instruction::Store: { + const DataLayout &DL = F->getParent()->getDataLayout(); // Check if the stores are consecutive or of we need to swizzle them. for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) - if (!isConsecutiveAccess(VL[i], VL[i + 1])) { + if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) { BS.cancelScheduling(VL); newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); @@ -1789,7 +1790,7 @@ unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { return -1; } -bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { +bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL) { Value *PtrA = getPointerOperand(A); Value *PtrB = getPointerOperand(B); unsigned ASA = getAddressSpaceOperand(A); @@ -1803,13 +1804,13 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { if (PtrA == PtrB || PtrA->getType() != PtrB->getType()) return false; - unsigned PtrBitWidth = DL->getPointerSizeInBits(ASA); + unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); - APInt Size(PtrBitWidth, DL->getTypeStoreSize(Ty)); + APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty)); APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0); - PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetA); - PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetB); + PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); + PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); APInt OffsetDelta = OffsetB - OffsetA; @@ -1842,6 +1843,7 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL, SmallVectorImpl<Value *> &Left, SmallVectorImpl<Value *> &Right) { + const DataLayout &DL = F->getParent()->getDataLayout(); // Push left and right operands of binary operation into Left and Right for (unsigned i = 0, e = VL.size(); i < e; ++i) { @@ -1856,10 +1858,10 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL, if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) { Instruction *VL1 = cast<Instruction>(VL[j]); Instruction *VL2 = cast<Instruction>(VL[j + 1]); - if (isConsecutiveAccess(L, L1) && VL1->isCommutative()) { + if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) { std::swap(Left[j], Right[j]); continue; - } else if (isConsecutiveAccess(L, L1) && VL2->isCommutative()) { + } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -1870,10 +1872,10 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL, if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) { Instruction *VL1 = cast<Instruction>(VL[j]); Instruction *VL2 = cast<Instruction>(VL[j + 1]); - if (isConsecutiveAccess(L, L1) && VL1->isCommutative()) { + if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) { std::swap(Left[j], Right[j]); continue; - } else if (isConsecutiveAccess(L, L1) && VL2->isCommutative()) { + } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -1983,6 +1985,8 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL, Right = OrigRight; } + const DataLayout &DL = F->getParent()->getDataLayout(); + // Finally check if we can get longer vectorizable chain by reordering // without breaking the good operand order detected above. // E.g. If we have something like- @@ -2001,7 +2005,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL, for (unsigned j = 0; j < VL.size() - 1; ++j) { if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) { if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) { - if (isConsecutiveAccess(L, L1)) { + if (isConsecutiveAccess(L, L1, DL)) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -2009,7 +2013,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL, } if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) { if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) { - if (isConsecutiveAccess(L, L1)) { + if (isConsecutiveAccess(L, L1, DL)) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -2105,6 +2109,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { return Gather(E->Scalars, VecTy); } + const DataLayout &DL = F->getParent()->getDataLayout(); unsigned Opcode = getSameOpcode(E->Scalars); switch (Opcode) { @@ -2301,8 +2306,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { unsigned Alignment = LI->getAlignment(); LI = Builder.CreateLoad(VecPtr); - if (!Alignment) - Alignment = DL->getABITypeAlignment(ScalarLoadTy); + if (!Alignment) { + Alignment = DL.getABITypeAlignment(ScalarLoadTy); + } LI->setAlignment(Alignment); E->VectorizedValue = LI; ++NumVectorInstructions; @@ -2331,8 +2337,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { ExternalUses.push_back( ExternalUser(SI->getPointerOperand(), cast<User>(VecPtr), 0)); - if (!Alignment) - Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType()); + if (!Alignment) { + Alignment = DL.getABITypeAlignment(SI->getValueOperand()->getType()); + } S->setAlignment(Alignment); E->VectorizedValue = S; ++NumVectorInstructions; @@ -2358,7 +2365,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { OpVecs.push_back(OpVec); } - Value *V = Builder.CreateGEP(Op0, OpVecs); + Value *V = Builder.CreateGEP( + cast<GetElementPtrInst>(VL0)->getSourceElementType(), Op0, OpVecs); E->VectorizedValue = V; ++NumVectorInstructions; @@ -3051,7 +3059,6 @@ struct SLPVectorizer : public FunctionPass { } ScalarEvolution *SE; - const DataLayout *DL; TargetTransformInfo *TTI; TargetLibraryInfo *TLI; AliasAnalysis *AA; @@ -3064,8 +3071,6 @@ struct SLPVectorizer : public FunctionPass { return false; SE = &getAnalysis<ScalarEvolution>(); - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : nullptr; TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); TLI = TLIP ? &TLIP->getTLI() : nullptr; @@ -3082,11 +3087,6 @@ struct SLPVectorizer : public FunctionPass { if (!TTI->getNumberOfRegisters(true)) return false; - // Must have DataLayout. We can't require it because some tests run w/o - // triple. - if (!DL) - return false; - // Don't vectorize when the attribute NoImplicitFloat is used. if (F.hasFnAttribute(Attribute::NoImplicitFloat)) return false; @@ -3095,7 +3095,7 @@ struct SLPVectorizer : public FunctionPass { // Use the bottom up slp vectorizer to construct chains that start with // store instructions. - BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT, AC); + BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC); // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to // delete instructions. @@ -3178,15 +3178,11 @@ private: /// the WeakVH array. /// Vectorization of part of the VL array may cause later values in the VL array /// to become invalid. We track when this has happened in the WeakVH array. -static bool hasValueBeenRAUWed(ArrayRef<Value *> &VL, - SmallVectorImpl<WeakVH> &VH, - unsigned SliceBegin, - unsigned SliceSize) { - for (unsigned i = SliceBegin; i < SliceBegin + SliceSize; ++i) - if (VH[i] != VL[i]) - return true; - - return false; +static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH, + unsigned SliceBegin, unsigned SliceSize) { + VL = VL.slice(SliceBegin, SliceSize); + VH = VH.slice(SliceBegin, SliceSize); + return !std::equal(VL.begin(), VL.end(), VH.begin()); } bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain, @@ -3195,7 +3191,8 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain, DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen << "\n"); Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType(); - unsigned Sz = DL->getTypeSizeInBits(StoreTy); + auto &DL = cast<StoreInst>(Chain[0])->getModule()->getDataLayout(); + unsigned Sz = DL.getTypeSizeInBits(StoreTy); unsigned VF = MinVecRegSize / Sz; if (!isPowerOf2_32(Sz) || VF < 2) @@ -3238,8 +3235,8 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain, bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold, BoUpSLP &R) { - SetVector<Value *> Heads, Tails; - SmallDenseMap<Value *, Value *> ConsecutiveChain; + SetVector<StoreInst *> Heads, Tails; + SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain; // We may run into multiple chains that merge into a single chain. We mark the // stores that we vectorized so that we don't visit the same store twice. @@ -3252,8 +3249,8 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores, for (unsigned j = 0; j < e; ++j) { if (i == j) continue; - - if (R.isConsecutiveAccess(Stores[i], Stores[j])) { + const DataLayout &DL = Stores[i]->getModule()->getDataLayout(); + if (R.isConsecutiveAccess(Stores[i], Stores[j], DL)) { Tails.insert(Stores[j]); Heads.insert(Stores[i]); ConsecutiveChain[Stores[i]] = Stores[j]; @@ -3262,7 +3259,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores, } // For stores that start but don't end a link in the chain: - for (SetVector<Value *>::iterator it = Heads.begin(), e = Heads.end(); + for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end(); it != e; ++it) { if (Tails.count(*it)) continue; @@ -3270,7 +3267,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores, // We found a store instr that starts a chain. Now follow the chain and try // to vectorize it. BoUpSLP::ValueList Operands; - Value *I = *it; + StoreInst *I = *it; // Collect the chain into a list. while (Tails.count(I) || Heads.count(I)) { if (VectorizedStores.count(I)) @@ -3295,6 +3292,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores, unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { unsigned count = 0; StoreRefs.clear(); + const DataLayout &DL = BB->getModule()->getDataLayout(); for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { StoreInst *SI = dyn_cast<StoreInst>(it); if (!SI) @@ -3340,9 +3338,10 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, return false; unsigned Opcode0 = I0->getOpcode(); + const DataLayout &DL = I0->getModule()->getDataLayout(); Type *Ty0 = I0->getType(); - unsigned Sz = DL->getTypeSizeInBits(Ty0); + unsigned Sz = DL.getTypeSizeInBits(Ty0); unsigned VF = MinVecRegSize / Sz; for (int i = 0, e = VL.size(); i < e; ++i) { @@ -3544,8 +3543,7 @@ public: ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {} /// \brief Try to find a reduction tree. - bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B, - const DataLayout *DL) { + bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) { assert((!Phi || std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) && "Thi phi needs to use the binary operator"); @@ -3570,9 +3568,10 @@ public: if (!isValidElementType(Ty)) return false; + const DataLayout &DL = B->getModule()->getDataLayout(); ReductionOpcode = B->getOpcode(); ReducedValueOpcode = 0; - ReduxWidth = MinVecRegSize / DL->getTypeSizeInBits(Ty); + ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty); ReductionRoot = B; ReductionPHI = Phi; @@ -3882,8 +3881,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { // Try to match and vectorize a horizontal reduction. HorizontalReduction HorRdx; - if (ShouldVectorizeHor && - HorRdx.matchAssociativeReduction(P, BI, DL) && + if (ShouldVectorizeHor && HorRdx.matchAssociativeReduction(P, BI) && HorRdx.tryToReduce(R, TTI)) { Changed = true; it = BB->begin(); @@ -3913,7 +3911,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(SI->getValueOperand())) { HorizontalReduction HorRdx; - if (((HorRdx.matchAssociativeReduction(nullptr, BinOp, DL) && + if (((HorRdx.matchAssociativeReduction(nullptr, BinOp) && HorRdx.tryToReduce(R, TTI)) || tryToVectorize(BinOp, R))) { Changed = true; |