diff options
author | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
commit | dce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch) | |
tree | dcebc53f2b182f145a2e659393bf9a0472cedf23 /lib/Transforms | |
parent | 220b921aed042f9e520c26cffd8282a94c66c3d5 (diff) | |
download | external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2 |
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'lib/Transforms')
128 files changed, 5955 insertions, 3110 deletions
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp index c514c49..29b9bb8 100644 --- a/lib/Transforms/Hello/Hello.cpp +++ b/lib/Transforms/Hello/Hello.cpp @@ -12,13 +12,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hello" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Function.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "hello" + STATISTIC(HelloCounter, "Counts number of functions greeted"); namespace { diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 48d3fba..377fa15 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -29,7 +29,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "argpromotion" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" @@ -49,6 +48,8 @@ #include <set> using namespace llvm; +#define DEBUG_TYPE "argpromotion" + STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted"); STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted"); STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted"); @@ -123,14 +124,14 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Function *F = CGN->getFunction(); // Make sure that it is local to this module. - if (!F || !F->hasLocalLinkage()) return 0; + if (!F || !F->hasLocalLinkage()) return nullptr; // First check: see if there are any pointer arguments! If not, quick exit. SmallVector<Argument*, 16> PointerArgs; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) if (I->getType()->isPointerTy()) PointerArgs.push_back(I); - if (PointerArgs.empty()) return 0; + if (PointerArgs.empty()) return nullptr; // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function @@ -139,7 +140,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { for (Use &U : F->uses()) { CallSite CS(U.getUser()); // Must be a direct call. - if (CS.getInstruction() == 0 || !CS.isCallee(&U)) return 0; + if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr; if (CS.getInstruction()->getParent()->getParent() == F) isSelfRecursive = true; @@ -207,7 +208,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // No promotable pointer arguments. if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) - return 0; + return nullptr; return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); } @@ -660,7 +661,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { - ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(*AI, Idxs, @@ -788,10 +789,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); - Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); + Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt); StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { - ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 5c3acea..23be081 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -17,7 +17,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "constmerge" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" @@ -31,6 +30,8 @@ #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "constmerge" + STATISTIC(NumMerged, "Number of global constants merged"); namespace { @@ -66,7 +67,7 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } /// Find values that are marked as llvm.used. static void FindUsedValues(GlobalVariable *LLVMUsed, SmallPtrSet<const GlobalValue*, 8> &UsedValues) { - if (LLVMUsed == 0) return; + if (!LLVMUsed) return; ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { @@ -103,7 +104,7 @@ unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const { bool ConstantMerge::runOnModule(Module &M) { DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; // Find all the globals that are marked "used". These cannot be merged. SmallPtrSet<const GlobalValue*, 8> UsedGlobals; @@ -161,7 +162,7 @@ bool ConstantMerge::runOnModule(Module &M) { // If this is the first constant we find or if the old one is local, // replace with the current one. If the current is externally visible // it cannot be replace, but can be the canonical constant we merge with. - if (Slot == 0 || IsBetterCanonical(*GV, *Slot)) + if (!Slot || IsBetterCanonical(*GV, *Slot)) Slot = GV; } diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 1aba3df..284b896 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -17,7 +17,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "deadargelim" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" @@ -38,8 +37,11 @@ #include "llvm/Support/raw_ostream.h" #include <map> #include <set> +#include <tuple> using namespace llvm; +#define DEBUG_TYPE "deadargelim" + STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); STATISTIC(NumArgumentsReplacedWithUndef, @@ -764,7 +766,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Find out the new return value. Type *RetTy = FTy->getReturnType(); - Type *NRetTy = NULL; + Type *NRetTy = nullptr; unsigned RetCount = NumRetVals(F); // -1 means unused, other numbers are the new index @@ -1050,7 +1052,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Value *RetVal; if (NFTy->getReturnType()->isVoidTy()) { - RetVal = 0; + RetVal = nullptr; } else { assert (RetTy->isStructTy()); // The original return value was a struct, insert diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 4211f12..40ec9fa 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -27,11 +27,10 @@ using namespace llvm; /// the split module remain valid. static void makeVisible(GlobalValue &GV, bool Delete) { bool Local = GV.hasLocalLinkage(); - if (Local) - GV.setVisibility(GlobalValue::HiddenVisibility); - if (Local || Delete) { GV.setLinkage(GlobalValue::ExternalLinkage); + if (Local) + GV.setVisibility(GlobalValue::HiddenVisibility); return; } @@ -95,7 +94,7 @@ namespace { makeVisible(*I, Delete); if (Delete) - I->setInitializer(0); + I->setInitializer(nullptr); } // Visit the Functions. @@ -134,7 +133,7 @@ namespace { } else { Declaration = new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage, - 0, CurI->getName()); + nullptr, CurI->getName()); } CurI->replaceAllUsesWith(Declaration); diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index b716718..fed8839 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -18,7 +18,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "functionattrs" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SetVector.h" @@ -35,6 +34,8 @@ #include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; +#define DEBUG_TYPE "functionattrs" + STATISTIC(NumReadNone, "Number of functions marked readnone"); STATISTIC(NumReadOnly, "Number of functions marked readonly"); STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); @@ -46,7 +47,7 @@ STATISTIC(NumAnnotated, "Number of attributes added to library functions"); namespace { struct FunctionAttrs : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - FunctionAttrs() : CallGraphSCCPass(ID), AA(0) { + FunctionAttrs() : CallGraphSCCPass(ID), AA(nullptr) { initializeFunctionAttrsPass(*PassRegistry::getPassRegistry()); } @@ -160,7 +161,7 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (F == 0) + if (!F) // External node - may write memory. Just give up. return false; @@ -319,7 +320,7 @@ namespace { ArgumentGraphNode SyntheticRoot; public: - ArgumentGraph() { SyntheticRoot.Definition = 0; } + ArgumentGraph() { SyntheticRoot.Definition = nullptr; } typedef SmallVectorImpl<ArgumentGraphNode*>::iterator iterator; @@ -521,7 +522,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (F == 0) + if (!F) // External node - only a problem for arguments that we pass to it. continue; @@ -600,7 +601,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { // captures. for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG); !I.isAtEnd(); ++I) { - std::vector<ArgumentGraphNode*> &ArgumentSCC = *I; + const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; if (ArgumentSCC.size() == 1) { if (!ArgumentSCC[0]->Definition) continue; // synthetic root node @@ -616,8 +617,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { } bool SCCCaptured = false; - for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(), - E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) { + for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); + I != E && !SCCCaptured; ++I) { ArgumentGraphNode *Node = *I; if (Node->Uses.empty()) { if (!Node->Definition->hasNoCaptureAttr()) @@ -629,13 +630,12 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { SmallPtrSet<Argument*, 8> ArgumentSCCNodes; // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for // quickly looking up whether a given Argument is in this ArgumentSCC. - for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(), - E = ArgumentSCC.end(); I != E; ++I) { + for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) { ArgumentSCCNodes.insert((*I)->Definition); } - for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(), - E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) { + for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); + I != E && !SCCCaptured; ++I) { ArgumentGraphNode *N = *I; for (SmallVectorImpl<ArgumentGraphNode*>::iterator UI = N->Uses.begin(), UE = N->Uses.end(); UI != UE; ++UI) { @@ -775,7 +775,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (F == 0) + if (!F) // External node - skip it; return false; @@ -1668,7 +1668,7 @@ bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (F != 0 && F->isDeclaration()) + if (F && F->isDeclaration()) MadeChange |= inferPrototypeAttributes(*F); } diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 0c081f1..9decddc 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -15,15 +15,18 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "globaldce" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "globaldce" + STATISTIC(NumAliases , "Number of global aliases removed"); STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); @@ -53,6 +56,15 @@ namespace { }; } +/// Returns true if F contains only a single "ret" instruction. +static bool isEmptyFunction(Function *F) { + BasicBlock &Entry = F->getEntryBlock(); + if (Entry.size() != 1 || !isa<ReturnInst>(Entry.front())) + return false; + ReturnInst &RI = cast<ReturnInst>(Entry.front()); + return RI.getReturnValue() == NULL; +} + char GlobalDCE::ID = 0; INITIALIZE_PASS(GlobalDCE, "globaldce", "Dead Global Elimination", false, false) @@ -61,7 +73,10 @@ ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); } bool GlobalDCE::runOnModule(Module &M) { bool Changed = false; - + + // Remove empty functions from the global ctors list. + Changed |= optimizeGlobalCtorsList(M, isEmptyFunction); + // Loop over the module, adding globals which are obviously necessary. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); @@ -99,7 +114,7 @@ bool GlobalDCE::runOnModule(Module &M) { I != E; ++I) if (!AliveGlobals.count(I)) { DeadGlobalVars.push_back(I); // Keep track of dead globals - I->setInitializer(0); + I->setInitializer(nullptr); } // The second pass drops the bodies of functions which are dead... @@ -117,7 +132,7 @@ bool GlobalDCE::runOnModule(Module &M) { ++I) if (!AliveGlobals.count(I)) { DeadAliases.push_back(I); - I->setAliasee(0); + I->setAliasee(nullptr); } if (!DeadFunctions.empty()) { diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 1a510cf..ae80c43 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "globalopt" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -39,11 +38,15 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include <algorithm> +#include <deque> using namespace llvm; +#define DEBUG_TYPE "globalopt" + STATISTIC(NumMarked , "Number of globals marked constant"); STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr"); STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); @@ -74,11 +77,9 @@ namespace { bool runOnModule(Module &M) override; private: - GlobalVariable *FindGlobalCtors(Module &M); bool OptimizeFunctions(Module &M); bool OptimizeGlobalVars(Module &M); bool OptimizeGlobalAliases(Module &M); - bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI); bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, const GlobalStatus &GS); @@ -294,7 +295,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, Changed = true; } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { if (CE->getOpcode() == Instruction::GetElementPtr) { - Constant *SubInit = 0; + Constant *SubInit = nullptr; if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI); @@ -302,7 +303,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, CE->getType()->isPointerTy()) || CE->getOpcode() == Instruction::AddrSpaceCast) { // Pointer cast, delete any stores and memsets to the global. - Changed |= CleanupConstantGlobalUsers(CE, 0, DL, TLI); + Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI); } if (CE->use_empty()) { @@ -313,7 +314,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, // Do not transform "gepinst (gep constexpr (GV))" here, because forming // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold // and will invalidate our notion of what Init is. - Constant *SubInit = 0; + Constant *SubInit = nullptr; if (!isa<ConstantExpr>(GEP->getOperand(0))) { ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, DL, TLI)); @@ -370,7 +371,7 @@ static bool isSafeSROAElementUse(Value *V) { // Otherwise, it must be a GEP. GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); - if (GEPI == 0) return false; + if (!GEPI) return false; if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) || !cast<Constant>(GEPI->getOperand(1))->isNullValue()) @@ -470,7 +471,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // Make sure this global only has simple uses that we can SRA. if (!GlobalUsersSafeToSRA(GV)) - return 0; + return nullptr; assert(GV->hasLocalLinkage() && !GV->isConstant()); Constant *Init = GV->getInitializer(); @@ -514,7 +515,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { NumElements = cast<VectorType>(STy)->getNumElements(); if (NumElements > 16 && GV->hasNUsesOrMore(16)) - return 0; // It's not worth it. + return nullptr; // It's not worth it. NewGlobals.reserve(NumElements); uint64_t EltSize = DL.getTypeAllocSize(STy->getElementType()); @@ -541,7 +542,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { } if (NewGlobals.empty()) - return 0; + return nullptr; DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); @@ -603,7 +604,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { if (FirstGlobal == i) ++FirstGlobal; } - return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0; + return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr; } /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified @@ -785,7 +786,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, Changed |= CleanupPointerRootUsers(GV, TLI); } else { Changed = true; - CleanupConstantGlobalUsers(GV, 0, DL, TLI); + CleanupConstantGlobalUsers(GV, nullptr, DL, TLI); } if (GV->use_empty()) { DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); @@ -847,7 +848,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // If there are bitcast users of the malloc (which is typical, usually we have // a malloc + bitcast) then replace them with uses of the new global. Update // other users to use the global as well. - BitCastInst *TheBC = 0; + BitCastInst *TheBC = nullptr; while (!CI->use_empty()) { Instruction *User = cast<Instruction>(CI->user_back()); if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { @@ -858,7 +859,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, BCI->setOperand(0, NewGV); } } else { - if (TheBC == 0) + if (!TheBC) TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI); User->replaceUsesOfWith(CI, TheBC); } @@ -1169,10 +1170,13 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, } else if (PHINode *PN = dyn_cast<PHINode>(V)) { // PN's type is pointer to struct. Make a new PHI of pointer to struct // field. - StructType *ST = cast<StructType>(PN->getType()->getPointerElementType()); + PointerType *PTy = cast<PointerType>(PN->getType()); + StructType *ST = cast<StructType>(PTy->getElementType()); + + unsigned AS = PTy->getAddressSpace(); PHINode *NewPN = - PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), + PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS), PN->getNumIncomingValues(), PN->getName()+".f"+Twine(FieldNo), PN); Result = NewPN; @@ -1284,9 +1288,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, std::vector<Value*> FieldGlobals; std::vector<Value*> FieldMallocs; + unsigned AS = GV->getType()->getPointerAddressSpace(); for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ Type *FieldTy = STy->getElementType(FieldNo); - PointerType *PFieldTy = PointerType::getUnqual(FieldTy); + PointerType *PFieldTy = PointerType::get(FieldTy, AS); GlobalVariable *NGV = new GlobalVariable(*GV->getParent(), @@ -1302,7 +1307,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Type *IntPtrTy = DL->getIntPtrType(CI->getType()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), - NElems, 0, + NElems, nullptr, CI->getName() + ".f" + Twine(FieldNo)); FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, CI); @@ -1535,7 +1540,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements, - 0, CI->getName()); + nullptr, CI->getName()); Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); CI->replaceAllUsesWith(Cast); CI->eraseFromParent(); @@ -1750,7 +1755,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, ->getEntryBlock().begin()); Type *ElemTy = GV->getType()->getElementType(); // FIXME: Pass Global's alignment when globals have alignment - AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); + AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr, + GV->getName(), &FirstI); if (!isa<UndefValue>(GV->getInitializer())) new StoreInst(GV->getInitializer(), Alloca, &FirstI); @@ -1957,116 +1963,6 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { return Changed; } -/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all -/// initializers have an init priority of 65535. -GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { - GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); - if (GV == 0) return 0; - - // Verify that the initializer is simple enough for us to handle. We are - // only allowed to optimize the initializer if it is unique. - if (!GV->hasUniqueInitializer()) return 0; - - if (isa<ConstantAggregateZero>(GV->getInitializer())) - return GV; - ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); - - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { - if (isa<ConstantAggregateZero>(*i)) - continue; - ConstantStruct *CS = cast<ConstantStruct>(*i); - if (isa<ConstantPointerNull>(CS->getOperand(1))) - continue; - - // Must have a function or null ptr. - if (!isa<Function>(CS->getOperand(1))) - return 0; - - // Init priority must be standard. - ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0)); - if (CI->getZExtValue() != 65535) - return 0; - } - - return GV; -} - -/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, -/// return a list of the functions and null terminator as a vector. -static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { - if (GV->getInitializer()->isNullValue()) - return std::vector<Function*>(); - ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); - std::vector<Function*> Result; - Result.reserve(CA->getNumOperands()); - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { - ConstantStruct *CS = cast<ConstantStruct>(*i); - Result.push_back(dyn_cast<Function>(CS->getOperand(1))); - } - return Result; -} - -/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the -/// specified array, returning the new global to use. -static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, - const std::vector<Function*> &Ctors) { - // If we made a change, reassemble the initializer list. - Constant *CSVals[2]; - CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535); - CSVals[1] = 0; - - StructType *StructTy = - cast<StructType>(GCL->getType()->getElementType()->getArrayElementType()); - - // Create the new init list. - std::vector<Constant*> CAList; - for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { - if (Ctors[i]) { - CSVals[1] = Ctors[i]; - } else { - Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), - false); - PointerType *PFTy = PointerType::getUnqual(FTy); - CSVals[1] = Constant::getNullValue(PFTy); - CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), - 0x7fffffff); - } - CAList.push_back(ConstantStruct::get(StructTy, CSVals)); - } - - // Create the array initializer. - Constant *CA = ConstantArray::get(ArrayType::get(StructTy, - CAList.size()), CAList); - - // If we didn't change the number of elements, don't create a new GV. - if (CA->getType() == GCL->getInitializer()->getType()) { - GCL->setInitializer(CA); - return GCL; - } - - // Create the new global and insert it next to the existing list. - GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), - GCL->getLinkage(), CA, "", - GCL->getThreadLocalMode()); - GCL->getParent()->getGlobalList().insert(GCL, NGV); - NGV->takeName(GCL); - - // Nuke the old list, replacing any uses with the new one. - if (!GCL->use_empty()) { - Constant *V = NGV; - if (V->getType() != GCL->getType()) - V = ConstantExpr::getBitCast(V, GCL->getType()); - GCL->replaceAllUsesWith(V); - } - GCL->eraseFromParent(); - - if (Ctors.size()) - return NGV; - else - return 0; -} - - static inline bool isSimpleEnoughValueToCommit(Constant *C, SmallPtrSet<Constant*, 8> &SimpleConstants, @@ -2271,22 +2167,16 @@ class Evaluator { public: Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI) : DL(DL), TLI(TLI) { - ValueStack.push_back(new DenseMap<Value*, Constant*>); + ValueStack.emplace_back(); } ~Evaluator() { - DeleteContainerPointers(ValueStack); - while (!AllocaTmps.empty()) { - GlobalVariable *Tmp = AllocaTmps.back(); - AllocaTmps.pop_back(); - + for (auto &Tmp : AllocaTmps) // If there are still users of the alloca, the program is doing something // silly, e.g. storing the address of the alloca somewhere and using it // later. Since this is undefined, we'll just make it be null. if (!Tmp->use_empty()) Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); - delete Tmp; - } } /// EvaluateFunction - Evaluate a call to function F, returning true if @@ -2302,13 +2192,13 @@ public: Constant *getVal(Value *V) { if (Constant *CV = dyn_cast<Constant>(V)) return CV; - Constant *R = ValueStack.back()->lookup(V); + Constant *R = ValueStack.back().lookup(V); assert(R && "Reference to an uncomputed value!"); return R; } void setVal(Value *V, Constant *C) { - ValueStack.back()->operator[](V) = C; + ValueStack.back()[V] = C; } const DenseMap<Constant*, Constant*> &getMutatedMemory() const { @@ -2323,9 +2213,9 @@ private: Constant *ComputeLoadResult(Constant *P); /// ValueStack - As we compute SSA register values, we store their contents - /// here. The back of the vector contains the current function and the stack + /// here. The back of the deque contains the current function and the stack /// contains the values in the calling frames. - SmallVector<DenseMap<Value*, Constant*>*, 4> ValueStack; + std::deque<DenseMap<Value*, Constant*>> ValueStack; /// CallStack - This is used to detect recursion. In pathological situations /// we could hit exponential behavior, but at least there is nothing @@ -2340,7 +2230,7 @@ private: /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable /// to represent its body. This vector is needed so we can delete the /// temporary globals when we are done. - SmallVector<GlobalVariable*, 32> AllocaTmps; + SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps; /// Invariants - These global variables have been marked invariant by the /// static constructor. @@ -2369,7 +2259,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { if (GV->hasDefinitiveInitializer()) return GV->getInitializer(); - return 0; + return nullptr; } // Handle a constantexpr getelementptr. @@ -2381,7 +2271,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) { return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); } - return 0; // don't know how to evaluate. + return nullptr; // don't know how to evaluate. } /// EvaluateBlock - Evaluate all instructions in block BB, returning true if @@ -2391,7 +2281,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB) { // This is the main evaluation loop. while (1) { - Constant *InstResult = 0; + Constant *InstResult = nullptr; DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); @@ -2517,7 +2407,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, "folding: " << *Ptr << "\n"); } InstResult = ComputeLoadResult(Ptr); - if (InstResult == 0) { + if (!InstResult) { DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load." "\n"); return false; // Could not evaluate load. @@ -2530,11 +2420,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; // Cannot handle array allocs. } Type *Ty = AI->getType()->getElementType(); - AllocaTmps.push_back(new GlobalVariable(Ty, false, - GlobalValue::InternalLinkage, - UndefValue::get(Ty), - AI->getName())); - InstResult = AllocaTmps.back(); + AllocaTmps.push_back( + make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage, + UndefValue::get(Ty), AI->getName())); + InstResult = AllocaTmps.back().get(); DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) { CallSite CS(CurInst); @@ -2636,17 +2525,17 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; } - Constant *RetVal = 0; + Constant *RetVal = nullptr; // Execute the call, if successful, use the return value. - ValueStack.push_back(new DenseMap<Value*, Constant*>); + ValueStack.emplace_back(); if (!EvaluateFunction(Callee, RetVal, Formals)) { DEBUG(dbgs() << "Failed to evaluate function.\n"); return false; } - delete ValueStack.pop_back_val(); + ValueStack.pop_back(); InstResult = RetVal; - if (InstResult != NULL) { + if (InstResult) { DEBUG(dbgs() << "Successfully evaluated function. Result: " << InstResult << "\n\n"); } else { @@ -2678,7 +2567,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, else return false; // Cannot determine. } else if (isa<ReturnInst>(CurInst)) { - NextBB = 0; + NextBB = nullptr; } else { // invoke, unwind, resume, unreachable. DEBUG(dbgs() << "Can not handle terminator."); @@ -2743,13 +2632,13 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, BasicBlock::iterator CurInst = CurBB->begin(); while (1) { - BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings. + BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); if (!EvaluateBlock(CurInst, NextBB)) return false; - if (NextBB == 0) { + if (!NextBB) { // Successfully running until there's no next block means that we found // the return. Fill it the return value and pop the call stack. ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator()); @@ -2768,7 +2657,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, // Okay, we have never been in this block before. Check to see if there // are any PHI nodes. If so, evaluate them with information about where // we came from. - PHINode *PN = 0; + PHINode *PN = nullptr; for (CurInst = NextBB->begin(); (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB))); @@ -2789,6 +2678,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL, SmallVector<Constant*, 0>()); if (EvalSuccess) { + ++NumCtorsEvaluated; + // We succeeded at evaluation: commit the result. DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" << F->getName() << "' to " << Eval.getMutatedMemory().size() @@ -2806,46 +2697,6 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL, return EvalSuccess; } -/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible. -/// Return true if anything changed. -bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { - std::vector<Function*> Ctors = ParseGlobalCtors(GCL); - bool MadeChange = false; - if (Ctors.empty()) return false; - - // Loop over global ctors, optimizing them when we can. - for (unsigned i = 0; i != Ctors.size(); ++i) { - Function *F = Ctors[i]; - // Found a null terminator in the middle of the list, prune off the rest of - // the list. - if (F == 0) { - if (i != Ctors.size()-1) { - Ctors.resize(i+1); - MadeChange = true; - } - break; - } - DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n"); - - // We cannot simplify external ctor functions. - if (F->empty()) continue; - - // If we can evaluate the ctor at compile time, do. - if (EvaluateStaticConstructor(F, DL, TLI)) { - Ctors.erase(Ctors.begin()+i); - MadeChange = true; - --i; - ++NumCtorsEvaluated; - continue; - } - } - - if (!MadeChange) return false; - - GCL = InstallGlobalCtors(GCL, Ctors); - return true; -} - static int compareNames(Constant *const *A, Constant *const *B) { return (*A)->getName().compare((*B)->getName()); } @@ -3010,7 +2861,7 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { if (!hasUsesToReplace(*J, Used, RenameTarget)) continue; - J->replaceAllUsesWith(Aliasee); + J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType())); ++NumAliasesResolved; Changed = true; @@ -3042,12 +2893,12 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::cxa_atexit)) - return 0; + return nullptr; Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit)); if (!Fn) - return 0; + return nullptr; FunctionType *FTy = Fn->getFunctionType(); @@ -3058,7 +2909,7 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { !FTy->getParamType(0)->isPointerTy() || !FTy->getParamType(1)->isPointerTy() || !FTy->getParamType(2)->isPointerTy()) - return 0; + return nullptr; return Fn; } @@ -3160,12 +3011,9 @@ bool GlobalOpt::runOnModule(Module &M) { bool Changed = false; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); - // Try to find the llvm.globalctors list. - GlobalVariable *GlobalCtors = FindGlobalCtors(M); - bool LocalChange = true; while (LocalChange) { LocalChange = false; @@ -3174,8 +3022,9 @@ bool GlobalOpt::runOnModule(Module &M) { LocalChange |= OptimizeFunctions(M); // Optimize global_ctors list. - if (GlobalCtors) - LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); + LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { + return EvaluateStaticConstructor(F, DL, TLI); + }); // Optimize non-address-taken globals. LocalChange |= OptimizeGlobalVars(M); diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index 8684796..af541d1 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ipconstprop" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -27,6 +26,8 @@ #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "ipconstprop" + STATISTIC(NumArgumentsProped, "Number of args turned into constants"); STATISTIC(NumReturnValProped, "Number of return values turned into constants"); @@ -112,7 +113,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { continue; Constant *C = dyn_cast<Constant>(*AI); - if (C && ArgumentConstants[i].first == 0) { + if (C && ArgumentConstants[i].first == nullptr) { ArgumentConstants[i].first = C; // First constant seen. } else if (C && ArgumentConstants[i].first == C) { // Still the constant value we think it is. @@ -139,7 +140,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { continue; Value *V = ArgumentConstants[i].first; - if (V == 0) V = UndefValue::get(AI->getType()); + if (!V) V = UndefValue::get(AI->getType()); AI->replaceAllUsesWith(V); ++NumArgumentsProped; MadeChange = true; @@ -209,7 +210,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { } // Different or no known return value? Don't propagate this return // value. - RetVals[i] = 0; + RetVals[i] = nullptr; // All values non-constant? Stop looking. if (++NumNonConstant == RetVals.size()) return false; @@ -235,7 +236,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { MadeChange = true; - if (STy == 0) { + if (!STy) { Value* New = RetVals[0]; if (Argument *A = dyn_cast<Argument>(New)) // Was an argument returned? Then find the corresponding argument in diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 6cf3040..624cb90 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "inline" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/CallGraph.h" @@ -28,6 +27,8 @@ using namespace llvm; +#define DEBUG_TYPE "inline" + namespace { /// \brief Inliner pass which only handles "always inline" functions. @@ -36,12 +37,13 @@ class AlwaysInliner : public Inliner { public: // Use extremely low threshold. - AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), ICA(0) { + AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), + ICA(nullptr) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } AlwaysInliner(bool InsertLifetime) - : Inliner(ID, -2000000000, InsertLifetime), ICA(0) { + : Inliner(ID, -2000000000, InsertLifetime), ICA(nullptr) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } @@ -93,8 +95,7 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) { // that are viable for inlining. FIXME: We shouldn't even get here for // declarations. if (Callee && !Callee->isDeclaration() && - Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::AlwaysInline) && + CS.hasFnAttr(Attribute::AlwaysInline) && ICA->isInlineViable(*Callee)) return InlineCost::getAlways(); diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index 7141064..d189756 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "inline" #include "llvm/Transforms/IPO.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" @@ -26,6 +25,8 @@ using namespace llvm; +#define DEBUG_TYPE "inline" + namespace { /// \brief Actual inliner pass implementation. @@ -37,12 +38,12 @@ class SimpleInliner : public Inliner { InlineCostAnalysis *ICA; public: - SimpleInliner() : Inliner(ID), ICA(0) { + SimpleInliner() : Inliner(ID), ICA(nullptr) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } SimpleInliner(int Threshold) - : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(0) { + : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(nullptr) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index e97fb83..9087ab2 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "inline" #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -21,6 +20,7 @@ #include "llvm/Analysis/InlineCost.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" @@ -32,6 +32,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "inline" + STATISTIC(NumInlined, "Number of functions inlined"); STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); @@ -183,7 +185,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, // canonicalized to be an allocation *of* an array), or allocations whose // type is not itself an array (because we're afraid of pessimizing SRoA). ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); - if (ATy == 0 || AI->isArrayAllocation()) + if (!ATy || AI->isArrayAllocation()) continue; // Get the list of all available allocas for this array type. @@ -239,7 +241,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, AI->eraseFromParent(); MergedAwayAlloca = true; ++NumMergedAllocas; - IFI.StaticAllocas[AllocaNo] = 0; + IFI.StaticAllocas[AllocaNo] = nullptr; break; } @@ -288,12 +290,24 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const { bool ColdCallee = Callee && !Callee->isDeclaration() && Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::Cold); - if (ColdCallee && ColdThreshold < thres) + // Command line argument for InlineLimit will override the default + // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold, + // do not use the default cold threshold even if it is smaller. + if ((InlineLimit.getNumOccurrences() == 0 || + ColdThreshold.getNumOccurrences() > 0) && ColdCallee && + ColdThreshold < thres) thres = ColdThreshold; return thres; } +static void emitAnalysis(CallSite CS, const Twine &Msg) { + Function *Caller = CS.getCaller(); + LLVMContext &Ctx = Caller->getContext(); + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg); +} + /// shouldInline - Return true if the inliner should attempt to inline /// at the given CallSite. bool Inliner::shouldInline(CallSite CS) { @@ -302,12 +316,16 @@ bool Inliner::shouldInline(CallSite CS) { if (IC.isAlways()) { DEBUG(dbgs() << " Inlining: cost=always" << ", Call: " << *CS.getInstruction() << "\n"); + emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) + + " should always be inlined (cost=always)"); return true; } if (IC.isNever()) { DEBUG(dbgs() << " NOT Inlining: cost=never" << ", Call: " << *CS.getInstruction() << "\n"); + emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + + " should never be inlined (cost=never)")); return false; } @@ -316,6 +334,10 @@ bool Inliner::shouldInline(CallSite CS) { DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost() << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << "\n"); + emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + + " too costly to inline (cost=") + + Twine(IC.getCost()) + ", threshold=" + + Twine(IC.getCostDelta() + IC.getCost()) + ")"); return false; } @@ -383,6 +405,11 @@ bool Inliner::shouldInline(CallSite CS) { DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << " Cost = " << IC.getCost() << ", outer Cost = " << TotalSecondaryCost << '\n'); + emitAnalysis( + CS, Twine("Not inlining. Cost of inlining " + + CS.getCalledFunction()->getName() + + " increases the cost of inlining " + + CS.getCaller()->getName() + " in other contexts")); return false; } } @@ -390,6 +417,10 @@ bool Inliner::shouldInline(CallSite CS) { DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << '\n'); + emitAnalysis( + CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") + + CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) + + " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")"); return true; } @@ -410,7 +441,7 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); SmallPtrSet<Function*, 8> SCCFunctions; @@ -499,7 +530,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { ++NumCallsDeleted; } else { // We can only inline direct calls to non-declarations. - if (Callee == 0 || Callee->isDeclaration()) continue; + if (!Callee || Callee->isDeclaration()) continue; // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee @@ -511,18 +542,37 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) continue; - + LLVMContext &CallerCtx = Caller->getContext(); + + // Get DebugLoc to report. CS will be invalid after Inliner. + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + // If the policy determines that we should inline this function, // try to do so. - if (!shouldInline(CS)) + if (!shouldInline(CS)) { + emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, + Twine(Callee->getName() + + " will not be inlined into " + + Caller->getName())); continue; + } // Attempt to inline the function. if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, - InlineHistoryID, InsertLifetime, DL)) + InlineHistoryID, InsertLifetime, DL)) { + emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, + Twine(Callee->getName() + + " will not be inlined into " + + Caller->getName())); continue; + } ++NumInlined; - + + // Report the inline decision. + emitOptimizationRemark( + CallerCtx, DEBUG_TYPE, *Caller, DLoc, + Twine(Callee->getName() + " inlined into " + Caller->getName())); + // If inlining this function gave us any new call sites, throw them // onto our worklist to process. They are useful inline candidates. if (!InlineInfo.InlinedCalls.empty()) { diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index c1fe01c..c970a1a 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -19,7 +19,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "internalize" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -35,6 +34,8 @@ #include <set> using namespace llvm; +#define DEBUG_TYPE "internalize" + STATISTIC(NumAliases , "Number of aliases internalized"); STATISTIC(NumFunctions, "Number of functions internalized"); STATISTIC(NumGlobals , "Number of global vars internalized"); @@ -131,8 +132,8 @@ static bool shouldInternalize(const GlobalValue &GV, bool InternalizePass::runOnModule(Module &M) { CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>(); - CallGraph *CG = CGPass ? &CGPass->getCallGraph() : 0; - CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0; + CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr; + CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr; bool Changed = false; SmallPtrSet<GlobalValue *, 8> Used; @@ -158,6 +159,7 @@ bool InternalizePass::runOnModule(Module &M) { if (!shouldInternalize(*I, ExternalNames)) continue; + I->setVisibility(GlobalValue::DefaultVisibility); I->setLinkage(GlobalValue::InternalLinkage); if (ExternalNode) @@ -194,6 +196,7 @@ bool InternalizePass::runOnModule(Module &M) { if (!shouldInternalize(*I, ExternalNames)) continue; + I->setVisibility(GlobalValue::DefaultVisibility); I->setLinkage(GlobalValue::InternalLinkage); Changed = true; ++NumGlobals; @@ -206,6 +209,7 @@ bool InternalizePass::runOnModule(Module &M) { if (!shouldInternalize(*I, ExternalNames)) continue; + I->setVisibility(GlobalValue::DefaultVisibility); I->setLinkage(GlobalValue::InternalLinkage); Changed = true; ++NumAliases; diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index 464aa99..20414aa 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-extract" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopPass.h" @@ -30,6 +29,8 @@ #include <set> using namespace llvm; +#define DEBUG_TYPE "loop-extract" + STATISTIC(NumExtracted, "Number of loops extracted"); namespace { @@ -136,7 +137,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { if (NumLoops == 0) return Changed; --NumLoops; CodeExtractor Extractor(DT, *L); - if (Extractor.extractCodeRegion() != 0) { + if (Extractor.extractCodeRegion() != nullptr) { Changed = true; // After extraction, the loop is replaced by a function call, so // we shouldn't try to run any more loop passes on it. @@ -241,7 +242,7 @@ void BlockExtractorPass::SplitLandingPadPreds(Function *F) { if (!Split) continue; SmallVector<BasicBlock*, 2> NewBBs; - SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", 0, NewBBs); + SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", nullptr, NewBBs); } } diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 8555d2c..c3a2b12 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -43,7 +43,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mergefunc" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" @@ -67,6 +66,8 @@ #include <vector> using namespace llvm; +#define DEBUG_TYPE "mergefunc" + STATISTIC(NumFunctionsMerged, "Number of functions merged"); STATISTIC(NumThunksWritten, "Number of thunks generated"); STATISTIC(NumAliasesWritten, "Number of aliases generated"); @@ -120,12 +121,12 @@ public: void release() { assert(Func && "Attempted to release function twice, or release empty/tombstone!"); - Func = NULL; + Func = nullptr; } private: explicit ComparableFunction(unsigned Hash) - : Func(NULL), Hash(Hash), DL(NULL) {} + : Func(nullptr), Hash(Hash), DL(nullptr) {} AssertingVH<Function> Func; unsigned Hash; @@ -175,19 +176,181 @@ private: /// Test whether two basic blocks have equivalent behaviour. bool compare(const BasicBlock *BB1, const BasicBlock *BB2); + /// Constants comparison. + /// Its analog to lexicographical comparison between hypothetical numbers + /// of next format: + /// <bitcastability-trait><raw-bit-contents> + /// + /// 1. Bitcastability. + /// Check whether L's type could be losslessly bitcasted to R's type. + /// On this stage method, in case when lossless bitcast is not possible + /// method returns -1 or 1, thus also defining which type is greater in + /// context of bitcastability. + /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight + /// to the contents comparison. + /// If types differ, remember types comparison result and check + /// whether we still can bitcast types. + /// Stage 1: Types that satisfies isFirstClassType conditions are always + /// greater then others. + /// Stage 2: Vector is greater then non-vector. + /// If both types are vectors, then vector with greater bitwidth is + /// greater. + /// If both types are vectors with the same bitwidth, then types + /// are bitcastable, and we can skip other stages, and go to contents + /// comparison. + /// Stage 3: Pointer types are greater than non-pointers. If both types are + /// pointers of the same address space - go to contents comparison. + /// Different address spaces: pointer with greater address space is + /// greater. + /// Stage 4: Types are neither vectors, nor pointers. And they differ. + /// We don't know how to bitcast them. So, we better don't do it, + /// and return types comparison result (so it determines the + /// relationship among constants we don't know how to bitcast). + /// + /// Just for clearance, let's see how the set of constants could look + /// on single dimension axis: + /// + /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] + /// Where: NFCT - Not a FirstClassType + /// FCT - FirstClassTyp: + /// + /// 2. Compare raw contents. + /// It ignores types on this stage and only compares bits from L and R. + /// Returns 0, if L and R has equivalent contents. + /// -1 or 1 if values are different. + /// Pretty trivial: + /// 2.1. If contents are numbers, compare numbers. + /// Ints with greater bitwidth are greater. Ints with same bitwidths + /// compared by their contents. + /// 2.2. "And so on". Just to avoid discrepancies with comments + /// perhaps it would be better to read the implementation itself. + /// 3. And again about overall picture. Let's look back at how the ordered set + /// of constants will look like: + /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] + /// + /// Now look, what could be inside [FCT, "others"], for example: + /// [FCT, "others"] = + /// [ + /// [double 0.1], [double 1.23], + /// [i32 1], [i32 2], + /// { double 1.0 }, ; StructTyID, NumElements = 1 + /// { i32 1 }, ; StructTyID, NumElements = 1 + /// { double 1, i32 1 }, ; StructTyID, NumElements = 2 + /// { i32 1, double 1 } ; StructTyID, NumElements = 2 + /// ] + /// + /// Let's explain the order. Float numbers will be less than integers, just + /// because of cmpType terms: FloatTyID < IntegerTyID. + /// Floats (with same fltSemantics) are sorted according to their value. + /// Then you can see integers, and they are, like a floats, + /// could be easy sorted among each others. + /// The structures. Structures are grouped at the tail, again because of their + /// TypeID: StructTyID > IntegerTyID > FloatTyID. + /// Structures with greater number of elements are greater. Structures with + /// greater elements going first are greater. + /// The same logic with vectors, arrays and other possible complex types. + /// + /// Bitcastable constants. + /// Let's assume, that some constant, belongs to some group of + /// "so-called-equal" values with different types, and at the same time + /// belongs to another group of constants with equal types + /// and "really" equal values. + /// + /// Now, prove that this is impossible: + /// + /// If constant A with type TyA is bitcastable to B with type TyB, then: + /// 1. All constants with equal types to TyA, are bitcastable to B. Since + /// those should be vectors (if TyA is vector), pointers + /// (if TyA is pointer), or else (if TyA equal to TyB), those types should + /// be equal to TyB. + /// 2. All constants with non-equal, but bitcastable types to TyA, are + /// bitcastable to B. + /// Once again, just because we allow it to vectors and pointers only. + /// This statement could be expanded as below: + /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to + /// vector B, and thus bitcastable to B as well. + /// 2.2. All pointers of the same address space, no matter what they point to, + /// bitcastable. So if C is pointer, it could be bitcasted to A and to B. + /// So any constant equal or bitcastable to A is equal or bitcastable to B. + /// QED. + /// + /// In another words, for pointers and vectors, we ignore top-level type and + /// look at their particular properties (bit-width for vectors, and + /// address space for pointers). + /// If these properties are equal - compare their contents. + int cmpConstants(const Constant *L, const Constant *R); + /// Assign or look up previously assigned numbers for the two values, and /// return whether the numbers are equal. Numbers are assigned in the order /// visited. - bool enumerate(const Value *V1, const Value *V2); + /// Comparison order: + /// Stage 0: Value that is function itself is always greater then others. + /// If left and right values are references to their functions, then + /// they are equal. + /// Stage 1: Constants are greater than non-constants. + /// If both left and right are constants, then the result of + /// cmpConstants is used as cmpValues result. + /// Stage 2: InlineAsm instances are greater than others. If both left and + /// right are InlineAsm instances, InlineAsm* pointers casted to + /// integers and compared as numbers. + /// Stage 3: For all other cases we compare order we meet these values in + /// their functions. If right value was met first during scanning, + /// then left value is greater. + /// In another words, we compare serial numbers, for more details + /// see comments for sn_mapL and sn_mapR. + int cmpValues(const Value *L, const Value *R); + + bool enumerate(const Value *V1, const Value *V2) { + return cmpValues(V1, V2) == 0; + } /// Compare two Instructions for equivalence, similar to /// Instruction::isSameOperationAs but with modifications to the type /// comparison. + /// Stages are listed in "most significant stage first" order: + /// On each stage below, we do comparison between some left and right + /// operation parts. If parts are non-equal, we assign parts comparison + /// result to the operation comparison result and exit from method. + /// Otherwise we proceed to the next stage. + /// Stages: + /// 1. Operations opcodes. Compared as numbers. + /// 2. Number of operands. + /// 3. Operation types. Compared with cmpType method. + /// 4. Compare operation subclass optional data as stream of bytes: + /// just convert it to integers and call cmpNumbers. + /// 5. Compare in operation operand types with cmpType in + /// most significant operand first order. + /// 6. Last stage. Check operations for some specific attributes. + /// For example, for Load it would be: + /// 6.1.Load: volatile (as boolean flag) + /// 6.2.Load: alignment (as integer numbers) + /// 6.3.Load: synch-scope (as integer numbers) + /// On this stage its better to see the code, since its not more than 10-15 + /// strings for particular instruction, and could change sometimes. + int cmpOperation(const Instruction *L, const Instruction *R) const; + bool isEquivalentOperation(const Instruction *I1, - const Instruction *I2) const; + const Instruction *I2) const { + return cmpOperation(I1, I2) == 0; + } /// Compare two GEPs for equivalent pointer arithmetic. - bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2); + /// Parts to be compared for each comparison stage, + /// most significant stage first: + /// 1. Address space. As numbers. + /// 2. Constant offset, (if "DataLayout *DL" field is not NULL, + /// using GEPOperator::accumulateConstantOffset method). + /// 3. Pointer operand type (using cmpType method). + /// 4. Number of operands. + /// 5. Compare operands, using cmpValues method. + int cmpGEP(const GEPOperator *GEPL, const GEPOperator *GEPR); + int cmpGEP(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) { + return cmpGEP(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR)); + } + + bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2) { + return cmpGEP(GEP1, GEP2) == 0; + } bool isEquivalentGEP(const GetElementPtrInst *GEP1, const GetElementPtrInst *GEP2) { return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2)); @@ -241,13 +404,50 @@ private: int cmpNumbers(uint64_t L, uint64_t R) const; + int cmpAPInt(const APInt &L, const APInt &R) const; + int cmpAPFloat(const APFloat &L, const APFloat &R) const; + int cmpStrings(StringRef L, StringRef R) const; + int cmpAttrs(const AttributeSet L, const AttributeSet R) const; + // The two functions undergoing comparison. const Function *F1, *F2; const DataLayout *DL; - DenseMap<const Value *, const Value *> id_map; - DenseSet<const Value *> seen_values; + /// Assign serial numbers to values from left function, and values from + /// right function. + /// Explanation: + /// Being comparing functions we need to compare values we meet at left and + /// right sides. + /// Its easy to sort things out for external values. It just should be + /// the same value at left and right. + /// But for local values (those were introduced inside function body) + /// we have to ensure they were introduced at exactly the same place, + /// and plays the same role. + /// Let's assign serial number to each value when we meet it first time. + /// Values that were met at same place will be with same serial numbers. + /// In this case it would be good to explain few points about values assigned + /// to BBs and other ways of implementation (see below). + /// + /// 1. Safety of BB reordering. + /// It's safe to change the order of BasicBlocks in function. + /// Relationship with other functions and serial numbering will not be + /// changed in this case. + /// As follows from FunctionComparator::compare(), we do CFG walk: we start + /// from the entry, and then take each terminator. So it doesn't matter how in + /// fact BBs are ordered in function. And since cmpValues are called during + /// this walk, the numbering depends only on how BBs located inside the CFG. + /// So the answer is - yes. We will get the same numbering. + /// + /// 2. Impossibility to use dominance properties of values. + /// If we compare two instruction operands: first is usage of local + /// variable AL from function FL, and second is usage of local variable AR + /// from FR, we could compare their origins and check whether they are + /// defined at the same place. + /// But, we are still not able to compare operands of PHI nodes, since those + /// could be operands from further BBs we didn't scan yet. + /// So it's impossible to use dominance properties in general. + DenseMap<const Value*, int> sn_mapL, sn_mapR; }; } @@ -258,6 +458,206 @@ int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { return 0; } +int FunctionComparator::cmpAPInt(const APInt &L, const APInt &R) const { + if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth())) + return Res; + if (L.ugt(R)) return 1; + if (R.ugt(L)) return -1; + return 0; +} + +int FunctionComparator::cmpAPFloat(const APFloat &L, const APFloat &R) const { + if (int Res = cmpNumbers((uint64_t)&L.getSemantics(), + (uint64_t)&R.getSemantics())) + return Res; + return cmpAPInt(L.bitcastToAPInt(), R.bitcastToAPInt()); +} + +int FunctionComparator::cmpStrings(StringRef L, StringRef R) const { + // Prevent heavy comparison, compare sizes first. + if (int Res = cmpNumbers(L.size(), R.size())) + return Res; + + // Compare strings lexicographically only when it is necessary: only when + // strings are equal in size. + return L.compare(R); +} + +int FunctionComparator::cmpAttrs(const AttributeSet L, + const AttributeSet R) const { + if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots())) + return Res; + + for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) { + AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i), + RE = R.end(i); + for (; LI != LE && RI != RE; ++LI, ++RI) { + Attribute LA = *LI; + Attribute RA = *RI; + if (LA < RA) + return -1; + if (RA < LA) + return 1; + } + if (LI != LE) + return 1; + if (RI != RE) + return -1; + } + return 0; +} + +/// Constants comparison: +/// 1. Check whether type of L constant could be losslessly bitcasted to R +/// type. +/// 2. Compare constant contents. +/// For more details see declaration comments. +int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { + + Type *TyL = L->getType(); + Type *TyR = R->getType(); + + // Check whether types are bitcastable. This part is just re-factored + // Type::canLosslesslyBitCastTo method, but instead of returning true/false, + // we also pack into result which type is "less" for us. + int TypesRes = cmpType(TyL, TyR); + if (TypesRes != 0) { + // Types are different, but check whether we can bitcast them. + if (!TyL->isFirstClassType()) { + if (TyR->isFirstClassType()) + return -1; + // Neither TyL nor TyR are values of first class type. Return the result + // of comparing the types + return TypesRes; + } + if (!TyR->isFirstClassType()) { + if (TyL->isFirstClassType()) + return 1; + return TypesRes; + } + + // Vector -> Vector conversions are always lossless if the two vector types + // have the same size, otherwise not. + unsigned TyLWidth = 0; + unsigned TyRWidth = 0; + + if (const VectorType *VecTyL = dyn_cast<VectorType>(TyL)) + TyLWidth = VecTyL->getBitWidth(); + if (const VectorType *VecTyR = dyn_cast<VectorType>(TyR)) + TyRWidth = VecTyR->getBitWidth(); + + if (TyLWidth != TyRWidth) + return cmpNumbers(TyLWidth, TyRWidth); + + // Zero bit-width means neither TyL nor TyR are vectors. + if (!TyLWidth) { + PointerType *PTyL = dyn_cast<PointerType>(TyL); + PointerType *PTyR = dyn_cast<PointerType>(TyR); + if (PTyL && PTyR) { + unsigned AddrSpaceL = PTyL->getAddressSpace(); + unsigned AddrSpaceR = PTyR->getAddressSpace(); + if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR)) + return Res; + } + if (PTyL) + return 1; + if (PTyR) + return -1; + + // TyL and TyR aren't vectors, nor pointers. We don't know how to + // bitcast them. + return TypesRes; + } + } + + // OK, types are bitcastable, now check constant contents. + + if (L->isNullValue() && R->isNullValue()) + return TypesRes; + if (L->isNullValue() && !R->isNullValue()) + return 1; + if (!L->isNullValue() && R->isNullValue()) + return -1; + + if (int Res = cmpNumbers(L->getValueID(), R->getValueID())) + return Res; + + switch (L->getValueID()) { + case Value::UndefValueVal: return TypesRes; + case Value::ConstantIntVal: { + const APInt &LInt = cast<ConstantInt>(L)->getValue(); + const APInt &RInt = cast<ConstantInt>(R)->getValue(); + return cmpAPInt(LInt, RInt); + } + case Value::ConstantFPVal: { + const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF(); + const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF(); + return cmpAPFloat(LAPF, RAPF); + } + case Value::ConstantArrayVal: { + const ConstantArray *LA = cast<ConstantArray>(L); + const ConstantArray *RA = cast<ConstantArray>(R); + uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements(); + uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (uint64_t i = 0; i < NumElementsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)), + cast<Constant>(RA->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantStructVal: { + const ConstantStruct *LS = cast<ConstantStruct>(L); + const ConstantStruct *RS = cast<ConstantStruct>(R); + unsigned NumElementsL = cast<StructType>(TyL)->getNumElements(); + unsigned NumElementsR = cast<StructType>(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (unsigned i = 0; i != NumElementsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)), + cast<Constant>(RS->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantVectorVal: { + const ConstantVector *LV = cast<ConstantVector>(L); + const ConstantVector *RV = cast<ConstantVector>(R); + unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements(); + unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (uint64_t i = 0; i < NumElementsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)), + cast<Constant>(RV->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantExprVal: { + const ConstantExpr *LE = cast<ConstantExpr>(L); + const ConstantExpr *RE = cast<ConstantExpr>(R); + unsigned NumOperandsL = LE->getNumOperands(); + unsigned NumOperandsR = RE->getNumOperands(); + if (int Res = cmpNumbers(NumOperandsL, NumOperandsR)) + return Res; + for (unsigned i = 0; i < NumOperandsL; ++i) { + if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)), + cast<Constant>(RE->getOperand(i)))) + return Res; + } + return 0; + } + case Value::FunctionVal: + case Value::GlobalVariableVal: + case Value::GlobalAliasVal: + default: // Unknown constant, cast L and R pointers to numbers and compare. + return cmpNumbers((uint64_t)L, (uint64_t)R); + } +} + /// cmpType - compares two types, /// defines total ordering among the types set. /// See method declaration comments for more details. @@ -350,143 +750,209 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const { // Determine whether the two operations are the same except that pointer-to-A // and pointer-to-B are equivalent. This should be kept in sync with // Instruction::isSameOperationAs. -bool FunctionComparator::isEquivalentOperation(const Instruction *I1, - const Instruction *I2) const { +// Read method declaration comments for more details. +int FunctionComparator::cmpOperation(const Instruction *L, + const Instruction *R) const { // Differences from Instruction::isSameOperationAs: // * replace type comparison with calls to isEquivalentType. // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top // * because of the above, we don't test for the tail bit on calls later on - if (I1->getOpcode() != I2->getOpcode() || - I1->getNumOperands() != I2->getNumOperands() || - !isEquivalentType(I1->getType(), I2->getType()) || - !I1->hasSameSubclassOptionalData(I2)) - return false; + if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode())) + return Res; + + if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) + return Res; + + if (int Res = cmpType(L->getType(), R->getType())) + return Res; + + if (int Res = cmpNumbers(L->getRawSubclassOptionalData(), + R->getRawSubclassOptionalData())) + return Res; // We have two instructions of identical opcode and #operands. Check to see // if all operands are the same type - for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i) - if (!isEquivalentType(I1->getOperand(i)->getType(), - I2->getOperand(i)->getType())) - return false; + for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) { + if (int Res = + cmpType(L->getOperand(i)->getType(), R->getOperand(i)->getType())) + return Res; + } // Check special state that is a part of some instructions. - if (const LoadInst *LI = dyn_cast<LoadInst>(I1)) - return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() && - LI->getAlignment() == cast<LoadInst>(I2)->getAlignment() && - LI->getOrdering() == cast<LoadInst>(I2)->getOrdering() && - LI->getSynchScope() == cast<LoadInst>(I2)->getSynchScope(); - if (const StoreInst *SI = dyn_cast<StoreInst>(I1)) - return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() && - SI->getAlignment() == cast<StoreInst>(I2)->getAlignment() && - SI->getOrdering() == cast<StoreInst>(I2)->getOrdering() && - SI->getSynchScope() == cast<StoreInst>(I2)->getSynchScope(); - if (const CmpInst *CI = dyn_cast<CmpInst>(I1)) - return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate(); - if (const CallInst *CI = dyn_cast<CallInst>(I1)) - return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() && - CI->getAttributes() == cast<CallInst>(I2)->getAttributes(); - if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1)) - return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() && - CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes(); - if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) - return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices(); - if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) - return EVI->getIndices() == cast<ExtractValueInst>(I2)->getIndices(); - if (const FenceInst *FI = dyn_cast<FenceInst>(I1)) - return FI->getOrdering() == cast<FenceInst>(I2)->getOrdering() && - FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope(); - if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1)) - return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() && - CXI->getSuccessOrdering() == - cast<AtomicCmpXchgInst>(I2)->getSuccessOrdering() && - CXI->getFailureOrdering() == - cast<AtomicCmpXchgInst>(I2)->getFailureOrdering() && - CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope(); - if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1)) - return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() && - RMWI->isVolatile() == cast<AtomicRMWInst>(I2)->isVolatile() && - RMWI->getOrdering() == cast<AtomicRMWInst>(I2)->getOrdering() && - RMWI->getSynchScope() == cast<AtomicRMWInst>(I2)->getSynchScope(); + if (const LoadInst *LI = dyn_cast<LoadInst>(L)) { + if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile())) + return Res; + if (int Res = + cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment())) + return Res; + if (int Res = + cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering())) + return Res; + return cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()); + } + if (const StoreInst *SI = dyn_cast<StoreInst>(L)) { + if (int Res = + cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile())) + return Res; + if (int Res = + cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment())) + return Res; + if (int Res = + cmpNumbers(SI->getOrdering(), cast<StoreInst>(R)->getOrdering())) + return Res; + return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope()); + } + if (const CmpInst *CI = dyn_cast<CmpInst>(L)) + return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate()); + if (const CallInst *CI = dyn_cast<CallInst>(L)) { + if (int Res = cmpNumbers(CI->getCallingConv(), + cast<CallInst>(R)->getCallingConv())) + return Res; + return cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()); + } + if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) { + if (int Res = cmpNumbers(CI->getCallingConv(), + cast<InvokeInst>(R)->getCallingConv())) + return Res; + return cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes()); + } + if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) { + ArrayRef<unsigned> LIndices = IVI->getIndices(); + ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices(); + if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) + return Res; + for (size_t i = 0, e = LIndices.size(); i != e; ++i) { + if (int Res = cmpNumbers(LIndices[i], RIndices[i])) + return Res; + } + } + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) { + ArrayRef<unsigned> LIndices = EVI->getIndices(); + ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices(); + if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) + return Res; + for (size_t i = 0, e = LIndices.size(); i != e; ++i) { + if (int Res = cmpNumbers(LIndices[i], RIndices[i])) + return Res; + } + } + if (const FenceInst *FI = dyn_cast<FenceInst>(L)) { + if (int Res = + cmpNumbers(FI->getOrdering(), cast<FenceInst>(R)->getOrdering())) + return Res; + return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope()); + } - return true; + if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) { + if (int Res = cmpNumbers(CXI->isVolatile(), + cast<AtomicCmpXchgInst>(R)->isVolatile())) + return Res; + if (int Res = cmpNumbers(CXI->getSuccessOrdering(), + cast<AtomicCmpXchgInst>(R)->getSuccessOrdering())) + return Res; + if (int Res = cmpNumbers(CXI->getFailureOrdering(), + cast<AtomicCmpXchgInst>(R)->getFailureOrdering())) + return Res; + return cmpNumbers(CXI->getSynchScope(), + cast<AtomicCmpXchgInst>(R)->getSynchScope()); + } + if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) { + if (int Res = cmpNumbers(RMWI->getOperation(), + cast<AtomicRMWInst>(R)->getOperation())) + return Res; + if (int Res = cmpNumbers(RMWI->isVolatile(), + cast<AtomicRMWInst>(R)->isVolatile())) + return Res; + if (int Res = cmpNumbers(RMWI->getOrdering(), + cast<AtomicRMWInst>(R)->getOrdering())) + return Res; + return cmpNumbers(RMWI->getSynchScope(), + cast<AtomicRMWInst>(R)->getSynchScope()); + } + return 0; } // Determine whether two GEP operations perform the same underlying arithmetic. -bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1, - const GEPOperator *GEP2) { - unsigned AS = GEP1->getPointerAddressSpace(); - if (AS != GEP2->getPointerAddressSpace()) - return false; +// Read method declaration comments for more details. +int FunctionComparator::cmpGEP(const GEPOperator *GEPL, + const GEPOperator *GEPR) { + + unsigned int ASL = GEPL->getPointerAddressSpace(); + unsigned int ASR = GEPR->getPointerAddressSpace(); + if (int Res = cmpNumbers(ASL, ASR)) + return Res; + + // When we have target data, we can reduce the GEP down to the value in bytes + // added to the address. if (DL) { - // When we have target data, we can reduce the GEP down to the value in bytes - // added to the address. - unsigned BitWidth = DL ? DL->getPointerSizeInBits(AS) : 1; - APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0); - if (GEP1->accumulateConstantOffset(*DL, Offset1) && - GEP2->accumulateConstantOffset(*DL, Offset2)) { - return Offset1 == Offset2; - } + unsigned BitWidth = DL->getPointerSizeInBits(ASL); + APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); + if (GEPL->accumulateConstantOffset(*DL, OffsetL) && + GEPR->accumulateConstantOffset(*DL, OffsetR)) + return cmpAPInt(OffsetL, OffsetR); } - if (GEP1->getPointerOperand()->getType() != - GEP2->getPointerOperand()->getType()) - return false; + if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(), + (uint64_t)GEPR->getPointerOperand()->getType())) + return Res; - if (GEP1->getNumOperands() != GEP2->getNumOperands()) - return false; + if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands())) + return Res; - for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) { - if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i))) - return false; + for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) { + if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i))) + return Res; } - return true; + return 0; } -// Compare two values used by the two functions under pair-wise comparison. If -// this is the first time the values are seen, they're added to the mapping so -// that we will detect mismatches on next use. -bool FunctionComparator::enumerate(const Value *V1, const Value *V2) { - // Check for function @f1 referring to itself and function @f2 referring to - // itself, or referring to each other, or both referring to either of them. - // They're all equivalent if the two functions are otherwise equivalent. - if (V1 == F1 && V2 == F2) - return true; - if (V1 == F2 && V2 == F1) - return true; +/// Compare two values used by the two functions under pair-wise comparison. If +/// this is the first time the values are seen, they're added to the mapping so +/// that we will detect mismatches on next use. +/// See comments in declaration for more details. +int FunctionComparator::cmpValues(const Value *L, const Value *R) { + // Catch self-reference case. + if (L == F1) { + if (R == F2) + return 0; + return -1; + } + if (R == F2) { + if (L == F1) + return 0; + return 1; + } - if (const Constant *C1 = dyn_cast<Constant>(V1)) { - if (V1 == V2) return true; - const Constant *C2 = dyn_cast<Constant>(V2); - if (!C2) return false; - // TODO: constant expressions with GEP or references to F1 or F2. - if (C1->isNullValue() && C2->isNullValue() && - isEquivalentType(C1->getType(), C2->getType())) - return true; - // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1 - // then they must have equal bit patterns. - return C1->getType()->canLosslesslyBitCastTo(C2->getType()) && - C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType()); - } - - if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2)) - return V1 == V2; - - // Check that V1 maps to V2. If we find a value that V1 maps to then we simply - // check whether it's equal to V2. When there is no mapping then we need to - // ensure that V2 isn't already equivalent to something else. For this - // purpose, we track the V2 values in a set. - - const Value *&map_elem = id_map[V1]; - if (map_elem) - return map_elem == V2; - if (!seen_values.insert(V2).second) - return false; - map_elem = V2; - return true; -} + const Constant *ConstL = dyn_cast<Constant>(L); + const Constant *ConstR = dyn_cast<Constant>(R); + if (ConstL && ConstR) { + if (L == R) + return 0; + return cmpConstants(ConstL, ConstR); + } + + if (ConstL) + return 1; + if (ConstR) + return -1; + + const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L); + const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R); + + if (InlineAsmL && InlineAsmR) + return cmpNumbers((uint64_t)L, (uint64_t)R); + if (InlineAsmL) + return 1; + if (InlineAsmR) + return -1; + + auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())), + RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size())); + return cmpNumbers(LeftSN.first->second, RightSN.first->second); +} // Test whether two basic blocks have equivalent behaviour. bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) { BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end(); @@ -535,6 +1001,9 @@ bool FunctionComparator::compare() { // We need to recheck everything, but check the things that weren't included // in the hash first. + sn_mapL.clear(); + sn_mapR.clear(); + if (F1->getAttributes() != F2->getAttributes()) return false; @@ -683,7 +1152,7 @@ ModulePass *llvm::createMergeFunctionsPass() { bool MergeFunctions::runOnModule(Module &M) { bool Changed = false; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) @@ -783,8 +1252,23 @@ void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) { // Helper for writeThunk, // Selects proper bitcast operation, // but a bit simpler then CastInst::getCastOpcode. -static Value* createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) { +static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) { Type *SrcTy = V->getType(); + if (SrcTy->isStructTy()) { + assert(DestTy->isStructTy()); + assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements()); + Value *Result = UndefValue::get(DestTy); + for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) { + Value *Element = createCast( + Builder, Builder.CreateExtractValue(V, ArrayRef<unsigned int>(I)), + DestTy->getStructElementType(I)); + + Result = + Builder.CreateInsertValue(Result, Element, ArrayRef<unsigned int>(I)); + } + return Result; + } + assert(!DestTy->isStructTy()); if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) return Builder.CreateIntToPtr(V, DestTy); else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) @@ -843,9 +1327,9 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { // Replace G with an alias to F and delete G. void MergeFunctions::writeAlias(Function *F, Function *G) { - Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); - GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "", - BitcastF, G->getParent()); + PointerType *PTy = G->getType(); + auto *GA = GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + G->getLinkage(), "", F); F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); GA->takeName(G); GA->setVisibility(G->getVisibility()); diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index ac88aee..76d6dfa 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "partialinlining" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/CFG.h" @@ -24,6 +23,8 @@ #include "llvm/Transforms/Utils/CodeExtractor.h" using namespace llvm; +#define DEBUG_TYPE "partialinlining" + STATISTIC(NumPartialInlined, "Number of functions partially inlined"); namespace { @@ -52,10 +53,10 @@ Function* PartialInliner::unswitchFunction(Function* F) { BasicBlock* entryBlock = F->begin(); BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator()); if (!BR || BR->isUnconditional()) - return 0; + return nullptr; - BasicBlock* returnBlock = 0; - BasicBlock* nonReturnBlock = 0; + BasicBlock* returnBlock = nullptr; + BasicBlock* nonReturnBlock = nullptr; unsigned returnCount = 0; for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock); SI != SE; ++SI) @@ -66,7 +67,7 @@ Function* PartialInliner::unswitchFunction(Function* F) { nonReturnBlock = *SI; if (returnCount != 1) - return 0; + return nullptr; // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 4a28b34..38e1b8e 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -56,8 +56,9 @@ RunLoopRerolling("reroll-loops", cl::Hidden, PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; - LibraryInfo = 0; - Inliner = 0; + LibraryInfo = nullptr; + Inliner = nullptr; + DisableTailCalls = false; DisableUnitAtATime = false; DisableUnrollLoops = false; BBVectorize = RunBBVectorization; @@ -128,7 +129,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { if (OptLevel == 0) { if (Inliner) { MPM.add(Inliner); - Inliner = 0; + Inliner = nullptr; } // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC @@ -156,6 +157,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createDeadArgEliminationPass()); // Dead argument elimination MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE + addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE } @@ -164,7 +166,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createPruneEHPass()); // Remove dead EH info if (Inliner) { MPM.add(Inliner); - Inliner = 0; + Inliner = nullptr; } if (!DisableUnitAtATime) MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs @@ -182,8 +184,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Combine silly seq's + addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + if (!DisableTailCalls) + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions MPM.add(createLoopRotatePass()); // Rotate Loop @@ -206,6 +210,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); MPM.add(createDeadStoreEliminationPass()); // Delete dead stores @@ -220,6 +225,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { if (BBVectorize) { MPM.add(createBBVectorizePass()); MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); if (OptLevel > 1 && UseGVNAfterVectorization) MPM.add(createGVNPass()); // Remove redundancies else @@ -233,6 +239,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Clean up after everything. + addExtensionsToPM(EP_Peephole, MPM); // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC // pass manager that we are specifically trying to avoid. To prevent this @@ -245,6 +252,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { // as function calls, so that we can only pass them when the vectorizer // changed the code. MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); if (!DisableUnrollLoops) @@ -297,6 +305,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. PM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, PM); // Inline small functions if (RunInliner) @@ -315,6 +324,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // The IPO passes may leave cruft around. Clean up after them. PM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); // Break up allocas @@ -334,11 +344,17 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // Nuke dead stores. PM.add(createDeadStoreEliminationPass()); - // More loops are countable try to vectorize them. + // More loops are countable; try to optimize them. + PM.add(createIndVarSimplifyPass()); + PM.add(createLoopDeletionPass()); PM.add(createLoopVectorizePass(true, true)); + // More scalar chains could be vectorized due to more alias information + PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + // Cleanup and simplify the code after the scalar optimizations. PM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index c61ec5e..b2c4a09 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "prune-eh" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -30,6 +29,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "prune-eh" + STATISTIC(NumRemoved, "Number of invokes removed"); STATISTIC(NumUnreach, "Number of noreturn calls optimized"); @@ -85,7 +86,7 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) { Function *F = (*I)->getFunction(); - if (F == 0) { + if (!F) { SCCMightUnwind = true; SCCMightReturn = true; } else if (F->isDeclaration() || F->mayBeOverridden()) { diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp index 1c6532d..956991a 100644 --- a/lib/Transforms/IPO/StripDeadPrototypes.cpp +++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -14,13 +14,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "strip-dead-prototypes" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "strip-dead-prototypes" + STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed"); namespace { diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 6d0be8f..1abbccc 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -192,7 +192,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) { /// Find values that are marked as llvm.used. static void findUsedValues(GlobalVariable *LLVMUsed, SmallPtrSet<const GlobalValue*, 8> &UsedValues) { - if (LLVMUsed == 0) return; + if (!LLVMUsed) return; UsedValues.insert(LLVMUsed); ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 822e146..e04b1be 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -20,34 +20,38 @@ #include "llvm/Pass.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#define DEBUG_TYPE "instcombine" + namespace llvm { - class CallSite; - class DataLayout; - class TargetLibraryInfo; - class DbgDeclareInst; - class MemIntrinsic; - class MemSetInst; +class CallSite; +class DataLayout; +class TargetLibraryInfo; +class DbgDeclareInst; +class MemIntrinsic; +class MemSetInst; /// SelectPatternFlavor - We can match a variety of different patterns for /// select operations. enum SelectPatternFlavor { SPF_UNKNOWN = 0, - SPF_SMIN, SPF_UMIN, - SPF_SMAX, SPF_UMAX - //SPF_ABS - TODO. + SPF_SMIN, + SPF_UMIN, + SPF_SMAX, + SPF_UMAX + // SPF_ABS - TODO. }; /// getComplexity: Assign a complexity or rank value to LLVM Values... /// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst static inline unsigned getComplexity(Value *V) { if (isa<Instruction>(V)) { - if (BinaryOperator::isNeg(V) || - BinaryOperator::isFNeg(V) || + if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) || BinaryOperator::isNot(V)) return 3; return 4; } - if (isa<Argument>(V)) return 3; + if (isa<Argument>(V)) + return 3; return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2; } @@ -60,18 +64,18 @@ static inline Constant *SubOne(Constant *C) { return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); } - /// InstCombineIRInserter - This is an IRBuilder insertion helper that works /// just like the normal insertion helper, but also adds any new instructions /// to the instcombine worklist. class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter : public IRBuilderDefaultInserter<true> { InstCombineWorklist &Worklist; + public: InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} - void InsertHelper(Instruction *I, const Twine &Name, - BasicBlock *BB, BasicBlock::iterator InsertPt) const { + void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, + BasicBlock::iterator InsertPt) const { IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt); Worklist.Add(I); } @@ -79,13 +83,14 @@ public: /// InstCombiner - The -instcombine pass. class LLVM_LIBRARY_VISIBILITY InstCombiner - : public FunctionPass, - public InstVisitor<InstCombiner, Instruction*> { + : public FunctionPass, + public InstVisitor<InstCombiner, Instruction *> { const DataLayout *DL; TargetLibraryInfo *TLI; bool MadeIRChange; LibCallSimplifier *Simplifier; bool MinimizeSize; + public: /// Worklist - All of the instructions that need to be simplified. InstCombineWorklist Worklist; @@ -96,7 +101,7 @@ public: BuilderTy *Builder; static char ID; // Pass identification, replacement for typeid - InstCombiner() : FunctionPass(ID), DL(0), Builder(0) { + InstCombiner() : FunctionPass(ID), DL(nullptr), Builder(nullptr) { MinimizeSize = false; initializeInstCombinerPass(*PassRegistry::getPassRegistry()); } @@ -144,9 +149,9 @@ public: Instruction *visitAnd(BinaryOperator &I); Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS); Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS); - Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, - Value *A, Value *B, Value *C); - Instruction *visitOr (BinaryOperator &I); + Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, + Value *B, Value *C); + Instruction *visitOr(BinaryOperator &I); Instruction *visitXor(BinaryOperator &I); Instruction *visitShl(BinaryOperator &I); Instruction *visitAShr(BinaryOperator &I); @@ -156,12 +161,11 @@ public: Constant *RHSC); Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, - ConstantInt *AndCst = 0); + ConstantInt *AndCst = nullptr); Instruction *visitFCmpInst(FCmpInst &I); Instruction *visitICmpInst(ICmpInst &I); Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI); - Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, - Instruction *LHS, + Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *LHS, ConstantInt *RHS); Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, ConstantInt *DivRHS); @@ -171,7 +175,7 @@ public: ICmpInst::Predicate Pred); Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I); - Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1, + Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1, BinaryOperator &I); Instruction *commonCastTransforms(CastInst &CI); Instruction *commonPointerCastTransforms(CastInst &CI); @@ -188,9 +192,8 @@ public: Instruction *visitIntToPtr(IntToPtrInst &CI); Instruction *visitBitCast(BitCastInst &CI); Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI); - Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, - Instruction *FI); - Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*); + Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI); + Instruction *FoldSelectIntoOp(SelectInst &SI, Value *, Value *); Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, Value *A, Value *B, Instruction &Outer, SelectPatternFlavor SPF2, Value *C); @@ -209,6 +212,7 @@ public: Instruction *visitStoreInst(StoreInst &SI); Instruction *visitBranchInst(BranchInst &BI); Instruction *visitSwitchInst(SwitchInst &SI); + Instruction *visitInsertValueInst(InsertValueInst &IV); Instruction *visitInsertElementInst(InsertElementInst &IE); Instruction *visitExtractElementInst(ExtractElementInst &EI); Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); @@ -216,21 +220,21 @@ public: Instruction *visitLandingPadInst(LandingPadInst &LI); // visitInstruction - Specify what to return for unhandled instructions... - Instruction *visitInstruction(Instruction &I) { return 0; } + Instruction *visitInstruction(Instruction &I) { return nullptr; } private: bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; - Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const; + Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const; Type *FindElementAtOffset(Type *PtrTy, int64_t Offset, - SmallVectorImpl<Value*> &NewIndices); + SmallVectorImpl<Value *> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually /// results in any code being generated and is interesting to optimize out. If /// the cast can be eliminated by some other simple transformation, we prefer /// to do the simplification first. - bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V, + bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V, Type *Ty); Instruction *visitCallSite(CallSite CS); @@ -251,10 +255,10 @@ public: // in the program. Add the new instruction to the worklist. // Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) { - assert(New && New->getParent() == 0 && + assert(New && !New->getParent() && "New instruction already inserted into a basic block!"); BasicBlock *BB = Old.getParent(); - BB->getInstList().insert(&Old, New); // Insert inst + BB->getInstList().insert(&Old, New); // Insert inst Worklist.Add(New); return New; } @@ -274,7 +278,7 @@ public: // modified. // Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { - Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. + Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. // If we are replacing the instruction with itself, this must be in a // segment of unreachable code, so just clobber the instruction. @@ -306,12 +310,12 @@ public: Worklist.Remove(&I); I.eraseFromParent(); MadeIRChange = true; - return 0; // Don't do anything with FI + return nullptr; // Don't do anything with FI } - void ComputeMaskedBits(Value *V, APInt &KnownZero, - APInt &KnownOne, unsigned Depth = 0) const { - return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, DL, Depth); + void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + unsigned Depth = 0) const { + return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth); } bool MaskedValueIsZero(Value *V, const APInt &Mask, @@ -323,7 +327,6 @@ public: } private: - /// SimplifyAssociativeOrCommutative - This performs a few simplifications for /// operators which are associative or commutative. bool SimplifyAssociativeOrCommutative(BinaryOperator &I); @@ -337,12 +340,10 @@ private: /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value /// based on the demanded bits. - Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, - APInt& KnownZero, APInt& KnownOne, - unsigned Depth); - bool SimplifyDemandedBits(Use &U, APInt DemandedMask, - APInt& KnownZero, APInt& KnownOne, - unsigned Depth=0); + Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero, + APInt &KnownOne, unsigned Depth); + bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero, + APInt &KnownOne, unsigned Depth = 0); /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence. Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl, @@ -355,7 +356,9 @@ private: bool SimplifyDemandedInstructionBits(Instruction &Inst); Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt& UndefElts, unsigned Depth = 0); + APInt &UndefElts, unsigned Depth = 0); + + Value *SimplifyVectorOp(BinaryOperator &Inst); // FoldOpIntoPhi - Given a binary operator, cast instruction, or select // which has a PHI node as operand #0, see if we can fold the instruction @@ -372,21 +375,19 @@ private: Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); - Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS, ConstantInt *AndRHS, BinaryOperator &TheAnd); Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, bool isSub, Instruction &I); - Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, - bool isSigned, bool Inside); + Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned, + bool Inside); Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); Instruction *MatchBSwap(BinaryOperator &I); bool SimplifyStoreAtEndOfBlock(StoreInst &SI); Instruction *SimplifyMemTransfer(MemIntrinsic *MI); Instruction *SimplifyMemSet(MemSetInst *MI); - Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned); /// Descale - Return a value X such that Val = X * Scale, or null if none. If @@ -394,8 +395,8 @@ private: Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap); }; - - } // end namespace llvm. +#undef DEBUG_TYPE + #endif diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 97910c7..c37a9cf 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -20,6 +20,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + namespace { /// Class representing coefficient of floating-point addend. @@ -112,12 +114,12 @@ namespace { /// class FAddend { public: - FAddend() { Val = 0; } + FAddend() { Val = nullptr; } Value *getSymVal (void) const { return Val; } const FAddendCoef &getCoef(void) const { return Coeff; } - bool isConstant() const { return Val == 0; } + bool isConstant() const { return Val == nullptr; } bool isZero() const { return Coeff.isZero(); } void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; } @@ -154,7 +156,7 @@ namespace { /// class FAddCombine { public: - FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {} + FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(nullptr) {} Value *simplify(Instruction *FAdd); private: @@ -348,8 +350,8 @@ Value *FAddendCoef::getValue(Type *Ty) const { // unsigned FAddend::drillValueDownOneStep (Value *Val, FAddend &Addend0, FAddend &Addend1) { - Instruction *I = 0; - if (Val == 0 || !(I = dyn_cast<Instruction>(Val))) + Instruction *I = nullptr; + if (!Val || !(I = dyn_cast<Instruction>(Val))) return 0; unsigned Opcode = I->getOpcode(); @@ -359,16 +361,16 @@ unsigned FAddend::drillValueDownOneStep Value *Opnd0 = I->getOperand(0); Value *Opnd1 = I->getOperand(1); if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero()) - Opnd0 = 0; + Opnd0 = nullptr; if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero()) - Opnd1 = 0; + Opnd1 = nullptr; if (Opnd0) { if (!C0) Addend0.set(1, Opnd0); else - Addend0.set(C0, 0); + Addend0.set(C0, nullptr); } if (Opnd1) { @@ -376,7 +378,7 @@ unsigned FAddend::drillValueDownOneStep if (!C1) Addend.set(1, Opnd1); else - Addend.set(C1, 0); + Addend.set(C1, nullptr); if (Opcode == Instruction::FSub) Addend.negate(); } @@ -385,7 +387,7 @@ unsigned FAddend::drillValueDownOneStep return Opnd0 && Opnd1 ? 2 : 1; // Both operands are zero. Weird! - Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0); + Addend0.set(APFloat(C0->getValueAPF().getSemantics()), nullptr); return 1; } @@ -443,13 +445,13 @@ Value *FAddCombine::performFactorization(Instruction *I) { Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) - return 0; + return nullptr; bool isMpy = false; if (I0->getOpcode() == Instruction::FMul) isMpy = true; else if (I0->getOpcode() != Instruction::FDiv) - return 0; + return nullptr; Value *Opnd0_0 = I0->getOperand(0); Value *Opnd0_1 = I0->getOperand(1); @@ -461,8 +463,8 @@ Value *FAddCombine::performFactorization(Instruction *I) { // (x*y) +/- (x*z) x y z // (y/x) +/- (z/x) x y z // - Value *Factor = 0; - Value *AddSub0 = 0, *AddSub1 = 0; + Value *Factor = nullptr; + Value *AddSub0 = nullptr, *AddSub1 = nullptr; if (isMpy) { if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) @@ -481,7 +483,7 @@ Value *FAddCombine::performFactorization(Instruction *I) { } if (!Factor) - return 0; + return nullptr; FastMathFlags Flags; Flags.setUnsafeAlgebra(); @@ -495,7 +497,7 @@ Value *FAddCombine::performFactorization(Instruction *I) { if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { const APFloat &F = CFP->getValueAPF(); if (!F.isNormal()) - return 0; + return nullptr; } else if (Instruction *II = dyn_cast<Instruction>(NewAddSub)) II->setFastMathFlags(Flags); @@ -517,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) { // Currently we are not able to handle vector type. if (I->getType()->isVectorTy()) - return 0; + return nullptr; assert((I->getOpcode() == Instruction::FAdd || I->getOpcode() == Instruction::FSub) && "Expect add/sub"); @@ -568,7 +570,7 @@ Value *FAddCombine::simplify(Instruction *I) { // been optimized into "I = Y - X" in the previous steps. // const FAddendCoef &CE = Opnd0.getCoef(); - return CE.isOne() ? Opnd0.getSymVal() : 0; + return CE.isOne() ? Opnd0.getSymVal() : nullptr; } // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1] @@ -614,7 +616,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { // constant close to supper-expr(s) will potentially reveal some optimization // opportunities in super-expr(s). // - const FAddend *ConstAdd = 0; + const FAddend *ConstAdd = nullptr; // Simplified addends are placed <SimpVect>. AddendVect SimpVect; @@ -647,7 +649,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { if (T && T->getSymVal() == Val) { // Set null such that next iteration of the outer loop will not process // this addend again. - Addends[SameSymIdx] = 0; + Addends[SameSymIdx] = nullptr; SimpVect.push_back(T); } } @@ -661,7 +663,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { // Pop all addends being folded and push the resulting folded addend. SimpVect.resize(StartIdx); - if (Val != 0) { + if (Val) { if (!R.isZero()) { SimpVect.push_back(&R); } @@ -698,7 +700,7 @@ Value *FAddCombine::createNaryFAdd // unsigned InstrNeeded = calcInstrNumber(Opnds); if (InstrNeeded > InstrQuota) - return 0; + return nullptr; initCreateInstNum(); @@ -710,7 +712,7 @@ Value *FAddCombine::createNaryFAdd // N-ary addition has at most two instructions, and we don't need to worry // about tree-height when constructing the N-ary addition. - Value *LastVal = 0; + Value *LastVal = nullptr; bool LastValNeedNeg = false; // Iterate the addends, creating fadd/fsub using adjacent two addends. @@ -870,10 +872,10 @@ Value *FAddCombine::createAddendVal // static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) { if (!V->hasOneUse() || !V->getType()->isIntOrIntVectorTy()) - return 0; + return nullptr; Instruction *I = dyn_cast<Instruction>(V); - if (I == 0) return 0; + if (!I) return nullptr; if (I->getOpcode() == Instruction::Mul) if ((CST = dyn_cast<Constant>(I->getOperand(1)))) @@ -884,7 +886,7 @@ static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) { CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST); return I->getOperand(0); } - return 0; + return nullptr; } @@ -918,6 +920,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), DL)) return ReplaceInstUsesWith(I, V); @@ -942,7 +947,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (ZI->getSrcTy()->isIntegerTy(1)) return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); - Value *XorLHS = 0; ConstantInt *XorRHS = 0; + Value *XorLHS = nullptr; ConstantInt *XorRHS = nullptr; if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); const APInt &RHSVal = CI->getValue(); @@ -974,7 +979,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { IntegerType *IT = cast<IntegerType>(I.getType()); APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(XorLHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(XorLHS, LHSKnownZero, LHSKnownOne); if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue()) return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); @@ -1042,11 +1047,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); if (LHSKnownZero != 0) { APInt RHSKnownOne(IT->getBitWidth(), 0); APInt RHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); // No bits in common -> bitwise or. if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) @@ -1174,7 +1179,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // Check for (x & y) + (x ^ y) { - Value *A = 0, *B = 0; + Value *A = nullptr, *B = nullptr; if (match(RHS, m_Xor(m_Value(A), m_Value(B))) && (match(LHS, m_And(m_Specific(A), m_Specific(B))) || match(LHS, m_And(m_Specific(B), m_Specific(A))))) @@ -1186,13 +1191,16 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return BinaryOperator::CreateOr(A, B); } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL)) return ReplaceInstUsesWith(I, V); @@ -1266,7 +1274,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) && match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) { if (C1 == C2) { - Constant *Z1=0, *Z2=0; + Constant *Z1=nullptr, *Z2=nullptr; Value *A, *B, *C=C1; if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) { Z1 = dyn_cast<Constant>(A1); A = A2; @@ -1290,7 +1298,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { return ReplaceInstUsesWith(I, V); } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } @@ -1305,7 +1313,7 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize // this. bool Swapped = false; - GEPOperator *GEP1 = 0, *GEP2 = 0; + GEPOperator *GEP1 = nullptr, *GEP2 = nullptr; // For now we require one side to be the base pointer "A" or a constant // GEP derived from it. @@ -1343,9 +1351,9 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, // Avoid duplicating the arithmetic if GEP2 has non-constant indices and // multiple users. - if (GEP1 == 0 || - (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) - return 0; + if (!GEP1 || + (GEP2 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) + return nullptr; // Emit the offset of the GEP and an intptr_t. Value *Result = EmitGEPOffset(GEP1); @@ -1368,6 +1376,9 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, Instruction *InstCombiner::visitSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), DL)) return ReplaceInstUsesWith(I, V); @@ -1393,7 +1404,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (Constant *C = dyn_cast<Constant>(Op0)) { // C - ~X == X + (1+C) - Value *X = 0; + Value *X = nullptr; if (match(Op1, m_Not(m_Value(X)))) return BinaryOperator::CreateAdd(X, AddOne(C)); @@ -1451,9 +1462,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { } if (Op1->hasOneUse()) { - Value *X = 0, *Y = 0, *Z = 0; - Constant *C = 0; - Constant *CI = 0; + Value *X = nullptr, *Y = nullptr, *Z = nullptr; + Constant *C = nullptr; + Constant *CI = nullptr; // (X - (Y - Z)) --> (X + (Z - Y)). if (match(Op1, m_Sub(m_Value(Y), m_Value(Z)))) @@ -1532,12 +1543,15 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return ReplaceInstUsesWith(I, Res); } - return 0; + return nullptr; } Instruction *InstCombiner::visitFSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL)) return ReplaceInstUsesWith(I, V); @@ -1574,5 +1588,5 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { return ReplaceInstUsesWith(I, V); } - return 0; + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 2c1bfc7..4f5d65a 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -20,6 +20,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + /// isFreeToInvert - Return true if the specified value is free to invert (apply /// ~ to). This happens in cases where the ~ can be eliminated. static inline bool isFreeToInvert(Value *V) { @@ -50,7 +52,7 @@ static inline Value *dyn_castNotVal(Value *V) { // Constants can be considered to be not'ed values... if (ConstantInt *C = dyn_cast<ConstantInt>(V)) return ConstantInt::get(C->getType(), ~C->getValue()); - return 0; + return nullptr; } /// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp @@ -123,7 +125,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, ConstantInt *AndRHS, BinaryOperator &TheAnd) { Value *X = Op->getOperand(0); - Constant *Together = 0; + Constant *Together = nullptr; if (!Op->isShift()) Together = ConstantExpr::getAnd(AndRHS, OpRHS); @@ -250,7 +252,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, } break; } - return 0; + return nullptr; } /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise @@ -332,12 +334,12 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, Instruction &I) { Instruction *LHSI = dyn_cast<Instruction>(LHS); if (!LHSI || LHSI->getNumOperands() != 2 || - !isa<ConstantInt>(LHSI->getOperand(1))) return 0; + !isa<ConstantInt>(LHSI->getOperand(1))) return nullptr; ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1)); switch (LHSI->getOpcode()) { - default: return 0; + default: return nullptr; case Instruction::And: if (ConstantExpr::getAnd(N, Mask) == Mask) { // If the AndRHS is a power of two minus one (0+1+), this is simple. @@ -357,7 +359,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, break; } } - return 0; + return nullptr; case Instruction::Or: case Instruction::Xor: // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 @@ -365,7 +367,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() && ConstantExpr::getAnd(N, Mask)->isNullValue()) break; - return 0; + return nullptr; } if (isSub) @@ -418,12 +420,12 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, ConstantInt *BCst = dyn_cast<ConstantInt>(B); ConstantInt *CCst = dyn_cast<ConstantInt>(C); bool icmp_eq = (SCC == ICmpInst::ICMP_EQ); - bool icmp_abit = (ACst != 0 && !ACst->isZero() && + bool icmp_abit = (ACst && !ACst->isZero() && ACst->getValue().isPowerOf2()); - bool icmp_bbit = (BCst != 0 && !BCst->isZero() && + bool icmp_bbit = (BCst && !BCst->isZero() && BCst->getValue().isPowerOf2()); unsigned result = 0; - if (CCst != 0 && CCst->isZero()) { + if (CCst && CCst->isZero()) { // if C is zero, then both A and B qualify as mask result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes | FoldMskICmp_Mask_AllZeroes | @@ -455,7 +457,7 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, FoldMskICmp_AMask_NotMixed) : (FoldMskICmp_Mask_AllZeroes | FoldMskICmp_AMask_Mixed)); - } else if (ACst != 0 && CCst != 0 && + } else if (ACst && CCst && ConstantExpr::getAnd(ACst, CCst) == CCst) { result |= (icmp_eq ? FoldMskICmp_AMask_Mixed : FoldMskICmp_AMask_NotMixed); @@ -470,7 +472,7 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, FoldMskICmp_BMask_NotMixed) : (FoldMskICmp_Mask_AllZeroes | FoldMskICmp_BMask_Mixed)); - } else if (BCst != 0 && CCst != 0 && + } else if (BCst && CCst && ConstantExpr::getAnd(BCst, CCst) == CCst) { result |= (icmp_eq ? FoldMskICmp_BMask_Mixed : FoldMskICmp_BMask_NotMixed); @@ -570,12 +572,12 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, Value *L11,*L12,*L21,*L22; // Check whether the icmp can be decomposed into a bit test. if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) { - L21 = L22 = L1 = 0; + L21 = L22 = L1 = nullptr; } else { // Look for ANDs in the LHS icmp. if (!L1->getType()->isIntegerTy()) { // You can icmp pointers, for example. They really aren't masks. - L11 = L12 = 0; + L11 = L12 = nullptr; } else if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) { // Any icmp can be viewed as being trivially masked; if it allows us to // remove one, it's worth it. @@ -585,7 +587,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, if (!L2->getType()->isIntegerTy()) { // You can icmp pointers, for example. They really aren't masks. - L21 = L22 = 0; + L21 = L22 = nullptr; } else if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) { L21 = L2; L22 = Constant::getAllOnesValue(L2->getType()); @@ -608,7 +610,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, } else { return 0; } - E = R2; R1 = 0; ok = true; + E = R2; R1 = nullptr; ok = true; } else if (R1->getType()->isIntegerTy()) { if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) { // As before, model no mask as a trivial mask if it'll let us do an @@ -665,11 +667,11 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, /// into a single (icmp(A & X) ==/!= Y) static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, llvm::InstCombiner::BuilderTy* Builder) { - Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0; + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS, LHSCC, RHSCC); - if (mask == 0) return 0; + if (mask == 0) return nullptr; assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) && "foldLogOpOfMaskedICmpsHelper must return an equality predicate."); @@ -722,9 +724,9 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // their actual values. This isn't strictly, necessary, just a "handle the // easy cases for now" decision. ConstantInt *BCst = dyn_cast<ConstantInt>(B); - if (BCst == 0) return 0; + if (!BCst) return nullptr; ConstantInt *DCst = dyn_cast<ConstantInt>(D); - if (DCst == 0) return 0; + if (!DCst) return nullptr; if (mask & (FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_BMask_NotAllOnes)) { // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and @@ -763,11 +765,11 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // (icmp ne (A & B), B) & (icmp eq (A & D), D) // with B and D, having a single bit set ConstantInt *CCst = dyn_cast<ConstantInt>(C); - if (CCst == 0) return 0; + if (!CCst) return nullptr; if (LHSCC != NEWCC) CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) ); ConstantInt *ECst = dyn_cast<ConstantInt>(E); - if (ECst == 0) return 0; + if (!ECst) return nullptr; if (RHSCC != NEWCC) ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) ); ConstantInt* MCst = dyn_cast<ConstantInt>( @@ -776,13 +778,13 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // if there is a conflict we should actually return a false for the // whole construct if (!MCst->isZero()) - return 0; + return nullptr; Value *newOr1 = Builder->CreateOr(B, D); Value *newOr2 = ConstantExpr::getOr(CCst, ECst); Value *newAnd = Builder->CreateAnd(A, newOr1); return Builder->CreateICmp(NEWCC, newAnd, newOr2); } - return 0; + return nullptr; } /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. @@ -811,7 +813,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1)); ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1)); - if (LHSCst == 0 || RHSCst == 0) return 0; + if (!LHSCst || !RHSCst) return nullptr; if (LHSCst == RHSCst && LHSCC == RHSCC) { // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) @@ -835,7 +837,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { if (LHSCC == ICmpInst::ICMP_EQ && LHSCC == RHSCC && LHS->hasOneUse() && RHS->hasOneUse()) { Value *V; - ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0; + ConstantInt *AndCst, *SmallCst = nullptr, *BigCst = nullptr; // (trunc x) == C1 & (and x, CA) == C2 // (and x, CA) == C2 & (trunc x) == C1 @@ -866,14 +868,14 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // From here on, we only handle: // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. - if (Val != Val2) return 0; + if (Val != Val2) return nullptr; // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) - return 0; + return nullptr; // Make a constant range that's the intersection of the two icmp ranges. // If the intersection is empty, we know that the result is false. @@ -887,7 +889,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // We can't fold (ugt x, C) & (sgt x, C2). if (!PredicatesFoldable(LHSCC, RHSCC)) - return 0; + return nullptr; // Ensure that the larger constant is on the RHS. bool ShouldSwap; @@ -1016,7 +1018,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { break; } - return 0; + return nullptr; } /// FoldAndOfFCmps - Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of @@ -1026,7 +1028,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (LHS->getPredicate() == FCmpInst::FCMP_ORD && RHS->getPredicate() == FCmpInst::FCMP_ORD) { if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) - return 0; + return nullptr; // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) @@ -1043,7 +1045,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && isa<ConstantAggregateZero>(RHS->getOperand(1))) return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); - return 0; + return nullptr; } Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); @@ -1096,7 +1098,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { } } - return 0; + return nullptr; } @@ -1104,6 +1106,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyAndInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1198,7 +1203,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // If this is an integer truncation, and if the source is an 'and' with // immediate, transform it. This frequently occurs for bitfield accesses. { - Value *X = 0; ConstantInt *YC = 0; + Value *X = nullptr; ConstantInt *YC = nullptr; if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) { // Change: and (trunc (and X, YC) to T), C2 // into : and (trunc X to T), trunc(YC) & C2 @@ -1231,7 +1236,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } { - Value *A = 0, *B = 0, *C = 0, *D = 0; + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; // (A|B) & ~(A&B) -> A^B if (match(Op0, m_Or(m_Value(A), m_Value(B))) && match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && @@ -1339,7 +1344,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } { - Value *X = 0; + Value *X = nullptr; bool OpsSwapped = false; // Canonicalize SExt or Not to the LHS if (match(Op1, m_SExt(m_Value())) || @@ -1366,7 +1371,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { std::swap(Op0, Op1); } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } /// CollectBSwapParts - Analyze the specified subexpression and see if it is @@ -1498,7 +1503,7 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { if (!ITy || ITy->getBitWidth() % 16 || // ByteMask only allows up to 32-byte values. ITy->getBitWidth() > 32*8) - return 0; // Can only bswap pairs of bytes. Can't do vectors. + return nullptr; // Can only bswap pairs of bytes. Can't do vectors. /// ByteValues - For each byte of the result, we keep track of which value /// defines each byte. @@ -1508,16 +1513,16 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { // Try to find all the pieces corresponding to the bswap. uint32_t ByteMask = ~0U >> (32-ByteValues.size()); if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) - return 0; + return nullptr; // Check to see if all of the bytes come from the same value. Value *V = ByteValues[0]; - if (V == 0) return 0; // Didn't find a byte? Must be zero. + if (!V) return nullptr; // Didn't find a byte? Must be zero. // Check to make sure that all of the bytes come from the same value. for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) if (ByteValues[i] != V) - return 0; + return nullptr; Module *M = I.getParent()->getParent()->getParent(); Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy); return CallInst::Create(F, V); @@ -1529,10 +1534,10 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { static Instruction *MatchSelectFromAndOr(Value *A, Value *B, Value *C, Value *D) { // If A is not a select of -1/0, this cannot match. - Value *Cond = 0; + Value *Cond = nullptr; if (!match(A, m_SExt(m_Value(Cond))) || !Cond->getType()->isIntegerTy(1)) - return 0; + return nullptr; // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. if (match(D, m_Not(m_SExt(m_Specific(Cond))))) @@ -1545,7 +1550,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B, return SelectInst::Create(Cond, C, D); if (match(B, m_SExt(m_Not(m_Specific(Cond))))) return SelectInst::Create(Cond, C, D); - return 0; + return nullptr; } /// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. @@ -1566,8 +1571,8 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { LAnd->getOpcode() == Instruction::And && RAnd->getOpcode() == Instruction::And) { - Value *Mask = 0; - Value *Masked = 0; + Value *Mask = nullptr; + Value *Masked = nullptr; if (LAnd->getOperand(0) == RAnd->getOperand(0) && isKnownToBeAPowerOfTwo(LAnd->getOperand(1)) && isKnownToBeAPowerOfTwo(RAnd->getOperand(1))) { @@ -1608,7 +1613,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { if (LHS->hasOneUse() || RHS->hasOneUse()) { // (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1) // (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1) - Value *A = 0, *B = 0; + Value *A = nullptr, *B = nullptr; if (LHSCC == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero()) { B = Val; if (RHSCC == ICmpInst::ICMP_ULT && Val == RHS->getOperand(1)) @@ -1632,7 +1637,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). - if (LHSCst == 0 || RHSCst == 0) return 0; + if (!LHSCst || !RHSCst) return nullptr; if (LHSCst == RHSCst && LHSCC == RHSCC) { // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) @@ -1653,18 +1658,18 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // From here on, we only handle: // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. - if (Val != Val2) return 0; + if (Val != Val2) return nullptr; // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) - return 0; + return nullptr; // We can't fold (ugt x, C) | (sgt x, C2). if (!PredicatesFoldable(LHSCC, RHSCC)) - return 0; + return nullptr; // Ensure that the larger constant is on the RHS. bool ShouldSwap; @@ -1809,7 +1814,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } break; } - return 0; + return nullptr; } /// FoldOrOfFCmps - Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of @@ -1837,7 +1842,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { isa<ConstantAggregateZero>(RHS->getOperand(1))) return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); - return 0; + return nullptr; } Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); @@ -1869,7 +1874,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder); } } - return 0; + return nullptr; } /// FoldOrWithConstants - This helper function folds: @@ -1884,27 +1889,30 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, Value *B, Value *C) { ConstantInt *CI1 = dyn_cast<ConstantInt>(C); - if (!CI1) return 0; + if (!CI1) return nullptr; - Value *V1 = 0; - ConstantInt *CI2 = 0; - if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0; + Value *V1 = nullptr; + ConstantInt *CI2 = nullptr; + if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return nullptr; APInt Xor = CI1->getValue() ^ CI2->getValue(); - if (!Xor.isAllOnesValue()) return 0; + if (!Xor.isAllOnesValue()) return nullptr; if (V1 == A || V1 == B) { Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); return BinaryOperator::CreateOr(NewOp, V1); } - return 0; + return nullptr; } Instruction *InstCombiner::visitOr(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyOrInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1918,7 +1926,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return &I; if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - ConstantInt *C1 = 0; Value *X = 0; + ConstantInt *C1 = nullptr; Value *X = nullptr; // (X & C1) | C2 --> (X | C2) & (C1|C2) // iff (C1 & C2) == 0. if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && @@ -1949,8 +1957,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return NV; } - Value *A = 0, *B = 0; - ConstantInt *C1 = 0, *C2 = 0; + Value *A = nullptr, *B = nullptr; + ConstantInt *C1 = nullptr, *C2 = nullptr; // (A | B) | C and A | (B | C) -> bswap if possible. // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. @@ -1981,10 +1989,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } // (A & C)|(B & D) - Value *C = 0, *D = 0; + Value *C = nullptr, *D = nullptr; if (match(Op0, m_And(m_Value(A), m_Value(C))) && match(Op1, m_And(m_Value(B), m_Value(D)))) { - Value *V1 = 0, *V2 = 0; + Value *V1 = nullptr, *V2 = nullptr; C1 = dyn_cast<ConstantInt>(C); C2 = dyn_cast<ConstantInt>(D); if (C1 && C2) { // (A & C1)|(B & C2) @@ -2028,7 +2036,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2) // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0. - ConstantInt *C3 = 0, *C4 = 0; + ConstantInt *C3 = nullptr, *C4 = nullptr; if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) && (C3->getValue() & ~C1->getValue()) == 0 && match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) && @@ -2220,7 +2228,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // Since this OR statement hasn't been optimized further yet, we hope // that this transformation will allow the new ORs to be optimized. { - Value *X = 0, *Y = 0; + Value *X = nullptr, *Y = nullptr; if (Op0->hasOneUse() && Op1->hasOneUse() && match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) && match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) { @@ -2230,13 +2238,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } Instruction *InstCombiner::visitXor(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyXorInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -2494,5 +2505,5 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { } } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0bc3ac7..d4b583b 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -22,6 +22,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + STATISTIC(NumSimplified, "Number of library calls simplified"); /// getPromotedType - Return the specified type promoted as it would be to pass @@ -70,7 +72,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with // load/store. ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2)); - if (MemOpLength == 0) return 0; + if (!MemOpLength) return nullptr; // Source and destination pointer types are always "i8*" for intrinsic. See // if the size is something we can handle with a single primitive load/store. @@ -80,7 +82,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { assert(Size && "0-sized memory transferring should be removed already."); if (Size > 8 || (Size&(Size-1))) - return 0; // If not 1/2/4/8 bytes, exit. + return nullptr; // If not 1/2/4/8 bytes, exit. // Use an integer load+store unless we can find something better. unsigned SrcAddrSp = @@ -99,7 +101,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // dest address will be promotable. See if we can find a better type than the // integer datatype. Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); - MDNode *CopyMD = 0; + MDNode *CopyMD = nullptr; if (StrippedDest != MI->getArgOperand(0)) { Type *SrcETy = cast<PointerType>(StrippedDest->getType()) ->getElementType(); @@ -163,7 +165,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) - return 0; + return nullptr; uint64_t Len = LenC->getLimitedValue(); Alignment = MI->getAlignment(); assert(Len && "0-sized memory setting should be removed already."); @@ -191,7 +193,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return MI; } - return 0; + return nullptr; } /// visitCallInst - CallInst simplification. This mostly only handles folding @@ -233,7 +235,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // No other transformations apply to volatile transfers. if (MI->isVolatile()) - return 0; + return nullptr; // If we have a memmove and the source operation is a constant global, // then the source and dest pointers can't alias, so we can change this @@ -276,11 +278,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint64_t Size; if (getObjectSize(II->getArgOperand(0), Size, DL, TLI)) return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); - return 0; + return nullptr; } case Intrinsic::bswap: { Value *IIOperand = II->getArgOperand(0); - Value *X = 0; + Value *X = nullptr; // bswap(bswap(x)) -> x if (match(IIOperand, m_BSwap(m_Value(X)))) @@ -320,7 +322,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne); + computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne); unsigned TrailingZeros = KnownOne.countTrailingZeros(); APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); if ((Mask & KnownZero) == Mask) @@ -338,7 +340,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne); + computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne); unsigned LeadingZeros = KnownOne.countLeadingZeros(); APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); if ((Mask & KnownZero) == Mask) @@ -353,14 +355,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint32_t BitWidth = IT->getBitWidth(); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; if (LHSKnownNegative || LHSKnownPositive) { APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; if (LHSKnownNegative && RHSKnownNegative) { @@ -447,10 +449,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); // Get the largest possible values for each operand. APInt LHSMax = ~LHSKnownZero; @@ -554,6 +556,79 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + // Constant fold <A x Bi> << Ci. + // FIXME: We don't handle _dq because it's a shift of an i128, but is + // represented in the IR as <2 x i64>. A per element shift is wrong. + case Intrinsic::x86_sse2_psll_d: + case Intrinsic::x86_sse2_psll_q: + case Intrinsic::x86_sse2_psll_w: + case Intrinsic::x86_sse2_pslli_d: + case Intrinsic::x86_sse2_pslli_q: + case Intrinsic::x86_sse2_pslli_w: + case Intrinsic::x86_avx2_psll_d: + case Intrinsic::x86_avx2_psll_q: + case Intrinsic::x86_avx2_psll_w: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + case Intrinsic::x86_avx2_pslli_w: + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: { + // Simplify if count is constant. To 0 if >= BitWidth, + // otherwise to shl/lshr. + auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1)); + auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1)); + if (!CDV && !CInt) + break; + ConstantInt *Count; + if (CDV) + Count = cast<ConstantInt>(CDV->getElementAsConstant(0)); + else + Count = CInt; + + auto Vec = II->getArgOperand(0); + auto VT = cast<VectorType>(Vec->getType()); + if (Count->getZExtValue() > + VT->getElementType()->getPrimitiveSizeInBits() - 1) + return ReplaceInstUsesWith( + CI, ConstantAggregateZero::get(Vec->getType())); + + bool isPackedShiftLeft = true; + switch (II->getIntrinsicID()) { + default : break; + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break; + } + + unsigned VWidth = VT->getNumElements(); + // Get a constant vector of the same type as the first operand. + auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue()); + if (isPackedShiftLeft) + return BinaryOperator::CreateShl(Vec, + Builder->CreateVectorSplat(VWidth, VTCI)); + + return BinaryOperator::CreateLShr(Vec, + Builder->CreateVectorSplat(VWidth, VTCI)); + } case Intrinsic::x86_sse41_pmovsxbw: case Intrinsic::x86_sse41_pmovsxwd: @@ -576,6 +651,153 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::x86_sse4a_insertqi: { + // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top + // ones undef + // TODO: eventually we should lower this intrinsic to IR + if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) { + if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) { + if (CIWidth->equalsInt(64) && CIStart->isZero()) { + Value *Vec = II->getArgOperand(1); + Value *Undef = UndefValue::get(Vec->getType()); + const uint32_t Mask[] = { 0, 2 }; + return ReplaceInstUsesWith( + CI, + Builder->CreateShuffleVector( + Vec, Undef, ConstantDataVector::get( + II->getContext(), ArrayRef<uint32_t>(Mask)))); + + } else if (auto Source = + dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { + if (Source->hasOneUse() && + Source->getArgOperand(1) == II->getArgOperand(1)) { + // If the source of the insert has only one use and it's another + // insert (and they're both inserting from the same vector), try to + // bundle both together. + auto CISourceWidth = + dyn_cast<ConstantInt>(Source->getArgOperand(2)); + auto CISourceStart = + dyn_cast<ConstantInt>(Source->getArgOperand(3)); + if (CISourceStart && CISourceWidth) { + unsigned Start = CIStart->getZExtValue(); + unsigned Width = CIWidth->getZExtValue(); + unsigned End = Start + Width; + unsigned SourceStart = CISourceStart->getZExtValue(); + unsigned SourceWidth = CISourceWidth->getZExtValue(); + unsigned SourceEnd = SourceStart + SourceWidth; + unsigned NewStart, NewWidth; + bool ShouldReplace = false; + if (Start <= SourceStart && SourceStart <= End) { + NewStart = Start; + NewWidth = std::max(End, SourceEnd) - NewStart; + ShouldReplace = true; + } else if (SourceStart <= Start && Start <= SourceEnd) { + NewStart = SourceStart; + NewWidth = std::max(SourceEnd, End) - NewStart; + ShouldReplace = true; + } + + if (ShouldReplace) { + Constant *ConstantWidth = ConstantInt::get( + II->getArgOperand(2)->getType(), NewWidth, false); + Constant *ConstantStart = ConstantInt::get( + II->getArgOperand(3)->getType(), NewStart, false); + Value *Args[4] = { Source->getArgOperand(0), + II->getArgOperand(1), ConstantWidth, + ConstantStart }; + Module *M = CI.getParent()->getParent()->getParent(); + Value *F = + Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi); + return ReplaceInstUsesWith(CI, Builder->CreateCall(F, Args)); + } + } + } + } + } + } + break; + } + + case Intrinsic::x86_sse41_pblendvb: + case Intrinsic::x86_sse41_blendvps: + case Intrinsic::x86_sse41_blendvpd: + case Intrinsic::x86_avx_blendv_ps_256: + case Intrinsic::x86_avx_blendv_pd_256: + case Intrinsic::x86_avx2_pblendvb: { + // Convert blendv* to vector selects if the mask is constant. + // This optimization is convoluted because the intrinsic is defined as + // getting a vector of floats or doubles for the ps and pd versions. + // FIXME: That should be changed. + Value *Mask = II->getArgOperand(2); + if (auto C = dyn_cast<ConstantDataVector>(Mask)) { + auto Tyi1 = Builder->getInt1Ty(); + auto SelectorType = cast<VectorType>(Mask->getType()); + auto EltTy = SelectorType->getElementType(); + unsigned Size = SelectorType->getNumElements(); + unsigned BitWidth = + EltTy->isFloatTy() + ? 32 + : (EltTy->isDoubleTy() ? 64 : EltTy->getIntegerBitWidth()); + assert((BitWidth == 64 || BitWidth == 32 || BitWidth == 8) && + "Wrong arguments for variable blend intrinsic"); + SmallVector<Constant *, 32> Selectors; + for (unsigned I = 0; I < Size; ++I) { + // The intrinsics only read the top bit + uint64_t Selector; + if (BitWidth == 8) + Selector = C->getElementAsInteger(I); + else + Selector = C->getElementAsAPFloat(I).bitcastToAPInt().getZExtValue(); + Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1))); + } + auto NewSelector = ConstantVector::get(Selectors); + return SelectInst::Create(NewSelector, II->getArgOperand(1), + II->getArgOperand(0), "blendv"); + } else { + break; + } + } + + case Intrinsic::x86_avx_vpermilvar_ps: + case Intrinsic::x86_avx_vpermilvar_ps_256: + case Intrinsic::x86_avx_vpermilvar_pd: + case Intrinsic::x86_avx_vpermilvar_pd_256: { + // Convert vpermil* to shufflevector if the mask is constant. + Value *V = II->getArgOperand(1); + unsigned Size = cast<VectorType>(V->getType())->getNumElements(); + assert(Size == 8 || Size == 4 || Size == 2); + uint32_t Indexes[8]; + if (auto C = dyn_cast<ConstantDataVector>(V)) { + // The intrinsics only read one or two bits, clear the rest. + for (unsigned I = 0; I < Size; ++I) { + uint32_t Index = C->getElementAsInteger(I) & 0x3; + if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd || + II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) + Index >>= 1; + Indexes[I] = Index; + } + } else if (isa<ConstantAggregateZero>(V)) { + for (unsigned I = 0; I < Size; ++I) + Indexes[I] = 0; + } else { + break; + } + // The _256 variants are a bit trickier since the mask bits always index + // into the corresponding 128 half. In order to convert to a generic + // shuffle, we have to make that explicit. + if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 || + II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) { + for (unsigned I = Size / 2; I < Size; ++I) + Indexes[I] += Size / 2; + } + auto NewC = + ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, Size)); + auto V1 = II->getArgOperand(0); + auto V2 = UndefValue::get(V1->getType()); + auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC); + return ReplaceInstUsesWith(CI, Shuffle); + } + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) { @@ -586,8 +808,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { bool AllEltsOk = true; for (unsigned i = 0; i != 16; ++i) { Constant *Elt = Mask->getAggregateElement(i); - if (Elt == 0 || - !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) { + if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) { AllEltsOk = false; break; } @@ -612,7 +833,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); Idx &= 31; // Match the hardware behavior. - if (ExtractedElts[Idx] == 0) { + if (!ExtractedElts[Idx]) { ExtractedElts[Idx] = Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, Builder->getInt32(Idx&15)); @@ -655,8 +876,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::arm_neon_vmulls: case Intrinsic::arm_neon_vmullu: - case Intrinsic::arm64_neon_smull: - case Intrinsic::arm64_neon_umull: { + case Intrinsic::aarch64_neon_smull: + case Intrinsic::aarch64_neon_umull: { Value *Arg0 = II->getArgOperand(0); Value *Arg1 = II->getArgOperand(1); @@ -667,7 +888,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Check for constant LHS & RHS - in this case we just simplify. bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu || - II->getIntrinsicID() == Intrinsic::arm64_neon_umull); + II->getIntrinsicID() == Intrinsic::aarch64_neon_umull); VectorType *NewVT = cast<VectorType>(II->getType()); if (Constant *CV0 = dyn_cast<Constant>(Arg0)) { if (Constant *CV1 = dyn_cast<Constant>(Arg1)) { @@ -776,14 +997,14 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, // strcat_chk and strncat_chk. Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) { - if (CI->getCalledFunction() == 0) return 0; + if (!CI->getCalledFunction()) return nullptr; if (Value *With = Simplifier->optimizeCall(CI)) { ++NumSimplified; return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With); } - return 0; + return nullptr; } static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { @@ -792,35 +1013,35 @@ static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { Value *Underlying = TrampMem->stripPointerCasts(); if (Underlying != TrampMem && (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem)) - return 0; + return nullptr; if (!isa<AllocaInst>(Underlying)) - return 0; + return nullptr; - IntrinsicInst *InitTrampoline = 0; + IntrinsicInst *InitTrampoline = nullptr; for (User *U : TrampMem->users()) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); if (!II) - return 0; + return nullptr; if (II->getIntrinsicID() == Intrinsic::init_trampoline) { if (InitTrampoline) // More than one init_trampoline writes to this value. Give up. - return 0; + return nullptr; InitTrampoline = II; continue; } if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) // Allow any number of calls to adjust.trampoline. continue; - return 0; + return nullptr; } // No call to init.trampoline found. if (!InitTrampoline) - return 0; + return nullptr; // Check that the alloca is being used in the expected way. if (InitTrampoline->getOperand(0) != TrampMem) - return 0; + return nullptr; return InitTrampoline; } @@ -837,9 +1058,9 @@ static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp, II->getOperand(0) == TrampMem) return II; if (Inst->mayWriteToMemory()) - return 0; + return nullptr; } - return 0; + return nullptr; } // Given a call to llvm.adjust.trampoline, find and return the corresponding @@ -851,7 +1072,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) { IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee); if (!AdjustTramp || AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) - return 0; + return nullptr; Value *TrampMem = AdjustTramp->getOperand(0); @@ -859,7 +1080,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) { return IT; if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem)) return IT; - return 0; + return nullptr; } // visitCallSite - Improvements for call and invoke instructions. @@ -874,7 +1095,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // arguments of the call/invoke. Value *Callee = CS.getCalledValue(); if (!isa<Function>(Callee) && transformConstExprCastCall(CS)) - return 0; + return nullptr; if (Function *CalleeF = dyn_cast<Function>(Callee)) // If the call and callee calling conventions don't match, this call must @@ -899,7 +1120,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // change the callee to a null pointer. cast<InvokeInst>(OldCall)->setCalledFunction( Constant::getNullValue(CalleeF->getType())); - return 0; + return nullptr; } if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { @@ -911,7 +1132,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { if (isa<InvokeInst>(CS.getInstruction())) { // Can't remove an invoke because we cannot change the CFG. - return 0; + return nullptr; } // This instruction is not reachable, just remove it. We insert a store to @@ -959,7 +1180,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { if (I) return EraseInstFromFunction(*I); } - return Changed ? CS.getInstruction() : 0; + return Changed ? CS.getInstruction() : nullptr; } // transformConstExprCastCall - If the callee is a constexpr cast of a function, @@ -968,7 +1189,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { bool InstCombiner::transformConstExprCastCall(CallSite CS) { Function *Callee = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); - if (Callee == 0) + if (!Callee) return false; Instruction *Caller = CS.getInstruction(); const AttributeSet &CallerPAL = CS.getAttributes(); @@ -1044,7 +1265,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1, Attribute::ByVal)) { PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); - if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || DL == 0) + if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL) return false; Type *CurElTy = ActTy->getPointerElementType(); @@ -1235,7 +1456,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // If the call already has the 'nest' attribute somewhere then give up - // otherwise 'nest' would occur twice after splicing in the chain. if (Attrs.hasAttrSomewhere(Attribute::Nest)) - return 0; + return nullptr; assert(Tramp && "transformCallThroughTrampoline called with incorrect CallSite."); @@ -1247,7 +1468,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, const AttributeSet &NestAttrs = NestF->getAttributes(); if (!NestAttrs.isEmpty()) { unsigned NestIdx = 1; - Type *NestTy = 0; + Type *NestTy = nullptr; AttributeSet NestAttr; // Look for a parameter marked with the 'nest' attribute. diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index c2b862a..356803a 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -19,6 +19,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + /// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear /// expression. If so, decompose it, returning some value X, such that Val is /// X*Scale+Offset. @@ -79,7 +81,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI) { // This requires DataLayout to get the alloca alignment and size information. - if (!DL) return 0; + if (!DL) return nullptr; PointerType *PTy = cast<PointerType>(CI.getType()); @@ -89,26 +91,26 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // Get the type really allocated and the type casted to. Type *AllocElTy = AI.getAllocatedType(); Type *CastElTy = PTy->getElementType(); - if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; + if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr; unsigned AllocElTyAlign = DL->getABITypeAlignment(AllocElTy); unsigned CastElTyAlign = DL->getABITypeAlignment(CastElTy); - if (CastElTyAlign < AllocElTyAlign) return 0; + if (CastElTyAlign < AllocElTyAlign) return nullptr; // If the allocation has multiple uses, only promote it if we are strictly // increasing the alignment of the resultant allocation. If we keep it the // same, we open the door to infinite loops of various kinds. - if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0; + if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr; uint64_t AllocElTySize = DL->getTypeAllocSize(AllocElTy); uint64_t CastElTySize = DL->getTypeAllocSize(CastElTy); - if (CastElTySize == 0 || AllocElTySize == 0) return 0; + if (CastElTySize == 0 || AllocElTySize == 0) return nullptr; // If the allocation has multiple uses, only promote it if we're not // shrinking the amount of memory being allocated. uint64_t AllocElTyStoreSize = DL->getTypeStoreSize(AllocElTy); uint64_t CastElTyStoreSize = DL->getTypeStoreSize(CastElTy); - if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0; + if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr; // See if we can satisfy the modulus by pulling a scale out of the array // size argument. @@ -120,10 +122,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // If we can now satisfy the modulus, by using a non-1 scale, we really can // do the xform. if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || - (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0; + (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr; unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize; - Value *Amt = 0; + Value *Amt = nullptr; if (Scale == 1) { Amt = NumElements; } else { @@ -141,6 +143,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); New->setAlignment(AI.getAlignment()); New->takeName(&AI); + New->setUsedWithInAlloca(AI.isUsedWithInAlloca()); // If the allocation has multiple real uses, insert a cast and change all // things that used it to use the new cast. This will also hack on CI, but it @@ -169,7 +172,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, // Otherwise, it must be an instruction. Instruction *I = cast<Instruction>(V); - Instruction *Res = 0; + Instruction *Res = nullptr; unsigned Opc = I->getOpcode(); switch (Opc) { case Instruction::Add: @@ -245,11 +248,11 @@ isEliminableCastPair( Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); Instruction::CastOps secondOp = Instruction::CastOps(opcode); Type *SrcIntPtrTy = DL && SrcTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(SrcTy) : 0; + DL->getIntPtrType(SrcTy) : nullptr; Type *MidIntPtrTy = DL && MidTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(MidTy) : 0; + DL->getIntPtrType(MidTy) : nullptr; Type *DstIntPtrTy = DL && DstTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(DstTy) : 0; + DL->getIntPtrType(DstTy) : nullptr; unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy, SrcIntPtrTy, MidIntPtrTy, DstIntPtrTy); @@ -318,7 +321,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { return NV; } - return 0; + return nullptr; } /// CanEvaluateTruncated - Return true if we can evaluate the specified @@ -470,7 +473,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { } // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. - Value *A = 0; ConstantInt *Cst = 0; + Value *A = nullptr; ConstantInt *Cst = nullptr; if (Src->hasOneUse() && match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) { // We have three types to worry about here, the type of A, the source of @@ -502,7 +505,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { ConstantExpr::getTrunc(Cst, CI.getType())); } - return 0; + return nullptr; } /// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations @@ -550,7 +553,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // If Op1C some other power of two, convert: uint32_t BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne); + computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne); APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? @@ -598,8 +601,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); - ComputeMaskedBits(LHS, KnownZeroLHS, KnownOneLHS); - ComputeMaskedBits(RHS, KnownZeroRHS, KnownOneRHS); + computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS); + computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS); if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { APInt KnownBits = KnownZeroLHS | KnownOneLHS; @@ -627,7 +630,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, } } - return 0; + return nullptr; } /// CanEvaluateZExtd - Determine if the specified value can be computed in the @@ -758,7 +761,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // If this zero extend is only used by a truncate, let the truncate be // eliminated before we try to optimize this zext. if (CI.hasOneUse() && isa<TruncInst>(CI.user_back())) - return 0; + return nullptr; // If one of the common conversion will work, do it. if (Instruction *Result = commonCastTransforms(CI)) @@ -883,7 +886,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1)); } - return 0; + return nullptr; } /// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations @@ -918,7 +921,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ unsigned BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(Op0, KnownZero, KnownOne); + computeKnownBits(Op0, KnownZero, KnownOne); APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { @@ -967,7 +970,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { } } - return 0; + return nullptr; } /// CanEvaluateSExtd - Return true if we can take the specified value @@ -1039,7 +1042,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // If this sign extend is only used by a truncate, let the truncate be // eliminated before we try to optimize this sext. if (CI.hasOneUse() && isa<TruncInst>(CI.user_back())) - return 0; + return nullptr; if (Instruction *I = commonCastTransforms(CI)) return I; @@ -1107,9 +1110,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // into: // %a = shl i32 %i, 30 // %d = ashr i32 %a, 30 - Value *A = 0; + Value *A = nullptr; // TODO: Eventually this could be subsumed by EvaluateInDifferentType. - ConstantInt *BA = 0, *CA = 0; + ConstantInt *BA = nullptr, *CA = nullptr; if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)), m_ConstantInt(CA))) && BA == CA && A->getType() == CI.getType()) { @@ -1121,7 +1124,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { return BinaryOperator::CreateAShr(A, ShAmtV); } - return 0; + return nullptr; } @@ -1133,7 +1136,7 @@ static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) { (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); if (!losesInfo) return ConstantFP::get(CFP->getContext(), F); - return 0; + return nullptr; } /// LookThroughFPExtensions - If this is an fp extension instruction, look @@ -1345,7 +1348,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { } } - return 0; + return nullptr; } Instruction *InstCombiner::visitFPExt(CastInst &CI) { @@ -1354,7 +1357,7 @@ Instruction *InstCombiner::visitFPExt(CastInst &CI) { Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); - if (OpI == 0) + if (!OpI) return commonCastTransforms(FI); // fptoui(uitofp(X)) --> X @@ -1374,7 +1377,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); - if (OpI == 0) + if (!OpI) return commonCastTransforms(FI); // fptosi(sitofp(X)) --> X @@ -1421,7 +1424,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { if (Instruction *I = commonCastTransforms(CI)) return I; - return 0; + return nullptr; } /// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) @@ -1520,7 +1523,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, // there yet. if (SrcTy->getElementType()->getPrimitiveSizeInBits() != DestTy->getElementType()->getPrimitiveSizeInBits()) - return 0; + return nullptr; SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); InVal = IC.Builder->CreateBitCast(InVal, SrcTy); @@ -1598,7 +1601,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, ElementIndex = Elements.size() - ElementIndex - 1; // Fail if multiple elements are inserted into this slot. - if (Elements[ElementIndex] != 0) + if (Elements[ElementIndex]) return false; Elements[ElementIndex] = V; @@ -1638,7 +1641,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, if (!V->hasOneUse()) return false; Instruction *I = dyn_cast<Instruction>(V); - if (I == 0) return false; + if (!I) return false; switch (I->getOpcode()) { default: return false; // Unhandled case. case Instruction::BitCast: @@ -1659,7 +1662,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, case Instruction::Shl: { // Must be shifting by a constant that is a multiple of the element size. ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); - if (CI == 0) return false; + if (!CI) return false; Shift += CI->getZExtValue(); if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false; return CollectInsertionElements(I->getOperand(0), Shift, @@ -1687,7 +1690,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombiner &IC) { // We need to know the target byte order to perform this optimization. - if (!IC.getDataLayout()) return 0; + if (!IC.getDataLayout()) return nullptr; VectorType *DestVecTy = cast<VectorType>(CI.getType()); Value *IntInput = CI.getOperand(0); @@ -1695,14 +1698,14 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); if (!CollectInsertionElements(IntInput, 0, Elements, DestVecTy->getElementType(), IC)) - return 0; + return nullptr; // If we succeeded, we know that all of the element are specified by Elements // or are zero if Elements has a null entry. Recast this as a set of // insertions. Value *Result = Constant::getNullValue(CI.getType()); for (unsigned i = 0, e = Elements.size(); i != e; ++i) { - if (Elements[i] == 0) continue; // Unset element. + if (!Elements[i]) continue; // Unset element. Result = IC.Builder->CreateInsertElement(Result, Elements[i], IC.Builder->getInt32(i)); @@ -1716,14 +1719,14 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, /// bitcast. The various long double bitcasts can't get in here. static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ // We need to know the target byte order to perform this optimization. - if (!IC.getDataLayout()) return 0; + if (!IC.getDataLayout()) return nullptr; Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(); // If this is a bitcast from int to float, check to see if the int is an // extraction from a vector. - Value *VecInput = 0; + Value *VecInput = nullptr; // bitcast(trunc(bitcast(somevector))) if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && isa<VectorType>(VecInput->getType())) { @@ -1747,7 +1750,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ } // bitcast(trunc(lshr(bitcast(somevector), cst)) - ConstantInt *ShAmt = 0; + ConstantInt *ShAmt = nullptr; if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), m_ConstantInt(ShAmt)))) && isa<VectorType>(VecInput->getType())) { @@ -1769,7 +1772,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } } - return 0; + return nullptr; } Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 8c0ad52..02e8bf1 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -24,6 +24,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + static ConstantInt *getOne(Constant *C) { return ConstantInt::get(cast<IntegerType>(C->getType()), 1); } @@ -218,15 +220,15 @@ Instruction *InstCombiner:: FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { // We need TD information to know the pointer size unless this is inbounds. - if (!GEP->isInBounds() && DL == 0) - return 0; + if (!GEP->isInBounds() && !DL) + return nullptr; Constant *Init = GV->getInitializer(); if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) - return 0; + return nullptr; uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); - if (ArrayElementCount > 1024) return 0; // Don't blow up on huge arrays. + if (ArrayElementCount > 1024) return nullptr; // Don't blow up on huge arrays. // There are many forms of this optimization we can handle, for now, just do // the simple index into a single-dimensional array. @@ -236,7 +238,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, !isa<ConstantInt>(GEP->getOperand(1)) || !cast<ConstantInt>(GEP->getOperand(1))->isZero() || isa<Constant>(GEP->getOperand(2))) - return 0; + return nullptr; // Check that indices after the variable are constants and in-range for the // type they index. Collect the indices. This is typically for arrays of @@ -246,18 +248,18 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, Type *EltTy = Init->getType()->getArrayElementType(); for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); - if (Idx == 0) return 0; // Variable index. + if (!Idx) return nullptr; // Variable index. uint64_t IdxVal = Idx->getZExtValue(); - if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index. + if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index. if (StructType *STy = dyn_cast<StructType>(EltTy)) EltTy = STy->getElementType(IdxVal); else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) { - if (IdxVal >= ATy->getNumElements()) return 0; + if (IdxVal >= ATy->getNumElements()) return nullptr; EltTy = ATy->getElementType(); } else { - return 0; // Unknown type. + return nullptr; // Unknown type. } LaterIndices.push_back(IdxVal); @@ -296,7 +298,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { Constant *Elt = Init->getAggregateElement(i); - if (Elt == 0) return 0; + if (!Elt) return nullptr; // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) @@ -321,7 +323,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // If we can't compute the result for any of the elements, we have to give // up evaluating the entire conditional. - if (!isa<ConstantInt>(C)) return 0; + if (!isa<ConstantInt>(C)) return nullptr; // Otherwise, we know if the comparison is true or false for this element, // update our state machines. @@ -375,7 +377,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined && SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined && FalseRangeEnd == Overdefined) - return 0; + return nullptr; } // Now that we've scanned the entire array, emit our new comparison(s). We @@ -467,7 +469,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 { - Type *Ty = 0; + Type *Ty = nullptr; // Look for an appropriate type: // - The type of Idx if the magic fits @@ -480,7 +482,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, else if (ArrayElementCount <= 32) Ty = Type::getInt32Ty(Init->getContext()); - if (Ty != 0) { + if (Ty) { Value *V = Builder->CreateIntCast(Idx, Ty, false); V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); @@ -488,7 +490,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, } } - return 0; + return nullptr; } @@ -533,7 +535,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { // If there are no variable indices, we must have a constant offset, just // evaluate it the general way. - if (i == e) return 0; + if (i == e) return nullptr; Value *VariableIdx = GEP->getOperand(i); // Determine the scale factor of the variable element. For example, this is @@ -543,7 +545,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { // Verify that there are no other variable indices. If so, emit the hard way. for (++i, ++GTI; i != e; ++i, ++GTI) { ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i)); - if (!CI) return 0; + if (!CI) return nullptr; // Compute the aggregate offset of constant indices. if (CI->isZero()) continue; @@ -587,7 +589,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { // multiple of the variable scale. int64_t NewOffs = Offset / (int64_t)VariableScale; if (Offset != NewOffs*(int64_t)VariableScale) - return 0; + return nullptr; // Okay, we can do this evaluation. Start by converting the index to intptr. if (VariableIdx->getType() != IntPtrTy) @@ -608,7 +610,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be // the maximum signed value for the pointer type. if (ICmpInst::isSigned(Cond)) - return 0; + return nullptr; // Look through bitcasts. if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS)) @@ -623,7 +625,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); // If not, synthesize the offset the hard way. - if (Offset == 0) + if (!Offset) Offset = EmitGEPOffset(GEPLHS); return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, Constant::getNullValue(Offset->getType())); @@ -661,7 +663,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // Otherwise, the base pointers are different and the indices are // different, bail out. - return 0; + return nullptr; } // If one of the GEPs has all zero indices, recurse. @@ -729,7 +731,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); } } - return 0; + return nullptr; } /// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". @@ -812,11 +814,11 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // if it finds it. bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv; if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) - return 0; + return nullptr; if (DivRHS->isZero()) - return 0; // The ProdOV computation fails on divide by zero. + return nullptr; // The ProdOV computation fails on divide by zero. if (DivIsSigned && DivRHS->isAllOnesValue()) - return 0; // The overflow computation also screws up here + return nullptr; // The overflow computation also screws up here if (DivRHS->isOne()) { // This eliminates some funny cases with INT_MIN. ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X. @@ -850,7 +852,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // overflow variable is set to 0 if it's corresponding bound variable is valid // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. int LoOverflow = 0, HiOverflow = 0; - Constant *LoBound = 0, *HiBound = 0; + Constant *LoBound = nullptr, *HiBound = nullptr; if (!DivIsSigned) { // udiv // e.g. X/5 op 3 --> [15, 20) @@ -890,7 +892,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); if (HiBound == DivRHS) { // -INTMIN = INTMIN HiOverflow = 1; // [INTMIN+1, overflow) - HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN + HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN } } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos // e.g. X/-5 op 3 --> [-19, -14) @@ -964,20 +966,20 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, uint32_t TypeBits = CmpRHSV.getBitWidth(); uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); if (ShAmtVal >= TypeBits || ShAmtVal == 0) - return 0; + return nullptr; if (!ICI.isEquality()) { // If we have an unsigned comparison and an ashr, we can't simplify this. // Similarly for signed comparisons with lshr. if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr)) - return 0; + return nullptr; // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv // by a power of 2. Since we already have logic to simplify these, // transform to div and then simplify the resultant comparison. if (Shr->getOpcode() == Instruction::AShr && (!Shr->isExact() || ShAmtVal == TypeBits - 1)) - return 0; + return nullptr; // Revisit the shift (to delete it). Worklist.Add(Shr); @@ -994,7 +996,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, // If the builder folded the binop, just return it. BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp); - if (TheDiv == 0) + if (!TheDiv) return &ICI; // Otherwise, fold this div/compare. @@ -1037,7 +1039,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, Mask, Shr->getName()+".mask"); return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS); } - return 0; + return nullptr; } @@ -1056,7 +1058,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); - ComputeMaskedBits(LHSI->getOperand(0), KnownZero, KnownOne); + computeKnownBits(LHSI->getOperand(0), KnownZero, KnownOne); // If all the high bits are known, we can do this xform. if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { @@ -1181,10 +1183,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // access. BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0)); if (Shift && !Shift->isShift()) - Shift = 0; + Shift = nullptr; ConstantInt *ShAmt; - ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0; + ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : nullptr; // This seemingly simple opportunity to fold away a shift turns out to // be rather complicated. See PR17827 @@ -1777,7 +1779,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } } } - return 0; + return nullptr; } /// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). @@ -1794,7 +1796,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // integer type is the same size as the pointer type. if (DL && LHSCI->getOpcode() == Instruction::PtrToInt && DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) { - Value *RHSOp = 0; + Value *RHSOp = nullptr; if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) { RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) { @@ -1812,7 +1814,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Enforce this. if (LHSCI->getOpcode() != Instruction::ZExt && LHSCI->getOpcode() != Instruction::SExt) - return 0; + return nullptr; bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; bool isSignedCmp = ICI.isSigned(); @@ -1821,12 +1823,12 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Not an extension from the same type? RHSCIOp = CI->getOperand(0); if (RHSCIOp->getType() != LHSCIOp->getType()) - return 0; + return nullptr; // If the signedness of the two casts doesn't agree (i.e. one is a sext // and the other is a zext), then we can't handle this. if (CI->getOpcode() != LHSCI->getOpcode()) - return 0; + return nullptr; // Deal with equality cases early. if (ICI.isEquality()) @@ -1844,7 +1846,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // If we aren't dealing with a constant on the RHS, exit early ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1)); if (!CI) - return 0; + return nullptr; // Compute the constant that would happen if we truncated to SrcTy then // reextended to DestTy. @@ -1873,7 +1875,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // by SimplifyICmpInst, so only deal with the tricky case. if (isSignedCmp || !isSignedExt) - return 0; + return nullptr; // Evaluate the comparison for LT (we invert for GT below). LE and GE cases // should have been folded away previously and not enter in here. @@ -1909,12 +1911,12 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // In order to eliminate the add-with-constant, the compare can be its only // use. Instruction *AddWithCst = cast<Instruction>(I.getOperand(0)); - if (!AddWithCst->hasOneUse()) return 0; + if (!AddWithCst->hasOneUse()) return nullptr; // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. - if (!CI2->getValue().isPowerOf2()) return 0; + if (!CI2->getValue().isPowerOf2()) return nullptr; unsigned NewWidth = CI2->getValue().countTrailingZeros(); - if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0; + if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return nullptr; // The width of the new add formed is 1 more than the bias. ++NewWidth; @@ -1922,7 +1924,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // Check to see that CI1 is an all-ones value with NewWidth bits. if (CI1->getBitWidth() == NewWidth || CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) - return 0; + return nullptr; // This is only really a signed overflow check if the inputs have been // sign-extended; check for that condition. For example, if CI2 is 2^31 and @@ -1930,7 +1932,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1; if (IC.ComputeNumSignBits(A) < NeededSignBits || IC.ComputeNumSignBits(B) < NeededSignBits) - return 0; + return nullptr; // In order to replace the original add with a narrower // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant @@ -1946,8 +1948,8 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // original add had another add which was then immediately truncated, we // could still do the transformation. TruncInst *TI = dyn_cast<TruncInst>(U); - if (TI == 0 || - TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0; + if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth) + return nullptr; } // If the pattern matches, truncate the inputs to the narrower type and @@ -1983,11 +1985,11 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, InstCombiner &IC) { // Don't bother doing this transformation for pointers, don't do it for // vectors. - if (!isa<IntegerType>(OrigAddV->getType())) return 0; + if (!isa<IntegerType>(OrigAddV->getType())) return nullptr; // If the add is a constant expr, then we don't bother transforming it. Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV); - if (OrigAdd == 0) return 0; + if (!OrigAdd) return nullptr; Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); @@ -2008,6 +2010,236 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, return ExtractValueInst::Create(Call, 1, "uadd.overflow"); } +/// \brief Recognize and process idiom involving test for multiplication +/// overflow. +/// +/// The caller has matched a pattern of the form: +/// I = cmp u (mul(zext A, zext B), V +/// The function checks if this is a test for overflow and if so replaces +/// multiplication with call to 'mul.with.overflow' intrinsic. +/// +/// \param I Compare instruction. +/// \param MulVal Result of 'mult' instruction. It is one of the arguments of +/// the compare instruction. Must be of integer type. +/// \param OtherVal The other argument of compare instruction. +/// \returns Instruction which must replace the compare instruction, NULL if no +/// replacement required. +static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal, + Value *OtherVal, InstCombiner &IC) { + assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal); + assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal); + assert(isa<IntegerType>(MulVal->getType())); + Instruction *MulInstr = cast<Instruction>(MulVal); + assert(MulInstr->getOpcode() == Instruction::Mul); + + Instruction *LHS = cast<Instruction>(MulInstr->getOperand(0)), + *RHS = cast<Instruction>(MulInstr->getOperand(1)); + assert(LHS->getOpcode() == Instruction::ZExt); + assert(RHS->getOpcode() == Instruction::ZExt); + Value *A = LHS->getOperand(0), *B = RHS->getOperand(0); + + // Calculate type and width of the result produced by mul.with.overflow. + Type *TyA = A->getType(), *TyB = B->getType(); + unsigned WidthA = TyA->getPrimitiveSizeInBits(), + WidthB = TyB->getPrimitiveSizeInBits(); + unsigned MulWidth; + Type *MulType; + if (WidthB > WidthA) { + MulWidth = WidthB; + MulType = TyB; + } else { + MulWidth = WidthA; + MulType = TyA; + } + + // In order to replace the original mul with a narrower mul.with.overflow, + // all uses must ignore upper bits of the product. The number of used low + // bits must be not greater than the width of mul.with.overflow. + if (MulVal->hasNUsesOrMore(2)) + for (User *U : MulVal->users()) { + if (U == &I) + continue; + if (TruncInst *TI = dyn_cast<TruncInst>(U)) { + // Check if truncation ignores bits above MulWidth. + unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits(); + if (TruncWidth > MulWidth) + return nullptr; + } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) { + // Check if AND ignores bits above MulWidth. + if (BO->getOpcode() != Instruction::And) + return nullptr; + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) { + const APInt &CVal = CI->getValue(); + if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth) + return nullptr; + } + } else { + // Other uses prohibit this transformation. + return nullptr; + } + } + + // Recognize patterns + switch (I.getPredicate()) { + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp eq/neq mulval, zext trunc mulval + if (ZExtInst *Zext = dyn_cast<ZExtInst>(OtherVal)) + if (Zext->hasOneUse()) { + Value *ZextArg = Zext->getOperand(0); + if (TruncInst *Trunc = dyn_cast<TruncInst>(ZextArg)) + if (Trunc->getType()->getPrimitiveSizeInBits() == MulWidth) + break; //Recognized + } + + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits. + ConstantInt *CI; + Value *ValToMask; + if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) { + if (ValToMask != MulVal) + return nullptr; + const APInt &CVal = CI->getValue() + 1; + if (CVal.isPowerOf2()) { + unsigned MaskWidth = CVal.logBase2(); + if (MaskWidth == MulWidth) + break; // Recognized + } + } + return nullptr; + + case ICmpInst::ICMP_UGT: + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp ugt mulval, max + if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { + APInt MaxVal = APInt::getMaxValue(MulWidth); + MaxVal = MaxVal.zext(CI->getBitWidth()); + if (MaxVal.eq(CI->getValue())) + break; // Recognized + } + return nullptr; + + case ICmpInst::ICMP_UGE: + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp uge mulval, max+1 + if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { + APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth); + if (MaxVal.eq(CI->getValue())) + break; // Recognized + } + return nullptr; + + case ICmpInst::ICMP_ULE: + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp ule mulval, max + if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { + APInt MaxVal = APInt::getMaxValue(MulWidth); + MaxVal = MaxVal.zext(CI->getBitWidth()); + if (MaxVal.eq(CI->getValue())) + break; // Recognized + } + return nullptr; + + case ICmpInst::ICMP_ULT: + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp ule mulval, max + 1 + if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) { + APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth); + if (MaxVal.eq(CI->getValue())) + break; // Recognized + } + return nullptr; + + default: + return nullptr; + } + + InstCombiner::BuilderTy *Builder = IC.Builder; + Builder->SetInsertPoint(MulInstr); + Module *M = I.getParent()->getParent()->getParent(); + + // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B) + Value *MulA = A, *MulB = B; + if (WidthA < MulWidth) + MulA = Builder->CreateZExt(A, MulType); + if (WidthB < MulWidth) + MulB = Builder->CreateZExt(B, MulType); + Value *F = + Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow, MulType); + CallInst *Call = Builder->CreateCall2(F, MulA, MulB, "umul"); + IC.Worklist.Add(MulInstr); + + // If there are uses of mul result other than the comparison, we know that + // they are truncation or binary AND. Change them to use result of + // mul.with.overflow and adjust properly mask/size. + if (MulVal->hasNUsesOrMore(2)) { + Value *Mul = Builder->CreateExtractValue(Call, 0, "umul.value"); + for (User *U : MulVal->users()) { + if (U == &I || U == OtherVal) + continue; + if (TruncInst *TI = dyn_cast<TruncInst>(U)) { + if (TI->getType()->getPrimitiveSizeInBits() == MulWidth) + IC.ReplaceInstUsesWith(*TI, Mul); + else + TI->setOperand(0, Mul); + } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) { + assert(BO->getOpcode() == Instruction::And); + // Replace (mul & mask) --> zext (mul.with.overflow & short_mask) + ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); + APInt ShortMask = CI->getValue().trunc(MulWidth); + Value *ShortAnd = Builder->CreateAnd(Mul, ShortMask); + Instruction *Zext = + cast<Instruction>(Builder->CreateZExt(ShortAnd, BO->getType())); + IC.Worklist.Add(Zext); + IC.ReplaceInstUsesWith(*BO, Zext); + } else { + llvm_unreachable("Unexpected Binary operation"); + } + IC.Worklist.Add(cast<Instruction>(U)); + } + } + if (isa<Instruction>(OtherVal)) + IC.Worklist.Add(cast<Instruction>(OtherVal)); + + // The original icmp gets replaced with the overflow value, maybe inverted + // depending on predicate. + bool Inverse = false; + switch (I.getPredicate()) { + case ICmpInst::ICMP_NE: + break; + case ICmpInst::ICMP_EQ: + Inverse = true; + break; + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + if (I.getOperand(0) == MulVal) + break; + Inverse = true; + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + if (I.getOperand(1) == MulVal) + break; + Inverse = true; + break; + default: + llvm_unreachable("Unexpected predicate"); + } + if (Inverse) { + Value *Res = Builder->CreateExtractValue(Call, 1); + return BinaryOperator::CreateNot(Res); + } + + return ExtractValueInst::Create(Call, 1); +} + // DemandedBitsLHSMask - When performing a comparison against a constant, // it is possible that not all the bits in the LHS are demanded. This helper // method computes the mask that IS demanded. @@ -2178,7 +2410,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // See if we are doing a comparison with a constant. if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { - Value *A = 0, *B = 0; + Value *A = nullptr, *B = nullptr; // Match the following pattern, which is a common idiom when writing // overflow-safe integer arithmetic function. The source performs an @@ -2293,15 +2525,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { APInt Op0KnownZeroInverted = ~Op0KnownZero; if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { // If the LHS is an AND with the same constant, look through it. - Value *LHS = 0; - ConstantInt *LHSC = 0; + Value *LHS = nullptr; + ConstantInt *LHSC = nullptr; if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || LHSC->getValue() != Op0KnownZeroInverted) LHS = Op0; // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) == 0" into "x != 3". - Value *X = 0; + Value *X = nullptr; if (match(LHS, m_Shl(m_One(), m_Value(X)))) { unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); return new ICmpInst(ICmpInst::ICMP_NE, X, @@ -2330,15 +2562,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { APInt Op0KnownZeroInverted = ~Op0KnownZero; if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { // If the LHS is an AND with the same constant, look through it. - Value *LHS = 0; - ConstantInt *LHSC = 0; + Value *LHS = nullptr; + ConstantInt *LHSC = nullptr; if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || LHSC->getValue() != Op0KnownZeroInverted) LHS = Op0; // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) != 0" into "x == 3". - Value *X = 0; + Value *X = nullptr; if (match(LHS, m_Shl(m_One(), m_Value(X)))) { unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); return new ICmpInst(ICmpInst::ICMP_EQ, X, @@ -2470,7 +2702,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin())) if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) - return 0; + return nullptr; // See if we are doing a comparison between a constant and an instruction that // can be folded into the comparison. @@ -2506,7 +2738,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // If either operand of the select is a constant, we can fold the // comparison into the select arms, which will cause one to be // constant folded and the select turned into a bitwise or. - Value *Op1 = 0, *Op2 = 0; + Value *Op1 = nullptr, *Op2 = nullptr; if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) @@ -2618,7 +2850,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // Analyze the case when either Op0 or Op1 is an add instruction. // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). - Value *A = 0, *B = 0, *C = 0, *D = 0; + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; if (BO0 && BO0->getOpcode() == Instruction::Add) A = BO0->getOperand(0), B = BO0->getOperand(1); if (BO1 && BO1->getOpcode() == Instruction::Add) @@ -2713,7 +2945,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // Analyze the case when either Op0 or Op1 is a sub instruction. // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). - A = 0; B = 0; C = 0; D = 0; + A = nullptr; B = nullptr; C = nullptr; D = nullptr; if (BO0 && BO0->getOpcode() == Instruction::Sub) A = BO0->getOperand(0), B = BO0->getOperand(1); if (BO1 && BO1->getOpcode() == Instruction::Sub) @@ -2739,7 +2971,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { BO0->hasOneUse() && BO1->hasOneUse()) return new ICmpInst(Pred, D, B); - BinaryOperator *SRem = NULL; + // icmp (0-X) < cst --> x > -cst + if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) { + Value *X; + if (match(BO0, m_Neg(m_Value(X)))) + if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) + if (!RHSC->isMinValue(/*isSigned=*/true)) + return new ICmpInst(I.getSwappedPredicate(), X, + ConstantExpr::getNeg(RHSC)); + } + + BinaryOperator *SRem = nullptr; // icmp (srem X, Y), Y if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1)) @@ -2877,6 +3119,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { (Op0 == A || Op0 == B)) if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) return R; + + // (zext a) * (zext b) --> llvm.umul.with.overflow. + if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { + if (Instruction *R = ProcessUMulZExtIdiom(I, Op0, Op1, *this)) + return R; + } + if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { + if (Instruction *R = ProcessUMulZExtIdiom(I, Op1, Op0, *this)) + return R; + } } if (I.isEquality()) { @@ -2918,7 +3170,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { - Value *X = 0, *Y = 0, *Z = 0; + Value *X = nullptr, *Y = nullptr, *Z = nullptr; if (A == C) { X = B; Y = D; Z = A; @@ -3009,7 +3261,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate()); } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } /// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible. @@ -3017,13 +3269,13 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI, Constant *RHSC) { - if (!isa<ConstantFP>(RHSC)) return 0; + if (!isa<ConstantFP>(RHSC)) return nullptr; const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF(); // Get the width of the mantissa. We don't want to hack on conversions that // might lose information from the integer, e.g. "i64 -> float" int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); - if (MantissaWidth == -1) return 0; // Unknown. + if (MantissaWidth == -1) return nullptr; // Unknown. // Check to see that the input is converted from an integer type that is small // enough that preserves all bits. TODO: check here for "known" sign bits. @@ -3037,7 +3289,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // If the conversion would lose info, don't hack on this. if ((int)InputSize > MantissaWidth) - return 0; + return nullptr; // Otherwise, we can potentially simplify the comparison. We know that it // will always come through as an integer value and we know the constant is @@ -3383,5 +3635,5 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), RHSExt->getOperand(0)); - return Changed ? &I : 0; + return Changed ? &I : nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index dcc8b0f..66d0938 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -20,6 +20,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "instcombine" + STATISTIC(NumDeadStore, "Number of dead stores eliminated"); STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); @@ -29,10 +31,13 @@ STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); static bool pointsToConstantGlobal(Value *V) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) return GV->isConstant(); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { if (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::AddrSpaceCast || CE->getOpcode() == Instruction::GetElementPtr) return pointsToConstantGlobal(CE->getOperand(0)); + } return false; } @@ -60,9 +65,9 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, continue; } - if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { + if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) { // If uses of the bitcast are ok, we are ok. - if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset)) + if (!isOnlyCopiedFromConstantGlobal(I, TheCopy, ToDelete, IsOffset)) return false; continue; } @@ -112,7 +117,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, // If this is isn't our memcpy/memmove, reject it as something we can't // handle. MemTransferInst *MI = dyn_cast<MemTransferInst>(I); - if (MI == 0) + if (!MI) return false; // If the transfer is using the alloca as a source of the transfer, then @@ -148,10 +153,10 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, static MemTransferInst * isOnlyCopiedFromConstantGlobal(AllocaInst *AI, SmallVectorImpl<Instruction *> &ToDelete) { - MemTransferInst *TheCopy = 0; + MemTransferInst *TheCopy = nullptr; if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) return TheCopy; - return 0; + return nullptr; } Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { @@ -172,7 +177,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); + AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName()); New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of @@ -295,7 +300,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, // If the address spaces don't match, don't eliminate the cast. if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) - return 0; + return nullptr; Type *SrcPTy = SrcTy->getElementType(); @@ -346,7 +351,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, } } } - return 0; + return nullptr; } Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { @@ -373,7 +378,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // None of the following transforms are legal for volatile/atomic loads. // FIXME: Some of it is okay for atomic loads; needs refactoring. - if (!LI.isSimple()) return 0; + if (!LI.isSimple()) return nullptr; // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, @@ -455,7 +460,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { } } } - return 0; + return nullptr; } /// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P @@ -467,12 +472,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); - if (SrcTy == 0) return 0; + if (!SrcTy) return nullptr; Type *SrcPTy = SrcTy->getElementType(); if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) - return 0; + return nullptr; /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" /// to its first element. This allows us to handle things like: @@ -506,20 +511,20 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { } if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) - return 0; + return nullptr; // If the pointers point into different address spaces don't do the // transformation. if (SrcTy->getAddressSpace() != cast<PointerType>(CI->getType())->getAddressSpace()) - return 0; + return nullptr; // If the pointers point to values of different sizes don't do the // transformation. if (!IC.getDataLayout() || IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != IC.getDataLayout()->getTypeSizeInBits(DestPTy)) - return 0; + return nullptr; // If the pointers point to pointers to different address spaces don't do the // transformation. It is not safe to introduce an addrspacecast instruction in @@ -527,7 +532,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // cast. if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() && SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace()) - return 0; + return nullptr; // Okay, we are casting from one integer or pointer type to another of // the same size. Instead of casting the pointer before @@ -607,7 +612,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // Don't hack volatile/atomic stores. // FIXME: Some bits are legal for atomic stores; needs refactoring. - if (!SI.isSimple()) return 0; + if (!SI.isSimple()) return nullptr; // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. @@ -674,7 +679,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (Instruction *U = dyn_cast<Instruction>(Val)) Worklist.Add(U); // Dropped a use. } - return 0; // Do not modify these! + return nullptr; // Do not modify these! } // store undef, Ptr -> noop @@ -703,9 +708,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) if (BI->isUnconditional()) if (SimplifyStoreAtEndOfBlock(SI)) - return 0; // xform done! + return nullptr; // xform done! - return 0; + return nullptr; } /// SimplifyStoreAtEndOfBlock - Turn things like: @@ -728,7 +733,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // the other predecessor. pred_iterator PI = pred_begin(DestBB); BasicBlock *P = *PI; - BasicBlock *OtherBB = 0; + BasicBlock *OtherBB = nullptr; if (P != StoreBB) OtherBB = P; @@ -758,7 +763,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // If the other block ends in an unconditional branch, check for the 'if then // else' case. there is an instruction before the branch. - StoreInst *OtherStore = 0; + StoreInst *OtherStore = nullptr; if (OtherBr->isUnconditional()) { --BBI; // Skip over debugging info. diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 71fbb6c..9996ebc 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -19,6 +19,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + /// simplifyValueKnownNonZero - The specific integer value is used in a context /// where it is known to be non-zero. If this allows us to simplify the @@ -27,13 +29,13 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { // If V has multiple uses, then we would have to do more analysis to determine // if this is safe. For example, the use could be in dynamically unreached // code. - if (!V->hasOneUse()) return 0; + if (!V->hasOneUse()) return nullptr; bool MadeChange = false; // ((1 << A) >>u B) --> (1 << (A-B)) // Because V cannot be zero, we know that B is less than A. - Value *A = 0, *B = 0, *PowerOf2 = 0; + Value *A = nullptr, *B = nullptr, *PowerOf2 = nullptr; if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))), m_Value(B))) && // The "1" can be any value known to be a power of 2. @@ -68,7 +70,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { // If V is a phi node, we can call this on each of its operands. // "select cond, X, 0" can simplify to "X". - return MadeChange ? V : 0; + return MadeChange ? V : nullptr; } @@ -107,7 +109,7 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) { for (unsigned I = 0, E = CV->getNumElements(); I != E; ++I) { Constant *Elt = CV->getElementAsConstant(I); if (!match(Elt, m_APInt(IVal)) || !IVal->isPowerOf2()) - return 0; + return nullptr; Elts.push_back(ConstantInt::get(Elt->getType(), IVal->logBase2())); } @@ -118,6 +120,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyMulInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -139,7 +144,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { return BinaryOperator::CreateMul(NewOp, ConstantExpr::getShl(C1, C2)); if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) { - Constant *NewCst = 0; + Constant *NewCst = nullptr; if (match(C1, m_APInt(IVal)) && IVal->isPowerOf2()) // Replace X*(2^C) with X << C, where C is either a scalar or a splat. NewCst = ConstantInt::get(NewOp->getType(), IVal->logBase2()); @@ -165,10 +170,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { const APInt & Val = CI->getValue(); const APInt &PosVal = Val.abs(); if (Val.isNegative() && PosVal.isPowerOf2()) { - Value *X = 0, *Y = 0; + Value *X = nullptr, *Y = nullptr; if (Op0->hasOneUse()) { ConstantInt *C1; - Value *Sub = 0; + Value *Sub = nullptr; if (match(Op0, m_Sub(m_Value(Y), m_Value(X)))) Sub = Builder->CreateSub(X, Y, "suba"); else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1)))) @@ -268,7 +273,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { // -2 is "-1 << 1" so it is all bits set except the low one. APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); - Value *BoolCast = 0, *OtherOp = 0; + Value *BoolCast = nullptr, *OtherOp = nullptr; if (MaskedValueIsZero(Op0, Negative2)) BoolCast = Op0, OtherOp = Op1; else if (MaskedValueIsZero(Op1, Negative2)) @@ -281,7 +286,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } // @@ -384,7 +389,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, Constant *C, Constant *C0 = dyn_cast<Constant>(Opnd0); Constant *C1 = dyn_cast<Constant>(Opnd1); - BinaryOperator *R = 0; + BinaryOperator *R = nullptr; // (X * C0) * C => X * (C0*C) if (FMulOrDiv->getOpcode() == Instruction::FMul) { @@ -426,6 +431,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (isa<Constant>(Op0)) std::swap(Op0, Op1); @@ -483,7 +491,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { Value *M1 = ConstantExpr::getFMul(C1, C); Value *M0 = isNormalFp(cast<Constant>(M1)) ? foldFMulConst(cast<Instruction>(Opnd0), C, &I) : - 0; + nullptr; if (M0 && M1) { if (Swap && FAddSub->getOpcode() == Instruction::FSub) std::swap(M0, M1); @@ -503,8 +511,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { // Under unsafe algebra do: // X * log2(0.5*Y) = X*log2(Y) - X if (I.hasUnsafeAlgebra()) { - Value *OpX = NULL; - Value *OpY = NULL; + Value *OpX = nullptr; + Value *OpY = nullptr; IntrinsicInst *Log2; detectLog2OfHalf(Op0, OpY, Log2); if (OpY) { @@ -567,7 +575,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { Value *Opnd0_0, *Opnd0_1; if (Opnd0->hasOneUse() && match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) { - Value *Y = 0; + Value *Y = nullptr; if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1) Y = Opnd0_1; else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1) @@ -621,7 +629,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { break; } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } /// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select @@ -682,12 +690,12 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { // If we past the instruction, quit looking for it. if (&*BBI == SI) - SI = 0; + SI = nullptr; if (&*BBI == SelectCond) - SelectCond = 0; + SelectCond = nullptr; // If we ran out of things to eliminate, break out of the loop. - if (SelectCond == 0 && SI == 0) + if (!SelectCond && !SI) break; } @@ -719,7 +727,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) { if (MultiplyOverflows(RHS, LHSRHS, - I.getOpcode()==Instruction::SDiv)) + I.getOpcode() == Instruction::SDiv)) return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), ConstantExpr::getMul(RHS, LHSRHS)); @@ -735,12 +743,31 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { } } + if (ConstantInt *One = dyn_cast<ConstantInt>(Op0)) { + if (One->isOne() && !I.getType()->isIntegerTy(1)) { + bool isSigned = I.getOpcode() == Instruction::SDiv; + if (isSigned) { + // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the + // result is one, if Op1 is -1 then the result is minus one, otherwise + // it's zero. + Value *Inc = Builder->CreateAdd(Op1, One); + Value *Cmp = Builder->CreateICmpULT( + Inc, ConstantInt::get(I.getType(), 3)); + return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0)); + } else { + // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the + // result is one, otherwise it's zero. + return new ZExtInst(Builder->CreateICmpEQ(Op1, One), I.getType()); + } + } + } + // See if we can fold away this div instruction. if (SimplifyDemandedInstructionBits(I)) return &I; // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y - Value *X = 0, *Z = 0; + Value *X = nullptr, *Z = nullptr; if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1 bool isSigned = I.getOpcode() == Instruction::SDiv; if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) || @@ -748,7 +775,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { return BinaryOperator::Create(I.getOpcode(), X, Op1); } - return 0; + return nullptr; } /// dyn_castZExtVal - Checks if V is a zext or constant that can @@ -761,7 +788,7 @@ static Value *dyn_castZExtVal(Value *V, Type *Ty) { if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth()) return ConstantExpr::getTrunc(C, Ty); } - return 0; + return nullptr; } namespace { @@ -786,7 +813,7 @@ struct UDivFoldAction { }; UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand) - : FoldAction(FA), OperandToFold(InputOperand), FoldResult(0) {} + : FoldAction(FA), OperandToFold(InputOperand), FoldResult(nullptr) {} UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand, size_t SLHS) : FoldAction(FA), OperandToFold(InputOperand), SelectLHSIdx(SLHS) {} }; @@ -865,7 +892,8 @@ static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I, if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) if (size_t LHSIdx = visitUDivOperand(Op0, SI->getOperand(1), I, Actions)) if (visitUDivOperand(Op0, SI->getOperand(2), I, Actions)) { - Actions.push_back(UDivFoldAction((FoldUDivOperandCb)0, Op1, LHSIdx-1)); + Actions.push_back(UDivFoldAction((FoldUDivOperandCb)nullptr, Op1, + LHSIdx-1)); return Actions.size(); } @@ -875,6 +903,9 @@ static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I, Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyUDivInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -928,12 +959,15 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { return Inst; } - return 0; + return nullptr; } Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifySDivInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -983,7 +1017,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { } } - return 0; + return nullptr; } /// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special @@ -997,7 +1031,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend, Constant *Divisor, bool AllowReciprocal) { if (!isa<ConstantFP>(Divisor)) // TODO: handle vectors. - return 0; + return nullptr; const APFloat &FpVal = cast<ConstantFP>(Divisor)->getValueAPF(); APFloat Reciprocal(FpVal.getSemantics()); @@ -1010,7 +1044,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend, } if (!Cvt) - return 0; + return nullptr; ConstantFP *R; R = ConstantFP::get(Dividend->getType()->getContext(), Reciprocal); @@ -1020,6 +1054,9 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend, Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFDivInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1037,10 +1074,10 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { return R; if (AllowReassociate) { - Constant *C1 = 0; + Constant *C1 = nullptr; Constant *C2 = Op1C; Value *X; - Instruction *Res = 0; + Instruction *Res = nullptr; if (match(Op0, m_FMul(m_Value(X), m_Constant(C1)))) { // (X*C1)/C2 => X * (C1/C2) @@ -1071,12 +1108,12 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { return T; } - return 0; + return nullptr; } if (AllowReassociate && isa<Constant>(Op0)) { Constant *C1 = cast<Constant>(Op0), *C2; - Constant *Fold = 0; + Constant *Fold = nullptr; Value *X; bool CreateDiv = true; @@ -1098,13 +1135,13 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { R->setFastMathFlags(I.getFastMathFlags()); return R; } - return 0; + return nullptr; } if (AllowReassociate) { Value *X, *Y; - Value *NewInst = 0; - Instruction *SimpR = 0; + Value *NewInst = nullptr; + Instruction *SimpR = nullptr; if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) { // (X/Y) / Z => X / (Y*Z) @@ -1140,7 +1177,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { } } - return 0; + return nullptr; } /// This function implements the transforms common to both integer remainder @@ -1176,12 +1213,15 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { } } - return 0; + return nullptr; } Instruction *InstCombiner::visitURem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyURemInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1208,12 +1248,15 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { return ReplaceInstUsesWith(I, Ext); } - return 0; + return nullptr; } Instruction *InstCombiner::visitSRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifySRemInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1250,7 +1293,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { bool hasMissing = false; for (unsigned i = 0; i != VWidth; ++i) { Constant *Elt = C->getAggregateElement(i); - if (Elt == 0) { + if (!Elt) { hasMissing = true; break; } @@ -1279,12 +1322,15 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { } } - return 0; + return nullptr; } Instruction *InstCombiner::visitFRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFRemInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1292,5 +1338,5 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) return &I; - return 0; + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 0ab657a..46f7b8a 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -18,6 +18,8 @@ #include "llvm/IR/DataLayout.h" using namespace llvm; +#define DEBUG_TYPE "instcombine" + /// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] /// and if a/b/c and the add's all have a single use, turn this into a phi /// and a single binop. @@ -48,12 +50,12 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { // types. I->getOperand(0)->getType() != LHSType || I->getOperand(1)->getType() != RHSType) - return 0; + return nullptr; // If they are CmpInst instructions, check their predicates if (CmpInst *CI = dyn_cast<CmpInst>(I)) if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate()) - return 0; + return nullptr; if (isNUW) isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap(); @@ -63,8 +65,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { isExact = cast<PossiblyExactOperator>(I)->isExact(); // Keep track of which operand needs a phi node. - if (I->getOperand(0) != LHSVal) LHSVal = 0; - if (I->getOperand(1) != RHSVal) RHSVal = 0; + if (I->getOperand(0) != LHSVal) LHSVal = nullptr; + if (I->getOperand(1) != RHSVal) RHSVal = nullptr; } // If both LHS and RHS would need a PHI, don't do this transformation, @@ -72,14 +74,14 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { // which leads to higher register pressure. This is especially // bad when the PHIs are in the header of a loop. if (!LHSVal && !RHSVal) - return 0; + return nullptr; // Otherwise, this is safe to transform! Value *InLHS = FirstInst->getOperand(0); Value *InRHS = FirstInst->getOperand(1); - PHINode *NewLHS = 0, *NewRHS = 0; - if (LHSVal == 0) { + PHINode *NewLHS = nullptr, *NewRHS = nullptr; + if (!LHSVal) { NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(), FirstInst->getOperand(0)->getName() + ".pn"); NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); @@ -87,7 +89,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { LHSVal = NewLHS; } - if (RHSVal == 0) { + if (!RHSVal) { NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(), FirstInst->getOperand(1)->getName() + ".pn"); NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); @@ -148,7 +150,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || GEP->getNumOperands() != FirstInst->getNumOperands()) - return 0; + return nullptr; AllInBounds &= GEP->isInBounds(); @@ -170,19 +172,19 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { // for struct indices, which must always be constant. if (isa<ConstantInt>(FirstInst->getOperand(op)) || isa<ConstantInt>(GEP->getOperand(op))) - return 0; + return nullptr; if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) - return 0; + return nullptr; // If we already needed a PHI for an earlier operand, and another operand // also requires a PHI, we'd be introducing more PHIs than we're // eliminating, which increases register pressure on entry to the PHI's // block. if (NeededPhi) - return 0; + return nullptr; - FixedOperands[op] = 0; // Needs a PHI. + FixedOperands[op] = nullptr; // Needs a PHI. NeededPhi = true; } } @@ -194,7 +196,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { // load up into the predecessors so that we have a load of a gep of an alloca, // which can usually all be folded into the load. if (AllBasePointersAreAllocas) - return 0; + return nullptr; // Otherwise, this is safe to transform. Insert PHI nodes for each operand // that is variable. @@ -288,7 +290,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // FIXME: This is overconservative; this transform is allowed in some cases // for atomic operations. if (FirstLI->isAtomic()) - return 0; + return nullptr; // When processing loads, we need to propagate two bits of information to the // sunk load: whether it is volatile, and what its alignment is. We currently @@ -303,20 +305,20 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // load and the PHI. if (FirstLI->getParent() != PN.getIncomingBlock(0) || !isSafeAndProfitableToSinkLoad(FirstLI)) - return 0; + return nullptr; // If the PHI is of volatile loads and the load block has multiple // successors, sinking it would remove a load of the volatile value from // the path through the other successor. if (isVolatile && FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) - return 0; + return nullptr; // Check to see if all arguments are the same operation. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i)); if (!LI || !LI->hasOneUse()) - return 0; + return nullptr; // We can't sink the load if the loaded value could be modified between // the load and the PHI. @@ -324,12 +326,12 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { LI->getParent() != PN.getIncomingBlock(i) || LI->getPointerAddressSpace() != LoadAddrSpace || !isSafeAndProfitableToSinkLoad(LI)) - return 0; + return nullptr; // If some of the loads have an alignment specified but not all of them, // we can't do the transformation. if ((LoadAlignment != 0) != (LI->getAlignment() != 0)) - return 0; + return nullptr; LoadAlignment = std::min(LoadAlignment, LI->getAlignment()); @@ -338,7 +340,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // the path through the other successor. if (isVolatile && LI->getParent()->getTerminator()->getNumSuccessors() != 1) - return 0; + return nullptr; } // Okay, they are all the same operation. Create a new PHI node of the @@ -354,7 +356,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0); if (NewInVal != InVal) - InVal = 0; + InVal = nullptr; NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); } @@ -398,8 +400,8 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { // If all input operands to the phi are the same instruction (e.g. a cast from // the same type or "+42") we can pull the operation through the PHI, reducing // code size and simplifying code. - Constant *ConstantOp = 0; - Type *CastSrcTy = 0; + Constant *ConstantOp = nullptr; + Type *CastSrcTy = nullptr; bool isNUW = false, isNSW = false, isExact = false; if (isa<CastInst>(FirstInst)) { @@ -409,13 +411,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { // the code by turning an i32 into an i1293. if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) { if (!ShouldChangeType(PN.getType(), CastSrcTy)) - return 0; + return nullptr; } } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) { // Can fold binop, compare or shift here if the RHS is a constant, // otherwise call FoldPHIArgBinOpIntoPHI. ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1)); - if (ConstantOp == 0) + if (!ConstantOp) return FoldPHIArgBinOpIntoPHI(PN); if (OverflowingBinaryOperator *BO = @@ -426,19 +428,19 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { dyn_cast<PossiblyExactOperator>(FirstInst)) isExact = PEO->isExact(); } else { - return 0; // Cannot fold this operation. + return nullptr; // Cannot fold this operation. } // Check to see if all arguments are the same operation. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); - if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst)) - return 0; + if (!I || !I->hasOneUse() || !I->isSameOperationAs(FirstInst)) + return nullptr; if (CastSrcTy) { if (I->getOperand(0)->getType() != CastSrcTy) - return 0; // Cast operation must match. + return nullptr; // Cast operation must match. } else if (I->getOperand(1) != ConstantOp) { - return 0; + return nullptr; } if (isNUW) @@ -462,7 +464,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0); if (NewInVal != InVal) - InVal = 0; + InVal = nullptr; NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); } @@ -587,10 +589,10 @@ namespace llvm { template<> struct DenseMapInfo<LoweredPHIRecord> { static inline LoweredPHIRecord getEmptyKey() { - return LoweredPHIRecord(0, 0); + return LoweredPHIRecord(nullptr, 0); } static inline LoweredPHIRecord getTombstoneKey() { - return LoweredPHIRecord(0, 1); + return LoweredPHIRecord(nullptr, 1); } static unsigned getHashValue(const LoweredPHIRecord &Val) { return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^ @@ -637,14 +639,14 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // bail out. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i)); - if (II == 0) continue; + if (!II) continue; if (II->getParent() != PN->getIncomingBlock(i)) continue; // If we have a phi, and if it's directly in the predecessor, then we have // a critical edge where we need to put the truncate. Since we can't // split the edge in instcombine, we have to bail out. - return 0; + return nullptr; } for (User *U : PN->users()) { @@ -667,7 +669,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { if (UserI->getOpcode() != Instruction::LShr || !UserI->hasOneUse() || !isa<TruncInst>(UserI->user_back()) || !isa<ConstantInt>(UserI->getOperand(1))) - return 0; + return nullptr; unsigned Shift = cast<ConstantInt>(UserI->getOperand(1))->getZExtValue(); PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, UserI->user_back())); @@ -705,7 +707,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // If we've already lowered a user like this, reuse the previously lowered // value. - if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { + if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == nullptr) { // Otherwise, Create the new PHI node for this user. EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(), @@ -894,5 +896,5 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) return Res; - return 0; + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index e74d912..9a41e4b 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -18,16 +18,18 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + /// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms, /// returning the kind and providing the out parameter results if we /// successfully match. static SelectPatternFlavor MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { SelectInst *SI = dyn_cast<SelectInst>(V); - if (SI == 0) return SPF_UNKNOWN; + if (!SI) return SPF_UNKNOWN; ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition()); - if (ICI == 0) return SPF_UNKNOWN; + if (!ICI) return SPF_UNKNOWN; LHS = ICI->getOperand(0); RHS = ICI->getOperand(1); @@ -129,15 +131,15 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, if (TI->isCast()) { Type *FIOpndTy = FI->getOperand(0)->getType(); if (TI->getOperand(0)->getType() != FIOpndTy) - return 0; + return nullptr; // The select condition may be a vector. We may only change the operand // type if the vector width remains the same (and matches the condition). Type *CondTy = SI.getCondition()->getType(); if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() || CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements())) - return 0; + return nullptr; } else { - return 0; // unknown unary op. + return nullptr; // unknown unary op. } // Fold this by inserting a select from the input values. @@ -149,7 +151,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, // Only handle binary operators here. if (!isa<BinaryOperator>(TI)) - return 0; + return nullptr; // Figure out if the operations have any operands in common. Value *MatchOp, *OtherOpT, *OtherOpF; @@ -165,7 +167,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, OtherOpF = FI->getOperand(0); MatchIsOpZero = false; } else if (!TI->isCommutative()) { - return 0; + return nullptr; } else if (TI->getOperand(0) == FI->getOperand(1)) { MatchOp = TI->getOperand(0); OtherOpT = TI->getOperand(1); @@ -177,7 +179,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, OtherOpF = FI->getOperand(1); MatchIsOpZero = true; } else { - return 0; + return nullptr; } // If we reach here, they do have operations in common. @@ -282,7 +284,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, } } - return 0; + return nullptr; } /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is @@ -296,7 +298,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, Instruction *I = dyn_cast<Instruction>(V); if (!I) - return 0; + return nullptr; // If this is a binary operator, try to simplify it with the replaced op. if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) { @@ -347,7 +349,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, } } - return 0; + return nullptr; } /// foldSelectICmpAndOr - We want to turn: @@ -368,18 +370,18 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, InstCombiner::BuilderTy *Builder) { const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) - return 0; + return nullptr; Value *CmpLHS = IC->getOperand(0); Value *CmpRHS = IC->getOperand(1); if (!match(CmpRHS, m_Zero())) - return 0; + return nullptr; Value *X; const APInt *C1; if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1)))) - return 0; + return nullptr; const APInt *C2; bool OrOnTrueVal = false; @@ -388,7 +390,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2))); if (!OrOnFalseVal && !OrOnTrueVal) - return 0; + return nullptr; Value *V = CmpLHS; Value *Y = OrOnFalseVal ? TrueVal : FalseVal; @@ -527,7 +529,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, if (IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) { if (TrueVal->getType() == Ty) { if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) { - ConstantInt *C1 = NULL, *C2 = NULL; + ConstantInt *C1 = nullptr, *C2 = nullptr; if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) { C1 = dyn_cast<ConstantInt>(TrueVal); C2 = dyn_cast<ConstantInt>(FalseVal); @@ -586,7 +588,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder)) return ReplaceInstUsesWith(SI, V); - return Changed ? &SI : 0; + return Changed ? &SI : nullptr; } @@ -606,7 +608,7 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, // If the value is a non-instruction value like a constant or argument, it // can always be mapped. const Instruction *I = dyn_cast<Instruction>(V); - if (I == 0) return true; + if (!I) return true; // If V is a PHI node defined in the same block as the condition PHI, we can // map the arguments. @@ -649,11 +651,35 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, return ReplaceInstUsesWith(Outer, C); } - // TODO: MIN(MIN(A, 23), 97) - return 0; + if (SPF1 == SPF2) { + if (ConstantInt *CB = dyn_cast<ConstantInt>(B)) { + if (ConstantInt *CC = dyn_cast<ConstantInt>(C)) { + APInt ACB = CB->getValue(); + APInt ACC = CC->getValue(); + + // MIN(MIN(A, 23), 97) -> MIN(A, 23) + // MAX(MAX(A, 97), 23) -> MAX(A, 97) + if ((SPF1 == SPF_UMIN && ACB.ule(ACC)) || + (SPF1 == SPF_SMIN && ACB.sle(ACC)) || + (SPF1 == SPF_UMAX && ACB.uge(ACC)) || + (SPF1 == SPF_SMAX && ACB.sge(ACC))) + return ReplaceInstUsesWith(Outer, Inner); + + // MIN(MIN(A, 97), 23) -> MIN(A, 23) + // MAX(MAX(A, 23), 97) -> MAX(A, 97) + if ((SPF1 == SPF_UMIN && ACB.ugt(ACC)) || + (SPF1 == SPF_SMIN && ACB.sgt(ACC)) || + (SPF1 == SPF_UMAX && ACB.ult(ACC)) || + (SPF1 == SPF_SMAX && ACB.slt(ACC))) { + Outer.replaceUsesOfWith(Inner, A); + return &Outer; + } + } + } + } + return nullptr; } - /// foldSelectICmpAnd - If one of the constants is zero (we know they can't /// both be) and we have an icmp instruction with zero, and we have an 'and' /// with the non-constant value and a power of two we can turn the select @@ -663,27 +689,27 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, InstCombiner::BuilderTy *Builder) { const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) - return 0; + return nullptr; if (!match(IC->getOperand(1), m_Zero())) - return 0; + return nullptr; ConstantInt *AndRHS; Value *LHS = IC->getOperand(0); if (!match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS)))) - return 0; + return nullptr; // If both select arms are non-zero see if we have a select of the form // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic // for 'x ? 2^n : 0' and fix the thing up at the end. - ConstantInt *Offset = 0; + ConstantInt *Offset = nullptr; if (!TrueVal->isZero() && !FalseVal->isZero()) { if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2()) Offset = FalseVal; else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2()) Offset = TrueVal; else - return 0; + return nullptr; // Adjust TrueVal and FalseVal to the offset. TrueVal = ConstantInt::get(Builder->getContext(), @@ -696,7 +722,7 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, if (!AndRHS->getValue().isPowerOf2() || (!TrueVal->getValue().isPowerOf2() && !FalseVal->getValue().isPowerOf2())) - return 0; + return nullptr; // Determine which shift is needed to transform result of the 'and' into the // desired result. @@ -708,7 +734,7 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, // or a trunc of the 'and'. The trunc case requires that all of the truncated // bits are zero, we can figure that out by looking at the 'and' mask. if (AndZeros >= ValC->getBitWidth()) - return 0; + return nullptr; Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType()); if (ValZeros > AndZeros) @@ -866,7 +892,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *TI = dyn_cast<Instruction>(TrueVal)) if (Instruction *FI = dyn_cast<Instruction>(FalseVal)) if (TI->hasOneUse() && FI->hasOneUse()) { - Instruction *AddOp = 0, *SubOp = 0; + Instruction *AddOp = nullptr, *SubOp = nullptr; // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) if (TI->getOpcode() == FI->getOpcode()) @@ -888,7 +914,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } if (AddOp) { - Value *OtherAddOp = 0; + Value *OtherAddOp = nullptr; if (SubOp->getOperand(0) == AddOp->getOperand(0)) { OtherAddOp = AddOp->getOperand(1); } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) { @@ -969,7 +995,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) { if (TrueSI->getCondition() == CondVal) { if (SI.getTrueValue() == TrueSI->getTrueValue()) - return 0; + return nullptr; SI.setOperand(1, TrueSI->getTrueValue()); return &SI; } @@ -977,7 +1003,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) { if (FalseSI->getCondition() == CondVal) { if (SI.getFalseValue() == FalseSI->getFalseValue()) - return 0; + return nullptr; SI.setOperand(2, FalseSI->getFalseValue()); return &SI; } @@ -1005,5 +1031,5 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } } - return 0; + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 8273dfd..cc6665c 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -19,6 +19,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -33,7 +35,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; - if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1)) + if (Constant *CUI = dyn_cast<Constant>(Op1)) if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) return Res; @@ -50,7 +52,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { return &I; } - return 0; + return nullptr; } /// CanEvaluateShifted - See if we can compute the specified value, but shifted @@ -78,7 +80,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // if the needed bits are already zero in the input. This allows us to reuse // the value which means that we don't care if the shift has multiple uses. // TODO: Handle opposite shift by exact value. - ConstantInt *CI = 0; + ConstantInt *CI = nullptr; if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) || (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) { if (CI->getZExtValue() == NumBits) { @@ -115,7 +117,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, case Instruction::Shl: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast<ConstantInt>(I->getOperand(1)); - if (CI == 0) return false; + if (!CI) return false; // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). if (isLeftShift) return true; @@ -139,7 +141,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, case Instruction::LShr: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast<ConstantInt>(I->getOperand(1)); - if (CI == 0) return false; + if (!CI) return false; // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) return true; @@ -309,37 +311,38 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, -Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, +Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, BinaryOperator &I) { bool isLeftShift = I.getOpcode() == Instruction::Shl; + ConstantInt *COp1 = nullptr; + if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(Op1)) + COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()); + else if (ConstantVector *CV = dyn_cast<ConstantVector>(Op1)) + COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()); + else + COp1 = dyn_cast<ConstantInt>(Op1); + + if (!COp1) + return nullptr; // See if we can propagate this shift into the input, this covers the trivial // cast of lshr(shl(x,c1),c2) as well as other more complex cases. if (I.getOpcode() != Instruction::AShr && - CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) { + CanEvaluateShifted(Op0, COp1->getZExtValue(), isLeftShift, *this)) { DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); return ReplaceInstUsesWith(I, - GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this)); + GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this)); } - // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); - // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate - // a signed shift. - // - if (Op1->uge(TypeBits)) { - if (I.getOpcode() != Instruction::AShr) - return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); - // ashr i32 X, 32 --> ashr i32 X, 31 - I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); - return &I; - } + assert(!COp1->uge(TypeBits) && + "Shift over the type width should have been removed already"); // ((X*C1) << C2) == (X * (C1 << C2)) if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0)) @@ -367,7 +370,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (TrOp && I.isLogicalShift() && TrOp->isShift() && isa<ConstantInt>(TrOp->getOperand(1))) { // Okay, we'll do this xform. Make the shift of shift. - Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); + Constant *ShAmt = ConstantExpr::getZExt(COp1, TrOp->getType()); // (shift2 (shift1 & 0x00FF), c2) Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); @@ -384,10 +387,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // shift. We know that it is a logical shift by a constant, so adjust the // mask as appropriate. if (I.getOpcode() == Instruction::Shl) - MaskV <<= Op1->getZExtValue(); + MaskV <<= COp1->getZExtValue(); else { assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); - MaskV = MaskV.lshr(Op1->getZExtValue()); + MaskV = MaskV.lshr(COp1->getZExtValue()); } // shift1 & 0x00FF @@ -421,9 +424,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // (X + (Y << C)) Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, Op0BO->getOperand(1)->getName()); - uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), - APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + uint32_t Op1Val = COp1->getLimitedValue(TypeBits); + + APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); + Constant *Mask = ConstantInt::get(I.getContext(), Bits); + if (VectorType *VT = dyn_cast<VectorType>(X->getType())) + Mask = ConstantVector::getSplat(VT->getNumElements(), Mask); + return BinaryOperator::CreateAnd(X, Mask); } // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) @@ -453,9 +460,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // (X + (Y << C)) Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, Op0BO->getOperand(0)->getName()); - uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), - APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + uint32_t Op1Val = COp1->getLimitedValue(TypeBits); + + APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); + Constant *Mask = ConstantInt::get(I.getContext(), Bits); + if (VectorType *VT = dyn_cast<VectorType>(X->getType())) + Mask = ConstantVector::getSplat(VT->getNumElements(), Mask); + return BinaryOperator::CreateAnd(X, Mask); } // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) @@ -523,7 +534,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // Find out if this is a shift of a shift by a constant. BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0); if (ShiftOp && !ShiftOp->isShift()) - ShiftOp = 0; + ShiftOp = nullptr; if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { @@ -541,9 +552,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1)); uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); - uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); + uint32_t ShiftAmt2 = COp1->getLimitedValue(TypeBits); assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); - if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. + if (ShiftAmt1 == 0) return nullptr; // Will be simplified in the future. Value *X = ShiftOp->getOperand(0); IntegerType *Ty = cast<IntegerType>(I.getType()); @@ -671,10 +682,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, } } } - return 0; + return nullptr; } Instruction *InstCombiner::visitShl(BinaryOperator &I) { + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), DL)) @@ -709,10 +723,13 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { match(I.getOperand(1), m_Constant(C2))) return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); - return 0; + return nullptr; } Instruction *InstCombiner::visitLShr(BinaryOperator &I) { + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), DL)) return ReplaceInstUsesWith(I, V); @@ -749,10 +766,13 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { } } - return 0; + return nullptr; } Instruction *InstCombiner::visitAShr(BinaryOperator &I) { + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), DL)) return ReplaceInstUsesWith(I, V); @@ -805,6 +825,5 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { if (NumSignBits == Op0->getType()->getScalarSizeInBits()) return ReplaceInstUsesWith(I, Op0); - return 0; + return nullptr; } - diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index a47b709..1b42d3d 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// - #include "InstCombine.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IntrinsicInst.h" @@ -21,6 +20,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "instcombine" + /// ShrinkDemandedConstant - Check to see if the specified operand of the /// specified instruction is a constant integer. If so, check to see if there /// are any bits set in the constant that are not demanded. If so, shrink the @@ -57,7 +58,7 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne, 0); - if (V == 0) return false; + if (!V) return false; if (V == &Inst) return true; ReplaceInstUsesWith(Inst, V); return true; @@ -71,7 +72,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, unsigned Depth) { Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, KnownOne, Depth); - if (NewVal == 0) return false; + if (!NewVal) return false; U = NewVal; return true; } @@ -101,7 +102,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne, unsigned Depth) { - assert(V != 0 && "Null pointer of Value???"); + assert(V != nullptr && "Null pointer of Value???"); assert(Depth <= 6 && "Limit Search Depth"); uint32_t BitWidth = DemandedMask.getBitWidth(); Type *VTy = V->getType(); @@ -118,33 +119,33 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // We know all of the bits for a constant! KnownOne = CI->getValue() & DemandedMask; KnownZero = ~KnownOne & DemandedMask; - return 0; + return nullptr; } if (isa<ConstantPointerNull>(V)) { // We know all of the bits for a constant! KnownOne.clearAllBits(); KnownZero = DemandedMask; - return 0; + return nullptr; } KnownZero.clearAllBits(); KnownOne.clearAllBits(); if (DemandedMask == 0) { // Not demanding any bits from V. if (isa<UndefValue>(V)) - return 0; + return nullptr; return UndefValue::get(VTy); } if (Depth == 6) // Limit search depth. - return 0; + return nullptr; APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); Instruction *I = dyn_cast<Instruction>(V); if (!I) { - ComputeMaskedBits(V, KnownZero, KnownOne, Depth); - return 0; // Only analyze instructions. + computeKnownBits(V, KnownZero, KnownOne, Depth); + return nullptr; // Only analyze instructions. } // If there are multiple uses of this value and we aren't at the root, then @@ -157,8 +158,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // this instruction has a simpler value in that context. if (I->getOpcode() == Instruction::And) { // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and' in this @@ -179,8 +180,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // only bits from X or Y are demanded. // If either the LHS or the RHS are One, the result is One. - ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If all of the demanded bits are known zero on one side, return the // other. These bits cannot contribute to the result of the 'or' in this @@ -204,8 +205,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // We can simplify (X^Y) -> X or Y in the user's context if we know that // only bits from X or Y are demanded. - ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If all of the demanded bits are known zero on one side, return the // other. @@ -216,8 +217,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } // Compute the KnownZero/KnownOne bits to simplify things downstream. - ComputeMaskedBits(I, KnownZero, KnownOne, Depth); - return 0; + computeKnownBits(I, KnownZero, KnownOne, Depth); + return nullptr; } // If this is the root being simplified, allow it to have multiple uses, @@ -229,7 +230,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, switch (I->getOpcode()) { default: - ComputeMaskedBits(I, KnownZero, KnownOne, Depth); + computeKnownBits(I, KnownZero, KnownOne, Depth); break; case Instruction::And: // If either the LHS or the RHS are Zero, the result is zero. @@ -409,20 +410,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } case Instruction::BitCast: if (!I->getOperand(0)->getType()->isIntOrIntVectorTy()) - return 0; // vector->int or fp->int? + return nullptr; // vector->int or fp->int? if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { if (VectorType *SrcVTy = dyn_cast<VectorType>(I->getOperand(0)->getType())) { if (DstVTy->getNumElements() != SrcVTy->getNumElements()) // Don't touch a bitcast between vectors of different element counts. - return 0; + return nullptr; } else // Don't touch a scalar-to-vector bitcast. - return 0; + return nullptr; } else if (I->getOperand(0)->getType()->isVectorTy()) // Don't touch a vector-to-scalar bitcast. - return 0; + return nullptr; if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, KnownOne, Depth+1)) @@ -578,9 +579,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I; } - // Otherwise just hand the sub off to ComputeMaskedBits to fill in + // Otherwise just hand the sub off to computeKnownBits to fill in // the known zeros and ones. - ComputeMaskedBits(V, KnownZero, KnownOne, Depth); + computeKnownBits(V, KnownZero, KnownOne, Depth); // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known // zero. @@ -751,7 +752,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // remainder is zero. if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) KnownZero.setBit(KnownZero.getBitWidth() - 1); @@ -810,10 +811,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } case Intrinsic::x86_sse42_crc32_64_64: KnownZero = APInt::getHighBitsSet(64, 32); - return 0; + return nullptr; } } - ComputeMaskedBits(V, KnownZero, KnownOne, Depth); + computeKnownBits(V, KnownZero, KnownOne, Depth); break; } @@ -821,7 +822,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // constant. if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) return Constant::getIntegerValue(VTy, KnownOne); - return 0; + return nullptr; } /// Helper routine of SimplifyDemandedUseBits. It tries to simplify @@ -847,13 +848,13 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue(); const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue(); if (!ShlOp1 || !ShrOp1) - return 0; // Noop. + return nullptr; // Noop. Value *VarX = Shr->getOperand(0); Type *Ty = VarX->getType(); unsigned BitWidth = Ty->getIntegerBitWidth(); if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth)) - return 0; // Undef. + return nullptr; // Undef. unsigned ShlAmt = ShlOp1.getZExtValue(); unsigned ShrAmt = ShrOp1.getZExtValue(); @@ -882,7 +883,7 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, return VarX; if (!Shr->hasOneUse()) - return 0; + return nullptr; BinaryOperator *New; if (ShrAmt < ShlAmt) { @@ -902,7 +903,7 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, return InsertNewInstWith(New, *Shl); } - return 0; + return nullptr; } /// SimplifyDemandedVectorElts - The specified value produces a vector with @@ -923,7 +924,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (isa<UndefValue>(V)) { // If the entire vector is undefined, just return this info. UndefElts = EltMask; - return 0; + return nullptr; } if (DemandedElts == 0) { // If nothing is demanded, provide undef. @@ -938,7 +939,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // Check if this is identity. If so, return 0 since we are not simplifying // anything. if (DemandedElts.isAllOnesValue()) - return 0; + return nullptr; Type *EltTy = cast<VectorType>(V->getType())->getElementType(); Constant *Undef = UndefValue::get(EltTy); @@ -952,7 +953,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } Constant *Elt = C->getAggregateElement(i); - if (Elt == 0) return 0; + if (!Elt) return nullptr; if (isa<UndefValue>(Elt)) { // Already undef. Elts.push_back(Undef); @@ -964,12 +965,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // If we changed the constant, return it. Constant *NewCV = ConstantVector::get(Elts); - return NewCV != C ? NewCV : 0; + return NewCV != C ? NewCV : nullptr; } // Limit search depth. if (Depth == 10) - return 0; + return nullptr; // If multiple users are using the root value, proceed with // simplification conservatively assuming that all elements @@ -980,14 +981,14 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // the main instcombine process. if (Depth != 0) // TODO: Just compute the UndefElts information recursively. - return 0; + return nullptr; // Conservatively assume that all elements are needed. DemandedElts = EltMask; } Instruction *I = dyn_cast<Instruction>(V); - if (!I) return 0; // Only analyze instructions. + if (!I) return nullptr; // Only analyze instructions. bool MadeChange = false; APInt UndefElts2(VWidth, 0); @@ -999,7 +1000,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // If this is a variable index, we don't know which element it overwrites. // demand exactly the same input as we produce. ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2)); - if (Idx == 0) { + if (!Idx) { // Note that we can't propagate undef elt info, because we don't know // which elt is getting updated. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, @@ -1281,5 +1282,5 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, break; } } - return MadeChange ? I : 0; + return MadeChange ? I : nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 521dc9c..8c5e202 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -17,6 +17,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + /// CheapToScalarize - Return true if the value is cheaper to scalarize than it /// is to leave as a vector operation. isConstant indicates whether we're /// extracting one known element. If false we're extracting a variable index. @@ -73,7 +75,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { // If this is an insert to a variable element, we don't know what it is. if (!isa<ConstantInt>(III->getOperand(2))) - return 0; + return nullptr; unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); // If this is an insert to the element we are looking for, return the @@ -97,14 +99,14 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { } // Extract a value from a vector add operation with a constant zero. - Value *Val = 0; Constant *Con = 0; + Value *Val = nullptr; Constant *Con = nullptr; if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) { if (Con->getAggregateElement(EltNo)->isNullValue()) return FindScalarElement(Val, EltNo); } // Otherwise, we don't know. - return 0; + return nullptr; } // If we have a PHI node with a vector type that has only 2 uses: feed @@ -113,7 +115,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. if (!PN->hasNUses(2)) - return NULL; + return nullptr; // If so, it's known at this point that one operand is PHI and the other is // an extractelement node. Find the PHI user that is not the extractelement @@ -128,7 +130,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { // otherwise return NULL. if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true)) - return NULL; + return nullptr; // Create a scalar PHI node that will replace the vector PHI node // just before the current PHI node. @@ -318,7 +320,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { } } } - return 0; + return nullptr; } /// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns @@ -440,10 +442,10 @@ static ShuffleOps CollectShuffleElements(Value *V, // Either the extracted from or inserted into vector must be RHSVec, // otherwise we'd end up with a shuffle of three inputs. - if (EI->getOperand(0) == PermittedRHS || PermittedRHS == 0) { + if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) { Value *RHS = EI->getOperand(0); ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS); - assert(LR.second == 0 || LR.second == RHS); + assert(LR.second == nullptr || LR.second == RHS); if (LR.first->getType() != RHS->getType()) { // We tried our best, but we can't find anything compatible with RHS @@ -488,6 +490,41 @@ static ShuffleOps CollectShuffleElements(Value *V, return std::make_pair(V, nullptr); } +/// Try to find redundant insertvalue instructions, like the following ones: +/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0 +/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0 +/// Here the second instruction inserts values at the same indices, as the +/// first one, making the first one redundant. +/// It should be transformed to: +/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0 +Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) { + bool IsRedundant = false; + ArrayRef<unsigned int> FirstIndices = I.getIndices(); + + // If there is a chain of insertvalue instructions (each of them except the + // last one has only one use and it's another insertvalue insn from this + // chain), check if any of the 'children' uses the same indices as the first + // instruction. In this case, the first one is redundant. + Value *V = &I; + unsigned Depth = 0; + while (V->hasOneUse() && Depth < 10) { + User *U = V->user_back(); + auto UserInsInst = dyn_cast<InsertValueInst>(U); + if (!UserInsInst || U->getOperand(0) != V) + break; + if (UserInsInst->getIndices() == FirstIndices) { + IsRedundant = true; + break; + } + V = UserInsInst; + Depth++; + } + + if (IsRedundant) + return ReplaceInstUsesWith(I, I.getOperand(0)); + return nullptr; +} + Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { Value *VecOp = IE.getOperand(0); Value *ScalarOp = IE.getOperand(1); @@ -523,13 +560,14 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { // (and any insertelements it points to), into one big shuffle. if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) { SmallVector<Constant*, 16> Mask; - ShuffleOps LR = CollectShuffleElements(&IE, Mask, 0); + ShuffleOps LR = CollectShuffleElements(&IE, Mask, nullptr); // The proposed shuffle may be trivial, in which case we shouldn't // perform the combine. if (LR.first != &IE && LR.second != &IE) { // We now have a shuffle of LHS, RHS, Mask. - if (LR.second == 0) LR.second = UndefValue::get(LR.first->getType()); + if (LR.second == nullptr) + LR.second = UndefValue::get(LR.first->getType()); return new ShuffleVectorInst(LR.first, LR.second, ConstantVector::get(Mask)); } @@ -546,7 +584,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { return &IE; } - return 0; + return nullptr; } /// Return true if we can evaluate the specified expression tree if the vector @@ -801,6 +839,20 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { llvm_unreachable("failed to reorder elements of vector instruction!"); } +static void RecognizeIdentityMask(const SmallVectorImpl<int> &Mask, + bool &isLHSID, bool &isRHSID) { + isLHSID = isRHSID = true; + + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] < 0) continue; // Ignore undef values. + // Is this an identity shuffle of the LHS value? + isLHSID &= (Mask[i] == (int)i); + + // Is this an identity shuffle of the RHS value? + isRHSID &= (Mask[i]-e == i); + } +} + Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -864,16 +916,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (VWidth == LHSWidth) { // Analyze the shuffle, are the LHS or RHS and identity shuffles? - bool isLHSID = true, isRHSID = true; - - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] < 0) continue; // Ignore undef values. - // Is this an identity shuffle of the LHS value? - isLHSID &= (Mask[i] == (int)i); - - // Is this an identity shuffle of the RHS value? - isRHSID &= (Mask[i]-e == i); - } + bool isLHSID, isRHSID; + RecognizeIdentityMask(Mask, isLHSID, isRHSID); // Eliminate identity shuffles. if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); @@ -932,16 +976,16 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); if (LHSShuffle) if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS)) - LHSShuffle = NULL; + LHSShuffle = nullptr; if (RHSShuffle) if (!isa<UndefValue>(RHSShuffle->getOperand(1))) - RHSShuffle = NULL; + RHSShuffle = nullptr; if (!LHSShuffle && !RHSShuffle) - return MadeChange ? &SVI : 0; + return MadeChange ? &SVI : nullptr; - Value* LHSOp0 = NULL; - Value* LHSOp1 = NULL; - Value* RHSOp0 = NULL; + Value* LHSOp0 = nullptr; + Value* LHSOp1 = nullptr; + Value* RHSOp0 = nullptr; unsigned LHSOp0Width = 0; unsigned RHSOp0Width = 0; if (LHSShuffle) { @@ -973,11 +1017,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // case 4 if (LHSOp0 == RHSOp0) { newLHS = LHSOp0; - newRHS = NULL; + newRHS = nullptr; } if (newLHS == LHS && newRHS == RHS) - return MadeChange ? &SVI : 0; + return MadeChange ? &SVI : nullptr; SmallVector<int, 16> LHSMask; SmallVector<int, 16> RHSMask; @@ -1037,7 +1081,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // If newRHS == newLHS, we want to remap any references from newRHS to // newLHS so that we can properly identify splats that may occur due to // obfuscation across the two vectors. - if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS) + if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS) eltMask += newLHSWidth; } @@ -1063,10 +1107,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); } } - if (newRHS == NULL) + if (!newRHS) newRHS = UndefValue::get(newLHS->getType()); return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); } - return MadeChange ? &SVI : 0; + // If the result mask is an identity, replace uses of this instruction with + // corresponding argument. + bool isLHSID, isRHSID; + RecognizeIdentityMask(newMask, isLHSID, isRHSID); + if (isLHSID && VWidth == LHSOp0Width) return ReplaceInstUsesWith(SVI, newLHS); + if (isRHSID && VWidth == RHSOp0Width) return ReplaceInstUsesWith(SVI, newRHS); + + return MadeChange ? &SVI : nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index 8c780b5..1ab7db3 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -10,7 +10,6 @@ #ifndef INSTCOMBINE_WORKLIST_H #define INSTCOMBINE_WORKLIST_H -#define DEBUG_TYPE "instcombine" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Instruction.h" @@ -18,6 +17,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#define DEBUG_TYPE "instcombine" + namespace llvm { /// InstCombineWorklist - This is the worklist management logic for @@ -68,7 +69,7 @@ public: if (It == WorklistMap.end()) return; // Not in worklist. // Don't bother moving everything down, just null out the slot. - Worklist[It->second] = 0; + Worklist[It->second] = nullptr; WorklistMap.erase(It); } @@ -101,4 +102,6 @@ public: } // end namespace llvm. +#undef DEBUG_TYPE + #endif diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 0cab81b..4c36887 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -33,7 +33,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instcombine" #include "llvm/Transforms/Scalar.h" #include "InstCombine.h" #include "llvm-c/Initialization.h" @@ -58,6 +57,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "instcombine" + STATISTIC(NumCombined , "Number of insts combined"); STATISTIC(NumConstProp, "Number of constant folds"); STATISTIC(NumDeadInst , "Number of dead inst eliminated"); @@ -512,7 +513,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { } } - return 0; + return nullptr; } // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction @@ -530,7 +531,7 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const { if (C->getType()->getElementType()->isIntegerTy()) return ConstantExpr::getNeg(C); - return 0; + return nullptr; } // dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the @@ -549,7 +550,7 @@ Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const { if (C->getType()->getElementType()->isFloatingPointTy()) return ConstantExpr::getFNeg(C); - return 0; + return nullptr; } static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, @@ -595,13 +596,13 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, // not have a second operand. Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { // Don't modify shared select instructions - if (!SI->hasOneUse()) return 0; + if (!SI->hasOneUse()) return nullptr; Value *TV = SI->getOperand(1); Value *FV = SI->getOperand(2); if (isa<Constant>(TV) || isa<Constant>(FV)) { // Bool selects with constant operands can be folded to logical ops. - if (SI->getType()->isIntegerTy(1)) return 0; + if (SI->getType()->isIntegerTy(1)) return nullptr; // If it's a bitcast involving vectors, make sure it has the same number of // elements on both sides. @@ -610,10 +611,10 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy()); // Verify that either both or neither are vectors. - if ((SrcTy == NULL) != (DestTy == NULL)) return 0; + if ((SrcTy == nullptr) != (DestTy == nullptr)) return nullptr; // If vectors, verify that they have the same number of elements. if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements()) - return 0; + return nullptr; } Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); @@ -622,7 +623,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { return SelectInst::Create(SI->getCondition(), SelectTrueVal, SelectFalseVal); } - return 0; + return nullptr; } @@ -634,7 +635,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { PHINode *PN = cast<PHINode>(I.getOperand(0)); unsigned NumPHIValues = PN->getNumIncomingValues(); if (NumPHIValues == 0) - return 0; + return nullptr; // We normally only transform phis with a single use. However, if a PHI has // multiple uses and they are all the same operation, we can fold *all* of the @@ -644,7 +645,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { for (User *U : PN->users()) { Instruction *UI = cast<Instruction>(U); if (UI != &I && !I.isIdenticalTo(UI)) - return 0; + return nullptr; } // Otherwise, we can replace *all* users with the new PHI we form. } @@ -654,14 +655,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { // remember the BB it is in. If there is more than one or if *it* is a PHI, // bail out. We don't do arbitrary constant expressions here because moving // their computation can be expensive without a cost model. - BasicBlock *NonConstBB = 0; + BasicBlock *NonConstBB = nullptr; for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InVal = PN->getIncomingValue(i); if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal)) continue; - if (isa<PHINode>(InVal)) return 0; // Itself a phi. - if (NonConstBB) return 0; // More than one non-const value. + if (isa<PHINode>(InVal)) return nullptr; // Itself a phi. + if (NonConstBB) return nullptr; // More than one non-const value. NonConstBB = PN->getIncomingBlock(i); @@ -669,22 +670,22 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { // insert a computation after it without breaking the edge. if (InvokeInst *II = dyn_cast<InvokeInst>(InVal)) if (II->getParent() == NonConstBB) - return 0; + return nullptr; // If the incoming non-constant value is in I's block, we will remove one // instruction, but insert another equivalent one, leading to infinite // instcombine. if (NonConstBB == I.getParent()) - return 0; + return nullptr; } // If there is exactly one non-constant value, we can insert a copy of the // operation in that block. However, if this is a critical edge, we would be // inserting the computation one some other paths (e.g. inside a loop). Only // do this if the pred block is unconditionally branching into the phi block. - if (NonConstBB != 0) { + if (NonConstBB != nullptr) { BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator()); - if (!BI || !BI->isUnconditional()) return 0; + if (!BI || !BI->isUnconditional()) return nullptr; } // Okay, we can do the transformation: create the new PHI node. @@ -708,7 +709,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { BasicBlock *ThisBB = PN->getIncomingBlock(i); Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); - Value *InV = 0; + Value *InV = nullptr; // Beware of ConstantExpr: it may eventually evaluate to getNullValue, // even if currently isNullValue gives false. Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)); @@ -722,7 +723,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) { Constant *C = cast<Constant>(I.getOperand(1)); for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = 0; + Value *InV = nullptr; if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); else if (isa<ICmpInst>(CI)) @@ -736,7 +737,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { } else if (I.getNumOperands() == 2) { Constant *C = cast<Constant>(I.getOperand(1)); for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = 0; + Value *InV = nullptr; if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) InV = ConstantExpr::get(I.getOpcode(), InC, C); else @@ -776,11 +777,11 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, assert(PtrTy->isPtrOrPtrVectorTy()); if (!DL) - return 0; + return nullptr; Type *Ty = PtrTy->getPointerElementType(); if (!Ty->isSized()) - return 0; + return nullptr; // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type @@ -806,7 +807,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, while (Offset) { // Indexing into tail padding between struct/array elements. if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty)) - return 0; + return nullptr; if (StructType *STy = dyn_cast<StructType>(Ty)) { const StructLayout *SL = DL->getStructLayout(STy); @@ -827,7 +828,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, Ty = AT->getElementType(); } else { // Otherwise, we can't index into the middle of this atomic type, bail. - return 0; + return nullptr; } } @@ -859,7 +860,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // If Scale is zero then it does not divide Val. if (Scale.isMinValue()) - return 0; + return nullptr; // Look through chains of multiplications, searching for a constant that is // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4 @@ -902,7 +903,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder); if (!Remainder.isMinValue()) // Not divisible by Scale. - return 0; + return nullptr; // Replace with the quotient in the parent. Op = ConstantInt::get(CI->getType(), Quotient); NoSignedWrap = true; @@ -915,7 +916,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Multiplication. NoSignedWrap = BO->hasNoSignedWrap(); if (RequireNoSignedWrap && !NoSignedWrap) - return 0; + return nullptr; // There are three cases for multiplication: multiplication by exactly // the scale, multiplication by a constant different to the scale, and @@ -934,7 +935,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Otherwise drill down into the constant. if (!Op->hasOneUse()) - return 0; + return nullptr; Parent = std::make_pair(BO, 1); continue; @@ -943,7 +944,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Multiplication by something else. Drill down into the left-hand side // since that's where the reassociate pass puts the good stuff. if (!Op->hasOneUse()) - return 0; + return nullptr; Parent = std::make_pair(BO, 0); continue; @@ -954,7 +955,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Multiplication by a power of 2. NoSignedWrap = BO->hasNoSignedWrap(); if (RequireNoSignedWrap && !NoSignedWrap) - return 0; + return nullptr; Value *LHS = BO->getOperand(0); int32_t Amt = cast<ConstantInt>(BO->getOperand(1))-> @@ -968,7 +969,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { break; } if (Amt < logScale || !Op->hasOneUse()) - return 0; + return nullptr; // Multiplication by more than the scale. Reduce the multiplying amount // by the scale in the parent. @@ -979,7 +980,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { } if (!Op->hasOneUse()) - return 0; + return nullptr; if (CastInst *Cast = dyn_cast<CastInst>(Op)) { if (Cast->getOpcode() == Instruction::SExt) { @@ -993,7 +994,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Scale and the multiplication Y * SmallScale should not overflow. if (SmallScale.sext(Scale.getBitWidth()) != Scale) // SmallScale does not sign-extend to Scale. - return 0; + return nullptr; assert(SmallScale.exactLogBase2() == logScale); // Require that Y * SmallScale must not overflow. RequireNoSignedWrap = true; @@ -1012,7 +1013,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // trunc (Y * sext Scale) does not, so nsw flags need to be cleared // from this point up in the expression (see later). if (RequireNoSignedWrap) - return 0; + return nullptr; // Drill down through the cast. unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); @@ -1026,7 +1027,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { } // Unsupported expression, bail out. - return 0; + return nullptr; } // We know that we can successfully descale, so from here on we can safely @@ -1082,6 +1083,101 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { } while (1); } +/// \brief Creates node of binary operation with the same attributes as the +/// specified one but with other operands. +static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS, + InstCombiner::BuilderTy *B) { + Value *BORes = B->CreateBinOp(Inst.getOpcode(), LHS, RHS); + if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BORes)) { + if (isa<OverflowingBinaryOperator>(NewBO)) { + NewBO->setHasNoSignedWrap(Inst.hasNoSignedWrap()); + NewBO->setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap()); + } + if (isa<PossiblyExactOperator>(NewBO)) + NewBO->setIsExact(Inst.isExact()); + } + return BORes; +} + +/// \brief Makes transformation of binary operation specific for vector types. +/// \param Inst Binary operator to transform. +/// \return Pointer to node that must replace the original binary operator, or +/// null pointer if no transformation was made. +Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { + if (!Inst.getType()->isVectorTy()) return nullptr; + + unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements(); + Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); + assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth); + assert(cast<VectorType>(RHS->getType())->getNumElements() == VWidth); + + // If both arguments of binary operation are shuffles, which use the same + // mask and shuffle within a single vector, it is worthwhile to move the + // shuffle after binary operation: + // Op(shuffle(v1, m), shuffle(v2, m)) -> shuffle(Op(v1, v2), m) + if (isa<ShuffleVectorInst>(LHS) && isa<ShuffleVectorInst>(RHS)) { + ShuffleVectorInst *LShuf = cast<ShuffleVectorInst>(LHS); + ShuffleVectorInst *RShuf = cast<ShuffleVectorInst>(RHS); + if (isa<UndefValue>(LShuf->getOperand(1)) && + isa<UndefValue>(RShuf->getOperand(1)) && + LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType() && + LShuf->getMask() == RShuf->getMask()) { + Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0), + RShuf->getOperand(0), Builder); + Value *Res = Builder->CreateShuffleVector(NewBO, + UndefValue::get(NewBO->getType()), LShuf->getMask()); + return Res; + } + } + + // If one argument is a shuffle within one vector, the other is a constant, + // try moving the shuffle after the binary operation. + ShuffleVectorInst *Shuffle = nullptr; + Constant *C1 = nullptr; + if (isa<ShuffleVectorInst>(LHS)) Shuffle = cast<ShuffleVectorInst>(LHS); + if (isa<ShuffleVectorInst>(RHS)) Shuffle = cast<ShuffleVectorInst>(RHS); + if (isa<Constant>(LHS)) C1 = cast<Constant>(LHS); + if (isa<Constant>(RHS)) C1 = cast<Constant>(RHS); + if (Shuffle && C1 && isa<UndefValue>(Shuffle->getOperand(1)) && + Shuffle->getType() == Shuffle->getOperand(0)->getType()) { + SmallVector<int, 16> ShMask = Shuffle->getShuffleMask(); + // Find constant C2 that has property: + // shuffle(C2, ShMask) = C1 + // If such constant does not exist (example: ShMask=<0,0> and C1=<1,2>) + // reorder is not possible. + SmallVector<Constant*, 16> C2M(VWidth, + UndefValue::get(C1->getType()->getScalarType())); + bool MayChange = true; + for (unsigned I = 0; I < VWidth; ++I) { + if (ShMask[I] >= 0) { + assert(ShMask[I] < (int)VWidth); + if (!isa<UndefValue>(C2M[ShMask[I]])) { + MayChange = false; + break; + } + C2M[ShMask[I]] = C1->getAggregateElement(I); + } + } + if (MayChange) { + Constant *C2 = ConstantVector::get(C2M); + Value *NewLHS, *NewRHS; + if (isa<Constant>(LHS)) { + NewLHS = C2; + NewRHS = Shuffle->getOperand(0); + } else { + NewLHS = Shuffle->getOperand(0); + NewRHS = C2; + } + Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder); + Value *Res = Builder->CreateShuffleVector(NewBO, + UndefValue::get(Inst.getType()), Shuffle->getMask()); + return Res; + } + } + + return nullptr; +} + Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); @@ -1130,7 +1226,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src)) - return 0; + return nullptr; // Note that if our source is a gep chain itself then we wait for that // chain to be resolved before we perform this transformation. This @@ -1138,7 +1234,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (GEPOperator *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0))) if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) - return 0; // Wait until our source is folded to completion. + return nullptr; // Wait until our source is folded to completion. SmallVector<Value*, 8> Indices; @@ -1166,7 +1262,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // intptr_t). Just avoid transforming this until the input has been // normalized. if (SO1->getType() != GO1->getType()) - return 0; + return nullptr; Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); } @@ -1216,7 +1312,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // We do not handle pointer-vector geps here. if (!StrippedPtrTy) - return 0; + return nullptr; if (StrippedPtr != PtrOp) { bool HasZeroPointerIndex = false; @@ -1241,7 +1337,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst *Res = GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName()); Res->setIsInBounds(GEP.isInBounds()); - return Res; + if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) + return Res; + // Insert Res, and create an addrspacecast. + // e.g., + // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ... + // -> + // %0 = GEP i8 addrspace(1)* X, ... + // addrspacecast i8 addrspace(1)* %0 to i8* + return new AddrSpaceCastInst(Builder->Insert(Res), GEP.getType()); } if (ArrayType *XATy = @@ -1253,8 +1357,24 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // to an array of the same type as the destination pointer // array. Because the array type is never stepped over (there // is a leading zero) we can fold the cast into this GEP. - GEP.setOperand(0, StrippedPtr); - return &GEP; + if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) { + GEP.setOperand(0, StrippedPtr); + return &GEP; + } + // Cannot replace the base pointer directly because StrippedPtr's + // address space is different. Instead, create a new GEP followed by + // an addrspacecast. + // e.g., + // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*), + // i32 0, ... + // -> + // %0 = GEP [10 x i8] addrspace(1)* X, ... + // addrspacecast i8 addrspace(1)* %0 to i8* + SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end()); + Value *NewGEP = GEP.isInBounds() ? + Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : + Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); + return new AddrSpaceCastInst(NewGEP, GEP.getType()); } } } @@ -1360,7 +1480,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } if (!DL) - return 0; + return nullptr; /// See if we can simplify: /// X = bitcast A* to B* @@ -1412,7 +1532,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } - return 0; + return nullptr; } static bool @@ -1527,7 +1647,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { } return EraseInstFromFunction(MI); } - return 0; + return nullptr; } /// \brief Move the call to free before a NULL test. @@ -1556,30 +1676,30 @@ tryToMoveFreeBeforeNullTest(CallInst &FI) { // would duplicate the call to free in each predecessor and it may // not be profitable even for code size. if (!PredBB) - return 0; + return nullptr; // Validate constraint #2: Does this block contains only the call to // free and an unconditional branch? // FIXME: We could check if we can speculate everything in the // predecessor block if (FreeInstrBB->size() != 2) - return 0; + return nullptr; BasicBlock *SuccBB; if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB))) - return 0; + return nullptr; // Validate the rest of constraint #1 by matching on the pred branch. TerminatorInst *TI = PredBB->getTerminator(); BasicBlock *TrueBB, *FalseBB; ICmpInst::Predicate Pred; if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB))) - return 0; + return nullptr; if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE) - return 0; + return nullptr; // Validate constraint #3: Ensure the null case just falls through. if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB)) - return 0; + return nullptr; assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) && "Broken CFG: missing edge from predecessor to successor"); @@ -1614,14 +1734,14 @@ Instruction *InstCombiner::visitFree(CallInst &FI) { if (Instruction *I = tryToMoveFreeBeforeNullTest(FI)) return I; - return 0; + return nullptr; } Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { // Change br (not X), label True, label False to: br X, label False, True - Value *X = 0; + Value *X = nullptr; BasicBlock *TrueDest; BasicBlock *FalseDest; if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && @@ -1664,7 +1784,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { return &BI; } - return 0; + return nullptr; } Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { @@ -1688,7 +1808,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { return &SI; } } - return 0; + return nullptr; } Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { @@ -1705,7 +1825,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // first index return ExtractValueInst::Create(C2, EV.getIndices().slice(1)); } - return 0; // Can't handle other constants + return nullptr; // Can't handle other constants } if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { @@ -1838,7 +1958,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // and if again single-use then via load (gep (gep)) to load (gep). // However, double extracts from e.g. function arguments or return values // aren't handled yet. - return 0; + return nullptr; } enum Personality_Type { @@ -2177,7 +2297,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { return &LI; } - return 0; + return nullptr; } @@ -2270,7 +2390,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); i != e; ++i) { ConstantExpr *CE = dyn_cast<ConstantExpr>(i); - if (CE == 0) continue; + if (CE == nullptr) continue; Constant*& FoldRes = FoldedConstants[CE]; if (!FoldRes) @@ -2374,7 +2494,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { while (!Worklist.isEmpty()) { Instruction *I = Worklist.RemoveOne(); - if (I == 0) continue; // skip null values. + if (I == nullptr) continue; // skip null values. // Check to see if we can DCE the instruction. if (isInstructionTriviallyDead(I, TLI)) { @@ -2516,7 +2636,7 @@ bool InstCombiner::runOnFunction(Function &F) { return false; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); // Minimizing size? MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, @@ -2543,7 +2663,7 @@ bool InstCombiner::runOnFunction(Function &F) { while (DoOneIteration(F, Iteration++)) EverMadeChange = true; - Builder = 0; + Builder = nullptr; return EverMadeChange; } diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index bbfa4c5..95fca75 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -13,8 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asan" - #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -53,8 +51,11 @@ using namespace llvm; +#define DEBUG_TYPE "asan" + static const uint64_t kDefaultShadowScale = 3; static const uint64_t kDefaultShadowOffset32 = 1ULL << 29; +static const uint64_t kIOSShadowOffset32 = 1ULL << 30; static const uint64_t kDefaultShadowOffset64 = 1ULL << 44; static const uint64_t kSmallX86_64ShadowOffset = 0x7FFF8000; // < 2G. static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41; @@ -79,6 +80,7 @@ static const char *const kAsanUnregisterGlobalsName = static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; static const char *const kAsanInitName = "__asan_init_v3"; +static const char *const kAsanCovModuleInitName = "__sanitizer_cov_module_init"; static const char *const kAsanCovName = "__sanitizer_cov"; static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp"; static const char *const kAsanPtrSub = "__sanitizer_ptr_sub"; @@ -135,10 +137,12 @@ static cl::opt<bool> ClGlobals("asan-globals", static cl::opt<int> ClCoverage("asan-coverage", cl::desc("ASan coverage. 0: none, 1: entry block, 2: all blocks"), cl::Hidden, cl::init(false)); +static cl::opt<int> ClCoverageBlockThreshold("asan-coverage-block-threshold", + cl::desc("Add coverage instrumentation only to the entry block if there " + "are more than this number of blocks."), + cl::Hidden, cl::init(1500)); static cl::opt<bool> ClInitializers("asan-initialization-order", cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false)); -static cl::opt<bool> ClMemIntrin("asan-memintrin", - cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true)); static cl::opt<bool> ClInvalidPointerPairs("asan-detect-invalid-pointer-pair", cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden, cl::init(false)); @@ -148,6 +152,16 @@ static cl::opt<unsigned> ClRealignStack("asan-realign-stack", static cl::opt<std::string> ClBlacklistFile("asan-blacklist", cl::desc("File containing the list of objects to ignore " "during instrumentation"), cl::Hidden); +static cl::opt<int> ClInstrumentationWithCallsThreshold( + "asan-instrumentation-with-call-threshold", + cl::desc("If the function being instrumented contains more than " + "this number of memory accesses, use callbacks instead of " + "inline checks (-1 means never use callbacks)."), + cl::Hidden, cl::init(7000)); +static cl::opt<std::string> ClMemoryAccessCallbackPrefix( + "asan-memory-access-callback-prefix", + cl::desc("Prefix for memory access callbacks"), cl::Hidden, + cl::init("__asan_")); // This is an experimental feature that will allow to choose between // instrumented and non-instrumented code at link-time. @@ -238,7 +252,7 @@ struct ShadowMapping { static ShadowMapping getShadowMapping(const Module &M, int LongSize) { llvm::Triple TargetTriple(M.getTargetTriple()); bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android; - // bool IsMacOSX = TargetTriple.getOS() == llvm::Triple::MacOSX; + bool IsIOS = TargetTriple.getOS() == llvm::Triple::IOS; bool IsFreeBSD = TargetTriple.getOS() == llvm::Triple::FreeBSD; bool IsLinux = TargetTriple.getOS() == llvm::Triple::Linux; bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64 || @@ -256,6 +270,8 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize) { Mapping.Offset = kMIPS32_ShadowOffset32; else if (IsFreeBSD) Mapping.Offset = kFreeBSD_ShadowOffset32; + else if (IsIOS) + Mapping.Offset = kIOSShadowOffset32; else Mapping.Offset = kDefaultShadowOffset32; } else { // LongSize == 64 @@ -303,20 +319,17 @@ struct AddressSanitizer : public FunctionPass { const char *getPassName() const override { return "AddressSanitizerFunctionPass"; } - void instrumentMop(Instruction *I); + void instrumentMop(Instruction *I, bool UseCalls); void instrumentPointerComparisonOrSubtraction(Instruction *I); void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, bool IsWrite, - Value *SizeArgument); + Value *SizeArgument, bool UseCalls); Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize); Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument); - bool instrumentMemIntrinsic(MemIntrinsic *MI); - void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr, - Value *Size, - Instruction *InsertBefore, bool IsWrite); + void instrumentMemIntrinsic(MemIntrinsic *MI); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool runOnFunction(Function &F) override; bool maybeInsertAsanInitAtFunctionEntry(Function &F); @@ -349,8 +362,11 @@ struct AddressSanitizer : public FunctionPass { std::unique_ptr<SpecialCaseList> BL; // This array is indexed by AccessIsWrite and log2(AccessSize). Function *AsanErrorCallback[2][kNumberOfAccessSizes]; + Function *AsanMemoryAccessCallback[2][kNumberOfAccessSizes]; // This array is indexed by AccessIsWrite. - Function *AsanErrorCallbackSized[2]; + Function *AsanErrorCallbackSized[2], + *AsanMemoryAccessCallbackSized[2]; + Function *AsanMemmove, *AsanMemcpy, *AsanMemset; InlineAsm *EmptyAsm; SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals; @@ -393,6 +409,7 @@ class AddressSanitizerModule : public ModulePass { Function *AsanUnpoisonGlobals; Function *AsanRegisterGlobals; Function *AsanUnregisterGlobals; + Function *AsanCovModuleInit; }; // Stack poisoning does not play well with exception handling. @@ -443,11 +460,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { bool runOnFunction() { if (!ClStack) return false; // Collect alloca, ret, lifetime instructions etc. - for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), - DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { - BasicBlock *BB = *DI; + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB); - } + if (AllocaVec.empty()) return false; initializeCallbacks(*F.getParent()); @@ -590,72 +605,54 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { return IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset)); } -void AddressSanitizer::instrumentMemIntrinsicParam( - Instruction *OrigIns, - Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) { - IRBuilder<> IRB(InsertBefore); - if (Size->getType() != IntptrTy) - Size = IRB.CreateIntCast(Size, IntptrTy, false); - // Check the first byte. - instrumentAddress(OrigIns, InsertBefore, Addr, 8, IsWrite, Size); - // Check the last byte. - IRB.SetInsertPoint(InsertBefore); - Value *SizeMinusOne = IRB.CreateSub(Size, ConstantInt::get(IntptrTy, 1)); - Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); - Value *AddrLast = IRB.CreateAdd(AddrLong, SizeMinusOne); - instrumentAddress(OrigIns, InsertBefore, AddrLast, 8, IsWrite, Size); -} - // Instrument memset/memmove/memcpy -bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { - Value *Dst = MI->getDest(); - MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI); - Value *Src = MemTran ? MemTran->getSource() : 0; - Value *Length = MI->getLength(); - - Constant *ConstLength = dyn_cast<Constant>(Length); - Instruction *InsertBefore = MI; - if (ConstLength) { - if (ConstLength->isNullValue()) return false; - } else { - // The size is not a constant so it could be zero -- check at run-time. - IRBuilder<> IRB(InsertBefore); - - Value *Cmp = IRB.CreateICmpNE(Length, - Constant::getNullValue(Length->getType())); - InsertBefore = SplitBlockAndInsertIfThen(Cmp, InsertBefore, false); +void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { + IRBuilder<> IRB(MI); + if (isa<MemTransferInst>(MI)) { + IRB.CreateCall3( + isa<MemMoveInst>(MI) ? AsanMemmove : AsanMemcpy, + IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), + IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()), + IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)); + } else if (isa<MemSetInst>(MI)) { + IRB.CreateCall3( + AsanMemset, + IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), + IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), + IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)); } - - instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true); - if (Src) - instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false); - return true; + MI->eraseFromParent(); } // If I is an interesting memory access, return the PointerOperand -// and set IsWrite. Otherwise return NULL. -static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) { +// and set IsWrite/Alignment. Otherwise return NULL. +static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, + unsigned *Alignment) { if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (!ClInstrumentReads) return NULL; + if (!ClInstrumentReads) return nullptr; *IsWrite = false; + *Alignment = LI->getAlignment(); return LI->getPointerOperand(); } if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (!ClInstrumentWrites) return NULL; + if (!ClInstrumentWrites) return nullptr; *IsWrite = true; + *Alignment = SI->getAlignment(); return SI->getPointerOperand(); } if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { - if (!ClInstrumentAtomics) return NULL; + if (!ClInstrumentAtomics) return nullptr; *IsWrite = true; + *Alignment = 0; return RMW->getPointerOperand(); } if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { - if (!ClInstrumentAtomics) return NULL; + if (!ClInstrumentAtomics) return nullptr; *IsWrite = true; + *Alignment = 0; return XCHG->getPointerOperand(); } - return NULL; + return nullptr; } static bool isPointerOperand(Value *V) { @@ -700,9 +697,10 @@ AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) { IRB.CreateCall2(F, Param[0], Param[1]); } -void AddressSanitizer::instrumentMop(Instruction *I) { +void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) { bool IsWrite = false; - Value *Addr = isInterestingMemoryAccess(I, &IsWrite); + unsigned Alignment = 0; + Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &Alignment); assert(Addr); if (ClOpt && ClOptGlobals) { if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) { @@ -737,22 +735,29 @@ void AddressSanitizer::instrumentMop(Instruction *I) { else NumInstrumentedReads++; - // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check. - if (TypeSize == 8 || TypeSize == 16 || - TypeSize == 32 || TypeSize == 64 || TypeSize == 128) - return instrumentAddress(I, I, Addr, TypeSize, IsWrite, 0); - // Instrument unusual size (but still multiple of 8). + unsigned Granularity = 1 << Mapping.Scale; + // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check + // if the data is properly aligned. + if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 || + TypeSize == 128) && + (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8)) + return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls); + // Instrument unusual size or unusual alignment. // We can not do it with a single check, so we do 1-byte check for the first // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able // to report the actual access size. IRBuilder<> IRB(I); - Value *LastByte = IRB.CreateIntToPtr( - IRB.CreateAdd(IRB.CreatePointerCast(Addr, IntptrTy), - ConstantInt::get(IntptrTy, TypeSize / 8 - 1)), - OrigPtrTy); Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8); - instrumentAddress(I, I, Addr, 8, IsWrite, Size); - instrumentAddress(I, I, LastByte, 8, IsWrite, Size); + Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + if (UseCalls) { + IRB.CreateCall2(AsanMemoryAccessCallbackSized[IsWrite], AddrLong, Size); + } else { + Value *LastByte = IRB.CreateIntToPtr( + IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)), + OrigPtrTy); + instrumentAddress(I, I, Addr, 8, IsWrite, Size, false); + instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false); + } } // Validate the result of Module::getOrInsertFunction called for an interface @@ -800,11 +805,18 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, } void AddressSanitizer::instrumentAddress(Instruction *OrigIns, - Instruction *InsertBefore, - Value *Addr, uint32_t TypeSize, - bool IsWrite, Value *SizeArgument) { + Instruction *InsertBefore, Value *Addr, + uint32_t TypeSize, bool IsWrite, + Value *SizeArgument, bool UseCalls) { IRBuilder<> IRB(InsertBefore); Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); + + if (UseCalls) { + IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][AccessSizeIndex], + AddrLong); + return; + } Type *ShadowTy = IntegerType::get( *C, std::max(8U, TypeSize >> Mapping.Scale)); @@ -815,9 +827,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); - size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); size_t Granularity = 1 << Mapping.Scale; - TerminatorInst *CrashTerm = 0; + TerminatorInst *CrashTerm = nullptr; if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { TerminatorInst *CheckTerm = @@ -842,8 +853,29 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, void AddressSanitizerModule::createInitializerPoisonCalls( Module &M, GlobalValue *ModuleName) { - // We do all of our poisoning and unpoisoning within _GLOBAL__I_a. - Function *GlobalInit = M.getFunction("_GLOBAL__I_a"); + // We do all of our poisoning and unpoisoning within a global constructor. + // These are called _GLOBAL__(sub_)?I_.*. + // TODO: Consider looking through the functions in + // M.getGlobalVariable("llvm.global_ctors") instead of using this stringly + // typed approach. + Function *GlobalInit = nullptr; + for (auto &F : M.getFunctionList()) { + StringRef FName = F.getName(); + + const char kGlobalPrefix[] = "_GLOBAL__"; + if (!FName.startswith(kGlobalPrefix)) + continue; + FName = FName.substr(strlen(kGlobalPrefix)); + + const char kOptionalSub[] = "sub_"; + if (FName.startswith(kOptionalSub)) + FName = FName.substr(strlen(kOptionalSub)); + + if (FName.startswith("I_")) { + GlobalInit = &F; + break; + } + } // If that function is not present, this TU contains no globals, or they have // all been optimized away if (!GlobalInit) @@ -858,7 +890,7 @@ void AddressSanitizerModule::createInitializerPoisonCalls( // Add calls to unpoison all globals before each return instruction. for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end(); - I != E; ++I) { + I != E; ++I) { if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) { CallInst::Create(AsanUnpoisonGlobals, "", RI); } @@ -902,8 +934,8 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { // Ignore the globals from the __OBJC section. The ObjC runtime assumes // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to // them. - if ((Section.find("__OBJC,") == 0) || - (Section.find("__DATA, __objc_") == 0)) { + if (Section.startswith("__OBJC,") || + Section.startswith("__DATA, __objc_")) { DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G << "\n"); return false; } @@ -915,16 +947,26 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { // is placed into __DATA,__cfstring // Therefore there's no point in placing redzones into __DATA,__cfstring. // Moreover, it causes the linker to crash on OS X 10.7 - if (Section.find("__DATA,__cfstring") == 0) { + if (Section.startswith("__DATA,__cfstring")) { DEBUG(dbgs() << "Ignoring CFString: " << *G << "\n"); return false; } // The linker merges the contents of cstring_literals and removes the // trailing zeroes. - if (Section.find("__TEXT,__cstring,cstring_literals") == 0) { + if (Section.startswith("__TEXT,__cstring,cstring_literals")) { DEBUG(dbgs() << "Ignoring a cstring literal: " << *G << "\n"); return false; } + + // Callbacks put into the CRT initializer/terminator sections + // should not be instrumented. + // See https://code.google.com/p/address-sanitizer/issues/detail?id=305 + // and http://msdn.microsoft.com/en-US/en-en/library/bb918180(v=vs.120).aspx + if (Section.startswith(".CRT")) { + DEBUG(dbgs() << "Ignoring a global initializer callback: " << *G << "\n"); + return false; + } + // Globals from llvm.metadata aren't emitted, do not instrument them. if (Section == "llvm.metadata") return false; } @@ -950,6 +992,10 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage); + AsanCovModuleInit = checkInterfaceFunction(M.getOrInsertFunction( + kAsanCovModuleInitName, + IRB.getVoidTy(), IntptrTy, NULL)); + AsanCovModuleInit->setLinkage(Function::ExternalLinkage); } // This function replaces all global variables with new variables that have @@ -980,6 +1026,14 @@ bool AddressSanitizerModule::runOnModule(Module &M) { GlobalsToChange.push_back(G); } + Function *CtorFunc = M.getFunction(kAsanModuleCtorName); + assert(CtorFunc); + IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); + + Function *CovFunc = M.getFunction(kAsanCovName); + int nCov = CovFunc ? CovFunc->getNumUses() : 0; + IRB.CreateCall(AsanCovModuleInit, ConstantInt::get(IntptrTy, nCov)); + size_t n = GlobalsToChange.size(); if (n == 0) return false; @@ -996,10 +1050,6 @@ bool AddressSanitizerModule::runOnModule(Module &M) { IntptrTy, IntptrTy, NULL); SmallVector<Constant *, 16> Initializers(n); - Function *CtorFunc = M.getFunction(kAsanModuleCtorName); - assert(CtorFunc); - IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); - bool HasDynamicallyInitializedGlobals = false; // We shouldn't merge same module names, as this string serves as unique @@ -1110,12 +1160,16 @@ void AddressSanitizer::initializeCallbacks(Module &M) { for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; AccessSizeIndex++) { // IsWrite and TypeSize are encoded in the function name. - std::string FunctionName = std::string(kAsanReportErrorTemplate) + + std::string Suffix = (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex); - // If we are merging crash callbacks, they have two parameters. AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = - checkInterfaceFunction(M.getOrInsertFunction( - FunctionName, IRB.getVoidTy(), IntptrTy, NULL)); + checkInterfaceFunction( + M.getOrInsertFunction(kAsanReportErrorTemplate + Suffix, + IRB.getVoidTy(), IntptrTy, NULL)); + AsanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] = + checkInterfaceFunction( + M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + Suffix, + IRB.getVoidTy(), IntptrTy, NULL)); } } AsanErrorCallbackSized[0] = checkInterfaceFunction(M.getOrInsertFunction( @@ -1123,8 +1177,25 @@ void AddressSanitizer::initializeCallbacks(Module &M) { AsanErrorCallbackSized[1] = checkInterfaceFunction(M.getOrInsertFunction( kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); - AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction( - kAsanHandleNoReturnName, IRB.getVoidTy(), NULL)); + AsanMemoryAccessCallbackSized[0] = checkInterfaceFunction( + M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "loadN", + IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + AsanMemoryAccessCallbackSized[1] = checkInterfaceFunction( + M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "storeN", + IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + + AsanMemmove = checkInterfaceFunction(M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, NULL)); + AsanMemcpy = checkInterfaceFunction(M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + "memcpy", IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, NULL)); + AsanMemset = checkInterfaceFunction(M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + "memset", IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, NULL)); + + AsanHandleNoReturnFunc = checkInterfaceFunction( + M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), NULL)); AsanCovFunction = checkInterfaceFunction(M.getOrInsertFunction( kAsanCovName, IRB.getVoidTy(), NULL)); AsanPtrCmpFunction = checkInterfaceFunction(M.getOrInsertFunction( @@ -1142,7 +1213,7 @@ bool AddressSanitizer::doInitialization(Module &M) { // Initialize the private fields. No one has accessed them before. DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); if (!DLP) - return false; + report_fatal_error("data layout missing"); DL = &DLP->getDataLayout(); BL.reset(SpecialCaseList::createOrDie(BlacklistFile)); @@ -1241,7 +1312,8 @@ bool AddressSanitizer::InjectCoverage(Function &F, const ArrayRef<BasicBlock *> AllBlocks) { if (!ClCoverage) return false; - if (ClCoverage == 1) { + if (ClCoverage == 1 || + (unsigned)ClCoverageBlockThreshold < AllBlocks.size()) { InjectCoverageAtBlock(F, F.getEntryBlock()); } else { for (size_t i = 0, n = AllBlocks.size(); i < n; i++) @@ -1275,6 +1347,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { SmallVector<Instruction*, 16> PointerComparisonsOrSubtracts; int NumAllocas = 0; bool IsWrite; + unsigned Alignment; // Fill the set of memory operations to instrument. for (Function::iterator FI = F.begin(), FE = F.end(); @@ -1285,7 +1358,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { if (LooksLikeCodeInBug11395(BI)) return false; - if (Value *Addr = isInterestingMemoryAccess(BI, &IsWrite)) { + if (Value *Addr = isInterestingMemoryAccess(BI, &IsWrite, &Alignment)) { if (ClOpt && ClOptSameTemp) { if (!TempsToInstrument.insert(Addr)) continue; // We've seen this temp in the current BB. @@ -1294,7 +1367,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { isInterestingPointerComparisonOrSubtraction(BI)) { PointerComparisonsOrSubtracts.push_back(BI); continue; - } else if (isa<MemIntrinsic>(BI) && ClMemIntrin) { + } else if (isa<MemIntrinsic>(BI)) { // ok, take it. } else { if (isa<AllocaInst>(BI)) @@ -1315,7 +1388,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { } } - Function *UninstrumentedDuplicate = 0; + Function *UninstrumentedDuplicate = nullptr; bool LikelyToInstrument = !NoReturnCalls.empty() || !ToInstrument.empty() || (NumAllocas > 0); if (ClKeepUninstrumented && LikelyToInstrument) { @@ -1326,14 +1399,19 @@ bool AddressSanitizer::runOnFunction(Function &F) { F.getParent()->getFunctionList().push_back(UninstrumentedDuplicate); } + bool UseCalls = false; + if (ClInstrumentationWithCallsThreshold >= 0 && + ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold) + UseCalls = true; + // Instrument. int NumInstrumented = 0; for (size_t i = 0, n = ToInstrument.size(); i != n; i++) { Instruction *Inst = ToInstrument[i]; if (ClDebugMin < 0 || ClDebugMax < 0 || (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { - if (isInterestingMemoryAccess(Inst, &IsWrite)) - instrumentMop(Inst); + if (isInterestingMemoryAccess(Inst, &IsWrite, &Alignment)) + instrumentMop(Inst, UseCalls); else instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); } @@ -1464,12 +1542,23 @@ void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined( } } +static DebugLoc getFunctionEntryDebugLocation(Function &F) { + BasicBlock::iterator I = F.getEntryBlock().begin(), + E = F.getEntryBlock().end(); + for (; I != E; ++I) + if (!isa<AllocaInst>(I)) + break; + return I->getDebugLoc(); +} + void FunctionStackPoisoner::poisonStack() { int StackMallocIdx = -1; + DebugLoc EntryDebugLocation = getFunctionEntryDebugLocation(F); assert(AllocaVec.size() > 0); Instruction *InsBefore = AllocaVec[0]; IRBuilder<> IRB(InsBefore); + IRB.SetCurrentDebugLocation(EntryDebugLocation); SmallVector<ASanStackVariableDescription, 16> SVD; SVD.reserve(AllocaVec.size()); @@ -1493,6 +1582,7 @@ void FunctionStackPoisoner::poisonStack() { Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize); AllocaInst *MyAlloca = new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore); + MyAlloca->setDebugLoc(EntryDebugLocation); assert((ClRealignStack & (ClRealignStack - 1)) == 0); size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack); MyAlloca->setAlignment(FrameAlignment); @@ -1513,11 +1603,13 @@ void FunctionStackPoisoner::poisonStack() { Instruction *Term = SplitBlockAndInsertIfThen(Cmp, InsBefore, false); BasicBlock *CmpBlock = cast<Instruction>(Cmp)->getParent(); IRBuilder<> IRBIf(Term); + IRBIf.SetCurrentDebugLocation(EntryDebugLocation); LocalStackBase = IRBIf.CreateCall2( AsanStackMallocFunc[StackMallocIdx], ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase); BasicBlock *SetBlock = cast<Instruction>(LocalStackBase)->getParent(); IRB.SetInsertPoint(InsBefore); + IRB.SetCurrentDebugLocation(EntryDebugLocation); PHINode *Phi = IRB.CreatePHI(IntptrTy, 2); Phi->addIncoming(OrigStackBase, CmpBlock); Phi->addIncoming(LocalStackBase, SetBlock); @@ -1654,7 +1746,7 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) { if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) // We're intested only in allocas we can handle. - return isInterestingAlloca(*AI) ? AI : 0; + return isInterestingAlloca(*AI) ? AI : nullptr; // See if we've already calculated (or started to calculate) alloca for a // given value. AllocaForValueMapTy::iterator I = AllocaForValue.find(V); @@ -1662,8 +1754,8 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) { return I->second; // Store 0 while we're calculating alloca for value V to avoid // infinite recursion if the value references itself. - AllocaForValue[V] = 0; - AllocaInst *Res = 0; + AllocaForValue[V] = nullptr; + AllocaInst *Res = nullptr; if (CastInst *CI = dyn_cast<CastInst>(V)) Res = findAllocaForValue(CI->getOperand(0)); else if (PHINode *PN = dyn_cast<PHINode>(V)) { @@ -1673,12 +1765,12 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) { if (IncValue == PN) continue; AllocaInst *IncValueAI = findAllocaForValue(IncValue); // AI for incoming values should exist and should all be equal. - if (IncValueAI == 0 || (Res != 0 && IncValueAI != Res)) - return 0; + if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res)) + return nullptr; Res = IncValueAI; } } - if (Res != 0) + if (Res) AllocaForValue[V] = Res; return Res; } diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index 505fb83..9a5cea8 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "bounds-checking" #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; +#define DEBUG_TYPE "bounds-checking" + static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap", cl::desc("Use one trap block per function")); @@ -61,7 +62,7 @@ namespace { BasicBlock *TrapBB; BasicBlock *getTrapBB(); - void emitBranchToTrap(Value *Cmp = 0); + void emitBranchToTrap(Value *Cmp = nullptr); bool instrument(Value *Ptr, Value *Val); }; } @@ -103,7 +104,7 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) { if (!C->getZExtValue()) return; else - Cmp = 0; // unconditional branch + Cmp = nullptr; // unconditional branch } ++ChecksAdded; @@ -167,7 +168,7 @@ bool BoundsChecking::runOnFunction(Function &F) { DL = &getAnalysis<DataLayoutPass>().getDataLayout(); TLI = &getAnalysis<TargetLibraryInfo>(); - TrapBB = 0; + TrapBB = nullptr; BuilderTy TheBuilder(F.getContext(), TargetFolder(DL)); Builder = &TheBuilder; ObjectSizeOffsetEvaluator TheObjSizeEval(DL, TLI, F.getContext(), diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index df1549d..7f468f7 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -211,7 +211,8 @@ class DataFlowSanitizer : public ModulePass { public: DataFlowSanitizer(StringRef ABIListFile = StringRef(), - void *(*getArgTLS)() = 0, void *(*getRetValTLS)() = 0); + void *(*getArgTLS)() = nullptr, + void *(*getRetValTLS)() = nullptr); static char ID; bool doInitialization(Module &M) override; bool runOnModule(Module &M) override; @@ -233,8 +234,8 @@ struct DFSanFunction { DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI) : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), - IsNativeABI(IsNativeABI), ArgTLSPtr(0), RetvalTLSPtr(0), - LabelReturnAlloca(0) {} + IsNativeABI(IsNativeABI), ArgTLSPtr(nullptr), RetvalTLSPtr(nullptr), + LabelReturnAlloca(nullptr) {} Value *getArgTLSPtr(); Value *getArgTLS(unsigned Index, Instruction *Pos); Value *getRetvalTLS(); @@ -303,7 +304,7 @@ FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) { ArgTypes.push_back(ShadowPtrTy); Type *RetType = T->getReturnType(); if (!RetType->isVoidTy()) - RetType = StructType::get(RetType, ShadowTy, (Type *)0); + RetType = StructType::get(RetType, ShadowTy, (Type *)nullptr); return FunctionType::get(RetType, ArgTypes, T->isVarArg()); } @@ -345,7 +346,7 @@ FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) { bool DataFlowSanitizer::doInitialization(Module &M) { DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); if (!DLP) - return false; + report_fatal_error("data layout missing"); DL = &DLP->getDataLayout(); Mod = &M; @@ -373,18 +374,20 @@ bool DataFlowSanitizer::doInitialization(Module &M) { if (GetArgTLSPtr) { Type *ArgTLSTy = ArrayType::get(ShadowTy, 64); - ArgTLS = 0; + ArgTLS = nullptr; GetArgTLS = ConstantExpr::getIntToPtr( ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)), PointerType::getUnqual( - FunctionType::get(PointerType::getUnqual(ArgTLSTy), (Type *)0))); + FunctionType::get(PointerType::getUnqual(ArgTLSTy), + (Type *)nullptr))); } if (GetRetvalTLSPtr) { - RetvalTLS = 0; + RetvalTLS = nullptr; GetRetvalTLS = ConstantExpr::getIntToPtr( ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)), PointerType::getUnqual( - FunctionType::get(PointerType::getUnqual(ShadowTy), (Type *)0))); + FunctionType::get(PointerType::getUnqual(ShadowTy), + (Type *)nullptr))); } ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000); @@ -554,7 +557,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { ++i; // Don't stop on weak. We assume people aren't playing games with the // instrumentedness of overridden weak aliases. - if (Function *F = dyn_cast<Function>(GA->getAliasedGlobal())) { + if (Function *F = dyn_cast<Function>(GA->getAliasee())) { bool GAInst = isInstrumented(GA), FInst = isInstrumented(F); if (GAInst && FInst) { addGlobalNamePrefix(GA); @@ -629,7 +632,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { // function... yet. } else if (FT->isVarArg()) { UnwrappedFnMap[&F] = &F; - *i = 0; + *i = nullptr; } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) { // Build a wrapper function for F. The wrapper simply calls F, and is // added to FnsToInstrument so that any instrumentation according to its @@ -680,9 +683,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) { // DFSanVisitor may create new basic blocks, which confuses df_iterator. // Build a copy of the list before iterating over it. - llvm::SmallVector<BasicBlock *, 4> BBList; - std::copy(df_begin(&(*i)->getEntryBlock()), df_end(&(*i)->getEntryBlock()), - std::back_inserter(BBList)); + llvm::SmallVector<BasicBlock *, 4> BBList( + depth_first(&(*i)->getEntryBlock())); for (llvm::SmallVector<BasicBlock *, 4>::iterator i = BBList.begin(), e = BBList.end(); @@ -1313,7 +1315,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) { } } - Instruction *Next = 0; + Instruction *Next = nullptr; if (!CS.getType()->isVoidTy()) { if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { if (II->getNormalDest()->getSinglePredecessor()) { diff --git a/lib/Transforms/Instrumentation/DebugIR.cpp b/lib/Transforms/Instrumentation/DebugIR.cpp index 069886e..18bda1a 100644 --- a/lib/Transforms/Instrumentation/DebugIR.cpp +++ b/lib/Transforms/Instrumentation/DebugIR.cpp @@ -16,8 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "debug-ir" - #include "llvm/IR/ValueMap.h" #include "DebugIR.h" #include "llvm/IR/AssemblyAnnotationWriter.h" @@ -42,6 +40,8 @@ using namespace llvm; +#define DEBUG_TYPE "debug-ir" + namespace { /// Builds a map of Value* to line numbers on which the Value appears in a @@ -118,7 +118,7 @@ public: void visitInstruction(Instruction &I) { if (I.getMetadata(LLVMContext::MD_dbg)) - I.setMetadata(LLVMContext::MD_dbg, 0); + I.setMetadata(LLVMContext::MD_dbg, nullptr); } void run(Module *M) { @@ -168,11 +168,11 @@ class DIUpdater : public InstVisitor<DIUpdater> { public: DIUpdater(Module &M, StringRef Filename = StringRef(), - StringRef Directory = StringRef(), const Module *DisplayM = 0, - const ValueToValueMapTy *VMap = 0) + StringRef Directory = StringRef(), const Module *DisplayM = nullptr, + const ValueToValueMapTy *VMap = nullptr) : Builder(M), Layout(&M), LineTable(DisplayM ? DisplayM : &M), VMap(VMap), - Finder(), Filename(Filename), Directory(Directory), FileNode(0), - LexicalBlockFileNode(0), CUNode(0) { + Finder(), Filename(Filename), Directory(Directory), FileNode(nullptr), + LexicalBlockFileNode(nullptr), CUNode(nullptr) { Finder.processModule(M); visit(&M); } @@ -184,7 +184,7 @@ public: report_fatal_error("DebugIR pass supports only a signle compile unit per " "Module."); createCompileUnit(Finder.compile_unit_count() == 1 ? - (MDNode*)*Finder.compile_units().begin() : 0); + (MDNode*)*Finder.compile_units().begin() : nullptr); } void visitFunction(Function &F) { @@ -232,7 +232,7 @@ public: /// If a ValueToValueMap is provided, use it to get the real instruction as /// the line table was generated on a clone of the module on which we are /// operating. - Value *RealInst = 0; + Value *RealInst = nullptr; if (VMap) RealInst = VMap->lookup(&I); @@ -256,7 +256,7 @@ public: NewLoc = DebugLoc::get(Line, Col, Loc.getScope(RealInst->getContext()), Loc.getInlinedAt(RealInst->getContext())); else if (MDNode *scope = findScope(&I)) - NewLoc = DebugLoc::get(Line, Col, scope, 0); + NewLoc = DebugLoc::get(Line, Col, scope, nullptr); else { DEBUG(dbgs() << "WARNING: no valid scope for instruction " << &I << ". no DebugLoc will be present." @@ -334,7 +334,7 @@ private: } DEBUG(dbgs() << "unable to find DISubprogram node for function " << F->getName().str() << "\n"); - return 0; + return nullptr; } /// Sets Line to the line number on which V appears and returns true. If a @@ -366,7 +366,7 @@ private: TypeNodeIter i = TypeDescriptors.find(T); if (i != TypeDescriptors.end()) return i->second; - return 0; + return nullptr; } /// Returns a DebugInfo type from an LLVM type T. @@ -375,12 +375,12 @@ private: if (N) return DIDerivedType(N); else if (T->isVoidTy()) - return DIDerivedType(0); + return DIDerivedType(nullptr); else if (T->isStructTy()) { N = Builder.createStructType( DIScope(LexicalBlockFileNode), T->getStructName(), DIFile(FileNode), 0, Layout.getTypeSizeInBits(T), Layout.getABITypeAlignment(T), 0, - DIType(0), DIArray(0)); // filled in later + DIType(nullptr), DIArray(nullptr)); // filled in later // N is added to the map (early) so that element search below can find it, // so as to avoid infinite recursion for structs that contain pointers to @@ -535,7 +535,7 @@ void DebugIR::writeDebugBitcode(const Module *M, int *fd) { Out.reset(new raw_fd_ostream(*fd, true)); } - M->print(*Out, 0); + M->print(*Out, nullptr); Out->close(); } diff --git a/lib/Transforms/Instrumentation/DebugIR.h b/lib/Transforms/Instrumentation/DebugIR.h index 3f57da5..02831ed 100644 --- a/lib/Transforms/Instrumentation/DebugIR.h +++ b/lib/Transforms/Instrumentation/DebugIR.h @@ -90,7 +90,7 @@ private: /// Write M to disk, optionally passing in an fd to an open file which is /// closed by this function after writing. If no fd is specified, a new file /// is opened, written, and closed. - void writeDebugBitcode(const llvm::Module *M, int *fd = 0); + void writeDebugBitcode(const llvm::Module *M, int *fd = nullptr); }; } // llvm namespace diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index bd00ec8..8330a9b 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -14,8 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "insert-gcov-profiling" - #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" @@ -39,10 +37,13 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include <algorithm> +#include <memory> #include <string> #include <utility> using namespace llvm; +#define DEBUG_TYPE "insert-gcov-profiling" + static cl::opt<std::string> DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden, cl::ValueRequired); @@ -77,9 +78,6 @@ namespace { "GCOVProfiler asked to do nothing?"); init(); } - ~GCOVProfiler() { - DeleteContainerPointers(Funcs); - } const char *getPassName() const override { return "GCOV Profiler"; } @@ -141,7 +139,7 @@ namespace { Module *M; LLVMContext *Ctx; - SmallVector<GCOVFunction *, 16> Funcs; + SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs; }; } @@ -449,6 +447,21 @@ bool GCOVProfiler::runOnModule(Module &M) { return false; } +static bool functionHasLines(Function *F) { + // Check whether this function actually has any source lines. Not only + // do these waste space, they also can crash gcov. + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); + I != IE; ++I) { + const DebugLoc &Loc = I->getDebugLoc(); + if (Loc.isUnknown()) continue; + if (Loc.getLine() != 0) + return true; + } + } + return false; +} + void GCOVProfiler::emitProfileNotes() { NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; @@ -474,6 +487,7 @@ void GCOVProfiler::emitProfileNotes() { Function *F = SP.getFunction(); if (!F) continue; + if (!functionHasLines(F)) continue; // gcov expects every function to start with an entry block that has a // single successor, so split the entry block to make sure of that. @@ -483,19 +497,19 @@ void GCOVProfiler::emitProfileNotes() { ++It; EntryBlock.splitBasicBlock(It); - GCOVFunction *Func = - new GCOVFunction(SP, &out, i, Options.UseCfgChecksum); - Funcs.push_back(Func); + Funcs.push_back( + make_unique<GCOVFunction>(SP, &out, i, Options.UseCfgChecksum)); + GCOVFunction &Func = *Funcs.back(); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - GCOVBlock &Block = Func->getBlock(BB); + GCOVBlock &Block = Func.getBlock(BB); TerminatorInst *TI = BB->getTerminator(); if (int successors = TI->getNumSuccessors()) { for (int i = 0; i != successors; ++i) { - Block.addEdge(Func->getBlock(TI->getSuccessor(i))); + Block.addEdge(Func.getBlock(TI->getSuccessor(i))); } } else if (isa<ReturnInst>(TI)) { - Block.addEdge(Func->getReturnBlock()); + Block.addEdge(Func.getReturnBlock()); } uint32_t Line = 0; @@ -511,7 +525,7 @@ void GCOVProfiler::emitProfileNotes() { Lines.addLine(Loc.getLine()); } } - EdgeDestinations += Func->getEdgeDestinations(); + EdgeDestinations += Func.getEdgeDestinations(); } FileChecksums.push_back(hash_value(EdgeDestinations)); @@ -519,9 +533,7 @@ void GCOVProfiler::emitProfileNotes() { out.write(ReversedVersion, 4); out.write(reinterpret_cast<char*>(&FileChecksums.back()), 4); - for (SmallVectorImpl<GCOVFunction *>::iterator I = Funcs.begin(), - E = Funcs.end(); I != E; ++I) { - GCOVFunction *Func = *I; + for (auto &Func : Funcs) { Func->setCfgChecksum(FileChecksums.back()); Func->writeOut(); } @@ -549,6 +561,7 @@ bool GCOVProfiler::emitProfileArcs() { continue; Function *F = SP.getFunction(); if (!F) continue; + if (!functionHasLines(F)) continue; if (!Result) Result = true; unsigned Edges = 0; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index ec1a195..b8e632e 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -93,12 +93,11 @@ //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "msan" - #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -122,6 +121,8 @@ using namespace llvm; +#define DEBUG_TYPE "msan" + static const uint64_t kShadowMask32 = 1ULL << 31; static const uint64_t kShadowMask64 = 1ULL << 46; static const uint64_t kOriginOffset32 = 1ULL << 30; @@ -129,6 +130,9 @@ static const uint64_t kOriginOffset64 = 1ULL << 45; static const unsigned kMinOriginAlignment = 4; static const unsigned kShadowTLSAlignment = 8; +// Accesses sizes are powers of two: 1, 2, 4, 8. +static const size_t kNumberOfAccessSizes = 4; + /// \brief Track origins of uninitialized values. /// /// Adds a section to MemorySanitizer report that points to the allocation @@ -178,6 +182,14 @@ static cl::opt<std::string> ClBlacklistFile("msan-blacklist", cl::desc("File containing the list of functions where MemorySanitizer " "should not report bugs"), cl::Hidden); +static cl::opt<int> ClInstrumentationWithCallThreshold( + "msan-instrumentation-with-call-threshold", + cl::desc( + "If the function being instrumented requires more than " + "this number of checks and origin stores, use callbacks instead of " + "inline checks (-1 means never use callbacks)."), + cl::Hidden, cl::init(3500)); + // Experimental. Wraps all indirect calls in the instrumented code with // a call to the given function. This is needed to assist the dynamic // helper tool (MSanDR) to regain control on transition between instrumented and @@ -203,8 +215,8 @@ class MemorySanitizer : public FunctionPass { StringRef BlacklistFile = StringRef()) : FunctionPass(ID), TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)), - DL(0), - WarningFn(0), + DL(nullptr), + WarningFn(nullptr), BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile : BlacklistFile), WrapIndirectCalls(!ClWrapIndirectCalls.empty()) {} const char *getPassName() const override { return "MemorySanitizer"; } @@ -245,6 +257,10 @@ class MemorySanitizer : public FunctionPass { /// \brief The run-time callback to print a warning. Value *WarningFn; + // These arrays are indexed by log2(AccessSize). + Value *MaybeWarningFn[kNumberOfAccessSizes]; + Value *MaybeStoreOriginFn[kNumberOfAccessSizes]; + /// \brief Run-time helper that generates a new origin value for a stack /// allocation. Value *MsanSetAllocaOrigin4Fn; @@ -321,6 +337,20 @@ void MemorySanitizer::initializeCallbacks(Module &M) { : "__msan_warning_noreturn"; WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), NULL); + for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; + AccessSizeIndex++) { + unsigned AccessSize = 1 << AccessSizeIndex; + std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize); + MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction( + FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), + IRB.getInt32Ty(), NULL); + + FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize); + MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction( + FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), + IRB.getInt8PtrTy(), IRB.getInt32Ty(), NULL); + } + MsanSetAllocaOrigin4Fn = M.getOrInsertFunction( "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy(), IntptrTy, NULL); @@ -341,31 +371,32 @@ void MemorySanitizer::initializeCallbacks(Module &M) { // Create globals. RetvalTLS = new GlobalVariable( M, ArrayType::get(IRB.getInt64Ty(), 8), false, - GlobalVariable::ExternalLinkage, 0, "__msan_retval_tls", 0, + GlobalVariable::ExternalLinkage, nullptr, "__msan_retval_tls", nullptr, GlobalVariable::InitialExecTLSModel); RetvalOriginTLS = new GlobalVariable( - M, OriginTy, false, GlobalVariable::ExternalLinkage, 0, - "__msan_retval_origin_tls", 0, GlobalVariable::InitialExecTLSModel); + M, OriginTy, false, GlobalVariable::ExternalLinkage, nullptr, + "__msan_retval_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel); ParamTLS = new GlobalVariable( M, ArrayType::get(IRB.getInt64Ty(), 1000), false, - GlobalVariable::ExternalLinkage, 0, "__msan_param_tls", 0, + GlobalVariable::ExternalLinkage, nullptr, "__msan_param_tls", nullptr, GlobalVariable::InitialExecTLSModel); ParamOriginTLS = new GlobalVariable( M, ArrayType::get(OriginTy, 1000), false, GlobalVariable::ExternalLinkage, - 0, "__msan_param_origin_tls", 0, GlobalVariable::InitialExecTLSModel); + nullptr, "__msan_param_origin_tls", nullptr, + GlobalVariable::InitialExecTLSModel); VAArgTLS = new GlobalVariable( M, ArrayType::get(IRB.getInt64Ty(), 1000), false, - GlobalVariable::ExternalLinkage, 0, "__msan_va_arg_tls", 0, + GlobalVariable::ExternalLinkage, nullptr, "__msan_va_arg_tls", nullptr, GlobalVariable::InitialExecTLSModel); VAArgOverflowSizeTLS = new GlobalVariable( - M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, 0, - "__msan_va_arg_overflow_size_tls", 0, + M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, nullptr, + "__msan_va_arg_overflow_size_tls", nullptr, GlobalVariable::InitialExecTLSModel); OriginTLS = new GlobalVariable( - M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, 0, - "__msan_origin_tls", 0, GlobalVariable::InitialExecTLSModel); + M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, nullptr, + "__msan_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel); // We insert an empty inline asm after __msan_report* to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), @@ -379,14 +410,14 @@ void MemorySanitizer::initializeCallbacks(Module &M) { ClWrapIndirectCalls, AnyFunctionPtrTy, AnyFunctionPtrTy, NULL); } - if (ClWrapIndirectCallsFast) { + if (WrapIndirectCalls && ClWrapIndirectCallsFast) { MsandrModuleStart = new GlobalVariable( M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage, - 0, "__executable_start"); + nullptr, "__executable_start"); MsandrModuleStart->setVisibility(GlobalVariable::HiddenVisibility); MsandrModuleEnd = new GlobalVariable( M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage, - 0, "_end"); + nullptr, "_end"); MsandrModuleEnd->setVisibility(GlobalVariable::HiddenVisibility); } } @@ -397,7 +428,7 @@ void MemorySanitizer::initializeCallbacks(Module &M) { bool MemorySanitizer::doInitialization(Module &M) { DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); if (!DLP) - return false; + report_fatal_error("data layout missing"); DL = &DLP->getDataLayout(); BL.reset(SpecialCaseList::createOrDie(BlacklistFile)); @@ -474,6 +505,11 @@ VarArgHelper* CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, MemorySanitizerVisitor &Visitor); +unsigned TypeSizeToSizeIndex(unsigned TypeSize) { + if (TypeSize <= 8) return 0; + return Log2_32_Ceil(TypeSize / 8); +} + /// This class does all the work for a given function. Store and Load /// instructions store and load corresponding shadow and origin /// values. Most instructions propagate shadow from arguments to their @@ -529,9 +565,42 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return IRB.CreateCall(MS.MsanChainOriginFn, V); } - void materializeStores() { + void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin, + unsigned Alignment, bool AsCall) { + if (isa<StructType>(Shadow->getType())) { + IRB.CreateAlignedStore(updateOrigin(Origin, IRB), getOriginPtr(Addr, IRB), + Alignment); + } else { + Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB); + // TODO(eugenis): handle non-zero constant shadow by inserting an + // unconditional check (can not simply fail compilation as this could + // be in the dead code). + if (isa<Constant>(ConvertedShadow)) return; + unsigned TypeSizeInBits = + MS.DL->getTypeSizeInBits(ConvertedShadow->getType()); + unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits); + if (AsCall && SizeIndex < kNumberOfAccessSizes) { + Value *Fn = MS.MaybeStoreOriginFn[SizeIndex]; + Value *ConvertedShadow2 = IRB.CreateZExt( + ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex))); + IRB.CreateCall3(Fn, ConvertedShadow2, + IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + updateOrigin(Origin, IRB)); + } else { + Value *Cmp = IRB.CreateICmpNE( + ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp"); + Instruction *CheckTerm = SplitBlockAndInsertIfThen( + Cmp, IRB.GetInsertPoint(), false, MS.OriginStoreWeights); + IRBuilder<> IRBNew(CheckTerm); + IRBNew.CreateAlignedStore(updateOrigin(Origin, IRBNew), + getOriginPtr(Addr, IRBNew), Alignment); + } + } + } + + void materializeStores(bool InstrumentWithCalls) { for (size_t i = 0, n = StoreList.size(); i < n; i++) { - StoreInst& I = *dyn_cast<StoreInst>(StoreList[i]); + StoreInst &I = *dyn_cast<StoreInst>(StoreList[i]); IRBuilder<> IRB(&I); Value *Val = I.getValueOperand(); @@ -540,53 +609,41 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB); StoreInst *NewSI = - IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment()); + IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment()); DEBUG(dbgs() << " STORE: " << *NewSI << "\n"); (void)NewSI; - if (ClCheckAccessAddress) - insertShadowCheck(Addr, &I); + if (ClCheckAccessAddress) insertShadowCheck(Addr, &I); - if (I.isAtomic()) - I.setOrdering(addReleaseOrdering(I.getOrdering())); + if (I.isAtomic()) I.setOrdering(addReleaseOrdering(I.getOrdering())); if (MS.TrackOrigins) { unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment()); - if (isa<StructType>(Shadow->getType())) { - IRB.CreateAlignedStore(updateOrigin(getOrigin(Val), IRB), - getOriginPtr(Addr, IRB), Alignment); - } else { - Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB); - - // TODO(eugenis): handle non-zero constant shadow by inserting an - // unconditional check (can not simply fail compilation as this could - // be in the dead code). - if (isa<Constant>(ConvertedShadow)) - continue; - - Value *Cmp = IRB.CreateICmpNE(ConvertedShadow, - getCleanShadow(ConvertedShadow), "_mscmp"); - Instruction *CheckTerm = - SplitBlockAndInsertIfThen(Cmp, &I, false, MS.OriginStoreWeights); - IRBuilder<> IRBNew(CheckTerm); - IRBNew.CreateAlignedStore(updateOrigin(getOrigin(Val), IRBNew), - getOriginPtr(Addr, IRBNew), Alignment); - } + storeOrigin(IRB, Addr, Shadow, getOrigin(Val), Alignment, + InstrumentWithCalls); } } } - void materializeChecks() { - for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) { - Value *Shadow = InstrumentationList[i].Shadow; - Instruction *OrigIns = InstrumentationList[i].OrigIns; - IRBuilder<> IRB(OrigIns); - DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n"); - Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB); - DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n"); - // See the comment in materializeStores(). - if (isa<Constant>(ConvertedShadow)) - continue; + void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin, + bool AsCall) { + IRBuilder<> IRB(OrigIns); + DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n"); + Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB); + DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n"); + // See the comment in materializeStores(). + if (isa<Constant>(ConvertedShadow)) return; + unsigned TypeSizeInBits = + MS.DL->getTypeSizeInBits(ConvertedShadow->getType()); + unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits); + if (AsCall && SizeIndex < kNumberOfAccessSizes) { + Value *Fn = MS.MaybeWarningFn[SizeIndex]; + Value *ConvertedShadow2 = + IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex))); + IRB.CreateCall2(Fn, ConvertedShadow2, MS.TrackOrigins && Origin + ? Origin + : (Value *)IRB.getInt32(0)); + } else { Value *Cmp = IRB.CreateICmpNE(ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp"); Instruction *CheckTerm = SplitBlockAndInsertIfThen( @@ -595,14 +652,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { IRB.SetInsertPoint(CheckTerm); if (MS.TrackOrigins) { - Value *Origin = InstrumentationList[i].Origin; - IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0), + IRB.CreateStore(Origin ? (Value *)Origin : (Value *)IRB.getInt32(0), MS.OriginTLS); } IRB.CreateCall(MS.WarningFn); IRB.CreateCall(MS.EmptyAsm); DEBUG(dbgs() << " CHECK: " << *Cmp << "\n"); } + } + + void materializeChecks(bool InstrumentWithCalls) { + for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) { + Instruction *OrigIns = InstrumentationList[i].OrigIns; + Value *Shadow = InstrumentationList[i].Shadow; + Value *Origin = InstrumentationList[i].Origin; + materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls); + } DEBUG(dbgs() << "DONE:\n" << F); } @@ -662,17 +727,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // Iterate all BBs in depth-first order and create shadow instructions // for all instructions (where applicable). // For PHI nodes we create dummy shadow PHIs which will be finalized later. - for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), - DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { - BasicBlock *BB = *DI; + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB); - } + // Finalize PHI nodes. for (size_t i = 0, n = ShadowPHINodes.size(); i < n; i++) { PHINode *PN = ShadowPHINodes[i]; PHINode *PNS = cast<PHINode>(getShadow(PN)); - PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : 0; + PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr; size_t NumValues = PN->getNumIncomingValues(); for (size_t v = 0; v < NumValues; v++) { PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v)); @@ -683,12 +746,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { VAHelper->finalizeInstrumentation(); + bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 && + InstrumentationList.size() + StoreList.size() > + (unsigned)ClInstrumentationWithCallThreshold; + // Delayed instrumentation of StoreInst. // This may add new checks to be inserted later. - materializeStores(); + materializeStores(InstrumentWithCalls); // Insert shadow value checks. - materializeChecks(); + materializeChecks(InstrumentWithCalls); // Wrap indirect calls. materializeIndirectCalls(); @@ -704,7 +771,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// \brief Compute the shadow type that corresponds to a given Type. Type *getShadowTy(Type *OrigTy) { if (!OrigTy->isSized()) { - return 0; + return nullptr; } // For integer type, shadow is the same as the original type. // This may return weird-sized types like i1. @@ -784,7 +851,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// \brief Compute the origin address for a given function argument. Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB, int ArgOffset) { - if (!MS.TrackOrigins) return 0; + if (!MS.TrackOrigins) return nullptr; Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy); Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0), @@ -825,7 +892,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Constant *getCleanShadow(Value *V) { Type *ShadowTy = getShadowTy(V); if (!ShadowTy) - return 0; + return nullptr; return Constant::getNullValue(ShadowTy); } @@ -845,7 +912,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Constant *getPoisonedShadow(Value *V) { Type *ShadowTy = getShadowTy(V); if (!ShadowTy) - return 0; + return nullptr; return getPoisonedShadow(ShadowTy); } @@ -936,7 +1003,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// \brief Get the origin for a value. Value *getOrigin(Value *V) { - if (!MS.TrackOrigins) return 0; + if (!MS.TrackOrigins) return nullptr; if (isa<Instruction>(V) || isa<Argument>(V)) { Value *Origin = OriginMap[V]; if (!Origin) { @@ -1234,7 +1301,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { public: Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB) : - Shadow(0), Origin(0), IRB(IRB), MSV(MSV) {} + Shadow(nullptr), Origin(nullptr), IRB(IRB), MSV(MSV) {} /// \brief Add a pair of shadow and origin values to the mix. Combiner &Add(Value *OpShadow, Value *OpOrigin) { @@ -1265,7 +1332,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// \brief Add an application value to the mix. Combiner &Add(Value *V) { Value *OpShadow = MSV->getShadow(V); - Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : 0; + Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr; return Add(OpShadow, OpOrigin); } @@ -1480,7 +1547,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { void handleSignedRelationalComparison(ICmpInst &I) { Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0)); Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1)); - Value* op = NULL; + Value* op = nullptr; CmpInst::Predicate pre = I.getPredicate(); if (constOp0 && constOp0->isNullValue() && (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE)) { @@ -1789,7 +1856,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { break; case 1: ConvertOp = I.getArgOperand(0); - CopyOp = NULL; + CopyOp = nullptr; break; default: llvm_unreachable("Cvt intrinsic with unsupported number of arguments."); @@ -1803,7 +1870,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // FIXME: consider propagating shadow of ConvertOp, at least in the case of // int->any conversion. Value *ConvertShadow = getShadow(ConvertOp); - Value *AggShadow = 0; + Value *AggShadow = nullptr; if (ConvertOp->getType()->isVectorTy()) { AggShadow = IRB.CreateExtractElement( ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0)); @@ -2055,7 +2122,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { continue; } unsigned Size = 0; - Value *Store = 0; + Value *Store = nullptr; // Compute the Shadow for arg even if it is ByVal, because // in that case getShadow() will copy the actual arg shadow to // __msan_param_tls. @@ -2080,7 +2147,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { IRB.CreateStore(getOrigin(A), getOriginPtrForArgument(A, IRB, ArgOffset)); (void)Store; - assert(Size != 0 && Store != 0); + assert(Size != 0 && Store != nullptr); DEBUG(dbgs() << " Param:" << *Store << "\n"); ArgOffset += DataLayout::RoundUpAlignment(Size, 8); } @@ -2098,7 +2165,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // Until we have full dynamic coverage, make sure the retval shadow is 0. Value *Base = getShadowPtrForRetval(&I, IRBBefore); IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment); - Instruction *NextInsn = 0; + Instruction *NextInsn = nullptr; if (CS.isCall()) { NextInsn = I.getNextNode(); } else { @@ -2318,7 +2385,8 @@ struct VarArgAMD64Helper : public VarArgHelper { VarArgAMD64Helper(Function &F, MemorySanitizer &MS, MemorySanitizerVisitor &MSV) - : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(0), VAArgOverflowSize(0) { } + : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr), + VAArgOverflowSize(nullptr) {} enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory }; diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 5ffb17c..8fe9bca 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -19,8 +19,6 @@ // The rest is handled by the run-time library. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "tsan" - #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" @@ -46,6 +44,8 @@ using namespace llvm; +#define DEBUG_TYPE "tsan" + static cl::opt<std::string> ClBlacklistFile("tsan-blacklist", cl::desc("Blacklist file"), cl::Hidden); static cl::opt<bool> ClInstrumentMemoryAccesses( @@ -78,7 +78,7 @@ namespace { struct ThreadSanitizer : public FunctionPass { ThreadSanitizer(StringRef BlacklistFile = StringRef()) : FunctionPass(ID), - DL(0), + DL(nullptr), BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile : BlacklistFile) { } const char *getPassName() const override; @@ -174,8 +174,8 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { for (int op = AtomicRMWInst::FIRST_BINOP; op <= AtomicRMWInst::LAST_BINOP; ++op) { - TsanAtomicRMW[op][i] = NULL; - const char *NamePart = NULL; + TsanAtomicRMW[op][i] = nullptr; + const char *NamePart = nullptr; if (op == AtomicRMWInst::Xchg) NamePart = "_exchange"; else if (op == AtomicRMWInst::Add) @@ -226,7 +226,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { bool ThreadSanitizer::doInitialization(Module &M) { DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); if (!DLP) - return false; + report_fatal_error("data layout missing"); DL = &DLP->getDataLayout(); BL.reset(SpecialCaseList::createOrDie(BlacklistFile)); @@ -518,7 +518,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) { if (Idx < 0) return false; Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx]; - if (F == NULL) + if (!F) return false; const size_t ByteSize = 1 << Idx; const size_t BitSize = ByteSize * 8; diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h index 4eac39d..4098428 100644 --- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h +++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h @@ -43,34 +43,34 @@ public: EPT_RetainAutoreleaseRV }; - ARCRuntimeEntryPoints() : TheModule(0), - AutoreleaseRV(0), - Release(0), - Retain(0), - RetainBlock(0), - Autorelease(0), - StoreStrong(0), - RetainRV(0), - RetainAutorelease(0), - RetainAutoreleaseRV(0) { } + ARCRuntimeEntryPoints() : TheModule(nullptr), + AutoreleaseRV(nullptr), + Release(nullptr), + Retain(nullptr), + RetainBlock(nullptr), + Autorelease(nullptr), + StoreStrong(nullptr), + RetainRV(nullptr), + RetainAutorelease(nullptr), + RetainAutoreleaseRV(nullptr) { } ~ARCRuntimeEntryPoints() { } void Initialize(Module *M) { TheModule = M; - AutoreleaseRV = 0; - Release = 0; - Retain = 0; - RetainBlock = 0; - Autorelease = 0; - StoreStrong = 0; - RetainRV = 0; - RetainAutorelease = 0; - RetainAutoreleaseRV = 0; + AutoreleaseRV = nullptr; + Release = nullptr; + Retain = nullptr; + RetainBlock = nullptr; + Autorelease = nullptr; + StoreStrong = nullptr; + RetainRV = nullptr; + RetainAutorelease = nullptr; + RetainAutoreleaseRV = nullptr; } Constant *get(const EntryPointType entry) { - assert(TheModule != 0 && "Not initialized."); + assert(TheModule != nullptr && "Not initialized."); switch (entry) { case EPT_AutoreleaseRV: diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index 8780359..08c8842 100644 --- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -20,7 +20,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "objc-arc-dependency" #include "ObjCARC.h" #include "DependencyAnalysis.h" #include "ProvenanceAnalysis.h" @@ -29,6 +28,8 @@ using namespace llvm; using namespace llvm::objcarc; +#define DEBUG_TYPE "objc-arc-dependency" + /// Test whether the given instruction can result in a reference count /// modification (positive or negative) for the pointer's object. bool @@ -223,7 +224,7 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor, pred_iterator PI(LocalStartBB), PE(LocalStartBB, false); if (PI == PE) // If we've reached the function entry, produce a null dependence. - DependingInsts.insert(0); + DependingInsts.insert(nullptr); else // Add the predecessors to the worklist. do { diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp index cb7e4da..1a25391 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp @@ -24,7 +24,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "objc-arc-ap-elim" #include "ObjCARC.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Constants.h" @@ -34,6 +33,8 @@ using namespace llvm; using namespace llvm::objcarc; +#define DEBUG_TYPE "objc-arc-ap-elim" + namespace { /// \brief Autorelease pool elimination. class ObjCARCAPElim : public ModulePass { @@ -93,7 +94,7 @@ bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) { bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) { bool Changed = false; - Instruction *Push = 0; + Instruction *Push = nullptr; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *Inst = I++; switch (GetBasicInstructionClass(Inst)) { @@ -112,11 +113,11 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) { Inst->eraseFromParent(); Push->eraseFromParent(); } - Push = 0; + Push = nullptr; break; case IC_CallOrUser: if (MayAutorelease(ImmutableCallSite(Inst))) - Push = 0; + Push = nullptr; break; default: break; @@ -154,8 +155,8 @@ bool ObjCARCAPElim::runOnModule(Module &M) { for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end(); OI != OE; ++OI) { Value *Op = *OI; - // llvm.global_ctors is an array of pairs where the second members - // are constructor functions. + // llvm.global_ctors is an array of three-field structs where the second + // members are constructor functions. Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1)); // If the user used a constructor function with the wrong signature and // it got bitcasted or whatever, look the other way. diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp index d18667b..2c09e70 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp @@ -20,7 +20,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "objc-arc-aa" #include "ObjCARC.h" #include "ObjCARCAliasAnalysis.h" #include "llvm/IR/Instruction.h" @@ -28,6 +27,8 @@ #include "llvm/PassAnalysisSupport.h" #include "llvm/PassSupport.h" +#define DEBUG_TYPE "objc-arc-aa" + namespace llvm { class Function; class Value; diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 3da5a0e..f48d53d 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -26,7 +26,6 @@ // TODO: ObjCARCContract could insert PHI nodes when uses aren't // dominated by single calls. -#define DEBUG_TYPE "objc-arc-contract" #include "ObjCARC.h" #include "ARCRuntimeEntryPoints.h" #include "DependencyAnalysis.h" @@ -40,6 +39,8 @@ using namespace llvm; using namespace llvm::objcarc; +#define DEBUG_TYPE "objc-arc-contract" + STATISTIC(NumPeeps, "Number of calls peephole-optimized"); STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed"); @@ -157,7 +158,7 @@ ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, // Check that there are no instructions between the retain and the autorelease // (such as an autorelease_pop) which may change the count. - CallInst *Retain = 0; + CallInst *Retain = nullptr; if (Class == IC_AutoreleaseRV) FindDependencies(RetainAutoreleaseRVDep, Arg, Autorelease->getParent(), Autorelease, @@ -218,7 +219,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release, BasicBlock::iterator I = Load, End = BB->end(); ++I; AliasAnalysis::Location Loc = AA->getLocation(Load); - StoreInst *Store = 0; + StoreInst *Store = nullptr; bool SawRelease = false; for (; !Store || !SawRelease; ++I) { if (I == End) @@ -300,7 +301,7 @@ bool ObjCARCContract::doInitialization(Module &M) { EP.Initialize(&M); // Initialize RetainRVMarker. - RetainRVMarker = 0; + RetainRVMarker = nullptr; if (NamedMDNode *NMD = M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker")) if (NMD->getNumOperands() == 1) { diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp index 8bec699..bf9fcbb 100644 --- a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp @@ -23,8 +23,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "objc-arc-expand" - #include "ObjCARC.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Function.h" @@ -40,6 +38,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#define DEBUG_TYPE "objc-arc-expand" + namespace llvm { class Module; } diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index eed3cb2..dd4dd50 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -24,7 +24,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "objc-arc-opts" #include "ObjCARC.h" #include "ARCRuntimeEntryPoints.h" #include "DependencyAnalysis.h" @@ -44,6 +43,8 @@ using namespace llvm; using namespace llvm::objcarc; +#define DEBUG_TYPE "objc-arc-opts" + /// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific. /// @{ @@ -156,7 +157,7 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { return FindSingleUseIdentifiedObject( cast<CallInst>(Arg)->getArgOperand(0)); if (!IsObjCIdentifiedObject(Arg)) - return 0; + return nullptr; return Arg; } @@ -165,12 +166,12 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { if (IsObjCIdentifiedObject(Arg)) { for (const User *U : Arg->users()) if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg) - return 0; + return nullptr; return Arg; } - return 0; + return nullptr; } /// This is a wrapper around getUnderlyingObjCPtr along the lines of @@ -373,7 +374,7 @@ namespace { bool CFGHazardAfflicted; RRInfo() : - KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0), + KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(nullptr), CFGHazardAfflicted(false) {} void clear(); @@ -388,7 +389,7 @@ namespace { void RRInfo::clear() { KnownSafe = false; IsTailCallRelease = false; - ReleaseMetadata = 0; + ReleaseMetadata = nullptr; Calls.clear(); ReverseInsertPts.clear(); CFGHazardAfflicted = false; @@ -397,7 +398,7 @@ void RRInfo::clear() { bool RRInfo::Merge(const RRInfo &Other) { // Conservatively merge the ReleaseMetadata information. if (ReleaseMetadata != Other.ReleaseMetadata) - ReleaseMetadata = 0; + ReleaseMetadata = nullptr; // Conservatively merge the boolean state. KnownSafe &= Other.KnownSafe; @@ -456,7 +457,7 @@ namespace { } bool IsTrackingImpreciseReleases() const { - return RRI.ReleaseMetadata != 0; + return RRI.ReleaseMetadata != nullptr; } const MDNode *GetReleaseMetadata() const { @@ -818,7 +819,7 @@ ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier", /// arc annotation processor tool. If the function is an static MDString *AppendMDNodeToSourcePtr(unsigned NodeId, Value *Ptr) { - MDString *Hash = 0; + MDString *Hash = nullptr; // If pointer is a result of an instruction and it does not have a source // MDNode it, attach a new MDNode onto it. If pointer is a result of @@ -880,7 +881,7 @@ static void AppendMDNodeToInstForPtr(unsigned NodeId, MDString *PtrSourceMDNodeID, Sequence OldSeq, Sequence NewSeq) { - MDNode *Node = 0; + MDNode *Node = nullptr; Value *tmp[3] = {PtrSourceMDNodeID, SequenceToMDString(Inst->getContext(), OldSeq), @@ -916,7 +917,7 @@ static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB, Value *PtrName; StringRef Tmp = Ptr->getName(); - if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) { + if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) { Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp, Tmp + "_STR"); PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, @@ -925,7 +926,7 @@ static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB, Value *S; std::string SeqStr = SequenceToString(Seq); - if (0 == (S = M->getGlobalVariable(SeqStr, true))) { + if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) { Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr, SeqStr + "_STR"); S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, @@ -959,7 +960,7 @@ static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB, Value *PtrName; StringRef Tmp = Ptr->getName(); - if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) { + if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) { Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp, Tmp + "_STR"); PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, @@ -968,7 +969,7 @@ static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB, Value *S; std::string SeqStr = SequenceToString(Seq); - if (0 == (S = M->getGlobalVariable(SeqStr, true))) { + if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) { Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr, SeqStr + "_STR"); S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, @@ -1718,7 +1719,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, BBState &MyStates) { bool NestingDetected = false; InstructionClass Class = GetInstructionClass(Inst); - const Value *Arg = 0; + const Value *Arg = nullptr; DEBUG(dbgs() << "Class: " << Class << "\n"); @@ -1974,7 +1975,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, BBState &MyStates) { bool NestingDetected = false; InstructionClass Class = GetInstructionClass(Inst); - const Value *Arg = 0; + const Value *Arg = nullptr; switch (Class) { case IC_RetainBlock: @@ -2026,7 +2027,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, switch (OldSeq) { case S_Retain: case S_CanRelease: - if (OldSeq == S_Retain || ReleaseMetadata != 0) + if (OldSeq == S_Retain || ReleaseMetadata != nullptr) S.ClearReverseInsertPts(); // FALL THROUGH case S_Use: @@ -2432,7 +2433,7 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> } else { if (ReleasesToMove.ReleaseMetadata != NewRetainReleaseRRI.ReleaseMetadata) - ReleasesToMove.ReleaseMetadata = 0; + ReleasesToMove.ReleaseMetadata = nullptr; if (ReleasesToMove.IsTailCallRelease != NewRetainReleaseRRI.IsTailCallRelease) ReleasesToMove.IsTailCallRelease = false; @@ -2884,7 +2885,7 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB, FindDependencies(CanChangeRetainCount, Arg, BB, Autorelease, DepInsts, Visited, PA); if (DepInsts.size() != 1) - return 0; + return nullptr; CallInst *Retain = dyn_cast_or_null<CallInst>(*DepInsts.begin()); @@ -2893,7 +2894,7 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB, if (!Retain || !IsRetain(GetBasicInstructionClass(Retain)) || GetObjCArg(Retain) != Arg) { - return 0; + return nullptr; } return Retain; @@ -2911,17 +2912,17 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB, FindDependencies(NeedsPositiveRetainCount, Arg, BB, Ret, DepInsts, V, PA); if (DepInsts.size() != 1) - return 0; + return nullptr; CallInst *Autorelease = dyn_cast_or_null<CallInst>(*DepInsts.begin()); if (!Autorelease) - return 0; + return nullptr; InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease); if (!IsAutorelease(AutoreleaseClass)) - return 0; + return nullptr; if (GetObjCArg(Autorelease) != Arg) - return 0; + return nullptr; return Autorelease; } diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index fa8b598..1a3a4aa 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "adce" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" @@ -28,6 +27,8 @@ #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "adce" + STATISTIC(NumRemoved, "Number of instructions removed"); namespace { diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk index 3894f93..079cc86 100644 --- a/lib/Transforms/Scalar/Android.mk +++ b/lib/Transforms/Scalar/Android.mk @@ -32,6 +32,7 @@ transforms_scalar_SRC_FILES := \ Scalar.cpp \ Scalarizer.cpp \ ScalarReplAggregates.cpp \ + SeparateConstOffsetFromGEP.cpp \ SimplifyCFGPass.cpp \ Sink.cpp \ StructurizeCFG.cpp \ @@ -60,11 +61,6 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES := $(transforms_scalar_SRC_FILES) LOCAL_MODULE:= libLLVMScalarOpts -# Override the default optimization level to work around a SIGSEGV -# on x86 target builds for SROA.cpp. -# Bug: 8047767 -LOCAL_CFLAGS_x86 += -O1 - LOCAL_MODULE_TAGS := optional include $(LLVM_DEVICE_BUILD_MK) diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 27434c1..3ad1488 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -5,19 +5,19 @@ add_llvm_library(LLVMScalarOpts CorrelatedValuePropagation.cpp DCE.cpp DeadStoreElimination.cpp - Scalarizer.cpp EarlyCSE.cpp - GlobalMerge.cpp + FlattenCFGPass.cpp GVN.cpp + GlobalMerge.cpp IndVarSimplify.cpp JumpThreading.cpp LICM.cpp LoopDeletion.cpp LoopIdiomRecognize.cpp LoopInstSimplify.cpp + LoopRerollPass.cpp LoopRotation.cpp LoopStrengthReduce.cpp - LoopRerollPass.cpp LoopUnrollPass.cpp LoopUnswitch.cpp LowerAtomic.cpp @@ -25,13 +25,14 @@ add_llvm_library(LLVMScalarOpts PartiallyInlineLibCalls.cpp Reassociate.cpp Reg2Mem.cpp - SampleProfile.cpp SCCP.cpp SROA.cpp + SampleProfile.cpp Scalar.cpp ScalarReplAggregates.cpp + Scalarizer.cpp + SeparateConstOffsetFromGEP.cpp SimplifyCFGPass.cpp - FlattenCFGPass.cpp Sink.cpp StructurizeCFG.cpp TailRecursionElimination.cpp diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index 57a1521..763d02b 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -33,7 +33,6 @@ // %0 = load i64* inttoptr (i64 big_constant to i64*) //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "consthoist" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -44,9 +43,12 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include <tuple> using namespace llvm; +#define DEBUG_TYPE "consthoist" + STATISTIC(NumConstantsHoisted, "Number of constants hoisted"); STATISTIC(NumConstantsRebased, "Number of constants rebased"); @@ -117,7 +119,8 @@ class ConstantHoisting : public FunctionPass { SmallVector<ConstantInfo, 8> ConstantVec; public: static char ID; // Pass identification, replacement for typeid - ConstantHoisting() : FunctionPass(ID), TTI(0), DT(0), Entry(0) { + ConstantHoisting() : FunctionPass(ID), TTI(nullptr), DT(nullptr), + Entry(nullptr) { initializeConstantHoistingPass(*PassRegistry::getPassRegistry()); } @@ -206,7 +209,16 @@ bool ConstantHoisting::runOnFunction(Function &Fn) { /// \brief Find the constant materialization insertion point. Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst, unsigned Idx) const { - // The simple and common case. + // If the operand is a cast instruction, then we have to materialize the + // constant before the cast instruction. + if (Idx != ~0U) { + Value *Opnd = Inst->getOperand(Idx); + if (auto CastInst = dyn_cast<Instruction>(Opnd)) + if (CastInst->isCast()) + return CastInst; + } + + // The simple and common case. This also includes constant expressions. if (!isa<PHINode>(Inst) && !isa<LandingPadInst>(Inst)) return Inst; @@ -228,7 +240,7 @@ findConstantInsertionPoint(const ConstantInfo &ConstInfo) const { SmallPtrSet<BasicBlock *, 8> BBs; for (auto const &RCI : ConstInfo.RebasedConstants) for (auto const &U : RCI.Uses) - BBs.insert(U.Inst->getParent()); + BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent()); if (BBs.count(Entry)) return &Entry->front(); @@ -487,8 +499,8 @@ void ConstantHoisting::emitBaseConstants(Instruction *Base, Constant *Offset, ClonedCastInst->insertAfter(CastInst); // Use the same debug location as the original cast instruction. ClonedCastInst->setDebugLoc(CastInst->getDebugLoc()); - DEBUG(dbgs() << "Clone instruction: " << *ClonedCastInst << '\n' - << "To : " << *CastInst << '\n'); + DEBUG(dbgs() << "Clone instruction: " << *CastInst << '\n' + << "To : " << *ClonedCastInst << '\n'); } DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n'); diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 7045b36..dd51ce1 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -18,7 +18,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "constprop" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" @@ -31,6 +30,8 @@ #include <set> using namespace llvm; +#define DEBUG_TYPE "constprop" + STATISTIC(NumInstKilled, "Number of instructions killed"); namespace { @@ -68,7 +69,7 @@ bool ConstantPropagation::runOnFunction(Function &F) { } bool Changed = false; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); while (!WorkList.empty()) { diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 0490767..0829462 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "correlated-value-propagation" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -26,6 +25,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "correlated-value-propagation" + STATISTIC(NumPhis, "Number of phis propagated"); STATISTIC(NumSelects, "Number of selects propagated"); STATISTIC(NumMemAccess, "Number of memory access targets propagated"); @@ -138,7 +139,7 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) { } bool CorrelatedValuePropagation::processMemAccess(Instruction *I) { - Value *Pointer = 0; + Value *Pointer = nullptr; if (LoadInst *L = dyn_cast<LoadInst>(I)) Pointer = L->getPointerOperand(); else diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 8377fd9..99fac75 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dce" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/InstIterator.h" @@ -26,6 +25,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "dce" + STATISTIC(DIEEliminated, "Number of insts removed by DIE pass"); STATISTIC(DCEEliminated, "Number of insts removed"); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index f54c00d..3af8ee7 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dse" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -38,6 +37,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "dse" + STATISTIC(NumFastStores, "Number of stores deleted"); STATISTIC(NumFastOther , "Number of other instrs removed"); @@ -49,7 +50,7 @@ namespace { const TargetLibraryInfo *TLI; static char ID; // Pass identification, replacement for typeid - DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) { + DSE() : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr) { initializeDSEPass(*PassRegistry::getPassRegistry()); } @@ -69,7 +70,7 @@ namespace { if (DT->isReachableFromEntry(I)) Changed |= runOnBasicBlock(*I); - AA = 0; MD = 0; DT = 0; + AA = nullptr; MD = nullptr; DT = nullptr; return Changed; } @@ -111,9 +112,9 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } /// If ValueSet is non-null, remove any deleted instructions from it as well. /// static void DeleteDeadInstruction(Instruction *I, - MemoryDependenceAnalysis &MD, - const TargetLibraryInfo *TLI, - SmallSetVector<Value*, 16> *ValueSet = 0) { + MemoryDependenceAnalysis &MD, + const TargetLibraryInfo *TLI, + SmallSetVector<Value*, 16> *ValueSet = nullptr) { SmallVector<Instruction*, 32> NowDeadInsts; NowDeadInsts.push_back(I); @@ -131,7 +132,7 @@ static void DeleteDeadInstruction(Instruction *I, for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { Value *Op = DeadInst->getOperand(op); - DeadInst->setOperand(op, 0); + DeadInst->setOperand(op, nullptr); // If this operand just became dead, add it to the NowDeadInsts list. if (!Op->use_empty()) continue; @@ -203,13 +204,13 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { // If we don't have target data around, an unknown size in Location means // that we should use the size of the pointee type. This isn't valid for // memset/memcpy, which writes more than an i8. - if (Loc.Size == AliasAnalysis::UnknownSize && DL == 0) + if (Loc.Size == AliasAnalysis::UnknownSize && DL == nullptr) return AliasAnalysis::Location(); return Loc; } IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst); - if (II == 0) return AliasAnalysis::Location(); + if (!II) return AliasAnalysis::Location(); switch (II->getIntrinsicID()) { default: return AliasAnalysis::Location(); // Unhandled intrinsic. @@ -217,7 +218,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { // If we don't have target data around, an unknown size in Location means // that we should use the size of the pointee type. This isn't valid for // init.trampoline, which writes more than an i8. - if (DL == 0) return AliasAnalysis::Location(); + if (!DL) return AliasAnalysis::Location(); // FIXME: We don't know the size of the trampoline, so we can't really // handle it here. @@ -359,7 +360,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // If we have no DataLayout information around, then the size of the store // is inferrable from the pointee type. If they are the same type, then // we know that the store is safe. - if (DL == 0 && Later.Ptr->getType() == Earlier.Ptr->getType()) + if (DL == nullptr && Later.Ptr->getType() == Earlier.Ptr->getType()) return OverwriteComplete; return OverwriteUnknown; @@ -373,7 +374,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // Otherwise, we have to have size information, and the later store has to be // larger than the earlier one. if (Later.Size == AliasAnalysis::UnknownSize || - Earlier.Size == AliasAnalysis::UnknownSize || DL == 0) + Earlier.Size == AliasAnalysis::UnknownSize || DL == nullptr) return OverwriteUnknown; // Check to see if the later store is to the entire object (either a global, @@ -461,7 +462,7 @@ static bool isPossibleSelfRead(Instruction *Inst, // Self reads can only happen for instructions that read memory. Get the // location read. AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA); - if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction. + if (!InstReadLoc.Ptr) return false; // Not a reading instruction. // If the read and written loc obviously don't alias, it isn't a read. if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false; @@ -528,7 +529,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { DeleteDeadInstruction(SI, *MD, TLI); - if (NextInst == 0) // Next instruction deleted. + if (!NextInst) // Next instruction deleted. BBI = BB.begin(); else if (BBI != BB.begin()) // Revisit this instruction if possible. --BBI; @@ -543,7 +544,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA); // If we didn't get a useful location, fail. - if (Loc.Ptr == 0) + if (!Loc.Ptr) continue; while (InstDep.isDef() || InstDep.isClobber()) { @@ -557,7 +558,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { Instruction *DepWrite = InstDep.getInst(); AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA); // If we didn't get a useful location, or if it isn't a size, bail out. - if (DepLoc.Ptr == 0) + if (!DepLoc.Ptr) break; // If we find a write that is a) removable (i.e., non-volatile), b) is diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index af2c3d1..735f5c1 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "early-cse" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/ScopedHashTable.h" @@ -29,6 +28,8 @@ #include <vector> using namespace llvm; +#define DEBUG_TYPE "early-cse" + STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd"); STATISTIC(NumCSE, "Number of instructions CSE'd"); STATISTIC(NumCSELoad, "Number of load instructions CSE'd"); @@ -207,7 +208,7 @@ namespace { return false; CallInst *CI = dyn_cast<CallInst>(Inst); - if (CI == 0 || !CI->onlyReadsMemory()) + if (!CI || !CI->onlyReadsMemory()) return false; return true; } @@ -405,14 +406,14 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // have invalidated the live-out memory values of our parent value. For now, // just be conservative and invalidate memory if this block has multiple // predecessors. - if (BB->getSinglePredecessor() == 0) + if (!BB->getSinglePredecessor()) ++CurrentGeneration; /// LastStore - Keep track of the last non-volatile store that we saw... for /// as long as there in no instruction that reads memory. If we see a store /// to the same location, we delete the dead store. This zaps trivial dead /// stores which can occur in bitfield code among other things. - StoreInst *LastStore = 0; + StoreInst *LastStore = nullptr; bool Changed = false; @@ -462,7 +463,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { // Ignore volatile loads. if (!LI->isSimple()) { - LastStore = 0; + LastStore = nullptr; continue; } @@ -470,7 +471,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // generation, replace this instruction. std::pair<Value*, unsigned> InVal = AvailableLoads->lookup(Inst->getOperand(0)); - if (InVal.first != 0 && InVal.second == CurrentGeneration) { + if (InVal.first != nullptr && InVal.second == CurrentGeneration) { DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: " << *InVal.first << '\n'); if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); @@ -483,20 +484,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Otherwise, remember that we have this instruction. AvailableLoads->insert(Inst->getOperand(0), std::pair<Value*, unsigned>(Inst, CurrentGeneration)); - LastStore = 0; + LastStore = nullptr; continue; } // If this instruction may read from memory, forget LastStore. if (Inst->mayReadFromMemory()) - LastStore = 0; + LastStore = nullptr; // If this is a read-only call, process it. if (CallValue::canHandle(Inst)) { // If we have an available version of this call, and if it is the right // generation, replace this instruction. std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst); - if (InVal.first != 0 && InVal.second == CurrentGeneration) { + if (InVal.first != nullptr && InVal.second == CurrentGeneration) { DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: " << *InVal.first << '\n'); if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); @@ -528,7 +529,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { LastStore->eraseFromParent(); Changed = true; ++NumDSE; - LastStore = 0; + LastStore = nullptr; continue; } @@ -558,7 +559,7 @@ bool EarlyCSE::runOnFunction(Function &F) { std::vector<StackNode *> nodesToProcess; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp index e7f2564..0430c18 100644 --- a/lib/Transforms/Scalar/FlattenCFGPass.cpp +++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "flattencfg" #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/CFG.h" @@ -19,6 +18,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "flattencfg" + namespace { struct FlattenCFGPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 33c387c..6d07ddd 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -15,11 +15,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "gvn" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -50,6 +50,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "gvn" + STATISTIC(NumGVNInstr, "Number of instructions deleted"); STATISTIC(NumGVNLoad, "Number of loads deleted"); STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); @@ -213,13 +215,13 @@ Expression ValueTable::create_cmp_expression(unsigned Opcode, } Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) { - assert(EI != 0 && "Not an ExtractValueInst?"); + assert(EI && "Not an ExtractValueInst?"); Expression e; e.type = EI->getType(); e.opcode = 0; IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); - if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { + if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { // EI might be an extract from one of our recognised intrinsics. If it // is we'll synthesize a semantically equivalent expression instead on // an extract value expression. @@ -327,7 +329,7 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) { const MemoryDependenceAnalysis::NonLocalDepInfo &deps = MD->getNonLocalCallDependency(CallSite(C)); // FIXME: Move the checking logic to MemDep! - CallInst* cdep = 0; + CallInst* cdep = nullptr; // Check to see if we have a single dominating call instruction that is // identical to C. @@ -338,8 +340,8 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) { // We don't handle non-definitions. If we already have a call, reject // instruction dependencies. - if (!I->getResult().isDef() || cdep != 0) { - cdep = 0; + if (!I->getResult().isDef() || cdep != nullptr) { + cdep = nullptr; break; } @@ -350,7 +352,7 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) { continue; } - cdep = 0; + cdep = nullptr; break; } @@ -551,7 +553,7 @@ namespace { static AvailableValueInBlock getUndef(BasicBlock *BB) { AvailableValueInBlock Res; Res.BB = BB; - Res.Val.setPointer(0); + Res.Val.setPointer(nullptr); Res.Val.setInt(UndefVal); Res.Offset = 0; return Res; @@ -611,7 +613,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit GVN(bool noloads = false) - : FunctionPass(ID), NoLoads(noloads), MD(0) { + : FunctionPass(ID), NoLoads(noloads), MD(nullptr) { initializeGVNPass(*PassRegistry::getPassRegistry()); } @@ -649,7 +651,7 @@ namespace { /// removeFromLeaderTable - Scan the list of values corresponding to a given /// value number, and remove the given instruction if encountered. void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) { - LeaderTableEntry* Prev = 0; + LeaderTableEntry* Prev = nullptr; LeaderTableEntry* Curr = &LeaderTable[N]; while (Curr->Val != I || Curr->BB != BB) { @@ -661,8 +663,8 @@ namespace { Prev->Next = Curr->Next; } else { if (!Curr->Next) { - Curr->Val = 0; - Curr->BB = 0; + Curr->Val = nullptr; + Curr->BB = nullptr; } else { LeaderTableEntry* Next = Curr->Next; Curr->Val = Next->Val; @@ -855,7 +857,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Instruction *InsertPt, const DataLayout &DL) { if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL)) - return 0; + return nullptr; // If this is already the right type, just return it. Type *StoredValTy = StoredVal->getType(); @@ -1060,7 +1062,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, const DataLayout &DL) { // If the mem operation is a non-constant size, we can't handle it. ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); - if (SizeCst == 0) return -1; + if (!SizeCst) return -1; uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; // If this is memset, we just need to see if the offset is valid in the size @@ -1075,10 +1077,10 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, MemTransferInst *MTI = cast<MemTransferInst>(MI); Constant *Src = dyn_cast<Constant>(MTI->getSource()); - if (Src == 0) return -1; + if (!Src) return -1; GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &DL)); - if (GV == 0 || !GV->isConstant()) return -1; + if (!GV || !GV->isConstant()) return -1; // See if the access is within the bounds of the transfer. int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, @@ -1420,8 +1422,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, // If this is a clobber and L is the first instruction in its block, then // we have the first instruction in the entry block. if (DepLI != LI && Address && DL) { - int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), - LI->getPointerOperand(), + int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, *DL); if (Offset != -1) { @@ -1469,8 +1470,8 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, if (S->getValueOperand()->getType() != LI->getType()) { // If the stored value is larger or equal to the loaded value, we can // reuse it. - if (DL == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), - LI->getType(), *DL)) { + if (!DL || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), + LI->getType(), *DL)) { UnavailableBlocks.push_back(DepBB); continue; } @@ -1486,7 +1487,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, if (LD->getType() != LI->getType()) { // If the stored value is larger or equal to the loaded value, we can // reuse it. - if (DL == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)){ + if (!DL || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)) { UnavailableBlocks.push_back(DepBB); continue; } @@ -1539,7 +1540,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Check to see how many predecessors have the loaded value fully // available. - DenseMap<BasicBlock*, Value*> PredLoads; + MapVector<BasicBlock *, Value *> PredLoads; DenseMap<BasicBlock*, char> FullyAvailableBlocks; for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; @@ -1553,7 +1554,6 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) { continue; } - PredLoads[Pred] = 0; if (Pred->getTerminator()->getNumSuccessors() != 1) { if (isa<IndirectBrInst>(Pred->getTerminator())) { @@ -1570,11 +1570,14 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, } CriticalEdgePred.push_back(Pred); + } else { + // Only add the predecessors that will not be split for now. + PredLoads[Pred] = nullptr; } } // Decide whether PRE is profitable for this load. - unsigned NumUnavailablePreds = PredLoads.size(); + unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size(); assert(NumUnavailablePreds != 0 && "Fully available value should already be eliminated!"); @@ -1586,12 +1589,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, return false; // Split critical edges, and update the unavailable predecessors accordingly. - for (SmallVectorImpl<BasicBlock *>::iterator I = CriticalEdgePred.begin(), - E = CriticalEdgePred.end(); I != E; I++) { - BasicBlock *OrigPred = *I; + for (BasicBlock *OrigPred : CriticalEdgePred) { BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); - PredLoads.erase(OrigPred); - PredLoads[NewPred] = 0; + assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!"); + PredLoads[NewPred] = nullptr; DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->" << LoadBB->getName() << '\n'); } @@ -1599,9 +1600,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Check if the load can safely be moved to all the unavailable predecessors. bool CanDoPRE = true; SmallVector<Instruction*, 8> NewInsts; - for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), - E = PredLoads.end(); I != E; ++I) { - BasicBlock *UnavailablePred = I->first; + for (auto &PredLoad : PredLoads) { + BasicBlock *UnavailablePred = PredLoad.first; // Do PHI translation to get its value in the predecessor if necessary. The // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. @@ -1610,20 +1610,20 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // the load on the pred (?!?), so we can insert code to materialize the // pointer if it is not available. PHITransAddr Address(LI->getPointerOperand(), DL); - Value *LoadPtr = 0; + Value *LoadPtr = nullptr; LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, *DT, NewInsts); // If we couldn't find or insert a computation of this phi translated value, // we fail PRE. - if (LoadPtr == 0) { + if (!LoadPtr) { DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " << *LI->getPointerOperand() << "\n"); CanDoPRE = false; break; } - I->second = LoadPtr; + PredLoad.second = LoadPtr; } if (!CanDoPRE) { @@ -1632,8 +1632,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (MD) MD->removeInstruction(I); I->eraseFromParent(); } - // HINT:Don't revert the edge-splitting as following transformation may - // also need to split these critial edges. + // HINT: Don't revert the edge-splitting as following transformation may + // also need to split these critical edges. return !CriticalEdgePred.empty(); } @@ -1654,10 +1654,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, VN.lookup_or_add(NewInsts[i]); } - for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), - E = PredLoads.end(); I != E; ++I) { - BasicBlock *UnavailablePred = I->first; - Value *LoadPtr = I->second; + for (const auto &PredLoad : PredLoads) { + BasicBlock *UnavailablePred = PredLoad.first; + Value *LoadPtr = PredLoad.second; Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, LI->getAlignment(), @@ -1776,7 +1775,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) { MDNode *ReplMD = Metadata[i].second; switch(Kind) { default: - ReplInst->setMetadata(Kind, NULL); // Remove unknown metadata + ReplInst->setMetadata(Kind, nullptr); // Remove unknown metadata break; case LLVMContext::MD_dbg: llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg"); @@ -1832,7 +1831,7 @@ bool GVN::processLoad(LoadInst *L) { // a common base + constant offset, and if the previous store (or memset) // completely covers this load. This sort of thing can happen in bitfield // access code. - Value *AvailVal = 0; + Value *AvailVal = nullptr; if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) { int Offset = AnalyzeLoadFromClobberingStore(L->getType(), L->getPointerOperand(), @@ -1920,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) { if (DL) { StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), L, *DL); - if (StoredVal == 0) + if (!StoredVal) return false; DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal @@ -1949,7 +1948,7 @@ bool GVN::processLoad(LoadInst *L) { if (DL) { AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L, *DL); - if (AvailableVal == 0) + if (!AvailableVal) return false; DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal @@ -1999,9 +1998,9 @@ bool GVN::processLoad(LoadInst *L) { // a few comparisons of DFS numbers. Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) { LeaderTableEntry Vals = LeaderTable[num]; - if (!Vals.Val) return 0; + if (!Vals.Val) return nullptr; - Value *Val = 0; + Value *Val = nullptr; if (DT->dominates(Vals.BB, BB)) { Val = Vals.Val; if (isa<Constant>(Val)) return Val; @@ -2052,7 +2051,7 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, const BasicBlock *Src = E.getStart(); assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); (void)Src; - return Pred != 0; + return Pred != nullptr; } /// propagateEquality - The given values are known to be equal in every block @@ -2296,7 +2295,7 @@ bool GVN::processInstruction(Instruction *I) { // Perform fast-path value-number based elimination of values inherited from // dominators. Value *repl = findLeader(I->getParent(), Num); - if (repl == 0) { + if (!repl) { // Failure, just remember this instance for future use. addToLeaderTable(Num, I, I->getParent()); return false; @@ -2319,7 +2318,7 @@ bool GVN::runOnFunction(Function& F) { MD = &getAnalysis<MemoryDependenceAnalysis>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); VN.setMemDep(MD); @@ -2421,10 +2420,7 @@ bool GVN::processBlock(BasicBlock *BB) { bool GVN::performPRE(Function &F) { bool Changed = false; SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap; - for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), - DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { - BasicBlock *CurrentBlock = *DI; - + for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) { // Nothing to PRE in the entry block. if (CurrentBlock == &F.getEntryBlock()) continue; @@ -2464,7 +2460,7 @@ bool GVN::performPRE(Function &F) { // more complicated to get right. unsigned NumWith = 0; unsigned NumWithout = 0; - BasicBlock *PREPred = 0; + BasicBlock *PREPred = nullptr; predMap.clear(); for (pred_iterator PI = pred_begin(CurrentBlock), @@ -2482,8 +2478,8 @@ bool GVN::performPRE(Function &F) { } Value* predV = findLeader(P, ValNo); - if (predV == 0) { - predMap.push_back(std::make_pair(static_cast<Value *>(0), P)); + if (!predV) { + predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P)); PREPred = P; ++NumWithout; } else if (predV == CurInst) { @@ -2637,9 +2633,8 @@ bool GVN::iterateOnFunction(Function &F) { // std::vector<BasicBlock *> BBVect; BBVect.reserve(256); - for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()), - DE = df_end(DT->getRootNode()); DI != DE; ++DI) - BBVect.push_back(DI->getBlock()); + for (DomTreeNode *x : depth_first(DT->getRootNode())) + BBVect.push_back(x->getBlock()); for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end(); I != E; I++) diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 8ffd64b..990d067 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -51,7 +51,6 @@ // note that we saved 2 registers here almostly "for free". // ===---------------------------------------------------------------------===// -#define DEBUG_TYPE "global-merge" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -70,6 +69,8 @@ #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; +#define DEBUG_TYPE "global-merge" + cl::opt<bool> EnableGlobalMerge("global-merge", cl::Hidden, cl::desc("Enable global merge pass"), @@ -107,7 +108,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - explicit GlobalMerge(const TargetMachine *TM = 0) + explicit GlobalMerge(const TargetMachine *TM = nullptr) : FunctionPass(ID), TM(TM) { initializeGlobalMergePass(*PassRegistry::getPassRegistry()); } @@ -173,7 +174,8 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst, GlobalValue::InternalLinkage, MergedInit, "_MergedGlobals", - 0, GlobalVariable::NotThreadLocal, + nullptr, + GlobalVariable::NotThreadLocal, AddrSpace); for (size_t k = i; k < j; ++k) { Constant *Idx[2] = { diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 7537632..e83a5c4 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -24,7 +24,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "indvars" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" @@ -50,6 +49,8 @@ #include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; +#define DEBUG_TYPE "indvars" + STATISTIC(NumWidened , "Number of indvars widened"); STATISTIC(NumReplaced , "Number of exit values replaced"); STATISTIC(NumLFTR , "Number of loop exit tests replaced"); @@ -79,8 +80,8 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), DL(0), - Changed(false) { + IndVarSimplify() : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), + DL(nullptr), Changed(false) { initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); } @@ -196,7 +197,7 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def, if (!PHI) return User; - Instruction *InsertPt = 0; + Instruction *InsertPt = nullptr; for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { if (PHI->getIncomingValue(i) != Def) continue; @@ -257,13 +258,13 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // an add or increment value can not be represented by an integer. BinaryOperator *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); - if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; + if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return; // If this is not an add of the PHI with a constantfp, or if the constant fp // is not an integer, bail out. ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); int64_t IncValue; - if (IncValueVal == 0 || Incr->getOperand(0) != PN || + if (IncValueVal == nullptr || Incr->getOperand(0) != PN || !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) return; @@ -280,7 +281,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { FCmpInst *Compare = dyn_cast<FCmpInst>(U1); if (!Compare) Compare = dyn_cast<FCmpInst>(U2); - if (Compare == 0 || !Compare->hasOneUse() || + if (!Compare || !Compare->hasOneUse() || !isa<BranchInst>(Compare->user_back())) return; @@ -301,7 +302,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // transform it. ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); int64_t ExitValue; - if (ExitValueVal == 0 || + if (ExitValueVal == nullptr || !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) return; @@ -651,7 +652,8 @@ namespace { Type *WidestNativeType; // Widest integer type created [sz]ext bool IsSigned; // Was an sext user seen before a zext? - WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {} + WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr), + IsSigned(false) {} }; } @@ -693,7 +695,7 @@ struct NarrowIVDefUse { Instruction *NarrowUse; Instruction *WideDef; - NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {} + NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {} NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD): NarrowDef(ND), NarrowUse(NU), WideDef(WD) {} @@ -736,9 +738,9 @@ public: L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), - WidePhi(0), - WideInc(0), - WideIncExpr(0), + WidePhi(nullptr), + WideInc(nullptr), + WideIncExpr(nullptr), DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); } @@ -793,7 +795,7 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) { unsigned Opcode = DU.NarrowUse->getOpcode(); switch (Opcode) { default: - return 0; + return nullptr; case Instruction::Add: case Instruction::Mul: case Instruction::UDiv: @@ -838,14 +840,14 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) { const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { // Handle the common case of add<nsw/nuw> if (DU.NarrowUse->getOpcode() != Instruction::Add) - return 0; + return nullptr; // One operand (NarrowDef) has already been extended to WideDef. Now determine // if extending the other will lead to a recurrence. unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); - const SCEV *ExtendOperExpr = 0; + const SCEV *ExtendOperExpr = nullptr; const OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(DU.NarrowUse); if (IsSigned && OBO->hasNoSignedWrap()) @@ -855,7 +857,7 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { ExtendOperExpr = SE->getZeroExtendExpr( SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); else - return 0; + return nullptr; // When creating this AddExpr, don't apply the current operations NSW or NUW // flags. This instruction may be guarded by control flow that the no-wrap @@ -866,7 +868,7 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr)); if (!AddRec || AddRec->getLoop() != L) - return 0; + return nullptr; return AddRec; } @@ -877,14 +879,14 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { /// recurrence. Otherwise return NULL. const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { if (!SE->isSCEVable(NarrowUse->getType())) - return 0; + return nullptr; const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= SE->getTypeSizeInBits(WideType)) { // NarrowUse implicitly widens its operand. e.g. a gep with a narrow // index. So don't follow this use. - return 0; + return nullptr; } const SCEV *WideExpr = IsSigned ? @@ -892,7 +894,7 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { SE->getZeroExtendExpr(NarrowExpr, WideType); const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); if (!AddRec || AddRec->getLoop() != L) - return 0; + return nullptr; return AddRec; } @@ -930,7 +932,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " << *WidePhi << "\n"); } - return 0; + return nullptr; } } // Our raison d'etre! Eliminate sign and zero extension. @@ -968,7 +970,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // push the uses of WideDef here. // No further widening is needed. The deceased [sz]ext had done it for us. - return 0; + return nullptr; } // Does this user itself evaluate to a recurrence after widening? @@ -981,7 +983,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. truncateIVUse(DU, DT); - return 0; + return nullptr; } // Assume block terminators cannot evaluate to a recurrence. We can't to // insert a Trunc after a terminator if there happens to be a critical edge. @@ -990,14 +992,14 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // Reuse the IV increment that SCEVExpander created as long as it dominates // NarrowUse. - Instruction *WideUse = 0; + Instruction *WideUse = nullptr; if (WideAddRec == WideIncExpr && Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) WideUse = WideInc; else { WideUse = CloneIVUser(DU); if (!WideUse) - return 0; + return nullptr; } // Evaluation of WideAddRec ensured that the narrow expression could be // extended outside the loop without overflow. This suggests that the wide use @@ -1008,7 +1010,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n"); DeadInsts.push_back(WideUse); - return 0; + return nullptr; } // Returning WideUse pushes it on the worklist. @@ -1043,7 +1045,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Is this phi an induction variable? const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); if (!AddRec) - return NULL; + return nullptr; // Widen the induction variable expression. const SCEV *WideIVExpr = IsSigned ? @@ -1056,7 +1058,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Can the IV be extended outside the loop without overflow? AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); if (!AddRec || AddRec->getLoop() != L) - return NULL; + return nullptr; // An AddRec must have loop-invariant operands. Since this AddRec is // materialized by a loop header phi, the expression cannot have any post-loop @@ -1282,7 +1284,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { Instruction *IncI = dyn_cast<Instruction>(IncV); if (!IncI) - return 0; + return nullptr; switch (IncI->getOpcode()) { case Instruction::Add: @@ -1293,17 +1295,17 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { if (IncI->getNumOperands() == 2) break; default: - return 0; + return nullptr; } PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); if (Phi && Phi->getParent() == L->getHeader()) { if (isLoopInvariant(IncI->getOperand(1), L, DT)) return Phi; - return 0; + return nullptr; } if (IncI->getOpcode() == Instruction::GetElementPtr) - return 0; + return nullptr; // Allow add/sub to be commuted. Phi = dyn_cast<PHINode>(IncI->getOperand(1)); @@ -1311,7 +1313,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { if (isLoopInvariant(IncI->getOperand(0), L, DT)) return Phi; } - return 0; + return nullptr; } /// Return the compare guarding the loop latch, or NULL for unrecognized tests. @@ -1321,7 +1323,7 @@ static ICmpInst *getLoopTest(Loop *L) { BasicBlock *LatchBlock = L->getLoopLatch(); // Don't bother with LFTR if the loop is not properly simplified. if (!LatchBlock) - return 0; + return nullptr; BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); assert(BI && "expected exit branch"); @@ -1446,8 +1448,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount, cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition(); // Loop over all of the PHI nodes, looking for a simple counter. - PHINode *BestPhi = 0; - const SCEV *BestInit = 0; + PHINode *BestPhi = nullptr; + const SCEV *BestInit = nullptr; BasicBlock *LatchBlock = L->getLoopLatch(); assert(LatchBlock && "needsLFTR should guarantee a loop latch"); @@ -1571,7 +1573,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, // IVInit integer and IVCount pointer would only occur if a canonical IV // were generated on top of case #2, which is not expected. - const SCEV *IVLimit = 0; + const SCEV *IVLimit = nullptr; // For unit stride, IVCount = Start + BECount with 2's complement overflow. // For non-zero Start, compute IVCount here. if (AR->getStart()->isZero()) @@ -1813,7 +1815,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); DeadInsts.clear(); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 067deb7..230a381 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jump-threading" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -38,6 +37,8 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; +#define DEBUG_TYPE "jump-threading" + STATISTIC(NumThreads, "Number of jumps threaded"); STATISTIC(NumFolds, "Number of terminators folded"); STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi"); @@ -153,7 +154,7 @@ bool JumpThreading::runOnFunction(Function &F) { DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); LVI = &getAnalysis<LazyValueInfo>(); @@ -308,7 +309,7 @@ void JumpThreading::FindLoopHeaders(Function &F) { /// Returns null if Val is null or not an appropriate constant. static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) { if (!Val) - return 0; + return nullptr; // Undef is "known" enough. if (UndefValue *U = dyn_cast<UndefValue>(Val)) @@ -352,7 +353,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // If V is a non-instruction value, or an instruction in a different block, // then it can't be derived from a PHI. Instruction *I = dyn_cast<Instruction>(V); - if (I == 0 || I->getParent() != BB) { + if (!I || I->getParent() != BB) { // Okay, if this is a live-in value, see if it has a known value at the end // of any of our predecessors. @@ -495,7 +496,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB); Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL); - if (Res == 0) { + if (!Res) { if (!isa<Constant>(RHS)) continue; @@ -581,7 +582,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // Either operand will do, so be sure to pick the one that's a known // constant. // FIXME: Do this more cleverly if both values are known constants? - KnownCond = (TrueVal != 0); + KnownCond = (TrueVal != nullptr); } // See if the select has a known constant value for this predecessor. @@ -737,7 +738,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { Instruction *CondInst = dyn_cast<Instruction>(Condition); // All the rest of our checks depend on the condition being an instruction. - if (CondInst == 0) { + if (!CondInst) { // FIXME: Unify this with code below. if (ProcessThreadableEdges(Condition, BB, Preference)) return true; @@ -890,7 +891,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { SmallPtrSet<BasicBlock*, 8> PredsScanned; typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy; AvailablePredsTy AvailablePreds; - BasicBlock *OneUnavailablePred = 0; + BasicBlock *OneUnavailablePred = nullptr; // If we got here, the loaded value is transparent through to the start of the // block. Check to see if it is available in any of the predecessor blocks. @@ -904,16 +905,16 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Scan the predecessor to see if the value is available in the pred. BBIt = PredBB->end(); - MDNode *ThisTBAATag = 0; + MDNode *ThisTBAATag = nullptr; Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6, - 0, &ThisTBAATag); + nullptr, &ThisTBAATag); if (!PredAvailable) { OneUnavailablePred = PredBB; continue; } // If tbaa tags disagree or are not present, forget about them. - if (TBAATag != ThisTBAATag) TBAATag = 0; + if (TBAATag != ThisTBAATag) TBAATag = nullptr; // If so, this load is partially redundant. Remember this info so that we // can create a PHI node. @@ -929,7 +930,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // predecessor, we want to insert a merge block for those common predecessors. // This ensures that we only have to insert one reload, thus not increasing // code size. - BasicBlock *UnavailablePred = 0; + BasicBlock *UnavailablePred = nullptr; // If there is exactly one predecessor where the value is unavailable, the // already computed 'OneUnavailablePred' block is it. If it ends in an @@ -996,7 +997,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { BasicBlock *P = *PI; AvailablePredsTy::iterator I = std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), - std::make_pair(P, (Value*)0)); + std::make_pair(P, (Value*)nullptr)); assert(I != AvailablePreds.end() && I->first == P && "Didn't find entry for predecessor!"); @@ -1103,7 +1104,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, SmallPtrSet<BasicBlock*, 16> SeenPreds; SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList; - BasicBlock *OnlyDest = 0; + BasicBlock *OnlyDest = nullptr; BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { @@ -1120,7 +1121,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, BasicBlock *DestBB; if (isa<UndefValue>(Val)) - DestBB = 0; + DestBB = nullptr; else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero()); else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { @@ -1171,7 +1172,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, // If the threadable edges are branching on an undefined value, we get to pick // the destination that these predecessors should get to. - if (MostPopularDest == 0) + if (!MostPopularDest) MostPopularDest = BB->getTerminator()-> getSuccessor(GetBestDestForJumpOnUndef(BB)); @@ -1273,7 +1274,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { } // Determine which value to split on, true, false, or undef if neither. - ConstantInt *SplitVal = 0; + ConstantInt *SplitVal = nullptr; if (NumTrue > NumFalse) SplitVal = ConstantInt::getTrue(BB->getContext()); else if (NumTrue != 0 || NumFalse != 0) @@ -1294,7 +1295,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { // help us. However, we can just replace the LHS or RHS with the constant. if (BlocksToFoldInto.size() == cast<PHINode>(BB->front()).getNumIncomingValues()) { - if (SplitVal == 0) { + if (!SplitVal) { // If all preds provide undef, just nuke the xor, because it is undef too. BO->replaceAllUsesWith(UndefValue::get(BO->getType())); BO->eraseFromParent(); @@ -1531,7 +1532,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, // can just clone the bits from BB into the end of the new PredBB. BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator()); - if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) { + if (!OldPredBranch || !OldPredBranch->isUnconditional()) { PredBB = SplitEdge(PredBB, BB, this); OldPredBranch = cast<BranchInst>(PredBB->getTerminator()); } diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index b69f2dc..0a8d16f 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -30,7 +30,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "licm" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -60,6 +59,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "licm" + STATISTIC(NumSunk , "Number of instructions sunk out of loop"); STATISTIC(NumHoisted , "Number of instructions hoisted out of loop"); STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); @@ -223,7 +224,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); @@ -315,8 +316,8 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { "Parent loop not left in LCSSA form after LICM!"); // Clear out loops state information for the next iteration - CurLoop = 0; - Preheader = 0; + CurLoop = nullptr; + Preheader = nullptr; // If this loop is nested inside of another one, save the alias information // for when we process the outer loop. @@ -334,7 +335,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { /// iteration. /// void LICM::SinkRegion(DomTreeNode *N) { - assert(N != 0 && "Null dominator tree node?"); + assert(N != nullptr && "Null dominator tree node?"); BasicBlock *BB = N->getBlock(); // If this subregion is not in the top level loop at all, exit. @@ -381,7 +382,7 @@ void LICM::SinkRegion(DomTreeNode *N) { /// before uses, allowing us to hoist a loop body in one pass without iteration. /// void LICM::HoistRegion(DomTreeNode *N) { - assert(N != 0 && "Null dominator tree node?"); + assert(N != nullptr && "Null dominator tree node?"); BasicBlock *BB = N->getBlock(); // If this subregion is not in the top level loop at all, exit. @@ -774,7 +775,7 @@ void LICM::PromoteAliasSet(AliasSet &AS, // We start with an alignment of one and try to find instructions that allow // us to prove better alignment. unsigned Alignment = 1; - MDNode *TBAATag = 0; + MDNode *TBAATag = nullptr; // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 9a520c8..5ab686a 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-delete" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -23,6 +22,8 @@ #include "llvm/IR/Dominators.h" using namespace llvm; +#define DEBUG_TYPE "loop-delete" + STATISTIC(NumDeleted, "Number of loops deleted"); namespace { diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index e5e8b84..26a83df 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -41,7 +41,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-idiom" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -61,6 +60,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "loop-idiom" + STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); @@ -114,7 +115,7 @@ namespace { Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const; /// Return true iff the idiom is detected in the loop. and 1) \p CntInst - /// is set to the instruction counting the pupulation bit. 2) \p CntPhi + /// is set to the instruction counting the population bit. 2) \p CntPhi /// is set to the corresponding phi node. 3) \p Var is set to the value /// whose population bits are being counted. bool detectIdiom @@ -138,7 +139,7 @@ namespace { static char ID; explicit LoopIdiomRecognize() : LoopPass(ID) { initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); - DL = 0; DT = 0; SE = 0; TLI = 0; TTI = 0; + DL = nullptr; DT = nullptr; SE = nullptr; TLI = nullptr; TTI = nullptr; } bool runOnLoop(Loop *L, LPPassManager &LPM) override; @@ -182,7 +183,7 @@ namespace { if (DL) return DL; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; return DL; } @@ -247,7 +248,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE, for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { Value *Op = DeadInst->getOperand(op); - DeadInst->setOperand(op, 0); + DeadInst->setOperand(op, nullptr); // If this operand just became dead, add it to the NowDeadInsts list. if (!Op->use_empty()) continue; @@ -292,9 +293,9 @@ bool LIRUtil::isAlmostEmpty(BasicBlock *BB) { BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) { if (BasicBlock *BB = PreHead->getSinglePredecessor()) { BranchInst *Br = getBranch(BB); - return Br && Br->isConditional() ? BB : 0; + return Br && Br->isConditional() ? BB : nullptr; } - return 0; + return nullptr; } //===----------------------------------------------------------------------===// @@ -304,7 +305,7 @@ BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) { //===----------------------------------------------------------------------===// NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR): - LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) { + LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(nullptr) { } bool NclPopcountRecognize::preliminaryScreen() { @@ -341,25 +342,25 @@ bool NclPopcountRecognize::preliminaryScreen() { return true; } -Value *NclPopcountRecognize::matchCondition (BranchInst *Br, - BasicBlock *LoopEntry) const { +Value *NclPopcountRecognize::matchCondition(BranchInst *Br, + BasicBlock *LoopEntry) const { if (!Br || !Br->isConditional()) - return 0; + return nullptr; ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition()); if (!Cond) - return 0; + return nullptr; ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1)); if (!CmpZero || !CmpZero->isZero()) - return 0; + return nullptr; ICmpInst::Predicate Pred = Cond->getPredicate(); if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) || (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry)) return Cond->getOperand(0); - return 0; + return nullptr; } bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, @@ -390,9 +391,9 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, Value *VarX1, *VarX0; PHINode *PhiX, *CountPhi; - DefX2 = CountInst = 0; - VarX1 = VarX0 = 0; - PhiX = CountPhi = 0; + DefX2 = CountInst = nullptr; + VarX1 = VarX0 = nullptr; + PhiX = CountPhi = nullptr; LoopEntry = *(CurLoop->block_begin()); // step 1: Check if the loop-back branch is in desirable form. @@ -439,7 +440,7 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1 { - CountInst = NULL; + CountInst = nullptr; for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(), IterE = LoopEntry->end(); Iter != IterE; Iter++) { Instruction *Inst = Iter; @@ -744,7 +745,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, // If processing the store invalidated our iterator, start over from the // top of the block. - if (InstPtr == 0) + if (!InstPtr) I = BB->begin(); continue; } @@ -757,7 +758,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, // If processing the memset invalidated our iterator, start over from the // top of the block. - if (InstPtr == 0) + if (!InstPtr) I = BB->begin(); continue; } @@ -784,7 +785,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { // random store we can't handle. const SCEVAddRecExpr *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); - if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) + if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) return false; // Check to see if the stride matches the size of the store. If so, then we @@ -792,7 +793,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { unsigned StoreSize = (unsigned)SizeInBits >> 3; const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1)); - if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) { + if (!Stride || StoreSize != Stride->getValue()->getValue()) { // TODO: Could also handle negative stride here someday, that will require // the validity check in mayLoopAccessLocation to be updated though. // Enable this to print exact negative strides. @@ -841,7 +842,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { // loop, which indicates a strided store. If we have something else, it's a // random store we can't handle. const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer)); - if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine()) + if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine()) return false; // Reject memsets that are so large that they overflow an unsigned. @@ -855,7 +856,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { // TODO: Could also handle negative stride here someday, that will require the // validity check in mayLoopAccessLocation to be updated though. - if (Stride == 0 || MSI->getLength() != Stride->getValue()) + if (!Stride || MSI->getLength() != Stride->getValue()) return false; return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, @@ -908,23 +909,23 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) { // array. We could theoretically do a store to an alloca or something, but // that doesn't seem worthwhile. Constant *C = dyn_cast<Constant>(V); - if (C == 0) return 0; + if (!C) return nullptr; // Only handle simple values that are a power of two bytes in size. uint64_t Size = DL.getTypeSizeInBits(V->getType()); if (Size == 0 || (Size & 7) || (Size & (Size-1))) - return 0; + return nullptr; // Don't care enough about darwin/ppc to implement this. if (DL.isBigEndian()) - return 0; + return nullptr; // Convert to size in bytes. Size /= 8; // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see // if the top and bottom are the same (e.g. for vectors and large integers). - if (Size > 16) return 0; + if (Size > 16) return nullptr; // If the constant is exactly 16 bytes, just use it. if (Size == 16) return C; @@ -949,7 +950,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // are stored. A store of i32 0x01020304 can never be turned into a memset, // but it can be turned into memset_pattern if the target supports it. Value *SplatValue = isBytewiseValue(StoredVal); - Constant *PatternValue = 0; + Constant *PatternValue = nullptr; unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); @@ -960,13 +961,13 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // promote the memset. CurLoop->isLoopInvariant(SplatValue)) { // Keep and use SplatValue. - PatternValue = 0; + PatternValue = nullptr; } else if (DestAS == 0 && TLI->has(LibFunc::memset_pattern16) && (PatternValue = getMemSetPatternValue(StoredVal, *DL))) { // Don't create memset_pattern16s with address spaces. // It looks like we can use PatternValue! - SplatValue = 0; + SplatValue = nullptr; } else { // Otherwise, this isn't an idiom we can transform. For example, we can't // do anything with a 3-byte store. @@ -1033,7 +1034,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, Int8PtrTy, Int8PtrTy, IntPtr, - (void*)0); + (void*)nullptr); // Otherwise we should form a memset_pattern16. PatternValue is known to be // an constant array of 16-bytes. Plop the value into a mergable global. diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 263ba93..ab1a939 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-instsimplify" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" @@ -26,6 +25,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "loop-instsimplify" + STATISTIC(NumSimplified, "Number of redundant instructions simplified"); namespace { @@ -70,10 +71,10 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0; + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; LoopInfo *LI = &getAnalysis<LoopInfo>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SmallVector<BasicBlock*, 8> ExitBlocks; @@ -126,7 +127,15 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ++NumSimplified; } } - LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + if (res) { + // RecursivelyDeleteTriviallyDeadInstruction can remove + // more than one instruction, so simply incrementing the + // iterator does not work. When instructions get deleted + // re-iterate instead. + BI = BB->begin(); BE = BB->end(); + LocalChanged |= res; + } if (IsSubloopHeader && !isa<PHINode>(I)) break; diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index 81c1e42..8b5e036 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-reroll" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" @@ -36,6 +35,8 @@ using namespace llvm; +#define DEBUG_TYPE "loop-reroll" + STATISTIC(NumRerolledLoops, "Number of rerolled loops"); static cl::opt<unsigned> @@ -945,7 +946,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, bool InReduction = Reductions.isPairInSame(J1, J2); if (!(InReduction && J1->isAssociative())) { - bool Swapped = false, SomeOpMatched = false;; + bool Swapped = false, SomeOpMatched = false; for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) { Value *Op2 = J2->getOperand(j); @@ -1133,7 +1134,7 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) { SE = &getAnalysis<ScalarEvolution>(); TLI = &getAnalysis<TargetLibraryInfo>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); BasicBlock *Header = L->getHeader(); diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index fde6bac..2ce5831 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-rotate" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CodeMetrics.h" @@ -24,6 +23,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -31,7 +31,11 @@ #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; -#define MAX_HEADER_SIZE 16 +#define DEBUG_TYPE "loop-rotate" + +static cl::opt<unsigned> +DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden, + cl::desc("The default maximum header size for automatic loop rotation")); STATISTIC(NumRotated, "Number of loops rotated"); namespace { @@ -39,8 +43,12 @@ namespace { class LoopRotate : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopRotate() : LoopPass(ID) { + LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) { initializeLoopRotatePass(*PassRegistry::getPassRegistry()); + if (SpecifiedMaxHeaderSize == -1) + MaxHeaderSize = DefaultRotationThreshold; + else + MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize); } // LCSSA form makes instruction renaming easier. @@ -61,6 +69,7 @@ namespace { bool rotateLoop(Loop *L, bool SimplifiedLatch); private: + unsigned MaxHeaderSize; LoopInfo *LI; const TargetTransformInfo *TTI; }; @@ -74,7 +83,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false) -Pass *llvm::createLoopRotatePass() { return new LoopRotate(); } +Pass *llvm::createLoopRotatePass(int MaxHeaderSize) { + return new LoopRotate(MaxHeaderSize); +} /// Rotate Loop L as many times as possible. Return true if /// the loop is rotated at least once. @@ -82,6 +93,9 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { if (skipOptnoneFunction(L)) return false; + // Save the loop metadata. + MDNode *LoopMD = L->getLoopID(); + LI = &getAnalysis<LoopInfo>(); TTI = &getAnalysis<TargetTransformInfo>(); @@ -96,6 +110,12 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { MadeChange = true; SimplifiedLatch = false; } + + // Restore the loop metadata. + // NB! We presume LoopRotation DOESN'T ADD its own metadata. + if ((MadeChange || SimplifiedLatch) && LoopMD) + L->setLoopID(LoopMD); + return MadeChange; } @@ -281,7 +301,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { BasicBlock *OrigLatch = L->getLoopLatch(); BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); - if (BI == 0 || BI->isUnconditional()) + if (!BI || BI->isUnconditional()) return false; // If the loop header is not one of the loop exiting blocks then @@ -292,7 +312,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // If the loop latch already contains a branch that leaves the loop then the // loop is already rotated. - if (OrigLatch == 0) + if (!OrigLatch) return false; // Rotate if either the loop latch does *not* exit the loop, or if the loop @@ -310,7 +330,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { << " instructions: "; L->dump()); return false; } - if (Metrics.NumInsts > MAX_HEADER_SIZE) + if (Metrics.NumInsts > MaxHeaderSize) return false; } @@ -319,7 +339,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. - if (OrigPreheader == 0) + if (!OrigPreheader) return false; // Anything ScalarEvolution may know about this loop or the PHI nodes diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 272a16d..914b56a 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -53,7 +53,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-reduce" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" @@ -78,6 +77,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "loop-reduce" + /// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for /// bail out. This threshold is far beyond the number of users that LSR can /// conceivably solve, so it should not affect generated code, but catches the @@ -237,7 +238,15 @@ struct Formula { int64_t Scale; /// BaseRegs - The list of "base" registers for this use. When this is - /// non-empty, + /// non-empty. The canonical representation of a formula is + /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and + /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty(). + /// #1 enforces that the scaled register is always used when at least two + /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2. + /// #2 enforces that 1 * reg is reg. + /// This invariant can be temporarly broken while building a formula. + /// However, every formula inserted into the LSRInstance must be in canonical + /// form. SmallVector<const SCEV *, 4> BaseRegs; /// ScaledReg - The 'scaled' register for this use. This should be non-null @@ -250,12 +259,18 @@ struct Formula { int64_t UnfoldedOffset; Formula() - : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0), - UnfoldedOffset(0) {} + : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0), + ScaledReg(nullptr), UnfoldedOffset(0) {} void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); - unsigned getNumRegs() const; + bool isCanonical() const; + + void Canonicalize(); + + bool Unscale(); + + size_t getNumRegs() const; Type *getType() const; void DeleteBaseReg(const SCEV *&S); @@ -345,12 +360,58 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { BaseRegs.push_back(Sum); HasBaseReg = true; } + Canonicalize(); +} + +/// \brief Check whether or not this formula statisfies the canonical +/// representation. +/// \see Formula::BaseRegs. +bool Formula::isCanonical() const { + if (ScaledReg) + return Scale != 1 || !BaseRegs.empty(); + return BaseRegs.size() <= 1; +} + +/// \brief Helper method to morph a formula into its canonical representation. +/// \see Formula::BaseRegs. +/// Every formula having more than one base register, must use the ScaledReg +/// field. Otherwise, we would have to do special cases everywhere in LSR +/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ... +/// On the other hand, 1*reg should be canonicalized into reg. +void Formula::Canonicalize() { + if (isCanonical()) + return; + // So far we did not need this case. This is easy to implement but it is + // useless to maintain dead code. Beside it could hurt compile time. + assert(!BaseRegs.empty() && "1*reg => reg, should not be needed."); + // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg. + ScaledReg = BaseRegs.back(); + BaseRegs.pop_back(); + Scale = 1; + size_t BaseRegsSize = BaseRegs.size(); + size_t Try = 0; + // If ScaledReg is an invariant, try to find a variant expression. + while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg)) + std::swap(ScaledReg, BaseRegs[Try++]); +} + +/// \brief Get rid of the scale in the formula. +/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2. +/// \return true if it was possible to get rid of the scale, false otherwise. +/// \note After this operation the formula may not be in the canonical form. +bool Formula::Unscale() { + if (Scale != 1) + return false; + Scale = 0; + BaseRegs.push_back(ScaledReg); + ScaledReg = nullptr; + return true; } /// getNumRegs - Return the total number of register operands used by this /// formula. This does not include register uses implied by non-constant /// addrec strides. -unsigned Formula::getNumRegs() const { +size_t Formula::getNumRegs() const { return !!ScaledReg + BaseRegs.size(); } @@ -360,7 +421,7 @@ Type *Formula::getType() const { return !BaseRegs.empty() ? BaseRegs.front()->getType() : ScaledReg ? ScaledReg->getType() : BaseGV ? BaseGV->getType() : - 0; + nullptr; } /// DeleteBaseReg - Delete the given base reg from the BaseRegs list. @@ -487,11 +548,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, // Check for a division of a constant by a constant. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) { if (!RC) - return 0; + return nullptr; const APInt &LA = C->getValue()->getValue(); const APInt &RA = RC->getValue()->getValue(); if (LA.srem(RA) != 0) - return 0; + return nullptr; return SE.getConstant(LA.sdiv(RA)); } @@ -500,16 +561,16 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) { const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE, IgnoreSignificantBits); - if (!Step) return 0; + if (!Step) return nullptr; const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, IgnoreSignificantBits); - if (!Start) return 0; + if (!Start) return nullptr; // FlagNW is independent of the start value, step direction, and is // preserved with smaller magnitude steps. // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap); } - return 0; + return nullptr; } // Distribute the sdiv over add operands, if the add doesn't overflow. @@ -520,12 +581,12 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, I != E; ++I) { const SCEV *Op = getExactSDiv(*I, RHS, SE, IgnoreSignificantBits); - if (!Op) return 0; + if (!Op) return nullptr; Ops.push_back(Op); } return SE.getAddExpr(Ops); } - return 0; + return nullptr; } // Check for a multiply operand that we can pull RHS out of. @@ -544,13 +605,13 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, } Ops.push_back(S); } - return Found ? SE.getMulExpr(Ops) : 0; + return Found ? SE.getMulExpr(Ops) : nullptr; } - return 0; + return nullptr; } // Otherwise we don't know. - return 0; + return nullptr; } /// ExtractImmediate - If S involves the addition of a constant integer value, @@ -604,7 +665,7 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { SCEV::FlagAnyWrap); return Result; } - return 0; + return nullptr; } /// isAddressUse - Returns true if the specified instruction is using the @@ -755,12 +816,12 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { Value *V = DeadInsts.pop_back_val(); Instruction *I = dyn_cast_or_null<Instruction>(V); - if (I == 0 || !isInstructionTriviallyDead(I)) + if (!I || !isInstructionTriviallyDead(I)) continue; for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) if (Instruction *U = dyn_cast<Instruction>(*OI)) { - *OI = 0; + *OI = nullptr; if (U->use_empty()) DeadInsts.push_back(U); } @@ -775,9 +836,18 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { namespace { class LSRUse; } -// Check if it is legal to fold 2 base registers. -static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU, - const Formula &F); + +/// \brief Check if the addressing mode defined by \p F is completely +/// folded in \p LU at isel time. +/// This includes address-mode folding and special icmp tricks. +/// This function returns true if \p LU can accommodate what \p F +/// defines and up to 1 base + 1 scaled + offset. +/// In other words, if \p F has several base registers, this function may +/// still return true. Therefore, users still need to account for +/// additional base registers and/or unfolded offsets to derive an +/// accurate cost model. +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + const LSRUse &LU, const Formula &F); // Get the cost of the scaling factor used in F for LU. static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F); @@ -828,7 +898,7 @@ public: const SmallVectorImpl<int64_t> &Offsets, ScalarEvolution &SE, DominatorTree &DT, const LSRUse &LU, - SmallPtrSet<const SCEV *, 16> *LoserRegs = 0); + SmallPtrSet<const SCEV *, 16> *LoserRegs = nullptr); void print(raw_ostream &OS) const; void dump() const; @@ -921,6 +991,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, ScalarEvolution &SE, DominatorTree &DT, const LSRUse &LU, SmallPtrSet<const SCEV *, 16> *LoserRegs) { + assert(F.isCanonical() && "Cost is accurate only for canonical formula"); // Tally up the registers. if (const SCEV *ScaledReg = F.ScaledReg) { if (VisitedRegs.count(ScaledReg)) { @@ -944,11 +1015,13 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, } // Determine how many (unfolded) adds we'll need inside the loop. - size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0); + size_t NumBaseParts = F.getNumRegs(); if (NumBaseParts > 1) // Do not count the base and a possible second register if the target // allows to fold 2 registers. - NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F)); + NumBaseAdds += + NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F))); + NumBaseAdds += (F.UnfoldedOffset != 0); // Accumulate non-free scaling amounts. ScaleCost += getScalingFactorCost(TTI, LU, F); @@ -1047,7 +1120,8 @@ struct LSRFixup { } LSRFixup::LSRFixup() - : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {} + : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)), + Offset(0) {} /// isUseFullyOutsideLoop - Test whether this fixup always uses its /// value outside of the given loop. @@ -1183,7 +1257,7 @@ public: MaxOffset(INT64_MIN), AllFixupsOutsideLoop(true), RigidFormula(false), - WidestFixupType(0) {} + WidestFixupType(nullptr) {} bool HasFormulaWithSameRegs(const Formula &F) const; bool InsertFormula(const Formula &F); @@ -1208,7 +1282,10 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { /// InsertFormula - If the given formula has not yet been inserted, add it to /// the list, and return true. Return false otherwise. +/// The formula must be in canonical form. bool LSRUse::InsertFormula(const Formula &F) { + assert(F.isCanonical() && "Invalid canonical representation"); + if (!Formulae.empty() && RigidFormula) return false; @@ -1234,6 +1311,8 @@ bool LSRUse::InsertFormula(const Formula &F) { // Record registers now being used by this use. Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); + if (F.ScaledReg) + Regs.insert(F.ScaledReg); return true; } @@ -1300,12 +1379,10 @@ void LSRUse::dump() const { } #endif -/// isLegalUse - Test whether the use described by AM is "legal", meaning it can -/// be completely folded into the user instruction at isel time. This includes -/// address-mode folding and special icmp tricks. -static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind, - Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale) { +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + LSRUse::KindType Kind, Type *AccessTy, + GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) { switch (Kind) { case LSRUse::Address: return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); @@ -1356,10 +1433,11 @@ static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind, llvm_unreachable("Invalid LSRUse Kind!"); } -static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, - int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, - GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) { +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + int64_t MinOffset, int64_t MaxOffset, + LSRUse::KindType Kind, Type *AccessTy, + GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) { // Check for overflow. if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) != (MinOffset > 0)) @@ -1370,9 +1448,41 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, return false; MaxOffset = (uint64_t)BaseOffset + MaxOffset; - return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg, - Scale) && - isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale); + return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset, + HasBaseReg, Scale) && + isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset, + HasBaseReg, Scale); +} + +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + int64_t MinOffset, int64_t MaxOffset, + LSRUse::KindType Kind, Type *AccessTy, + const Formula &F) { + // For the purpose of isAMCompletelyFolded either having a canonical formula + // or a scale not equal to zero is correct. + // Problems may arise from non canonical formulae having a scale == 0. + // Strictly speaking it would best to just rely on canonical formulae. + // However, when we generate the scaled formulae, we first check that the + // scaling factor is profitable before computing the actual ScaledReg for + // compile time sake. + assert((F.isCanonical() || F.Scale != 0)); + return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, + F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale); +} + +/// isLegalUse - Test whether we know how to expand the current formula. +static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, + int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, + GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) { + // We know how to expand completely foldable formulae. + return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, + BaseOffset, HasBaseReg, Scale) || + // Or formulae that use a base register produced by a sum of base + // registers. + (Scale == 1 && + isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, + BaseGV, BaseOffset, true, 0)); } static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, @@ -1382,36 +1492,23 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, F.BaseOffset, F.HasBaseReg, F.Scale); } -static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU, - const Formula &F) { - // If F is used as an Addressing Mode, it may fold one Base plus one - // scaled register. If the scaled register is nil, do as if another - // element of the base regs is a 1-scaled register. - // This is possible if BaseRegs has at least 2 registers. - - // If this is not an address calculation, this is not an addressing mode - // use. - if (LU.Kind != LSRUse::Address) - return false; - - // F is already scaled. - if (F.Scale != 0) - return false; - - // We need to keep one register for the base and one to scale. - if (F.BaseRegs.size() < 2) - return false; - - return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, - F.BaseGV, F.BaseOffset, F.HasBaseReg, 1); - } +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + const LSRUse &LU, const Formula &F) { + return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg, + F.Scale); +} static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F) { if (!F.Scale) return 0; - assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, F) && "Illegal formula in use."); + + // If the use is not completely folded in that instruction, we will have to + // pay an extra cost only for scale != 1. + if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, F)) + return F.Scale != 1; switch (LU.Kind) { case LSRUse::Address: { @@ -1430,12 +1527,10 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, return std::max(ScaleCostMinOffset, ScaleCostMaxOffset); } case LSRUse::ICmpZero: - // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg. - // Therefore, return 0 in case F.Scale == -1. - return F.Scale != -1; - case LSRUse::Basic: case LSRUse::Special: + // The use is completely folded, i.e., everything is folded into the + // instruction. return 0; } @@ -1460,7 +1555,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI, HasBaseReg = true; } - return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); + return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset, + HasBaseReg, Scale); } static bool isAlwaysFoldable(const TargetTransformInfo &TTI, @@ -1485,8 +1581,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI, // base and a scale. int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; - return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, - BaseOffset, HasBaseReg, Scale); + return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, + BaseOffset, HasBaseReg, Scale); } namespace { @@ -1515,7 +1611,7 @@ struct IVChain { SmallVector<IVInc,1> Incs; const SCEV *ExprBase; - IVChain() : ExprBase(0) {} + IVChain() : ExprBase(nullptr) {} IVChain(const IVInc &Head, const SCEV *Base) : Incs(1, Head), ExprBase(Base) {} @@ -1642,8 +1738,19 @@ class LSRInstance { void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base, unsigned Depth = 0); + + void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, unsigned Depth, + size_t Idx, bool IsScaledReg = false); void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, size_t Idx, + bool IsScaledReg = false); void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, + const SmallVectorImpl<int64_t> &Worklist, + size_t Idx, bool IsScaledReg = false); void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base); void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base); @@ -1721,7 +1828,7 @@ void LSRInstance::OptimizeShadowIV() { IVUsers::const_iterator CandidateUI = UI; ++UI; Instruction *ShadowUse = CandidateUI->getUser(); - Type *DestTy = 0; + Type *DestTy = nullptr; bool IsSigned = false; /* If shadow use is a int->float cast then insert a second IV @@ -1783,7 +1890,7 @@ void LSRInstance::OptimizeShadowIV() { continue; /* Initialize new IV, double d = 0.0 in above example. */ - ConstantInt *C = 0; + ConstantInt *C = nullptr; if (Incr->getOperand(0) == PH) C = dyn_cast<ConstantInt>(Incr->getOperand(1)); else if (Incr->getOperand(1) == PH) @@ -1905,7 +2012,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { // for ICMP_ULE here because the comparison would be with zero, which // isn't interesting. CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; - const SCEVNAryExpr *Max = 0; + const SCEVNAryExpr *Max = nullptr; if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) { Pred = ICmpInst::ICMP_SLE; Max = S; @@ -1948,7 +2055,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { // Check the right operand of the select, and remember it, as it will // be used in the new comparison instruction. - Value *NewRHS = 0; + Value *NewRHS = nullptr; if (ICmpInst::isTrueWhenEqual(Pred)) { // Look for n+1, and grab n. if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1))) @@ -2018,7 +2125,7 @@ LSRInstance::OptimizeLoopTermCond() { continue; // Search IVUsesByStride to find Cond's IVUse if there is one. - IVStrideUse *CondUse = 0; + IVStrideUse *CondUse = nullptr; ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); if (!FindIVUserForCond(Cond, CondUse)) continue; @@ -2071,12 +2178,12 @@ LSRInstance::OptimizeLoopTermCond() { // Check for possible scaled-address reuse. Type *AccessTy = getAccessType(UI->getUser()); int64_t Scale = C->getSExtValue(); - if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0, + if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr, /*BaseOffset=*/ 0, /*HasBaseReg=*/ false, Scale)) goto decline_post_inc; Scale = -Scale; - if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0, + if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr, /*BaseOffset=*/ 0, /*HasBaseReg=*/ false, Scale)) goto decline_post_inc; @@ -2146,23 +2253,25 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, // the uses will have all its uses outside the loop, for example. if (LU.Kind != Kind) return false; + + // Check for a mismatched access type, and fall back conservatively as needed. + // TODO: Be less conservative when the type is similar and can use the same + // addressing modes. + if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) + NewAccessTy = Type::getVoidTy(AccessTy->getContext()); + // Conservatively assume HasBaseReg is true for now. if (NewOffset < LU.MinOffset) { - if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, + if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, LU.MaxOffset - NewOffset, HasBaseReg)) return false; NewMinOffset = NewOffset; } else if (NewOffset > LU.MaxOffset) { - if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, + if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, NewOffset - LU.MinOffset, HasBaseReg)) return false; NewMaxOffset = NewOffset; } - // Check for a mismatched access type, and fall back conservatively as needed. - // TODO: Be less conservative when the type is similar and can use the same - // addressing modes. - if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) - NewAccessTy = Type::getVoidTy(AccessTy->getContext()); // Update the use. LU.MinOffset = NewMinOffset; @@ -2183,7 +2292,7 @@ LSRInstance::getUse(const SCEV *&Expr, int64_t Offset = ExtractImmediate(Expr, SE); // Basic uses can't accept any offset, for example. - if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, + if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr, Offset, /*HasBaseReg=*/ true)) { Expr = Copy; Offset = 0; @@ -2267,7 +2376,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, } // Nothing looked good. - return 0; + return nullptr; } void LSRInstance::CollectInterestingTypesAndFactors() { @@ -2385,7 +2494,7 @@ static const SCEV *getExprBase(const SCEV *S) { default: // uncluding scUnknown. return S; case scConstant: - return 0; + return nullptr; case scTruncate: return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand()); case scZeroExtend: @@ -2476,7 +2585,7 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users, && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) { --cost; } - const SCEV *LastIncExpr = 0; + const SCEV *LastIncExpr = nullptr; unsigned NumConstIncrements = 0; unsigned NumVarIncrements = 0; unsigned NumReusedIncrements = 0; @@ -2535,7 +2644,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, // Visit all existing chains. Check if its IVOper can be computed as a // profitable loop invariant increment from the last link in the Chain. unsigned ChainIdx = 0, NChains = IVChainVec.size(); - const SCEV *LastIncExpr = 0; + const SCEV *LastIncExpr = nullptr; for (; ChainIdx < NChains; ++ChainIdx) { IVChain &Chain = IVChainVec[ChainIdx]; @@ -2755,7 +2864,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, int64_t IncOffset = IncConst->getValue()->getSExtValue(); if (!isAlwaysFoldable(TTI, LSRUse::Address, - getAccessType(UserInst), /*BaseGV=*/ 0, + getAccessType(UserInst), /*BaseGV=*/ nullptr, IncOffset, /*HaseBaseReg=*/ false)) return false; @@ -2773,7 +2882,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, // findIVOperand returns IVOpEnd if it can no longer find a valid IV user. User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(), IVOpEnd, L, SE); - Value *IVSrc = 0; + Value *IVSrc = nullptr; while (IVOpIter != IVOpEnd) { IVSrc = getWideOperand(*IVOpIter); @@ -2800,7 +2909,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n"); Type *IVTy = IVSrc->getType(); Type *IntTy = SE.getEffectiveSCEVType(IVTy); - const SCEV *LeftOverExpr = 0; + const SCEV *LeftOverExpr = nullptr; for (IVChain::const_iterator IncI = Chain.begin(), IncE = Chain.end(); IncI != IncE; ++IncI) { @@ -2831,7 +2940,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, TTI)) { assert(IVTy == IVOper->getType() && "inconsistent IV increment type"); IVSrc = IVOper; - LeftOverExpr = 0; + LeftOverExpr = nullptr; } } Type *OperTy = IncI->IVOperand->getType(); @@ -2886,7 +2995,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LF.PostIncLoops = UI->getPostIncLoops(); LSRUse::KindType Kind = LSRUse::Basic; - Type *AccessTy = 0; + Type *AccessTy = nullptr; if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) { Kind = LSRUse::Address; AccessTy = getAccessType(LF.UserInst); @@ -2917,7 +3026,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) { // S is normalized, so normalize N before folding it into S // to keep the result normalized. - N = TransformForPostIncUse(Normalize, N, CI, 0, + N = TransformForPostIncUse(Normalize, N, CI, nullptr, LF.PostIncLoops, SE, DT); Kind = LSRUse::ICmpZero; S = SE.getMinusSCEV(N, S); @@ -2992,6 +3101,9 @@ void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) { /// InsertFormula - If the given formula has not yet been inserted, add it to /// the list, and return true. Return false otherwise. bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { + // Do not insert formula that we will not be able to expand. + assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && + "Formula is illegal"); if (!LU.InsertFormula(F)) return false; @@ -3068,7 +3180,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { LSRFixup &LF = getNewFixup(); LF.UserInst = const_cast<Instruction *>(UserInst); LF.OperandValToReplace = U; - std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0); + std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, nullptr); LF.LUIdx = P.first; LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; @@ -3107,7 +3219,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, if (Remainder) Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); } - return 0; + return nullptr; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { // Split a non-zero base out of an addrec. if (AR->getStart()->isZero()) @@ -3119,7 +3231,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, // does not pertain to this loop. if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) { Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); - Remainder = 0; + Remainder = nullptr; } if (Remainder != AR->getStart()) { if (!Remainder) @@ -3141,90 +3253,110 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1); if (Remainder) Ops.push_back(SE.getMulExpr(C, Remainder)); - return 0; + return nullptr; } } return S; } -/// GenerateReassociations - Split out subexpressions from adds and the bases of -/// addrecs. -void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, - Formula Base, - unsigned Depth) { - // Arbitrarily cap recursion to protect compile time. - if (Depth >= 3) return; - - for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { - const SCEV *BaseReg = Base.BaseRegs[i]; +/// \brief Helper function for LSRInstance::GenerateReassociations. +void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, + unsigned Depth, size_t Idx, + bool IsScaledReg) { + const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; + SmallVector<const SCEV *, 8> AddOps; + const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE); + if (Remainder) + AddOps.push_back(Remainder); + + if (AddOps.size() == 1) + return; - SmallVector<const SCEV *, 8> AddOps; - const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE); - if (Remainder) - AddOps.push_back(Remainder); + for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(), + JE = AddOps.end(); + J != JE; ++J) { - if (AddOps.size() == 1) continue; + // Loop-variant "unknown" values are uninteresting; we won't be able to + // do anything meaningful with them. + if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L)) + continue; - for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(), - JE = AddOps.end(); J != JE; ++J) { + // Don't pull a constant into a register if the constant could be folded + // into an immediate field. + if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, *J, Base.getNumRegs() > 1)) + continue; - // Loop-variant "unknown" values are uninteresting; we won't be able to - // do anything meaningful with them. - if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L)) - continue; + // Collect all operands except *J. + SmallVector<const SCEV *, 8> InnerAddOps( + ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J); + InnerAddOps.append(std::next(J), + ((const SmallVector<const SCEV *, 8> &)AddOps).end()); + + // Don't leave just a constant behind in a register if the constant could + // be folded into an immediate field. + if (InnerAddOps.size() == 1 && + isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1)) + continue; - // Don't pull a constant into a register if the constant could be folded - // into an immediate field. - if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, *J, Base.getNumRegs() > 1)) - continue; + const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); + if (InnerSum->isZero()) + continue; + Formula F = Base; - // Collect all operands except *J. - SmallVector<const SCEV *, 8> InnerAddOps( - ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J); - InnerAddOps.append(std::next(J), - ((const SmallVector<const SCEV *, 8> &)AddOps).end()); - - // Don't leave just a constant behind in a register if the constant could - // be folded into an immediate field. - if (InnerAddOps.size() == 1 && - isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1)) - continue; + // Add the remaining pieces of the add back into the new formula. + const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum); + if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && + TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + InnerSumSC->getValue()->getZExtValue())) { + F.UnfoldedOffset = + (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue(); + if (IsScaledReg) + F.ScaledReg = nullptr; + else + F.BaseRegs.erase(F.BaseRegs.begin() + Idx); + } else if (IsScaledReg) + F.ScaledReg = InnerSum; + else + F.BaseRegs[Idx] = InnerSum; + + // Add J as its own register, or an unfolded immediate. + const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J); + if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && + TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + SC->getValue()->getZExtValue())) + F.UnfoldedOffset = + (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue(); + else + F.BaseRegs.push_back(*J); + // We may have changed the number of register in base regs, adjust the + // formula accordingly. + F.Canonicalize(); + + if (InsertFormula(LU, LUIdx, F)) + // If that formula hadn't been seen before, recurse to find more like + // it. + GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1); + } +} - const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); - if (InnerSum->isZero()) - continue; - Formula F = Base; +/// GenerateReassociations - Split out subexpressions from adds and the bases of +/// addrecs. +void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, + Formula Base, unsigned Depth) { + assert(Base.isCanonical() && "Input must be in the canonical form"); + // Arbitrarily cap recursion to protect compile time. + if (Depth >= 3) + return; - // Add the remaining pieces of the add back into the new formula. - const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum); - if (InnerSumSC && - SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && - TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + - InnerSumSC->getValue()->getZExtValue())) { - F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + - InnerSumSC->getValue()->getZExtValue(); - F.BaseRegs.erase(F.BaseRegs.begin() + i); - } else - F.BaseRegs[i] = InnerSum; - - // Add J as its own register, or an unfolded immediate. - const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J); - if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && - TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + - SC->getValue()->getZExtValue())) - F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + - SC->getValue()->getZExtValue(); - else - F.BaseRegs.push_back(*J); + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) + GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i); - if (InsertFormula(LU, LUIdx, F)) - // If that formula hadn't been seen before, recurse to find more like - // it. - GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1); - } - } + if (Base.Scale == 1) + GenerateReassociationsImpl(LU, LUIdx, Base, Depth, + /* Idx */ -1, /* IsScaledReg */ true); } /// GenerateCombinations - Generate a formula consisting of all of the @@ -3232,8 +3364,12 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base) { // This method is only interesting on a plurality of registers. - if (Base.BaseRegs.size() <= 1) return; + if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1) + return; + // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before + // processing the formula. + Base.Unscale(); Formula F = Base; F.BaseRegs.clear(); SmallVector<const SCEV *, 4> Ops; @@ -3253,29 +3389,87 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, // rather than proceed with zero in a register. if (!Sum->isZero()) { F.BaseRegs.push_back(Sum); + F.Canonicalize(); (void)InsertFormula(LU, LUIdx, F); } } } +/// \brief Helper function for LSRInstance::GenerateSymbolicOffsets. +void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, size_t Idx, + bool IsScaledReg) { + const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; + GlobalValue *GV = ExtractSymbol(G, SE); + if (G->isZero() || !GV) + return; + Formula F = Base; + F.BaseGV = GV; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) + return; + if (IsScaledReg) + F.ScaledReg = G; + else + F.BaseRegs[Idx] = G; + (void)InsertFormula(LU, LUIdx, F); +} + /// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets. void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base) { // We can't add a symbolic offset if the address already contains one. if (Base.BaseGV) return; - for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { - const SCEV *G = Base.BaseRegs[i]; - GlobalValue *GV = ExtractSymbol(G, SE); - if (G->isZero() || !GV) - continue; + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) + GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i); + if (Base.Scale == 1) + GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1, + /* IsScaledReg */ true); +} + +/// \brief Helper function for LSRInstance::GenerateConstantOffsets. +void LSRInstance::GenerateConstantOffsetsImpl( + LSRUse &LU, unsigned LUIdx, const Formula &Base, + const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) { + const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; + for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(), + E = Worklist.end(); + I != E; ++I) { Formula F = Base; - F.BaseGV = GV; - if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) - continue; - F.BaseRegs[i] = G; - (void)InsertFormula(LU, LUIdx, F); + F.BaseOffset = (uint64_t)Base.BaseOffset - *I; + if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, + LU.AccessTy, F)) { + // Add the offset to the base register. + const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); + // If it cancelled out, drop the base register, otherwise update it. + if (NewG->isZero()) { + if (IsScaledReg) { + F.Scale = 0; + F.ScaledReg = nullptr; + } else + F.DeleteBaseReg(F.BaseRegs[Idx]); + F.Canonicalize(); + } else if (IsScaledReg) + F.ScaledReg = NewG; + else + F.BaseRegs[Idx] = NewG; + + (void)InsertFormula(LU, LUIdx, F); + } } + + int64_t Imm = ExtractImmediate(G, SE); + if (G->isZero() || Imm == 0) + return; + Formula F = Base; + F.BaseOffset = (uint64_t)F.BaseOffset + Imm; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) + return; + if (IsScaledReg) + F.ScaledReg = G; + else + F.BaseRegs[Idx] = G; + (void)InsertFormula(LU, LUIdx, F); } /// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets. @@ -3288,38 +3482,11 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, if (LU.MaxOffset != LU.MinOffset) Worklist.push_back(LU.MaxOffset); - for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { - const SCEV *G = Base.BaseRegs[i]; - - for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(), - E = Worklist.end(); I != E; ++I) { - Formula F = Base; - F.BaseOffset = (uint64_t)Base.BaseOffset - *I; - if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, - LU.AccessTy, F)) { - // Add the offset to the base register. - const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); - // If it cancelled out, drop the base register, otherwise update it. - if (NewG->isZero()) { - std::swap(F.BaseRegs[i], F.BaseRegs.back()); - F.BaseRegs.pop_back(); - } else - F.BaseRegs[i] = NewG; - - (void)InsertFormula(LU, LUIdx, F); - } - } - - int64_t Imm = ExtractImmediate(G, SE); - if (G->isZero() || Imm == 0) - continue; - Formula F = Base; - F.BaseOffset = (uint64_t)F.BaseOffset + Imm; - if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) - continue; - F.BaseRegs[i] = G; - (void)InsertFormula(LU, LUIdx, F); - } + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) + GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i); + if (Base.Scale == 1) + GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1, + /* IsScaledReg */ true); } /// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up @@ -3419,7 +3586,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { if (!IntTy) return; // If this Formula already has a scaled register, we can't add another one. - if (Base.Scale != 0) return; + // Try to unscale the formula to generate a better scale. + if (Base.Scale != 0 && !Base.Unscale()) + return; + + assert(Base.Scale == 0 && "Unscale did not did its job!"); // Check each interesting stride. for (SmallSetVector<int64_t, 8>::const_iterator @@ -3460,6 +3631,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { Formula F = Base; F.ScaledReg = Quotient; F.DeleteBaseReg(F.BaseRegs[i]); + // The canonical representation of 1*reg is reg, which is already in + // Base. In that case, do not try to insert the formula, it will be + // rejected anyway. + if (F.Scale == 1 && F.BaseRegs.empty()) + continue; (void)InsertFormula(LU, LUIdx, F); } } @@ -3624,7 +3800,12 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // TODO: Use a more targeted data structure. for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) { - const Formula &F = LU.Formulae[L]; + Formula F = LU.Formulae[L]; + // FIXME: The code for the scaled and unscaled registers looks + // very similar but slightly different. Investigate if they + // could be merged. That way, we would not have to unscale the + // Formula. + F.Unscale(); // Use the immediate in the scaled register. if (F.ScaledReg == OrigReg) { int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale; @@ -3650,6 +3831,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { continue; // OK, looks good. + NewF.Canonicalize(); (void)InsertFormula(LU, LUIdx, NewF); } else { // Use the immediate in a base register. @@ -3683,6 +3865,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { goto skip_formula; // Ok, looks good. + NewF.Canonicalize(); (void)InsertFormula(LU, LUIdx, NewF); break; skip_formula:; @@ -3936,7 +4119,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), E = LU.Formulae.end(); I != E; ++I) { const Formula &F = *I; - if (F.BaseOffset == 0 || F.Scale != 0) + if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1)) continue; LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU); @@ -4033,7 +4216,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { // Pick the register which is used by the most LSRUses, which is likely // to be a good reuse register candidate. - const SCEV *Best = 0; + const SCEV *Best = nullptr; unsigned BestNum = 0; for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end(); I != E; ++I) { @@ -4130,19 +4313,22 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, E = LU.Formulae.end(); I != E; ++I) { const Formula &F = *I; - // Ignore formulae which do not use any of the required registers. - bool SatisfiedReqReg = true; + // Ignore formulae which may not be ideal in terms of register reuse of + // ReqRegs. The formula should use all required registers before + // introducing new ones. + int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size()); for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(), JE = ReqRegs.end(); J != JE; ++J) { const SCEV *Reg = *J; - if ((!F.ScaledReg || F.ScaledReg != Reg) && - std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) == + if ((F.ScaledReg && F.ScaledReg == Reg) || + std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) != F.BaseRegs.end()) { - SatisfiedReqReg = false; - break; + --NumReqRegsToFind; + if (NumReqRegsToFind == 0) + break; } } - if (!SatisfiedReqReg) { + if (NumReqRegsToFind != 0) { // If none of the formulae satisfied the required registers, then we could // clear ReqRegs and try again. Currently, we simply give up in this case. continue; @@ -4240,7 +4426,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, } bool AllDominate = true; - Instruction *BetterPos = 0; + Instruction *BetterPos = nullptr; Instruction *Tentative = IDom->getTerminator(); for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(), E = Inputs.end(); I != E; ++I) { @@ -4379,11 +4565,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, LF.UserInst, LF.OperandValToReplace, Loops, SE, DT); - Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP))); + Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP))); } // Expand the ScaledReg portion. - Value *ICmpScaledV = 0; + Value *ICmpScaledV = nullptr; if (F.Scale != 0) { const SCEV *ScaledS = F.ScaledReg; @@ -4394,25 +4580,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF, Loops, SE, DT); if (LU.Kind == LSRUse::ICmpZero) { - // An interesting way of "folding" with an icmp is to use a negated - // scale, which we'll implement by inserting it into the other operand - // of the icmp. - assert(F.Scale == -1 && - "The only scale supported by ICmpZero uses is -1!"); - ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP); + // Expand ScaleReg as if it was part of the base regs. + if (F.Scale == 1) + Ops.push_back( + SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP))); + else { + // An interesting way of "folding" with an icmp is to use a negated + // scale, which we'll implement by inserting it into the other operand + // of the icmp. + assert(F.Scale == -1 && + "The only scale supported by ICmpZero uses is -1!"); + ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP); + } } else { // Otherwise just expand the scaled register and an explicit scale, // which is expected to be matched as part of the address. // Flush the operand list to suppress SCEVExpander hoisting address modes. - if (!Ops.empty() && LU.Kind == LSRUse::Address) { + // Unless the addressing mode will not be folded. + if (!Ops.empty() && LU.Kind == LSRUse::Address && + isAMCompletelyFolded(TTI, LU, F)) { Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } - ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP)); - ScaledS = SE.getMulExpr(ScaledS, - SE.getConstant(ScaledS->getType(), F.Scale)); + ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)); + if (F.Scale != 1) + ScaledS = + SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale)); Ops.push_back(ScaledS); } } @@ -4490,7 +4685,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF, } CI->setOperand(1, ICmpScaledV); } else { - assert(F.Scale == 0 && + // A scale of 1 means that the scale has been expanded as part of the + // base regs. + assert((F.Scale == 0 || F.Scale == 1) && "ICmp does not support folding a global value and " "a scale at the same time!"); Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), @@ -4531,7 +4728,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN, Loop *PNLoop = LI.getLoopFor(Parent); if (!PNLoop || Parent != PNLoop->getHeader()) { // Split the critical edge. - BasicBlock *NewBB = 0; + BasicBlock *NewBB = nullptr; if (!Parent->isLandingPad()) { NewBB = SplitCriticalEdge(BB, Parent, P, /*MergeIdenticalEdges=*/true, @@ -4560,7 +4757,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN, } std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair = - Inserted.insert(std::make_pair(BB, static_cast<Value *>(0))); + Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr))); if (!Pair.second) PN->setIncomingValue(i, Pair.first->second); else { @@ -4670,7 +4867,7 @@ LSRInstance::LSRInstance(Loop *L, Pass *P) DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()), LI(P->getAnalysis<LoopInfo>()), TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false), - IVIncInsertPos(0) { + IVIncInsertPos(nullptr) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) return; diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index ecd350b..fc28fd2 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -12,7 +12,6 @@ // counts of loops easily. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopPass.h" @@ -29,6 +28,8 @@ using namespace llvm; +#define DEBUG_TYPE "loop-unroll" + static cl::opt<unsigned> UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden, cl::desc("The cut-off point for automatic loop unrolling")); @@ -237,9 +238,12 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { return false; } uint64_t Size = (uint64_t)LoopSize*Count; - if (TripCount != 1 && Size > Threshold) { - DEBUG(dbgs() << " Too large to fully unroll with count: " << Count - << " because size: " << Size << ">" << Threshold << "\n"); + if (TripCount != 1 && + (Size > Threshold || (Count != TripCount && Size > PartialThreshold))) { + if (Size > Threshold) + DEBUG(dbgs() << " Too large to fully unroll with count: " << Count + << " because size: " << Size << ">" << Threshold << "\n"); + bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; if (!AllowPartial && !(Runtime && TripCount == 0)) { DEBUG(dbgs() << " will not try to unroll partially because " diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 5954f4a..977c53a 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -26,7 +26,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-unswitch" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -53,6 +52,8 @@ #include <set> using namespace llvm; +#define DEBUG_TYPE "loop-unswitch" + STATISTIC(NumBranches, "Number of branches unswitched"); STATISTIC(NumSwitches, "Number of switches unswitched"); STATISTIC(NumSelects , "Number of selects unswitched"); @@ -96,7 +97,7 @@ namespace { public: LUAnalysisCache() : - CurLoopInstructions(0), CurrentLoopProperties(0), + CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr), MaxSize(Threshold) {} @@ -151,8 +152,8 @@ namespace { static char ID; // Pass ID, replacement for typeid explicit LoopUnswitch(bool Os = false) : LoopPass(ID), OptimizeForSize(Os), redoLoop(false), - currentLoop(0), DT(0), loopHeader(0), - loopPreheader(0) { + currentLoop(nullptr), DT(nullptr), loopHeader(nullptr), + loopPreheader(nullptr) { initializeLoopUnswitchPass(*PassRegistry::getPassRegistry()); } @@ -180,15 +181,6 @@ namespace { BranchesInfo.forgetLoop(currentLoop); } - /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist, - /// remove it. - void RemoveLoopFromWorklist(Loop *L) { - std::vector<Loop*>::iterator I = std::find(LoopProcessWorklist.begin(), - LoopProcessWorklist.end(), L); - if (I != LoopProcessWorklist.end()) - LoopProcessWorklist.erase(I); - } - void initLoopData() { loopHeader = currentLoop->getHeader(); loopPreheader = currentLoop->getLoopPreheader(); @@ -212,9 +204,8 @@ namespace { Instruction *InsertPt); void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L); - void RemoveLoopFromHierarchy(Loop *L); - bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0, - BasicBlock **LoopExit = 0); + bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr, + BasicBlock **LoopExit = nullptr); }; } @@ -283,8 +274,8 @@ void LUAnalysisCache::forgetLoop(const Loop *L) { LoopsProperties.erase(LIt); } - CurrentLoopProperties = 0; - CurLoopInstructions = 0; + CurrentLoopProperties = nullptr; + CurLoopInstructions = nullptr; } // Mark case value as unswitched. @@ -355,10 +346,10 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { // We can never unswitch on vector conditions. if (Cond->getType()->isVectorTy()) - return 0; + return nullptr; // Constants should be folded, not unswitched on! - if (isa<Constant>(Cond)) return 0; + if (isa<Constant>(Cond)) return nullptr; // TODO: Handle: br (VARIANT|INVARIANT). @@ -378,7 +369,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { return RHS; } - return 0; + return nullptr; } bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { @@ -389,7 +380,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { LPM = &LPM_Ref; DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DT = DTWP ? &DTWP->getDomTree() : 0; + DT = DTWP ? &DTWP->getDomTree() : nullptr; currentLoop = L; Function *F = currentLoop->getHeader()->getParent(); bool Changed = false; @@ -461,7 +452,7 @@ bool LoopUnswitch::processCurrentLoop() { // Find a value to unswitch on: // FIXME: this should chose the most expensive case! // FIXME: scan for a case with a non-critical edge? - Constant *UnswitchVal = 0; + Constant *UnswitchVal = nullptr; // Do not process same value again and again. // At this point we have some cases already unswitched and @@ -518,7 +509,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, if (!L->contains(BB)) { // Otherwise, this is a loop exit, this is fine so long as this is the // first exit. - if (ExitBB != 0) return false; + if (ExitBB) return false; ExitBB = BB; return true; } @@ -545,10 +536,10 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { std::set<BasicBlock*> Visited; Visited.insert(L->getHeader()); // Branches to header make infinite loops. - BasicBlock *ExitBB = 0; + BasicBlock *ExitBB = nullptr; if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited)) return ExitBB; - return 0; + return nullptr; } /// IsTrivialUnswitchCondition - Check to see if this unswitch condition is @@ -569,7 +560,7 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, TerminatorInst *HeaderTerm = Header->getTerminator(); LLVMContext &Context = Header->getContext(); - BasicBlock *LoopExitBB = 0; + BasicBlock *LoopExitBB = nullptr; if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) { // If the header block doesn't end with a conditional branch on Cond, we // can't handle it. @@ -639,8 +630,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, /// unswitch the loop, reprocess the pieces, then return true. bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { Function *F = loopHeader->getParent(); - Constant *CondVal = 0; - BasicBlock *ExitBlock = 0; + Constant *CondVal = nullptr; + BasicBlock *ExitBlock = nullptr; if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) { // If the condition is trivial, always unswitch. There is no code growth @@ -948,17 +939,6 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V, ++NumSimplify; } -/// RemoveLoopFromHierarchy - We have discovered that the specified loop has -/// become unwrapped, either because the backedge was deleted, or because the -/// edge into the header was removed. If the edge into the header from the -/// latch block was removed, the loop is unwrapped but subloops are still alive, -/// so they just reparent loops. If the loops are actually dead, they will be -/// removed later. -void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) { - LPM->deleteLoopFromQueue(L); - RemoveLoopFromWorklist(L); -} - // RewriteLoopBodyWithConditionConstant - We know either that the value LIC has // the value specified by Val in the specified loop, or we know it does NOT have // that value. Rewrite any uses of LIC or of properties correlated to it. @@ -1020,7 +1000,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // If we know that LIC is not Val, use this info to simplify code. SwitchInst *SI = dyn_cast<SwitchInst>(UI); - if (SI == 0 || !isa<ConstantInt>(Val)) continue; + if (!SI || !isa<ConstantInt>(Val)) continue; SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val)); // Default case is live for multiple values. diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp index 7c0a623..4251ac4 100644 --- a/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/lib/Transforms/Scalar/LowerAtomic.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loweratomic" #include "llvm/Transforms/Scalar.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -20,6 +19,8 @@ #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "loweratomic" + static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { IRBuilder<> Builder(CXI->getParent(), CXI); Value *Ptr = CXI->getPointerOperand(); @@ -42,7 +43,7 @@ static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) { Value *Val = RMWI->getValOperand(); LoadInst *Orig = Builder.CreateLoad(Ptr); - Value *Res = NULL; + Value *Res = nullptr; switch (RMWI->getOperation()) { default: llvm_unreachable("Unexpected RMW operation"); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 2603c96..b6bc792 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "memcpyopt" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -33,6 +32,8 @@ #include <list> using namespace llvm; +#define DEBUG_TYPE "memcpyopt" + STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); @@ -49,7 +50,7 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, int64_t Offset = 0; for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i)); - if (OpC == 0) + if (!OpC) return VariableIdxFound = true; if (OpC->isZero()) continue; // No offset. @@ -89,12 +90,12 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, // If one pointer is a GEP and the other isn't, then see if the GEP is a // constant offset from the base, as in "P" and "gep P, 1". - if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { + if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD); return !VariableIdxFound; } - if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { + if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD); return !VariableIdxFound; } @@ -317,9 +318,9 @@ namespace { static char ID; // Pass identification, replacement for typeid MemCpyOpt() : FunctionPass(ID) { initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); - MD = 0; - TLI = 0; - DL = 0; + MD = nullptr; + TLI = nullptr; + DL = nullptr; } bool runOnFunction(Function &F) override; @@ -373,7 +374,7 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", /// attempts to merge them together into a memcpy/memset. Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, Value *StartPtr, Value *ByteVal) { - if (DL == 0) return 0; + if (!DL) return nullptr; // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. @@ -426,7 +427,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (Ranges.empty()) - return 0; + return nullptr; // If we had at least one store that could be merged in, add the starting // store as well. We try to avoid this unless there is at least something @@ -440,7 +441,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, // Now that we have full information about ranges, loop over the ranges and // emit memset's for anything big enough to be worthwhile. - Instruction *AMemSet = 0; + Instruction *AMemSet = nullptr; for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemsetRange &Range = *I; @@ -491,7 +492,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!SI->isSimple()) return false; - if (DL == 0) return false; + if (!DL) return false; // Detect cases where we're performing call slot forwarding, but // happen to be using a load-store pair to implement it, rather than @@ -500,7 +501,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { MemDepResult ldep = MD->getDependency(LI); - CallInst *C = 0; + CallInst *C = nullptr; if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) C = dyn_cast<CallInst>(ldep.getInst()); @@ -512,7 +513,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { for (BasicBlock::iterator I = --BasicBlock::iterator(SI), E = C; I != E; --I) { if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) { - C = 0; + C = nullptr; break; } } @@ -603,7 +604,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, return false; // Check that all of src is copied to dest. - if (DL == 0) return false; + if (!DL) return false; ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); if (!srcArraySize) @@ -846,7 +847,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { // The optimizations after this point require the memcpy size. ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); - if (CopySize == 0) return false; + if (!CopySize) return false; // The are three possible optimizations we can do for memcpy: // a) memcpy-memcpy xform which exposes redundance for DSE. @@ -929,7 +930,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { /// processByValArgument - This is called on every byval argument in call sites. bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { - if (DL == 0) return false; + if (!DL) return false; // Find out what feeds this byval argument. Value *ByValArg = CS.getArgument(ArgNo); @@ -946,13 +947,13 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { // a memcpy, see if we can byval from the source of the memcpy instead of the // result. MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); - if (MDep == 0 || MDep->isVolatile() || + if (!MDep || MDep->isVolatile() || ByValArg->stripPointerCasts() != MDep->getDest()) return false; // The length of the memcpy must be larger or equal to the size of the byval. ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); - if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize) + if (!C1 || C1->getValue().getZExtValue() < ByValSize) return false; // Get the alignment of the byval. If the call doesn't specify the alignment, @@ -1043,7 +1044,7 @@ bool MemCpyOpt::runOnFunction(Function &F) { bool MadeChange = false; MD = &getAnalysis<MemoryDependenceAnalysis>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis<TargetLibraryInfo>(); // If we don't have at least memset and memcpy, there is little point of doing @@ -1058,6 +1059,6 @@ bool MemCpyOpt::runOnFunction(Function &F) { MadeChange = true; } - MD = 0; + MD = nullptr; return MadeChange; } diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp index 2f19935..7cce89e 100644 --- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp +++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "partially-inline-libcalls" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Intrinsics.h" @@ -25,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "partially-inline-libcalls" + namespace { class PartiallyInlineLibCalls : public FunctionPass { public: diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index b6b4d97..986d6a4 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -20,7 +20,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "reassociate" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" @@ -42,6 +41,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "reassociate" + STATISTIC(NumChanged, "Number of insts reassociated"); STATISTIC(NumAnnihil, "Number of expr tree annihilated"); STATISTIC(NumFactor , "Number of multiplies factored"); @@ -122,14 +123,14 @@ namespace { public: XorOpnd(Value *V); - bool isInvalid() const { return SymbolicPart == 0; } + bool isInvalid() const { return SymbolicPart == nullptr; } bool isOrExpr() const { return isOr; } Value *getValue() const { return OrigVal; } Value *getSymbolicPart() const { return SymbolicPart; } unsigned getSymbolicRank() const { return SymbolicRank; } const APInt &getConstPart() const { return ConstPart; } - void Invalidate() { SymbolicPart = OrigVal = 0; } + void Invalidate() { SymbolicPart = OrigVal = nullptr; } void setSymbolicRank(unsigned R) { SymbolicRank = R; } // Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank. @@ -236,7 +237,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { if (V->hasOneUse() && isa<Instruction>(V) && cast<Instruction>(V)->getOpcode() == Opcode) return cast<BinaryOperator>(V); - return 0; + return nullptr; } static bool isUnmovableInstruction(Instruction *I) { @@ -284,7 +285,7 @@ void Reassociate::BuildRankMap(Function &F) { unsigned Reassociate::getRank(Value *V) { Instruction *I = dyn_cast<Instruction>(V); - if (I == 0) { + if (!I) { if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument. return 0; // Otherwise it's a global or constant, rank 0. } @@ -705,7 +706,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, // ExpressionChanged - Non-null if the rewritten expression differs from the // original in some non-trivial way, requiring the clearing of optional flags. // Flags are cleared from the operator in ExpressionChanged up to I inclusive. - BinaryOperator *ExpressionChanged = 0; + BinaryOperator *ExpressionChanged = nullptr; for (unsigned i = 0; ; ++i) { // The last operation (which comes earliest in the IR) is special as both // operands will come from Ops, rather than just one with the other being @@ -995,7 +996,7 @@ static Value *EmitAddTreeOfValues(Instruction *I, /// remove Factor from the tree and return the new tree. Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); - if (!BO) return 0; + if (!BO) return nullptr; SmallVector<RepeatedValue, 8> Tree; MadeChange |= LinearizeExprTree(BO, Tree); @@ -1029,7 +1030,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { if (!FoundFactor) { // Make sure to restore the operands to the expression tree. RewriteExprTree(BO, Factors); - return 0; + return nullptr; } BasicBlock::iterator InsertPt = BO; ++InsertPt; @@ -1114,7 +1115,7 @@ static Value *OptimizeAndOrXor(unsigned Opcode, ++NumAnnihil; } } - return 0; + return nullptr; } /// Helper funciton of CombineXorOpnd(). It creates a bitwise-and @@ -1135,7 +1136,7 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, } return Opnd; } - return 0; + return nullptr; } // Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd" @@ -1261,7 +1262,7 @@ Value *Reassociate::OptimizeXor(Instruction *I, return V; if (Ops.size() == 1) - return 0; + return nullptr; SmallVector<XorOpnd, 8> Opnds; SmallVector<XorOpnd*, 8> OpndPtrs; @@ -1294,7 +1295,7 @@ Value *Reassociate::OptimizeXor(Instruction *I, std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor()); // Step 3: Combine adjacent operands - XorOpnd *PrevOpnd = 0; + XorOpnd *PrevOpnd = nullptr; bool Changed = false; for (unsigned i = 0, e = Opnds.size(); i < e; i++) { XorOpnd *CurrOpnd = OpndPtrs[i]; @@ -1328,7 +1329,7 @@ Value *Reassociate::OptimizeXor(Instruction *I, PrevOpnd = CurrOpnd; } else { CurrOpnd->Invalidate(); - PrevOpnd = 0; + PrevOpnd = nullptr; } Changed = true; } @@ -1358,7 +1359,7 @@ Value *Reassociate::OptimizeXor(Instruction *I, } } - return 0; + return nullptr; } /// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This @@ -1445,7 +1446,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4) // where they are actually the same multiply. unsigned MaxOcc = 0; - Value *MaxOccVal = 0; + Value *MaxOccVal = nullptr; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul); if (!BOp) @@ -1543,7 +1544,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2)); } - return 0; + return nullptr; } /// \brief Build up a vector of value/power pairs factoring a product. @@ -1688,14 +1689,14 @@ Value *Reassociate::OptimizeMul(BinaryOperator *I, // We can only optimize the multiplies when there is a chain of more than // three, such that a balanced tree might require fewer total multiplies. if (Ops.size() < 4) - return 0; + return nullptr; // Try to turn linear trees of multiplies without other uses of the // intermediate stages into minimal multiply DAGs with perfect sub-expression // re-use. SmallVector<Factor, 4> Factors; if (!collectMultiplyFactors(Ops, Factors)) - return 0; // All distinct factors, so nothing left for us to do. + return nullptr; // All distinct factors, so nothing left for us to do. IRBuilder<> Builder(I); Value *V = buildMinimalMultiplyDAG(Builder, Factors); @@ -1704,14 +1705,14 @@ Value *Reassociate::OptimizeMul(BinaryOperator *I, ValueEntry NewEntry = ValueEntry(getRank(V), V); Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry); - return 0; + return nullptr; } Value *Reassociate::OptimizeExpression(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops) { // Now that we have the linearized expression tree, try to optimize it. // Start by folding any constants that we found. - Constant *Cst = 0; + Constant *Cst = nullptr; unsigned Opcode = I->getOpcode(); while (!Ops.empty() && isa<Constant>(Ops.back().Op)) { Constant *C = cast<Constant>(Ops.pop_back_val().Op); @@ -1761,7 +1762,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, if (Ops.size() != NumOps) return OptimizeExpression(I, Ops); - return 0; + return nullptr; } /// EraseInst - Zap the given instruction, adding interesting operands to the diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index d9809ce..b6023e2 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "reg2mem" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/BasicBlock.h" @@ -30,6 +29,8 @@ #include <list> using namespace llvm; +#define DEBUG_TYPE "reg2mem" + STATISTIC(NumRegsDemoted, "Number of registers demoted"); STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted"); diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index b8f10e9..feeb231 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -17,7 +17,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sccp" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -42,6 +41,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "sccp" + STATISTIC(NumInstRemoved, "Number of instructions removed"); STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable"); @@ -81,7 +82,7 @@ class LatticeVal { } public: - LatticeVal() : Val(0, undefined) {} + LatticeVal() : Val(nullptr, undefined) {} bool isUndefined() const { return getLatticeValue() == undefined; } bool isConstant() const { @@ -133,7 +134,7 @@ public: ConstantInt *getConstantInt() const { if (isConstant()) return dyn_cast<ConstantInt>(getConstant()); - return 0; + return nullptr; } void markForcedConstant(Constant *V) { @@ -403,7 +404,7 @@ private: if (Constant *C = dyn_cast<Constant>(V)) { Constant *Elt = C->getAggregateElement(i); - if (Elt == 0) + if (!Elt) LV.markOverdefined(); // Unknown sort of constant. else if (isa<UndefValue>(Elt)) ; // Undef values remain undefined. @@ -522,7 +523,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, LatticeVal BCValue = getValueState(BI->getCondition()); ConstantInt *CI = BCValue.getConstantInt(); - if (CI == 0) { + if (!CI) { // Overdefined condition variables, and branches on unfoldable constant // conditions, mean the branch could go either way. if (!BCValue.isUndefined()) @@ -549,7 +550,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, LatticeVal SCValue = getValueState(SI->getCondition()); ConstantInt *CI = SCValue.getConstantInt(); - if (CI == 0) { // Overdefined or undefined condition? + if (!CI) { // Overdefined or undefined condition? // All destinations are executable! if (!SCValue.isUndefined()) Succs.assign(TI.getNumSuccessors(), true); @@ -594,7 +595,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { // Overdefined condition variables mean the branch could go either way, // undef conditions mean that neither edge is feasible yet. ConstantInt *CI = BCValue.getConstantInt(); - if (CI == 0) + if (!CI) return !BCValue.isUndefined(); // Constant condition variables mean the branch can only go a single way. @@ -612,7 +613,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { LatticeVal SCValue = getValueState(SI->getCondition()); ConstantInt *CI = SCValue.getConstantInt(); - if (CI == 0) + if (!CI) return !SCValue.isUndefined(); return SI->findCaseValue(CI).getCaseSuccessor() == To; @@ -626,7 +627,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { #ifndef NDEBUG dbgs() << "Unknown terminator instruction: " << *TI << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } // visit Implementations - Something changed in this instruction, either an @@ -667,7 +668,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) { // constant. If they are constant and don't agree, the PHI is overdefined. // If there are no executable operands, the PHI remains undefined. // - Constant *OperandVal = 0; + Constant *OperandVal = nullptr; for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { LatticeVal IV = getValueState(PN.getIncomingValue(i)); if (IV.isUndefined()) continue; // Doesn't influence PHI node. @@ -678,7 +679,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) { if (IV.isOverdefined()) // PHI node becomes overdefined! return markOverdefined(&PN); - if (OperandVal == 0) { // Grab the first value. + if (!OperandVal) { // Grab the first value. OperandVal = IV.getConstant(); continue; } @@ -774,7 +775,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) { void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) { StructType *STy = dyn_cast<StructType>(IVI.getType()); - if (STy == 0) + if (!STy) return markOverdefined(&IVI); // If this has more than one index, we can't handle it, drive all results to @@ -862,7 +863,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { // If this is an AND or OR with 0 or -1, it doesn't matter that the other // operand is overdefined. if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) { - LatticeVal *NonOverdefVal = 0; + LatticeVal *NonOverdefVal = nullptr; if (!V1State.isOverdefined()) NonOverdefVal = &V1State; else if (!V2State.isOverdefined()) @@ -1081,7 +1082,7 @@ void SCCPSolver::visitCallSite(CallSite CS) { // The common case is that we aren't tracking the callee, either because we // are not doing interprocedural analysis or the callee is indirect, or is // external. Handle these cases first. - if (F == 0 || F->isDeclaration()) { + if (!F || F->isDeclaration()) { CallOverdefined: // Void return and not tracking callee, just bail. if (I->getType()->isVoidTy()) return; @@ -1555,7 +1556,7 @@ bool SCCP::runOnFunction(Function &F) { DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n"); const DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SCCPSolver Solver(DL, TLI); @@ -1684,7 +1685,7 @@ static bool AddressIsTaken(const GlobalValue *GV) { bool IPSCCP::runOnModule(Module &M) { DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SCCPSolver Solver(DL, TLI); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index ed5e618..04bf4f8 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -23,7 +23,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sroa" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -64,6 +63,8 @@ using namespace llvm; +#define DEBUG_TYPE "sroa" + STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement"); STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed"); STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca"); @@ -159,8 +160,8 @@ public: Use *getUse() const { return UseAndIsSplittable.getPointer(); } - bool isDead() const { return getUse() == 0; } - void kill() { UseAndIsSplittable.setPointer(0); } + bool isDead() const { return getUse() == nullptr; } + void kill() { UseAndIsSplittable.setPointer(nullptr); } /// \brief Support for ordering ranges. /// @@ -320,7 +321,7 @@ static Value *foldSelectInst(SelectInst &SI) { if (SI.getOperand(1) == SI.getOperand(2)) return SI.getOperand(1); - return 0; + return nullptr; } /// \brief Builder for the alloca slices. @@ -642,7 +643,7 @@ private: Uses.push_back(std::make_pair(I, cast<Instruction>(U))); } while (!Uses.empty()); - return 0; + return nullptr; } void visitPHINode(PHINode &PN) { @@ -724,7 +725,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) AI(AI), #endif - PointerEscapingInstr(0) { + PointerEscapingInstr(nullptr) { SliceBuilder PB(DL, AI, *this); SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI); if (PtrI.isEscaped() || PtrI.isAborted()) { @@ -873,7 +874,7 @@ public: for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(), E = DVIs.end(); I != E; ++I) { DbgValueInst *DVI = *I; - Value *Arg = 0; + Value *Arg = nullptr; if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -969,7 +970,7 @@ class SROA : public FunctionPass { public: SROA(bool RequiresDomTree = true) : FunctionPass(ID), RequiresDomTree(RequiresDomTree), - C(0), DL(0), DT(0) { + C(nullptr), DL(nullptr), DT(nullptr) { initializeSROAPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -1011,9 +1012,9 @@ INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates", static Type *findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset) { - Type *Ty = 0; + Type *Ty = nullptr; bool TyIsCommon = true; - IntegerType *ITy = 0; + IntegerType *ITy = nullptr; // Note that we need to look at *every* alloca slice's Use to ensure we // always get consistent results regardless of the order of slices. @@ -1024,7 +1025,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B, if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset) continue; - Type *UserTy = 0; + Type *UserTy = nullptr; if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { UserTy = LI->getType(); } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { @@ -1074,7 +1075,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B, /// FIXME: This should be hoisted into a generic utility, likely in /// Transforms/Util/Local.h static bool isSafePHIToSpeculate(PHINode &PN, - const DataLayout *DL = 0) { + const DataLayout *DL = nullptr) { // For now, we can only do this promotion if the load is in the same block // as the PHI, and if there are no stores between the phi and load. // TODO: Allow recursive phi users. @@ -1084,7 +1085,7 @@ static bool isSafePHIToSpeculate(PHINode &PN, bool HaveLoad = false; for (User *U : PN.users()) { LoadInst *LI = dyn_cast<LoadInst>(U); - if (LI == 0 || !LI->isSimple()) + if (!LI || !LI->isSimple()) return false; // For now we only allow loads in the same block as the PHI. This is @@ -1191,7 +1192,8 @@ static void speculatePHINodeLoads(PHINode &PN) { /// /// We can do this to a select if its only uses are loads and if the operand /// to the select can be loaded unconditionally. -static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = 0) { +static bool isSafeSelectToSpeculate(SelectInst &SI, + const DataLayout *DL = nullptr) { Value *TValue = SI.getTrueValue(); Value *FValue = SI.getFalseValue(); bool TDerefable = TValue->isDereferenceablePointer(); @@ -1199,7 +1201,7 @@ static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = 0) { for (User *U : SI.users()) { LoadInst *LI = dyn_cast<LoadInst>(U); - if (LI == 0 || !LI->isSimple()) + if (!LI || !LI->isSimple()) return false; // Both operands to the select need to be dereferencable, either @@ -1332,19 +1334,21 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, // We can't recurse through pointer types. if (Ty->isPointerTy()) - return 0; + return nullptr; // We try to analyze GEPs over vectors here, but note that these GEPs are // extremely poorly defined currently. The long-term goal is to remove GEPing // over a vector from the IR completely. if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) { unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType()); - if (ElementSizeInBits % 8) - return 0; // GEPs over non-multiple of 8 size vector elements are invalid. + if (ElementSizeInBits % 8 != 0) { + // GEPs over non-multiple of 8 size vector elements are invalid. + return nullptr; + } APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8); APInt NumSkippedElements = Offset.sdiv(ElementSize); if (NumSkippedElements.ugt(VecTy->getNumElements())) - return 0; + return nullptr; Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(), @@ -1356,7 +1360,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy)); APInt NumSkippedElements = Offset.sdiv(ElementSize); if (NumSkippedElements.ugt(ArrTy->getNumElements())) - return 0; + return nullptr; Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); @@ -1366,17 +1370,17 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, StructType *STy = dyn_cast<StructType>(Ty); if (!STy) - return 0; + return nullptr; const StructLayout *SL = DL.getStructLayout(STy); uint64_t StructOffset = Offset.getZExtValue(); if (StructOffset >= SL->getSizeInBytes()) - return 0; + return nullptr; unsigned Index = SL->getElementContainingOffset(StructOffset); Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index)); Type *ElementTy = STy->getElementType(Index); if (Offset.uge(DL.getTypeAllocSize(ElementTy))) - return 0; // The offset points into alignment padding. + return nullptr; // The offset points into alignment padding. Indices.push_back(IRB.getInt32(Index)); return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy, @@ -1402,14 +1406,14 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL, // Don't consider any GEPs through an i8* as natural unless the TargetTy is // an i8. if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8)) - return 0; + return nullptr; Type *ElementTy = Ty->getElementType(); if (!ElementTy->isSized()) - return 0; // We can't GEP through an unsized element. + return nullptr; // We can't GEP through an unsized element. APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy)); if (ElementSize == 0) - return 0; // Zero-length arrays can't help us build a natural GEP. + return nullptr; // Zero-length arrays can't help us build a natural GEP. APInt NumSkippedElements = Offset.sdiv(ElementSize); Offset -= NumSkippedElements * ElementSize; @@ -1445,11 +1449,11 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, // We may end up computing an offset pointer that has the wrong type. If we // never are able to compute one directly that has the correct type, we'll // fall back to it, so keep it around here. - Value *OffsetPtr = 0; + Value *OffsetPtr = nullptr; // Remember any i8 pointer we come across to re-use if we need to do a raw // byte offset. - Value *Int8Ptr = 0; + Value *Int8Ptr = nullptr; APInt Int8PtrOffset(Offset.getBitWidth(), 0); Type *TargetTy = PointerTy->getPointerElementType(); @@ -2043,14 +2047,14 @@ public: NewAllocaBeginOffset(NewAllocaBeginOffset), NewAllocaEndOffset(NewAllocaEndOffset), NewAllocaTy(NewAI.getAllocatedType()), - VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : 0), - ElementTy(VecTy ? VecTy->getElementType() : 0), + VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : nullptr), + ElementTy(VecTy ? VecTy->getElementType() : nullptr), ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0), IntTy(IsIntegerPromotable ? Type::getIntNTy( NewAI.getContext(), DL.getTypeSizeInBits(NewAI.getAllocatedType())) - : 0), + : nullptr), BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(), OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers), IRB(NewAI.getContext(), ConstantFolder()) { @@ -2144,7 +2148,7 @@ private: /// /// You can optionally pass a type to this routine and if that type's ABI /// alignment is itself suitable, this will return zero. - unsigned getSliceAlign(Type *Ty = 0) { + unsigned getSliceAlign(Type *Ty = nullptr) { unsigned NewAIAlign = NewAI.getAlignment(); if (!NewAIAlign) NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType()); @@ -2594,7 +2598,7 @@ private: unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0; unsigned NumElements = EndIndex - BeginIndex; IntegerType *SubIntTy - = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; + = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : nullptr; // Reset the other pointer type to match the register type we're going to // use, but using the address space of the original other pointer. @@ -2992,22 +2996,22 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, return stripAggregateTypeWrapping(DL, Ty); if (Offset > DL.getTypeAllocSize(Ty) || (DL.getTypeAllocSize(Ty) - Offset) < Size) - return 0; + return nullptr; if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) { // We can't partition pointers... if (SeqTy->isPointerTy()) - return 0; + return nullptr; Type *ElementTy = SeqTy->getElementType(); uint64_t ElementSize = DL.getTypeAllocSize(ElementTy); uint64_t NumSkippedElements = Offset / ElementSize; if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) { if (NumSkippedElements >= ArrTy->getNumElements()) - return 0; + return nullptr; } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) { if (NumSkippedElements >= VecTy->getNumElements()) - return 0; + return nullptr; } Offset -= NumSkippedElements * ElementSize; @@ -3015,7 +3019,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, if (Offset > 0 || Size < ElementSize) { // Bail if the partition ends in a different array element. if ((Offset + Size) > ElementSize) - return 0; + return nullptr; // Recurse through the element type trying to peel off offset bytes. return getTypePartition(DL, ElementTy, Offset, Size); } @@ -3026,20 +3030,20 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, assert(Size > ElementSize); uint64_t NumElements = Size / ElementSize; if (NumElements * ElementSize != Size) - return 0; + return nullptr; return ArrayType::get(ElementTy, NumElements); } StructType *STy = dyn_cast<StructType>(Ty); if (!STy) - return 0; + return nullptr; const StructLayout *SL = DL.getStructLayout(STy); if (Offset >= SL->getSizeInBytes()) - return 0; + return nullptr; uint64_t EndOffset = Offset + Size; if (EndOffset > SL->getSizeInBytes()) - return 0; + return nullptr; unsigned Index = SL->getElementContainingOffset(Offset); Offset -= SL->getElementOffset(Index); @@ -3047,12 +3051,12 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, Type *ElementTy = STy->getElementType(Index); uint64_t ElementSize = DL.getTypeAllocSize(ElementTy); if (Offset >= ElementSize) - return 0; // The offset points into alignment padding. + return nullptr; // The offset points into alignment padding. // See if any partition must be contained by the element. if (Offset > 0 || Size < ElementSize) { if ((Offset + Size) > ElementSize) - return 0; + return nullptr; return getTypePartition(DL, ElementTy, Offset, Size); } assert(Offset == 0); @@ -3065,14 +3069,14 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, if (EndOffset < SL->getSizeInBytes()) { unsigned EndIndex = SL->getElementContainingOffset(EndOffset); if (Index == EndIndex) - return 0; // Within a single element and its padding. + return nullptr; // Within a single element and its padding. // Don't try to form "natural" types if the elements don't line up with the // expected size. // FIXME: We could potentially recurse down through the last element in the // sub-struct to find a natural end point. if (SL->getElementOffset(EndIndex) != EndOffset) - return 0; + return nullptr; assert(Index < EndIndex); EE = STy->element_begin() + EndIndex; @@ -3083,7 +3087,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, STy->isPacked()); const StructLayout *SubSL = DL.getStructLayout(SubTy); if (Size != SubSL->getSizeInBytes()) - return 0; // The sub-struct doesn't have quite the size needed. + return nullptr; // The sub-struct doesn't have quite the size needed. return SubTy; } @@ -3108,7 +3112,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S, // Try to compute a friendly type for this partition of the alloca. This // won't always succeed, in which case we fall back to a legal integer type // or an i8 array of an appropriate size. - Type *SliceTy = 0; + Type *SliceTy = nullptr; if (Type *CommonUseTy = findCommonType(B, E, EndOffset)) if (DL->getTypeAllocSize(CommonUseTy) >= SliceSize) SliceTy = CommonUseTy; @@ -3155,7 +3159,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S, // the alloca's alignment unconstrained. if (Alignment <= DL->getABITypeAlignment(SliceTy)) Alignment = 0; - NewAI = new AllocaInst(SliceTy, 0, Alignment, + NewAI = new AllocaInst(SliceTy, nullptr, Alignment, AI.getName() + ".sroa." + Twine(B - S.begin()), &AI); ++NumNewAllocas; } @@ -3494,7 +3498,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) { for (Use &Operand : I->operands()) if (Instruction *U = dyn_cast<Instruction>(Operand)) { // Zero out the operand and see if it becomes trivially dead. - Operand = 0; + Operand = nullptr; if (isInstructionTriviallyDead(U)) DeadInsts.insert(U); } @@ -3612,7 +3616,7 @@ bool SROA::runOnFunction(Function &F) { DL = &DLP->getDataLayout(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DT = DTWP ? &DTWP->getDomTree() : 0; + DT = DTWP ? &DTWP->getDomTree() : nullptr; BasicBlock &EntryBB = F.getEntryBlock(); for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp index 20d6daa..8e557aa 100644 --- a/lib/Transforms/Scalar/SampleProfile.cpp +++ b/lib/Transforms/Scalar/SampleProfile.cpp @@ -22,8 +22,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sample-profile" - #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -54,6 +52,8 @@ using namespace llvm; +#define DEBUG_TYPE "sample-profile" + // Command line option to specify the file to read samples from. This is // mainly used for debugging. static cl::opt<std::string> SampleProfileFile( @@ -120,8 +120,8 @@ typedef DenseMap<BasicBlock *, SmallVector<BasicBlock *, 8>> BlockEdgeMap; class SampleFunctionProfile { public: SampleFunctionProfile() - : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(0), PDT(0), - LI(0), Ctx(0) {} + : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(nullptr), + PDT(nullptr), LI(nullptr), Ctx(nullptr) {} unsigned getFunctionLoc(Function &F); bool emitAnnotations(Function &F, DominatorTree *DomTree, @@ -315,7 +315,7 @@ protected: /// \brief Name of the profile file to load. StringRef Filename; - /// \brief Flag indicating whether the profile input loaded succesfully. + /// \brief Flag indicating whether the profile input loaded successfully. bool ProfileIsValid; }; } diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index e950eba..f8f828c 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -64,6 +64,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeStructurizeCFGPass(Registry); initializeSinkingPass(Registry); initializeTailCallElimPass(Registry); + initializeSeparateConstOffsetFromGEPPass(Registry); } void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { @@ -181,6 +182,7 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) { void LLVMAddVerifierPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createVerifierPass()); + // FIXME: should this also add createDebugInfoVerifierPass()? } void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index e7b5ab2..58192fc 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -19,7 +19,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "scalarrepl" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" @@ -52,6 +51,8 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; +#define DEBUG_TYPE "scalarrepl" + STATISTIC(NumReplaced, "Number of allocas broken up"); STATISTIC(NumPromoted, "Number of allocas promoted"); STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); @@ -304,7 +305,7 @@ public: explicit ConvertToScalarInfo(unsigned Size, const DataLayout &DL, unsigned SLT) : AllocaSize(Size), DL(DL), ScalarLoadThreshold(SLT), IsNotTrivial(false), - ScalarKind(Unknown), VectorTy(0), HadNonMemTransferAccess(false), + ScalarKind(Unknown), VectorTy(nullptr), HadNonMemTransferAccess(false), HadDynamicAccess(false) { } AllocaInst *TryConvert(AllocaInst *AI); @@ -332,8 +333,8 @@ private: AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // If we can't convert this scalar, or if mem2reg can trivially do it, bail // out. - if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial) - return 0; + if (!CanConvertToScalar(AI, 0, nullptr) || !IsNotTrivial) + return nullptr; // If an alloca has only memset / memcpy uses, it may still have an Unknown // ScalarKind. Treat it as an Integer below. @@ -361,23 +362,24 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // Do not convert to scalar integer if the alloca size exceeds the // scalar load threshold. if (BitWidth > ScalarLoadThreshold) - return 0; + return nullptr; if ((ScalarKind == ImplicitVector || ScalarKind == Integer) && !HadNonMemTransferAccess && !DL.fitsInLegalInteger(BitWidth)) - return 0; + return nullptr; // Dynamic accesses on integers aren't yet supported. They need us to shift // by a dynamic amount which could be difficult to work out as we might not // know whether to use a left or right shift. if (ScalarKind == Integer && HadDynamicAccess) - return 0; + return nullptr; DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. NewTy = IntegerType::get(AI->getContext(), BitWidth); } - AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); - ConvertUsesToScalar(AI, NewAI, 0, 0); + AllocaInst *NewAI = new AllocaInst(NewTy, nullptr, "", + AI->getParent()->begin()); + ConvertUsesToScalar(AI, NewAI, 0, nullptr); return NewAI; } @@ -508,7 +510,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset, // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); - Value *GEPNonConstantIdx = 0; + Value *GEPNonConstantIdx = nullptr; if (!GEP->hasAllConstantIndices()) { if (!isa<VectorType>(PtrTy->getElementType())) return false; @@ -564,7 +566,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset, if (NonConstantIdx) return false; ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()); - if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0) + if (!Len || Len->getZExtValue() != AllocaSize || Offset != 0) return false; IsNotTrivial = true; // Can't be mem2reg'd. @@ -608,7 +610,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); - Value* GEPNonConstantIdx = 0; + Value* GEPNonConstantIdx = nullptr; if (!GEP->hasAllConstantIndices()) { assert(!NonConstantIdx && "Dynamic GEP reading from dynamic GEP unsupported"); @@ -671,7 +673,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); Value *New = ConvertScalar_InsertValue( ConstantInt::get(User->getContext(), APVal), - Old, Offset, 0, Builder); + Old, Offset, nullptr, Builder); Builder.CreateStore(New, NewAI); // If the load we just inserted is now dead, then the memset overwrote @@ -809,7 +811,7 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), Offset+Layout.getElementOffsetInBits(i), - 0, Builder); + nullptr, Builder); Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; @@ -822,7 +824,8 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, Value *Res = UndefValue::get(AT); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), - Offset+i*EltSize, 0, Builder); + Offset+i*EltSize, nullptr, + Builder); Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; @@ -938,7 +941,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, Value *Elt = Builder.CreateExtractValue(SV, i); Old = ConvertScalar_InsertValue(Elt, Old, Offset+Layout.getElementOffsetInBits(i), - 0, Builder); + nullptr, Builder); } return Old; } @@ -949,7 +952,8 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType()); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = Builder.CreateExtractValue(SV, i); - Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder); + Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, nullptr, + Builder); } return Old; } @@ -1024,7 +1028,7 @@ bool SROA::runOnFunction(Function &F) { return false; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; bool Changed = performPromotion(F); @@ -1054,7 +1058,7 @@ class AllocaPromoter : public LoadAndStorePromoter { public: AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, DIBuilder *DB) - : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {} + : LoadAndStorePromoter(Insts, S), AI(nullptr), DIB(DB) {} void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) { // Remember which alloca we're promoting (for isInstInList). @@ -1100,7 +1104,7 @@ public: for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(), E = DVIs.end(); I != E; ++I) { DbgValueInst *DVI = *I; - Value *Arg = NULL; + Value *Arg = nullptr; if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -1143,7 +1147,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) { for (User *U : SI->users()) { LoadInst *LI = dyn_cast<LoadInst>(U); - if (LI == 0 || !LI->isSimple()) return false; + if (!LI || !LI->isSimple()) return false; // Both operands to the select need to be dereferencable, either absolutely // (e.g. allocas) or at this point because we can see other accesses to it. @@ -1183,7 +1187,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) { unsigned MaxAlign = 0; for (User *U : PN->users()) { LoadInst *LI = dyn_cast<LoadInst>(U); - if (LI == 0 || !LI->isSimple()) return false; + if (!LI || !LI->isSimple()) return false; // For now we only allow loads in the same block as the PHI. This is a // common case that happens when instcombine merges two loads through a PHI. @@ -1380,7 +1384,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = PN->getIncomingBlock(i); LoadInst *&Load = InsertedLoads[Pred]; - if (Load == 0) { + if (!Load) { Load = new LoadInst(PN->getIncomingValue(i), PN->getName() + "." + Pred->getName(), Pred->getTerminator()); @@ -1400,7 +1404,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) { bool SROA::performPromotion(Function &F) { std::vector<AllocaInst*> Allocas; - DominatorTree *DT = 0; + DominatorTree *DT = nullptr; if (HasDomTree) DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); @@ -1537,7 +1541,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI, if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { ElementAllocas.reserve(ST->getNumContainedTypes()); for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { - AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, + AllocaInst *NA = new AllocaInst(ST->getContainedType(i), nullptr, AI->getAlignment(), AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); @@ -1548,7 +1552,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI, ElementAllocas.reserve(AT->getNumElements()); Type *ElTy = AT->getElementType(); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { - AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), + AllocaInst *NA = new AllocaInst(ElTy, nullptr, AI->getAlignment(), AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); WorkList.push_back(NA); // Add to worklist for recursive processing @@ -1577,7 +1581,7 @@ void SROA::DeleteDeadInstructions() { // Zero out the operand and see if it becomes trivially dead. // (But, don't add allocas to the dead instruction list -- they are // already on the worklist and will be deleted separately.) - *OI = 0; + *OI = nullptr; if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U)) DeadInsts.push_back(U); } @@ -1604,12 +1608,10 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, isSafeForScalarRepl(GEPI, GEPOffset, Info); } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); - if (Length == 0) - return MarkUnsafe(Info, User); - if (Length->isNegative()) + if (!Length || Length->isNegative()) return MarkUnsafe(Info, User); - isSafeMemAccess(Offset, Length->getZExtValue(), 0, + isSafeMemAccess(Offset, Length->getZExtValue(), nullptr, U.getOperandNo() == 0, Info, MI, true /*AllowWholeAccess*/); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { @@ -1744,12 +1746,12 @@ static bool isHomogeneousAggregate(Type *T, unsigned &NumElts, Type *&EltTy) { if (ArrayType *AT = dyn_cast<ArrayType>(T)) { NumElts = AT->getNumElements(); - EltTy = (NumElts == 0 ? 0 : AT->getElementType()); + EltTy = (NumElts == 0 ? nullptr : AT->getElementType()); return true; } if (StructType *ST = dyn_cast<StructType>(T)) { NumElts = ST->getNumContainedTypes(); - EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0)); + EltTy = (NumElts == 0 ? nullptr : ST->getContainedType(0)); for (unsigned n = 1; n < NumElts; ++n) { if (ST->getContainedType(n) != EltTy) return false; @@ -2038,7 +2040,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, // In this case, it must be the last GEP operand which is dynamic so keep that // aside until we've found the constant GEP offset then add it back in at the // end. - Value* NonConstantIdx = 0; + Value* NonConstantIdx = nullptr; if (!GEPI->hasAllConstantIndices()) NonConstantIdx = Indices.pop_back_val(); Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices); @@ -2108,7 +2110,8 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, if (NewOffset) { // Splice the first element and index 'NewOffset' bytes in. SROA will // split the alloca again later. - Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy()); + unsigned AS = AI->getType()->getAddressSpace(); + Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS)); V = Builder.CreateGEP(V, Builder.getInt64(NewOffset)); IdxTy = NewElts[Idx]->getAllocatedType(); @@ -2155,7 +2158,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // appropriate type. The "Other" pointer is the pointer that goes to memory // that doesn't have anything to do with the alloca that we are promoting. For // memset, this Value* stays null. - Value *OtherPtr = 0; + Value *OtherPtr = nullptr; unsigned MemAlignment = MI->getAlignment(); if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy if (Inst == MTI->getRawDest()) @@ -2207,7 +2210,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // If this is a memcpy/memmove, emit a GEP of the other element address. - Value *OtherElt = 0; + Value *OtherElt = nullptr; unsigned OtherEltAlign = MemAlignment; if (OtherPtr) { @@ -2449,7 +2452,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. - const StructLayout *Layout = 0; + const StructLayout *Layout = nullptr; uint64_t ArrayEltBitOffset = 0; if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { Layout = DL->getStructLayout(EltSTy); diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index 006375c..7a73f11 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "scalarizer" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -25,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "scalarizer" + namespace { // Used to store the scattered form of a vector. typedef SmallVector<Value *, 8> ValueVector; @@ -48,7 +49,7 @@ public: // insert them before BBI in BB. If Cache is nonnull, use it to cache // the results. Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, - ValueVector *cachePtr = 0); + ValueVector *cachePtr = nullptr); // Return component I, creating a new Value for it if necessary. Value *operator[](unsigned I); @@ -101,7 +102,7 @@ struct BinarySplitter { // Information about a load or store that we're scalarizing. struct VectorLayout { - VectorLayout() : VecTy(0), ElemTy(0), VecAlign(0), ElemSize(0) {} + VectorLayout() : VecTy(nullptr), ElemTy(nullptr), VecAlign(0), ElemSize(0) {} // Return the alignment of element I. uint64_t getElemAlign(unsigned I) { @@ -186,9 +187,9 @@ Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, Ty = PtrTy->getElementType(); Size = Ty->getVectorNumElements(); if (!CachePtr) - Tmp.resize(Size, 0); + Tmp.resize(Size, nullptr); else if (CachePtr->empty()) - CachePtr->resize(Size, 0); + CachePtr->resize(Size, nullptr); else assert(Size == CachePtr->size() && "Inconsistent vector sizes"); } @@ -241,7 +242,7 @@ bool Scalarizer::doInitialization(Module &M) { bool Scalarizer::runOnFunction(Function &F) { DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { BasicBlock *BB = BBI; for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp new file mode 100644 index 0000000..b8529e1 --- /dev/null +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -0,0 +1,623 @@ +//===-- SeparateConstOffsetFromGEP.cpp - ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loop unrolling may create many similar GEPs for array accesses. +// e.g., a 2-level loop +// +// float a[32][32]; // global variable +// +// for (int i = 0; i < 2; ++i) { +// for (int j = 0; j < 2; ++j) { +// ... +// ... = a[x + i][y + j]; +// ... +// } +// } +// +// will probably be unrolled to: +// +// gep %a, 0, %x, %y; load +// gep %a, 0, %x, %y + 1; load +// gep %a, 0, %x + 1, %y; load +// gep %a, 0, %x + 1, %y + 1; load +// +// LLVM's GVN does not use partial redundancy elimination yet, and is thus +// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs +// significant slowdown in targets with limited addressing modes. For instance, +// because the PTX target does not support the reg+reg addressing mode, the +// NVPTX backend emits PTX code that literally computes the pointer address of +// each GEP, wasting tons of registers. It emits the following PTX for the +// first load and similar PTX for other loads. +// +// mov.u32 %r1, %x; +// mov.u32 %r2, %y; +// mul.wide.u32 %rl2, %r1, 128; +// mov.u64 %rl3, a; +// add.s64 %rl4, %rl3, %rl2; +// mul.wide.u32 %rl5, %r2, 4; +// add.s64 %rl6, %rl4, %rl5; +// ld.global.f32 %f1, [%rl6]; +// +// To reduce the register pressure, the optimization implemented in this file +// merges the common part of a group of GEPs, so we can compute each pointer +// address by adding a simple offset to the common part, saving many registers. +// +// It works by splitting each GEP into a variadic base and a constant offset. +// The variadic base can be computed once and reused by multiple GEPs, and the +// constant offsets can be nicely folded into the reg+immediate addressing mode +// (supported by most targets) without using any extra register. +// +// For instance, we transform the four GEPs and four loads in the above example +// into: +// +// base = gep a, 0, x, y +// load base +// laod base + 1 * sizeof(float) +// load base + 32 * sizeof(float) +// load base + 33 * sizeof(float) +// +// Given the transformed IR, a backend that supports the reg+immediate +// addressing mode can easily fold the pointer arithmetics into the loads. For +// example, the NVPTX backend can easily fold the pointer arithmetics into the +// ld.global.f32 instructions, and the resultant PTX uses much fewer registers. +// +// mov.u32 %r1, %tid.x; +// mov.u32 %r2, %tid.y; +// mul.wide.u32 %rl2, %r1, 128; +// mov.u64 %rl3, a; +// add.s64 %rl4, %rl3, %rl2; +// mul.wide.u32 %rl5, %r2, 4; +// add.s64 %rl6, %rl4, %rl5; +// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX +// ld.global.f32 %f2, [%rl6+4]; // much better +// ld.global.f32 %f3, [%rl6+128]; // much better +// ld.global.f32 %f4, [%rl6+132]; // much better +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +static cl::opt<bool> DisableSeparateConstOffsetFromGEP( + "disable-separate-const-offset-from-gep", cl::init(false), + cl::desc("Do not separate the constant offset from a GEP instruction"), + cl::Hidden); + +namespace { + +/// \brief A helper class for separating a constant offset from a GEP index. +/// +/// In real programs, a GEP index may be more complicated than a simple addition +/// of something and a constant integer which can be trivially splitted. For +/// example, to split ((a << 3) | 5) + b, we need to search deeper for the +/// constant offset, so that we can separate the index to (a << 3) + b and 5. +/// +/// Therefore, this class looks into the expression that computes a given GEP +/// index, and tries to find a constant integer that can be hoisted to the +/// outermost level of the expression as an addition. Not every constant in an +/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a + +/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case, +/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15). +class ConstantOffsetExtractor { + public: + /// Extracts a constant offset from the given GEP index. It outputs the + /// numeric value of the extracted constant offset (0 if failed), and a + /// new index representing the remainder (equal to the original index minus + /// the constant offset). + /// \p Idx The given GEP index + /// \p NewIdx The new index to replace + /// \p DL The datalayout of the module + /// \p IP Calculating the new index requires new instructions. IP indicates + /// where to insert them (typically right before the GEP). + static int64_t Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL, + Instruction *IP); + /// Looks for a constant offset without extracting it. The meaning of the + /// arguments and the return value are the same as Extract. + static int64_t Find(Value *Idx, const DataLayout *DL); + + private: + ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt) + : DL(Layout), IP(InsertionPt) {} + /// Searches the expression that computes V for a constant offset. If the + /// searching is successful, update UserChain as a path from V to the constant + /// offset. + int64_t find(Value *V); + /// A helper function to look into both operands of a binary operator U. + /// \p IsSub Whether U is a sub operator. If so, we need to negate the + /// constant offset at some point. + int64_t findInEitherOperand(User *U, bool IsSub); + /// After finding the constant offset and how it is reached from the GEP + /// index, we build a new index which is a clone of the old one except the + /// constant offset is removed. For example, given (a + (b + 5)) and knowning + /// the constant offset is 5, this function returns (a + b). + /// + /// We cannot simply change the constant to zero because the expression that + /// computes the index or its intermediate result may be used by others. + Value *rebuildWithoutConstantOffset(); + // A helper function for rebuildWithoutConstantOffset that rebuilds the direct + // user (U) of the constant offset (C). + Value *rebuildLeafWithoutConstantOffset(User *U, Value *C); + /// Returns a clone of U except the first occurrence of From with To. + Value *cloneAndReplace(User *U, Value *From, Value *To); + + /// Returns true if LHS and RHS have no bits in common, i.e., LHS | RHS == 0. + bool NoCommonBits(Value *LHS, Value *RHS) const; + /// Computes which bits are known to be one or zero. + /// \p KnownOne Mask of all bits that are known to be one. + /// \p KnownZero Mask of all bits that are known to be zero. + void ComputeKnownBits(Value *V, APInt &KnownOne, APInt &KnownZero) const; + /// Finds the first use of Used in U. Returns -1 if not found. + static unsigned FindFirstUse(User *U, Value *Used); + /// Returns whether OPC (sext or zext) can be distributed to the operands of + /// BO. e.g., sext can be distributed to the operands of an "add nsw" because + /// sext (add nsw a, b) == add nsw (sext a), (sext b). + static bool Distributable(unsigned OPC, BinaryOperator *BO); + + /// The path from the constant offset to the old GEP index. e.g., if the GEP + /// index is "a * b + (c + 5)". After running function find, UserChain[0] will + /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and + /// UserChain[2] will be the entire expression "a * b + (c + 5)". + /// + /// This path helps rebuildWithoutConstantOffset rebuild the new GEP index. + SmallVector<User *, 8> UserChain; + /// The data layout of the module. Used in ComputeKnownBits. + const DataLayout *DL; + Instruction *IP; /// Insertion position of cloned instructions. +}; + +/// \brief A pass that tries to split every GEP in the function into a variadic +/// base and a constant offset. It is a FunctionPass because searching for the +/// constant offset may inspect other basic blocks. +class SeparateConstOffsetFromGEP : public FunctionPass { + public: + static char ID; + SeparateConstOffsetFromGEP() : FunctionPass(ID) { + initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DataLayoutPass>(); + AU.addRequired<TargetTransformInfo>(); + } + bool runOnFunction(Function &F) override; + + private: + /// Tries to split the given GEP into a variadic base and a constant offset, + /// and returns true if the splitting succeeds. + bool splitGEP(GetElementPtrInst *GEP); + /// Finds the constant offset within each index, and accumulates them. This + /// function only inspects the GEP without changing it. The output + /// NeedsExtraction indicates whether we can extract a non-zero constant + /// offset from any index. + int64_t accumulateByteOffset(GetElementPtrInst *GEP, const DataLayout *DL, + bool &NeedsExtraction); +}; +} // anonymous namespace + +char SeparateConstOffsetFromGEP::ID = 0; +INITIALIZE_PASS_BEGIN( + SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", + "Split GEPs to a variadic base and a constant offset for better CSE", false, + false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_DEPENDENCY(DataLayoutPass) +INITIALIZE_PASS_END( + SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", + "Split GEPs to a variadic base and a constant offset for better CSE", false, + false) + +FunctionPass *llvm::createSeparateConstOffsetFromGEPPass() { + return new SeparateConstOffsetFromGEP(); +} + +bool ConstantOffsetExtractor::Distributable(unsigned OPC, BinaryOperator *BO) { + assert(OPC == Instruction::SExt || OPC == Instruction::ZExt); + + // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B) + // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B) + if (BO->getOpcode() == Instruction::Add || + BO->getOpcode() == Instruction::Sub) { + return (OPC == Instruction::SExt && BO->hasNoSignedWrap()) || + (OPC == Instruction::ZExt && BO->hasNoUnsignedWrap()); + } + + // sext/zext (and/or/xor A, B) == and/or/xor (sext/zext A), (sext/zext B) + // -instcombine also leverages this invariant to do the reverse + // transformation to reduce integer casts. + return BO->getOpcode() == Instruction::And || + BO->getOpcode() == Instruction::Or || + BO->getOpcode() == Instruction::Xor; +} + +int64_t ConstantOffsetExtractor::findInEitherOperand(User *U, bool IsSub) { + assert(U->getNumOperands() == 2); + int64_t ConstantOffset = find(U->getOperand(0)); + // If we found a constant offset in the left operand, stop and return that. + // This shortcut might cause us to miss opportunities of combining the + // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9. + // However, such cases are probably already handled by -instcombine, + // given this pass runs after the standard optimizations. + if (ConstantOffset != 0) return ConstantOffset; + ConstantOffset = find(U->getOperand(1)); + // If U is a sub operator, negate the constant offset found in the right + // operand. + return IsSub ? -ConstantOffset : ConstantOffset; +} + +int64_t ConstantOffsetExtractor::find(Value *V) { + // TODO(jingyue): We can even trace into integer/pointer casts, such as + // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only + // integers because it gives good enough results for our benchmarks. + assert(V->getType()->isIntegerTy()); + + User *U = dyn_cast<User>(V); + // We cannot do much with Values that are not a User, such as BasicBlock and + // MDNode. + if (U == nullptr) return 0; + + int64_t ConstantOffset = 0; + if (ConstantInt *CI = dyn_cast<ConstantInt>(U)) { + // Hooray, we found it! + ConstantOffset = CI->getSExtValue(); + } else if (Operator *O = dyn_cast<Operator>(U)) { + // The GEP index may be more complicated than a simple addition of a + // varaible and a constant. Therefore, we trace into subexpressions for more + // hoisting opportunities. + switch (O->getOpcode()) { + case Instruction::Add: { + ConstantOffset = findInEitherOperand(U, false); + break; + } + case Instruction::Sub: { + ConstantOffset = findInEitherOperand(U, true); + break; + } + case Instruction::Or: { + // If LHS and RHS don't have common bits, (LHS | RHS) is equivalent to + // (LHS + RHS). + if (NoCommonBits(U->getOperand(0), U->getOperand(1))) + ConstantOffset = findInEitherOperand(U, false); + break; + } + case Instruction::SExt: + case Instruction::ZExt: { + // We trace into sext/zext if the operator can be distributed to its + // operand. e.g., we can transform into "sext (add nsw a, 5)" and + // extract constant 5, because + // sext (add nsw a, 5) == add nsw (sext a), 5 + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0))) { + if (Distributable(O->getOpcode(), BO)) + ConstantOffset = find(U->getOperand(0)); + } + break; + } + } + } + // If we found a non-zero constant offset, adds it to the path for future + // transformation (rebuildWithoutConstantOffset). Zero is a valid constant + // offset, but doesn't help this optimization. + if (ConstantOffset != 0) + UserChain.push_back(U); + return ConstantOffset; +} + +unsigned ConstantOffsetExtractor::FindFirstUse(User *U, Value *Used) { + for (unsigned I = 0, E = U->getNumOperands(); I < E; ++I) { + if (U->getOperand(I) == Used) + return I; + } + return -1; +} + +Value *ConstantOffsetExtractor::cloneAndReplace(User *U, Value *From, + Value *To) { + // Finds in U the first use of From. It is safe to ignore future occurrences + // of From, because findInEitherOperand similarly stops searching the right + // operand when the first operand has a non-zero constant offset. + unsigned OpNo = FindFirstUse(U, From); + assert(OpNo != (unsigned)-1 && "UserChain wasn't built correctly"); + + // ConstantOffsetExtractor::find only follows Operators (i.e., Instructions + // and ConstantExprs). Therefore, U is either an Instruction or a + // ConstantExpr. + if (Instruction *I = dyn_cast<Instruction>(U)) { + Instruction *Clone = I->clone(); + Clone->setOperand(OpNo, To); + Clone->insertBefore(IP); + return Clone; + } + // cast<Constant>(To) is safe because a ConstantExpr only uses Constants. + return cast<ConstantExpr>(U) + ->getWithOperandReplaced(OpNo, cast<Constant>(To)); +} + +Value *ConstantOffsetExtractor::rebuildLeafWithoutConstantOffset(User *U, + Value *C) { + assert(U->getNumOperands() <= 2 && + "We didn't trace into any operator with more than 2 operands"); + // If U has only one operand which is the constant offset, removing the + // constant offset leaves U as a null value. + if (U->getNumOperands() == 1) + return Constant::getNullValue(U->getType()); + + // U->getNumOperands() == 2 + unsigned OpNo = FindFirstUse(U, C); // U->getOperand(OpNo) == C + assert(OpNo < 2 && "UserChain wasn't built correctly"); + Value *TheOther = U->getOperand(1 - OpNo); // The other operand of U + // If U = C - X, removing C makes U = -X; otherwise U will simply be X. + if (!isa<SubOperator>(U) || OpNo == 1) + return TheOther; + if (isa<ConstantExpr>(U)) + return ConstantExpr::getNeg(cast<Constant>(TheOther)); + return BinaryOperator::CreateNeg(TheOther, "", IP); +} + +Value *ConstantOffsetExtractor::rebuildWithoutConstantOffset() { + assert(UserChain.size() > 0 && "you at least found a constant, right?"); + // Start with the constant and go up through UserChain, each time building a + // clone of the subexpression but with the constant removed. + // e.g., to build a clone of (a + (b + (c + 5)) but with the 5 removed, we + // first c, then (b + c), and finally (a + (b + c)). + // + // Fast path: if the GEP index is a constant, simply returns 0. + if (UserChain.size() == 1) + return ConstantInt::get(UserChain[0]->getType(), 0); + + Value *Remainder = + rebuildLeafWithoutConstantOffset(UserChain[1], UserChain[0]); + for (size_t I = 2; I < UserChain.size(); ++I) + Remainder = cloneAndReplace(UserChain[I], UserChain[I - 1], Remainder); + return Remainder; +} + +int64_t ConstantOffsetExtractor::Extract(Value *Idx, Value *&NewIdx, + const DataLayout *DL, + Instruction *IP) { + ConstantOffsetExtractor Extractor(DL, IP); + // Find a non-zero constant offset first. + int64_t ConstantOffset = Extractor.find(Idx); + if (ConstantOffset == 0) + return 0; + // Then rebuild a new index with the constant removed. + NewIdx = Extractor.rebuildWithoutConstantOffset(); + return ConstantOffset; +} + +int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL) { + return ConstantOffsetExtractor(DL, nullptr).find(Idx); +} + +void ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne, + APInt &KnownZero) const { + IntegerType *IT = cast<IntegerType>(V->getType()); + KnownOne = APInt(IT->getBitWidth(), 0); + KnownZero = APInt(IT->getBitWidth(), 0); + llvm::computeKnownBits(V, KnownZero, KnownOne, DL, 0); +} + +bool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const { + assert(LHS->getType() == RHS->getType() && + "LHS and RHS should have the same type"); + APInt LHSKnownOne, LHSKnownZero, RHSKnownOne, RHSKnownZero; + ComputeKnownBits(LHS, LHSKnownOne, LHSKnownZero); + ComputeKnownBits(RHS, RHSKnownOne, RHSKnownZero); + return (LHSKnownZero | RHSKnownZero).isAllOnesValue(); +} + +int64_t SeparateConstOffsetFromGEP::accumulateByteOffset( + GetElementPtrInst *GEP, const DataLayout *DL, bool &NeedsExtraction) { + NeedsExtraction = false; + int64_t AccumulativeByteOffset = 0; + gep_type_iterator GTI = gep_type_begin(*GEP); + for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { + if (isa<SequentialType>(*GTI)) { + // Tries to extract a constant offset from this GEP index. + int64_t ConstantOffset = + ConstantOffsetExtractor::Find(GEP->getOperand(I), DL); + if (ConstantOffset != 0) { + NeedsExtraction = true; + // A GEP may have multiple indices. We accumulate the extracted + // constant offset to a byte offset, and later offset the remainder of + // the original GEP with this byte offset. + AccumulativeByteOffset += + ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType()); + } + } + } + return AccumulativeByteOffset; +} + +bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { + // Skip vector GEPs. + if (GEP->getType()->isVectorTy()) + return false; + + // The backend can already nicely handle the case where all indices are + // constant. + if (GEP->hasAllConstantIndices()) + return false; + + bool Changed = false; + + // Shortcuts integer casts. Eliminating these explicit casts can make + // subsequent optimizations more obvious: ConstantOffsetExtractor needn't + // trace into these casts. + if (GEP->isInBounds()) { + // Doing this to inbounds GEPs is safe because their indices are guaranteed + // to be non-negative and in bounds. + gep_type_iterator GTI = gep_type_begin(*GEP); + for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { + if (isa<SequentialType>(*GTI)) { + if (Operator *O = dyn_cast<Operator>(GEP->getOperand(I))) { + if (O->getOpcode() == Instruction::SExt || + O->getOpcode() == Instruction::ZExt) { + GEP->setOperand(I, O->getOperand(0)); + Changed = true; + } + } + } + } + } + + const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout(); + bool NeedsExtraction; + int64_t AccumulativeByteOffset = + accumulateByteOffset(GEP, DL, NeedsExtraction); + + if (!NeedsExtraction) + return Changed; + // Before really splitting the GEP, check whether the backend supports the + // addressing mode we are about to produce. If no, this splitting probably + // won't be beneficial. + TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>(); + if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(), + /*BaseGV=*/nullptr, AccumulativeByteOffset, + /*HasBaseReg=*/true, /*Scale=*/0)) { + return Changed; + } + + // Remove the constant offset in each GEP index. The resultant GEP computes + // the variadic base. + gep_type_iterator GTI = gep_type_begin(*GEP); + for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { + if (isa<SequentialType>(*GTI)) { + Value *NewIdx = nullptr; + // Tries to extract a constant offset from this GEP index. + int64_t ConstantOffset = + ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP); + if (ConstantOffset != 0) { + assert(NewIdx != nullptr && + "ConstantOffset != 0 implies NewIdx is set"); + GEP->setOperand(I, NewIdx); + // Clear the inbounds attribute because the new index may be off-bound. + // e.g., + // + // b = add i64 a, 5 + // addr = gep inbounds float* p, i64 b + // + // is transformed to: + // + // addr2 = gep float* p, i64 a + // addr = gep float* addr2, i64 5 + // + // If a is -4, although the old index b is in bounds, the new index a is + // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the + // inbounds keyword is not present, the offsets are added to the base + // address with silently-wrapping two's complement arithmetic". + // Therefore, the final code will be a semantically equivalent. + // + // TODO(jingyue): do some range analysis to keep as many inbounds as + // possible. GEPs with inbounds are more friendly to alias analysis. + GEP->setIsInBounds(false); + Changed = true; + } + } + } + + // Offsets the base with the accumulative byte offset. + // + // %gep ; the base + // ... %gep ... + // + // => add the offset + // + // %gep2 ; clone of %gep + // %new.gep = gep %gep2, <offset / sizeof(*%gep)> + // %gep ; will be removed + // ... %gep ... + // + // => replace all uses of %gep with %new.gep and remove %gep + // + // %gep2 ; clone of %gep + // %new.gep = gep %gep2, <offset / sizeof(*%gep)> + // ... %new.gep ... + // + // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an + // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep): + // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the + // type of %gep. + // + // %gep2 ; clone of %gep + // %0 = bitcast %gep2 to i8* + // %uglygep = gep %0, <offset> + // %new.gep = bitcast %uglygep to <type of %gep> + // ... %new.gep ... + Instruction *NewGEP = GEP->clone(); + NewGEP->insertBefore(GEP); + + Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + uint64_t ElementTypeSizeOfGEP = + DL->getTypeAllocSize(GEP->getType()->getElementType()); + if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) { + // Very likely. As long as %gep is natually aligned, the byte offset we + // extracted should be a multiple of sizeof(*%gep). + // Per ANSI C standard, signed / unsigned = unsigned. Therefore, we + // cast ElementTypeSizeOfGEP to signed. + int64_t Index = + AccumulativeByteOffset / static_cast<int64_t>(ElementTypeSizeOfGEP); + NewGEP = GetElementPtrInst::Create( + NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP); + } else { + // Unlikely but possible. For example, + // #pragma pack(1) + // struct S { + // int a[3]; + // int64 b[8]; + // }; + // #pragma pack() + // + // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After + // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is + // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of + // sizeof(int64). + // + // Emit an uglygep in this case. + Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(), + GEP->getPointerAddressSpace()); + NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP); + NewGEP = GetElementPtrInst::Create( + NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), + "uglygep", GEP); + if (GEP->getType() != I8PtrTy) + NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP); + } + + GEP->replaceAllUsesWith(NewGEP); + GEP->eraseFromParent(); + + return true; +} + +bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) { + if (DisableSeparateConstOffsetFromGEP) + return false; + + bool Changed = false; + for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) { + for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) { + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) { + Changed |= splitGEP(GEP); + } + // No need to split GEP ConstantExprs because all its indices are constant + // already. + } + } + return Changed; +} diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index ceae5a7..5d5606b 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -21,7 +21,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -38,6 +37,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "simplifycfg" + STATISTIC(NumSimpl, "Number of blocks simplified"); namespace { @@ -71,7 +72,7 @@ FunctionPass *llvm::createCFGSimplificationPass() { static bool mergeEmptyReturnBlocks(Function &F) { bool Changed = false; - BasicBlock *RetBlock = 0; + BasicBlock *RetBlock = nullptr; // Scan all the blocks in the function, looking for empty return blocks. for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) { @@ -79,7 +80,7 @@ static bool mergeEmptyReturnBlocks(Function &F) { // Only look at return blocks. ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator()); - if (Ret == 0) continue; + if (!Ret) continue; // Only look at the block if it is empty or the only other thing in it is a // single PHI node that is the operand to the return. @@ -98,7 +99,7 @@ static bool mergeEmptyReturnBlocks(Function &F) { } // If this is the first returning block, remember it and keep going. - if (RetBlock == 0) { + if (!RetBlock) { RetBlock = &BB; continue; } @@ -119,7 +120,7 @@ static bool mergeEmptyReturnBlocks(Function &F) { // If the canonical return block has no PHI node, create one now. PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin()); - if (RetBlockPHI == 0) { + if (!RetBlockPHI) { Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0); pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock); RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), @@ -173,7 +174,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) { const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; bool EverChanged = removeUnreachableBlocks(F); EverChanged |= mergeEmptyReturnBlocks(F); EverChanged |= iterativelySimplifyCFG(F, TTI, DL); diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index 4107374..482c33a 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sink" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -25,6 +24,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "sink" + STATISTIC(NumSunk, "Number of instructions sunk"); STATISTIC(NumSinkIter, "Number of sinking iterations"); @@ -203,7 +204,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst, // Don't sink instructions into a loop. Loop *succ = LI->getLoopFor(SuccToSinkTo); Loop *cur = LI->getLoopFor(Inst->getParent()); - if (succ != 0 && succ != cur) + if (succ != nullptr && succ != cur) return false; } @@ -237,14 +238,14 @@ bool Sinking::SinkInstruction(Instruction *Inst, // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. - BasicBlock *SuccToSinkTo = 0; + BasicBlock *SuccToSinkTo = nullptr; // Instructions can only be sunk if all their uses are in blocks // dominated by one of the successors. // Look at all the postdominators and see if we can sink it in one. DomTreeNode *DTN = DT->getNode(Inst->getParent()); for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end(); - I != E && SuccToSinkTo == 0; ++I) { + I != E && SuccToSinkTo == nullptr; ++I) { BasicBlock *Candidate = (*I)->getBlock(); if ((*I)->getIDom()->getBlock() == Inst->getParent() && IsAcceptableTarget(Inst, Candidate)) @@ -254,13 +255,13 @@ bool Sinking::SinkInstruction(Instruction *Inst, // If no suitable postdominator was found, look at all the successors and // decide which one we should sink to, if any. for (succ_iterator I = succ_begin(Inst->getParent()), - E = succ_end(Inst->getParent()); I != E && SuccToSinkTo == 0; ++I) { + E = succ_end(Inst->getParent()); I != E && !SuccToSinkTo; ++I) { if (IsAcceptableTarget(Inst, *I)) SuccToSinkTo = *I; } // If we couldn't find a block to sink to, ignore this instruction. - if (SuccToSinkTo == 0) + if (!SuccToSinkTo) return false; DEBUG(dbgs() << "Sink" << *Inst << " ("; diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 8fd2268..7b77ae1 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "structurizecfg" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SCCIterator.h" @@ -21,6 +20,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "structurizecfg" + namespace { // Definition of the complex types used in this pass. @@ -64,14 +65,14 @@ public: /// \brief Start a new query NearestCommonDominator(DominatorTree *DomTree) { DT = DomTree; - Result = 0; + Result = nullptr; } /// \brief Add BB to the resulting dominator void addBlock(BasicBlock *BB, bool Remember = true) { DomTreeNode *Node = DT->getNode(BB); - if (Result == 0) { + if (!Result) { unsigned Numbering = 0; for (;Node;Node = Node->getIDom()) IndexMap[Node] = ++Numbering; @@ -279,7 +280,7 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) { void StructurizeCFG::orderNodes() { scc_iterator<Region *> I = scc_begin(ParentRegion); for (Order.clear(); !I.isAtEnd(); ++I) { - std::vector<RegionNode *> &Nodes = *I; + const std::vector<RegionNode *> &Nodes = *I; Order.append(Nodes.begin(), Nodes.end()); } } @@ -453,10 +454,7 @@ void StructurizeCFG::insertConditions(bool Loops) { Value *Default = Loops ? BoolTrue : BoolFalse; SSAUpdater PhiInserter; - for (BranchVector::iterator I = Conds.begin(), - E = Conds.end(); I != E; ++I) { - - BranchInst *Term = *I; + for (BranchInst *Term : Conds) { assert(Term->isConditional()); BasicBlock *Parent = Term->getParent(); @@ -472,7 +470,7 @@ void StructurizeCFG::insertConditions(bool Loops) { NearestCommonDominator Dominator(DT); Dominator.addBlock(Parent, false); - Value *ParentValue = 0; + Value *ParentValue = nullptr; for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { @@ -591,7 +589,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, if (Node->isSubRegion()) { Region *SubRegion = Node->getNodeAs<Region>(); BasicBlock *OldExit = SubRegion->getExit(); - BasicBlock *Dominator = 0; + BasicBlock *Dominator = nullptr; // Find all the edges from the sub region to the exit for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit); @@ -678,7 +676,8 @@ BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow, /// \brief Set the previous node void StructurizeCFG::setPrevNode(BasicBlock *BB) { - PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0; + PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) + : nullptr; } /// \brief Does BB dominate all the predicates of Node ? @@ -699,7 +698,7 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { bool Dominated = false; // Regionentry is always true - if (PrevNode == 0) + if (!PrevNode) return true; for (BBPredicates::iterator I = Preds.begin(), E = Preds.end(); @@ -806,11 +805,11 @@ void StructurizeCFG::createFlow() { Conditions.clear(); LoopConds.clear(); - PrevNode = 0; + PrevNode = nullptr; Visited.clear(); while (!Order.empty()) { - handleLoops(EntryDominatesExit, 0); + handleLoops(EntryDominatesExit, nullptr); } if (PrevNode) diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 6d02777..05b9892 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -50,12 +50,12 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "tailcallelim" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" @@ -64,6 +64,7 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -76,6 +77,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "tailcallelim" + STATISTIC(NumEliminated, "Number of tail calls removed"); STATISTIC(NumRetDuped, "Number of return duplicated"); STATISTIC(NumAccumAdded, "Number of accumulators introduced"); @@ -94,6 +97,9 @@ namespace { bool runOnFunction(Function &F) override; private: + bool runTRE(Function &F); + bool markTails(Function &F, bool &AllCallsAreTailCalls); + CallInst *FindTRECandidate(Instruction *I, bool CannotTailCallElimCallsMarkedTail); bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, @@ -131,55 +137,255 @@ void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetTransformInfo>(); } -/// CanTRE - Scan the specified basic block for alloca instructions. -/// If it contains any that are variable-sized or not in the entry block, -/// returns false. -static bool CanTRE(AllocaInst *AI) { - // Because of PR962, we don't TRE allocas outside the entry block. - - // If this alloca is in the body of the function, or if it is a variable - // sized allocation, we cannot tail call eliminate calls marked 'tail' - // with this mechanism. - BasicBlock *BB = AI->getParent(); - return BB == &BB->getParent()->getEntryBlock() && - isa<ConstantInt>(AI->getArraySize()); +/// \brief Scan the specified function for alloca instructions. +/// If it contains any dynamic allocas, returns false. +static bool CanTRE(Function &F) { + // Because of PR962, we don't TRE dynamic allocas. + for (auto &BB : F) { + for (auto &I : BB) { + if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { + if (!AI->isStaticAlloca()) + return false; + } + } + } + + return true; } -namespace { -struct AllocaCaptureTracker : public CaptureTracker { - AllocaCaptureTracker() : Captured(false) {} +bool TailCallElim::runOnFunction(Function &F) { + if (skipOptnoneFunction(F)) + return false; - void tooManyUses() override { Captured = true; } + bool AllCallsAreTailCalls = false; + bool Modified = markTails(F, AllCallsAreTailCalls); + if (AllCallsAreTailCalls) + Modified |= runTRE(F); + return Modified; +} - bool shouldExplore(const Use *U) override { - Value *V = U->getUser(); - if (isa<CallInst>(V) || isa<InvokeInst>(V)) - UsesAlloca.insert(V); - return true; +namespace { +struct AllocaDerivedValueTracker { + // Start at a root value and walk its use-def chain to mark calls that use the + // value or a derived value in AllocaUsers, and places where it may escape in + // EscapePoints. + void walk(Value *Root) { + SmallVector<Use *, 32> Worklist; + SmallPtrSet<Use *, 32> Visited; + + auto AddUsesToWorklist = [&](Value *V) { + for (auto &U : V->uses()) { + if (!Visited.insert(&U)) + continue; + Worklist.push_back(&U); + } + }; + + AddUsesToWorklist(Root); + + while (!Worklist.empty()) { + Use *U = Worklist.pop_back_val(); + Instruction *I = cast<Instruction>(U->getUser()); + + switch (I->getOpcode()) { + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(I); + bool IsNocapture = !CS.isCallee(U) && + CS.doesNotCapture(CS.getArgumentNo(U)); + callUsesLocalStack(CS, IsNocapture); + if (IsNocapture) { + // If the alloca-derived argument is passed in as nocapture, then it + // can't propagate to the call's return. That would be capturing. + continue; + } + break; + } + case Instruction::Load: { + // The result of a load is not alloca-derived (unless an alloca has + // otherwise escaped, but this is a local analysis). + continue; + } + case Instruction::Store: { + if (U->getOperandNo() == 0) + EscapePoints.insert(I); + continue; // Stores have no users to analyze. + } + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::PHI: + case Instruction::Select: + case Instruction::AddrSpaceCast: + break; + default: + EscapePoints.insert(I); + break; + } + + AddUsesToWorklist(I); + } } - bool captured(const Use *U) override { - if (isa<ReturnInst>(U->getUser())) - return false; - Captured = true; - return true; + void callUsesLocalStack(CallSite CS, bool IsNocapture) { + // Add it to the list of alloca users. If it's already there, skip further + // processing. + if (!AllocaUsers.insert(CS.getInstruction())) + return; + + // If it's nocapture then it can't capture the alloca. + if (IsNocapture) + return; + + // If it can write to memory, it can leak the alloca value. + if (!CS.onlyReadsMemory()) + EscapePoints.insert(CS.getInstruction()); } - bool Captured; - SmallPtrSet<const Value *, 16> UsesAlloca; + SmallPtrSet<Instruction *, 32> AllocaUsers; + SmallPtrSet<Instruction *, 32> EscapePoints; }; -} // end anonymous namespace +} -bool TailCallElim::runOnFunction(Function &F) { - if (skipOptnoneFunction(F)) +bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) { + if (F.callsFunctionThatReturnsTwice()) return false; + AllCallsAreTailCalls = true; + + // The local stack holds all alloca instructions and all byval arguments. + AllocaDerivedValueTracker Tracker; + for (Argument &Arg : F.args()) { + if (Arg.hasByValAttr()) + Tracker.walk(&Arg); + } + for (auto &BB : F) { + for (auto &I : BB) + if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) + Tracker.walk(AI); + } + bool Modified = false; + + // Track whether a block is reachable after an alloca has escaped. Blocks that + // contain the escaping instruction will be marked as being visited without an + // escaped alloca, since that is how the block began. + enum VisitType { + UNVISITED, + UNESCAPED, + ESCAPED + }; + DenseMap<BasicBlock *, VisitType> Visited; + + // We propagate the fact that an alloca has escaped from block to successor. + // Visit the blocks that are propagating the escapedness first. To do this, we + // maintain two worklists. + SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped; + + // We may enter a block and visit it thinking that no alloca has escaped yet, + // then see an escape point and go back around a loop edge and come back to + // the same block twice. Because of this, we defer setting tail on calls when + // we first encounter them in a block. Every entry in this list does not + // statically use an alloca via use-def chain analysis, but may find an alloca + // through other means if the block turns out to be reachable after an escape + // point. + SmallVector<CallInst *, 32> DeferredTails; + + BasicBlock *BB = &F.getEntryBlock(); + VisitType Escaped = UNESCAPED; + do { + for (auto &I : *BB) { + if (Tracker.EscapePoints.count(&I)) + Escaped = ESCAPED; + + CallInst *CI = dyn_cast<CallInst>(&I); + if (!CI || CI->isTailCall()) + continue; + + if (CI->doesNotAccessMemory()) { + // A call to a readnone function whose arguments are all things computed + // outside this function can be marked tail. Even if you stored the + // alloca address into a global, a readnone function can't load the + // global anyhow. + // + // Note that this runs whether we know an alloca has escaped or not. If + // it has, then we can't trust Tracker.AllocaUsers to be accurate. + bool SafeToTail = true; + for (auto &Arg : CI->arg_operands()) { + if (isa<Constant>(Arg.getUser())) + continue; + if (Argument *A = dyn_cast<Argument>(Arg.getUser())) + if (!A->hasByValAttr()) + continue; + SafeToTail = false; + break; + } + if (SafeToTail) { + emitOptimizationRemark( + F.getContext(), "tailcallelim", F, CI->getDebugLoc(), + "marked this readnone call a tail call candidate"); + CI->setTailCall(); + Modified = true; + continue; + } + } + + if (Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) { + DeferredTails.push_back(CI); + } else { + AllCallsAreTailCalls = false; + } + } + + for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) { + auto &State = Visited[SuccBB]; + if (State < Escaped) { + State = Escaped; + if (State == ESCAPED) + WorklistEscaped.push_back(SuccBB); + else + WorklistUnescaped.push_back(SuccBB); + } + } + + if (!WorklistEscaped.empty()) { + BB = WorklistEscaped.pop_back_val(); + Escaped = ESCAPED; + } else { + BB = nullptr; + while (!WorklistUnescaped.empty()) { + auto *NextBB = WorklistUnescaped.pop_back_val(); + if (Visited[NextBB] == UNESCAPED) { + BB = NextBB; + Escaped = UNESCAPED; + break; + } + } + } + } while (BB); + + for (CallInst *CI : DeferredTails) { + if (Visited[CI->getParent()] != ESCAPED) { + // If the escape point was part way through the block, calls after the + // escape point wouldn't have been put into DeferredTails. + emitOptimizationRemark(F.getContext(), "tailcallelim", F, + CI->getDebugLoc(), + "marked this call a tail call candidate"); + CI->setTailCall(); + Modified = true; + } else { + AllCallsAreTailCalls = false; + } + } + + return Modified; +} + +bool TailCallElim::runTRE(Function &F) { // If this function is a varargs function, we won't be able to PHI the args // right, so don't even try to convert it... if (F.getFunctionType()->isVarArg()) return false; TTI = &getAnalysis<TargetTransformInfo>(); - BasicBlock *OldEntry = 0; + BasicBlock *OldEntry = nullptr; bool TailCallsAreMarkedTail = false; SmallVector<PHINode*, 8> ArgumentPHIs; bool MadeChange = false; @@ -188,39 +394,23 @@ bool TailCallElim::runOnFunction(Function &F) { // marked with the 'tail' attribute, because doing so would cause the stack // size to increase (real TRE would deallocate variable sized allocas, TRE // doesn't). - bool CanTRETailMarkedCall = true; - - // Find calls that can be marked tail. - AllocaCaptureTracker ACT; - for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { - CanTRETailMarkedCall &= CanTRE(AI); - PointerMayBeCaptured(AI, &ACT); - // If any allocas are captured, exit. - if (ACT.Captured) - return false; - } - } - } + bool CanTRETailMarkedCall = CanTRE(F); - // Second pass, change any tail recursive calls to loops. + // Change any tail recursive calls to loops. // // FIXME: The code generator produces really bad code when an 'escaping // alloca' is changed from being a static alloca to being a dynamic alloca. // Until this is resolved, disable this transformation if that would ever // happen. This bug is PR962. - if (ACT.UsesAlloca.empty()) { - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { - bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, !CanTRETailMarkedCall); - if (!Change && BB->getFirstNonPHIOrDbg() == Ret) - Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, - TailCallsAreMarkedTail, ArgumentPHIs, - !CanTRETailMarkedCall); - MadeChange |= Change; - } + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) { + bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, + ArgumentPHIs, !CanTRETailMarkedCall); + if (!Change && BB->getFirstNonPHIOrDbg() == Ret) + Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, + TailCallsAreMarkedTail, ArgumentPHIs, + !CanTRETailMarkedCall); + MadeChange |= Change; } } @@ -229,34 +419,13 @@ bool TailCallElim::runOnFunction(Function &F) { // with themselves. Check to see if we did and clean up our mess if so. This // occurs when a function passes an argument straight through to its tail // call. - if (!ArgumentPHIs.empty()) { - for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) { - PHINode *PN = ArgumentPHIs[i]; - - // If the PHI Node is a dynamic constant, replace it with the value it is. - if (Value *PNV = SimplifyInstruction(PN)) { - PN->replaceAllUsesWith(PNV); - PN->eraseFromParent(); - } - } - } + for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) { + PHINode *PN = ArgumentPHIs[i]; - // At this point, we know that the function does not have any captured - // allocas. If additionally the function does not call setjmp, mark all calls - // in the function that do not access stack memory with the tail keyword. This - // implies ensuring that there does not exist any path from a call that takes - // in an alloca but does not capture it and the call which we wish to mark - // with "tail". - if (!F.callsFunctionThatReturnsTwice()) { - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (CallInst *CI = dyn_cast<CallInst>(I)) { - if (!ACT.UsesAlloca.count(CI)) { - CI->setTailCall(); - MadeChange = true; - } - } - } + // If the PHI Node is a dynamic constant, replace it with the value it is. + if (Value *PNV = SimplifyInstruction(PN)) { + PN->replaceAllUsesWith(PNV); + PN->eraseFromParent(); } } @@ -343,11 +512,11 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { // static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { Function *F = CI->getParent()->getParent(); - Value *ReturnedValue = 0; + Value *ReturnedValue = nullptr; for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) { ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()); - if (RI == 0 || RI == IgnoreRI) continue; + if (RI == nullptr || RI == IgnoreRI) continue; // We can only perform this transformation if the value returned is // evaluatable at the start of the initial invocation of the function, @@ -355,10 +524,10 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { // Value *RetOp = RI->getOperand(0); if (!isDynamicConstant(RetOp, CI, RI)) - return 0; + return nullptr; if (ReturnedValue && RetOp != ReturnedValue) - return 0; // Cannot transform if differing values are returned. + return nullptr; // Cannot transform if differing values are returned. ReturnedValue = RetOp; } return ReturnedValue; @@ -370,18 +539,18 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { /// Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI) { - if (!I->isAssociative() || !I->isCommutative()) return 0; + if (!I->isAssociative() || !I->isCommutative()) return nullptr; assert(I->getNumOperands() == 2 && "Associative/commutative operations should have 2 args!"); // Exactly one operand should be the result of the call instruction. if ((I->getOperand(0) == CI && I->getOperand(1) == CI) || (I->getOperand(0) != CI && I->getOperand(1) != CI)) - return 0; + return nullptr; // The only user of this instruction we allow is a single return instruction. if (!I->hasOneUse() || !isa<ReturnInst>(I->user_back())) - return 0; + return nullptr; // Ok, now we have to check all of the other return instructions in this // function. If they return non-constants or differing values, then we cannot @@ -402,11 +571,11 @@ TailCallElim::FindTRECandidate(Instruction *TI, Function *F = BB->getParent(); if (&BB->front() == TI) // Make sure there is something before the terminator. - return 0; + return nullptr; // Scan backwards from the return, checking to see if there is a tail call in // this block. If so, set CI to it. - CallInst *CI = 0; + CallInst *CI = nullptr; BasicBlock::iterator BBI = TI; while (true) { CI = dyn_cast<CallInst>(BBI); @@ -414,14 +583,14 @@ TailCallElim::FindTRECandidate(Instruction *TI, break; if (BBI == BB->begin()) - return 0; // Didn't find a potential tail call. + return nullptr; // Didn't find a potential tail call. --BBI; } // If this call is marked as a tail call, and if there are dynamic allocas in // the function, we cannot perform this optimization. if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) - return 0; + return nullptr; // As a special case, detect code like this: // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call @@ -441,7 +610,7 @@ TailCallElim::FindTRECandidate(Instruction *TI, for (; I != E && FI != FE; ++I, ++FI) if (*I != &*FI) break; if (I == E && FI == FE) - return 0; + return nullptr; } return CI; @@ -462,8 +631,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, // which is different to the constant returned by other return instructions // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a // special case of accumulator recursion, the operation being "return C". - Value *AccumulatorRecursionEliminationInitVal = 0; - Instruction *AccumulatorRecursionInstr = 0; + Value *AccumulatorRecursionEliminationInitVal = nullptr; + Instruction *AccumulatorRecursionInstr = nullptr; // Ok, we found a potential tail call. We can currently only transform the // tail call if all of the instructions between the call and the return are @@ -493,8 +662,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, // accumulator recursion variable eliminated. if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && !isa<UndefValue>(Ret->getReturnValue()) && - AccumulatorRecursionEliminationInitVal == 0 && - !getCommonReturnValue(0, CI)) { + AccumulatorRecursionEliminationInitVal == nullptr && + !getCommonReturnValue(nullptr, CI)) { // One case remains that we are able to handle: the current return // instruction returns a constant, and all other return instructions // return a different constant. @@ -510,9 +679,12 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *BB = Ret->getParent(); Function *F = BB->getParent(); + emitOptimizationRemark(F->getContext(), "tailcallelim", *F, CI->getDebugLoc(), + "transforming tail recursion to loop"); + // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. - if (OldEntry == 0) { + if (!OldEntry) { OldEntry = &F->getEntryBlock(); BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry); NewEntry->takeName(OldEntry); diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp index f42635e..196ac79 100644 --- a/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/lib/Transforms/Utils/AddDiscriminators.cpp @@ -52,8 +52,6 @@ // http://wiki.dwarfstd.org/index.php?title=Path_Discriminators //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "add-discriminators" - #include "llvm/Transforms/Scalar.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -69,6 +67,8 @@ using namespace llvm; +#define DEBUG_TYPE "add-discriminators" + namespace { struct AddDiscriminators : public FunctionPass { static char ID; // Pass identification, replacement for typeid @@ -99,7 +99,7 @@ FunctionPass *llvm::createAddDiscriminatorsPass() { static bool hasDebugInfo(const Function &F) { NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu"); - return CUNodes != 0; + return CUNodes != nullptr; } /// \brief Assign DWARF discriminators. @@ -154,10 +154,15 @@ static bool hasDebugInfo(const Function &F) { /// file and line location as I2. This new lexical block will have a /// different discriminator number than I1. bool AddDiscriminators::runOnFunction(Function &F) { - // No need to do anything if there is no debug info for this function. // If the function has debug information, but the user has disabled // discriminators, do nothing. - if (!hasDebugInfo(F) || NoDiscriminators) return false; + // Simlarly, if the function has no debug info, do nothing. + // Finally, if this module is built with dwarf versions earlier than 4, + // do nothing (discriminator support is a DWARF 4 feature). + if (NoDiscriminators || + !hasDebugInfo(F) || + F.getParent()->getDwarfVersion() < 4) + return false; bool Changed = false; Module *M = F.getParent(); diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk index ab4d8a8..cbd8dd0 100644 --- a/lib/Transforms/Utils/Android.mk +++ b/lib/Transforms/Utils/Android.mk @@ -11,6 +11,7 @@ transforms_utils_SRC_FILES := \ CloneModule.cpp \ CmpInstAnalysis.cpp \ CodeExtractor.cpp \ + CtorUtils.cpp \ DemoteRegToStack.cpp \ GlobalStatus.cpp \ InlineFunction.cpp \ diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index b3cd5ce..80b7e22 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -68,8 +68,8 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) { if (!isa<PHINode>(BB->begin())) return; - AliasAnalysis *AA = 0; - MemoryDependenceAnalysis *MemDep = 0; + AliasAnalysis *AA = nullptr; + MemoryDependenceAnalysis *MemDep = nullptr; if (P) { AA = P->getAnalysisIfAvailable<AliasAnalysis>(); MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>(); @@ -130,7 +130,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { BasicBlock *OnlySucc = BB; for (; SI != SE; ++SI) if (*SI != OnlySucc) { - OnlySucc = 0; // There are multiple distinct successors! + OnlySucc = nullptr; // There are multiple distinct successors! break; } @@ -217,7 +217,7 @@ void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL, /// void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL, BasicBlock::iterator &BI, Instruction *I) { - assert(I->getParent() == 0 && + assert(I->getParent() == nullptr && "ReplaceInstWithInst: Instruction already inserted into basic block!"); // Insert the new instruction into the basic block... @@ -254,7 +254,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { // If the successor only has a single pred, split the top of the successor // block. assert(SP == BB && "CFG broken"); - SP = NULL; + SP = nullptr; return SplitBlock(Succ, Succ->begin(), P); } @@ -310,7 +310,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, if (!P) return; LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); - Loop *L = LI ? LI->getLoopFor(OldBB) : 0; + Loop *L = LI ? LI->getLoopFor(OldBB) : nullptr; // If we need to preserve loop analyses, collect some information about how // this split will affect loops. @@ -351,7 +351,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, // loop). To find this, examine each of the predecessors and determine which // loops enclose them, and select the most-nested loop which contains the // loop containing the block being split. - Loop *InnermostPredLoop = 0; + Loop *InnermostPredLoop = nullptr; for (ArrayRef<BasicBlock*>::iterator i = Preds.begin(), e = Preds.end(); i != e; ++i) { BasicBlock *Pred = *i; @@ -384,51 +384,68 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, ArrayRef<BasicBlock*> Preds, BranchInst *BI, Pass *P, bool HasLoopExit) { // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. - AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; + AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : nullptr; + SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end()); for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. - Value *InVal = 0; + Value *InVal = nullptr; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); - for (unsigned i = 1, e = Preds.size(); i != e; ++i) - if (InVal != PN->getIncomingValueForBlock(Preds[i])) { - InVal = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (!PredSet.count(PN->getIncomingBlock(i))) + continue; + if (!InVal) + InVal = PN->getIncomingValue(i); + else if (InVal != PN->getIncomingValue(i)) { + InVal = nullptr; break; } + } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - // Explicitly check the BB index here to handle duplicates in Preds. - int Idx = PN->getBasicBlockIndex(Preds[i]); - if (Idx >= 0) - PN->removeIncomingValue(Idx, false); - } - } else { - // If the values coming into the block are not the same, we need a PHI. - // Create the new PHI node, insert it into NewBB at the end of the block - PHINode *NewPHI = - PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); - if (AA) AA->copyValue(PN, NewPHI); - // Move all of the PHI values for 'Preds' to the new PHI. - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - Value *V = PN->removeIncomingValue(Preds[i], false); - NewPHI->addIncoming(V, Preds[i]); - } + // NOTE! This loop walks backwards for a reason! First off, this minimizes + // the cost of removal if we end up removing a large number of values, and + // second off, this ensures that the indices for the incoming values + // aren't invalidated when we remove one. + for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) + if (PredSet.count(PN->getIncomingBlock(i))) + PN->removeIncomingValue(i, false); + + // Add an incoming value to the PHI node in the loop for the preheader + // edge. + PN->addIncoming(InVal, NewBB); + continue; + } - InVal = NewPHI; + // If the values coming into the block are not the same, we need a new + // PHI. + // Create the new PHI node, insert it into NewBB at the end of the block + PHINode *NewPHI = + PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); + if (AA) + AA->copyValue(PN, NewPHI); + + // NOTE! This loop walks backwards for a reason! First off, this minimizes + // the cost of removal if we end up removing a large number of values, and + // second off, this ensures that the indices for the incoming values aren't + // invalidated when we remove one. + for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) { + BasicBlock *IncomingBB = PN->getIncomingBlock(i); + if (PredSet.count(IncomingBB)) { + Value *V = PN->removeIncomingValue(i, false); + NewPHI->addIncoming(V, IncomingBB); + } } - // Add an incoming value to the PHI node in the loop for the preheader - // edge. - PN->addIncoming(InVal, NewBB); + PN->addIncoming(NewPHI, NewBB); } } @@ -542,7 +559,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, e = pred_end(OrigBB); } - BasicBlock *NewBB2 = 0; + BasicBlock *NewBB2 = nullptr; if (!NewBB2Preds.empty()) { // Create another basic block for the rest of OrigBB's predecessors. NewBB2 = BasicBlock::Create(OrigBB->getContext(), @@ -607,7 +624,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); i != e; ++i) { Value *V = *i; - Instruction *NewBC = 0; + Instruction *NewBC = nullptr; if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) { // Return value might be bitcasted. Clone and insert it before the // return instruction. @@ -724,32 +741,32 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse) { PHINode *SomePHI = dyn_cast<PHINode>(BB->begin()); - BasicBlock *Pred1 = NULL; - BasicBlock *Pred2 = NULL; + BasicBlock *Pred1 = nullptr; + BasicBlock *Pred2 = nullptr; if (SomePHI) { if (SomePHI->getNumIncomingValues() != 2) - return NULL; + return nullptr; Pred1 = SomePHI->getIncomingBlock(0); Pred2 = SomePHI->getIncomingBlock(1); } else { pred_iterator PI = pred_begin(BB), PE = pred_end(BB); if (PI == PE) // No predecessor - return NULL; + return nullptr; Pred1 = *PI++; if (PI == PE) // Only one predecessor - return NULL; + return nullptr; Pred2 = *PI++; if (PI != PE) // More than two predecessors - return NULL; + return nullptr; } // We can only handle branches. Other control flow will be lowered to // branches if possible anyway. BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator()); BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator()); - if (Pred1Br == 0 || Pred2Br == 0) - return 0; + if (!Pred1Br || !Pred2Br) + return nullptr; // Eliminate code duplication by ensuring that Pred1Br is conditional if // either are. @@ -759,7 +776,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // required anyway, we stand no chance of eliminating it, so the xform is // probably not profitable. if (Pred1Br->isConditional()) - return 0; + return nullptr; std::swap(Pred1, Pred2); std::swap(Pred1Br, Pred2Br); @@ -769,8 +786,8 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // The only thing we have to watch out for here is to make sure that Pred2 // doesn't have incoming edges from other blocks. If it does, the condition // doesn't dominate BB. - if (Pred2->getSinglePredecessor() == 0) - return 0; + if (!Pred2->getSinglePredecessor()) + return nullptr; // If we found a conditional branch predecessor, make sure that it branches // to BB and Pred2Br. If it doesn't, this isn't an "if statement". @@ -785,7 +802,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, } else { // We know that one arm of the conditional goes to BB, so the other must // go somewhere unrelated, and this must not be an "if statement". - return 0; + return nullptr; } return Pred1Br->getCondition(); @@ -795,12 +812,12 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // BB. Don't panic! If both blocks only have a single (identical) // predecessor, and THAT is a conditional branch, then we're all ok! BasicBlock *CommonPred = Pred1->getSinglePredecessor(); - if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor()) - return 0; + if (CommonPred == nullptr || CommonPred != Pred2->getSinglePredecessor()) + return nullptr; // Otherwise, if this is a conditional branch, then we can use it! BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); - if (BI == 0) return 0; + if (!BI) return nullptr; assert(BI->isConditional() && "Two successors but not conditional?"); if (BI->getSuccessor(0) == Pred1) { diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 76ebb9f..80bd516 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "break-crit-edges" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -30,6 +29,8 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; +#define DEBUG_TYPE "break-crit-edges" + STATISTIC(NumBroken, "Number of blocks inserted"); namespace { @@ -141,7 +142,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, bool MergeIdenticalEdges, bool DontDeleteUselessPhis, bool SplitLandingPads) { - if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; + if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return nullptr; assert(!isa<IndirectBrInst>(TI) && "Cannot split critical edge from IndirectBrInst"); @@ -151,7 +152,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. - if (DestBB->isLandingPad()) return 0; + if (DestBB->isLandingPad()) return nullptr; // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), @@ -207,15 +208,15 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // If we don't have a pass object, we can't update anything... - if (P == 0) return NewBB; + if (!P) return NewBB; DominatorTreeWrapperPass *DTWP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0; + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); // If we have nothing to update, just return. - if (DT == 0 && LI == 0) + if (!DT && !LI) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is @@ -251,7 +252,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); - DomTreeNode *DestBBNode = 0; + DomTreeNode *DestBBNode = nullptr; // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 82384a1..be00b69 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -27,7 +27,8 @@ using namespace llvm; /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { - return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr"); + unsigned AS = V->getType()->getPointerAddressSpace(); + return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr"); } /// EmitStrLen - Emit a call to the strlen function to the builder, for the @@ -35,7 +36,7 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strlen)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -64,7 +65,7 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strnlen)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -94,7 +95,7 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strchr)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; @@ -120,7 +121,7 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strncmp)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[3]; @@ -153,7 +154,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strcpy)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -177,7 +178,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strncpy)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -204,7 +205,7 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcpy_chk)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS; @@ -232,7 +233,7 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memchr)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS; @@ -260,7 +261,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcmp)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[3]; @@ -347,7 +348,7 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::putchar)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), @@ -369,7 +370,7 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::puts)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -393,7 +394,7 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputc)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -426,7 +427,7 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputs)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[3]; @@ -459,7 +460,7 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fwrite)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[3]; diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 1f517d0..f2d5e07 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "bypass-slow-division" #include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/Function.h" @@ -24,6 +23,8 @@ using namespace llvm; +#define DEBUG_TYPE "bypass-slow-division" + namespace { struct DivOpInfo { bool SignedOp; @@ -53,11 +54,11 @@ namespace llvm { } static DivOpInfo getEmptyKey() { - return DivOpInfo(false, 0, 0); + return DivOpInfo(false, nullptr, nullptr); } static DivOpInfo getTombstoneKey() { - return DivOpInfo(true, 0, 0); + return DivOpInfo(true, nullptr, nullptr); } static unsigned getHashValue(const DivOpInfo &Val) { diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index dac2090..e10ca90 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMTransformUtils BreakCriticalEdges.cpp BuildLibCalls.cpp BypassSlowDivision.cpp + CtorUtils.cpp CloneFunction.cpp CloneModule.cpp CmpInstAnalysis.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index a199086..5c8f20d 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -159,7 +159,7 @@ static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) { for (DISubprogram Subprogram : Finder.subprograms()) { if (Subprogram.describes(F)) return Subprogram; } - return NULL; + return nullptr; } // Add an operand to an existing MDNode. The new operand will be added at the @@ -359,7 +359,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If the condition was a known constant in the callee... ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); // Or is a known constant in the caller... - if (Cond == 0) { + if (!Cond) { Value *V = VMap[BI->getCondition()]; Cond = dyn_cast_or_null<ConstantInt>(V); } @@ -375,7 +375,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) { // If switching on a value known constant in the caller. ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); - if (Cond == 0) { // Or known constant after constant prop in the callee... + if (!Cond) { // Or known constant after constant prop in the callee... Value *V = VMap[SI->getCondition()]; Cond = dyn_cast_or_null<ConstantInt>(V); } @@ -454,7 +454,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, BI != BE; ++BI) { Value *V = VMap[BI]; BasicBlock *NewBB = cast_or_null<BasicBlock>(V); - if (NewBB == 0) continue; // Dead block. + if (!NewBB) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 64df089..eb67db1 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -47,8 +47,8 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { GlobalVariable *GV = new GlobalVariable(*New, I->getType()->getElementType(), I->isConstant(), I->getLinkage(), - (Constant*) 0, I->getName(), - (GlobalVariable*) 0, + (Constant*) nullptr, I->getName(), + (GlobalVariable*) nullptr, I->getThreadLocalMode(), I->getType()->getAddressSpace()); GV->copyAttributesFrom(I); @@ -67,8 +67,10 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { // Loop over the aliases in the module for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { - GlobalAlias *GA = new GlobalAlias(I->getType(), I->getLinkage(), - I->getName(), NULL, New); + auto *PTy = cast<PointerType>(I->getType()); + auto *GA = + GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + I->getLinkage(), I->getName(), New); GA->copyAttributesFrom(I); VMap[I] = GA; } @@ -105,8 +107,8 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); - if (const Constant *C = I->getAliasee()) - GA->setAliasee(MapValue(C, VMap)); + if (const GlobalObject *C = I->getAliasee()) + GA->setAliasee(cast<GlobalObject>(MapValue(C, VMap))); } // And named metadata.... diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp index 8fa412a..3b15a0a 100644 --- a/lib/Transforms/Utils/CmpInstAnalysis.cpp +++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -84,7 +84,7 @@ Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, case 7: // True. return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); } - return NULL; + return nullptr; } /// PredicatesFoldable - Return true if both predicates match sign or if at diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index b814842..e70a7d6 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -38,6 +38,8 @@ #include <set> using namespace llvm; +#define DEBUG_TYPE "code-extractor" + // Provide a command-line option to aggregate function arguments into a struct // for functions produced by the code extractor. This is useful when converting // extracted functions to pthread-based code, as only one argument (void*) can @@ -118,7 +120,7 @@ buildExtractionBlockSet(const RegionNode &RN) { } CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs) - : DT(0), AggregateArgs(AggregateArgs||AggregateArgsOpt), + : DT(nullptr), AggregateArgs(AggregateArgs||AggregateArgsOpt), Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {} CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, @@ -410,7 +412,7 @@ static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) { return P->getIncomingBlock(U); } - return 0; + return nullptr; } /// emitCallAndSwitchStatement - This method sets up the caller side by adding @@ -438,14 +440,14 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, StructValues.push_back(*i); } else { AllocaInst *alloca = - new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc", + new AllocaInst((*i)->getType(), nullptr, (*i)->getName()+".loc", codeReplacer->getParent()->begin()->begin()); ReloadOutputs.push_back(alloca); params.push_back(alloca); } } - AllocaInst *Struct = 0; + AllocaInst *Struct = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { std::vector<Type*> ArgTypes; for (ValueSet::iterator v = StructValues.begin(), @@ -455,7 +457,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Allocate a struct at the beginning of this function Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); Struct = - new AllocaInst(StructArgTy, 0, "structArg", + new AllocaInst(StructArgTy, nullptr, "structArg", codeReplacer->getParent()->begin()->begin()); params.push_back(Struct); @@ -484,7 +486,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Reload the outputs passed in by reference for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value *Output = 0; + Value *Output = nullptr; if (AggregateArgs) { Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); @@ -537,7 +539,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, newFunction); unsigned SuccNum = switchVal++; - Value *brVal = 0; + Value *brVal = nullptr; switch (NumExitBlocks) { case 0: case 1: break; // No value needed. @@ -633,7 +635,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Check if the function should return a value if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, 0, TheSwitch); // Return void + ReturnInst::Create(Context, nullptr, TheSwitch); // Return void } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { // return what we have ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); @@ -685,7 +687,7 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { Function *CodeExtractor::extractCodeRegion() { if (!isEligible()) - return 0; + return nullptr; ValueSet inputs, outputs; diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp new file mode 100644 index 0000000..a359424 --- /dev/null +++ b/lib/Transforms/Utils/CtorUtils.cpp @@ -0,0 +1,183 @@ +//===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines functions that are used to process llvm.global_ctors. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CtorUtils.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "ctor_utils" + +namespace llvm { + +namespace { +/// Given a specified llvm.global_ctors list, install the +/// specified array. +void installGlobalCtors(GlobalVariable *GCL, + const std::vector<Function *> &Ctors) { + // If we made a change, reassemble the initializer list. + Constant *CSVals[3]; + + StructType *StructTy = + cast<StructType>(GCL->getType()->getElementType()->getArrayElementType()); + + // Create the new init list. + std::vector<Constant *> CAList; + for (Function *F : Ctors) { + Type *Int32Ty = Type::getInt32Ty(GCL->getContext()); + if (F) { + CSVals[0] = ConstantInt::get(Int32Ty, 65535); + CSVals[1] = F; + } else { + CSVals[0] = ConstantInt::get(Int32Ty, 0x7fffffff); + CSVals[1] = Constant::getNullValue(StructTy->getElementType(1)); + } + // FIXME: Only allow the 3-field form in LLVM 4.0. + size_t NumElts = StructTy->getNumElements(); + if (NumElts > 2) + CSVals[2] = Constant::getNullValue(StructTy->getElementType(2)); + CAList.push_back( + ConstantStruct::get(StructTy, makeArrayRef(CSVals, NumElts))); + } + + // Create the array initializer. + Constant *CA = + ConstantArray::get(ArrayType::get(StructTy, CAList.size()), CAList); + + // If we didn't change the number of elements, don't create a new GV. + if (CA->getType() == GCL->getInitializer()->getType()) { + GCL->setInitializer(CA); + return; + } + + // Create the new global and insert it next to the existing list. + GlobalVariable *NGV = + new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), + CA, "", GCL->getThreadLocalMode()); + GCL->getParent()->getGlobalList().insert(GCL, NGV); + NGV->takeName(GCL); + + // Nuke the old list, replacing any uses with the new one. + if (!GCL->use_empty()) { + Constant *V = NGV; + if (V->getType() != GCL->getType()) + V = ConstantExpr::getBitCast(V, GCL->getType()); + GCL->replaceAllUsesWith(V); + } + GCL->eraseFromParent(); +} + +/// Given a llvm.global_ctors list that we can understand, +/// return a list of the functions and null terminator as a vector. +std::vector<Function*> parseGlobalCtors(GlobalVariable *GV) { + if (GV->getInitializer()->isNullValue()) + return std::vector<Function *>(); + ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); + std::vector<Function *> Result; + Result.reserve(CA->getNumOperands()); + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { + ConstantStruct *CS = cast<ConstantStruct>(*i); + Result.push_back(dyn_cast<Function>(CS->getOperand(1))); + } + return Result; +} + +/// Find the llvm.global_ctors list, verifying that all initializers have an +/// init priority of 65535. +GlobalVariable *findGlobalCtors(Module &M) { + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (!GV) + return nullptr; + + // Verify that the initializer is simple enough for us to handle. We are + // only allowed to optimize the initializer if it is unique. + if (!GV->hasUniqueInitializer()) + return nullptr; + + if (isa<ConstantAggregateZero>(GV->getInitializer())) + return GV; + ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); + + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { + if (isa<ConstantAggregateZero>(*i)) + continue; + ConstantStruct *CS = cast<ConstantStruct>(*i); + if (isa<ConstantPointerNull>(CS->getOperand(1))) + continue; + + // Must have a function or null ptr. + if (!isa<Function>(CS->getOperand(1))) + return nullptr; + + // Init priority must be standard. + ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0)); + if (CI->getZExtValue() != 65535) + return nullptr; + } + + return GV; +} +} // namespace + +/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the +/// entries for which it returns true. Return true if anything changed. +bool optimizeGlobalCtorsList(Module &M, + function_ref<bool(Function *)> ShouldRemove) { + GlobalVariable *GlobalCtors = findGlobalCtors(M); + if (!GlobalCtors) + return false; + + std::vector<Function *> Ctors = parseGlobalCtors(GlobalCtors); + if (Ctors.empty()) + return false; + + bool MadeChange = false; + + // Loop over global ctors, optimizing them when we can. + for (unsigned i = 0; i != Ctors.size(); ++i) { + Function *F = Ctors[i]; + // Found a null terminator in the middle of the list, prune off the rest of + // the list. + if (!F) { + if (i != Ctors.size() - 1) { + Ctors.resize(i + 1); + MadeChange = true; + } + break; + } + DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n"); + + // We cannot simplify external ctor functions. + if (F->empty()) + continue; + + // If we can evaluate the ctor at compile time, do. + if (ShouldRemove(F)) { + Ctors.erase(Ctors.begin() + i); + MadeChange = true; + --i; + continue; + } + } + + if (!MadeChange) + return false; + + installGlobalCtors(GlobalCtors, Ctors); + return true; +} + +} // End llvm namespace diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index ac6926f..9972b22 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -25,17 +25,17 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, Instruction *AllocaPoint) { if (I.use_empty()) { I.eraseFromParent(); - return 0; + return nullptr; } // Create a stack slot to hold the value. AllocaInst *Slot; if (AllocaPoint) { - Slot = new AllocaInst(I.getType(), 0, + Slot = new AllocaInst(I.getType(), nullptr, I.getName()+".reg2mem", AllocaPoint); } else { Function *F = I.getParent()->getParent(); - Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", + Slot = new AllocaInst(I.getType(), nullptr, I.getName()+".reg2mem", F->getEntryBlock().begin()); } @@ -56,7 +56,7 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == &I) { Value *&V = Loads[PN->getIncomingBlock(i)]; - if (V == 0) { + if (!V) { // Insert the load into the predecessor block V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, PN->getIncomingBlock(i)->getTerminator()); @@ -110,17 +110,17 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { if (P->use_empty()) { P->eraseFromParent(); - return 0; + return nullptr; } // Create a stack slot to hold the value. AllocaInst *Slot; if (AllocaPoint) { - Slot = new AllocaInst(P->getType(), 0, + Slot = new AllocaInst(P->getType(), nullptr, P->getName()+".reg2mem", AllocaPoint); } else { Function *F = P->getParent()->getParent(); - Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", + Slot = new AllocaInst(P->getType(), nullptr, P->getName()+".reg2mem", F->getEntryBlock().begin()); } diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp index 39c80f8..51ead40 100644 --- a/lib/Transforms/Utils/FlattenCFG.cpp +++ b/lib/Transforms/Utils/FlattenCFG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "flattencfg" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -22,16 +21,19 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; +#define DEBUG_TYPE "flattencfg" + namespace { class FlattenCFGOpt { AliasAnalysis *AA; /// \brief Use parallel-and or parallel-or to generate conditions for /// conditional branches. - bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); + bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, + Pass *P = nullptr); /// \brief If \param BB is the merge block of an if-region, attempt to merge /// the if-region with an adjacent if-region upstream if two if-regions /// contain identical instructions. - bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); + bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = nullptr); /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which /// are from two if-regions whose entry blocks are \p Head1 and \p /// Head2. \returns true if \p Block1 and \p Block2 contain identical @@ -126,9 +128,9 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, if (PHI) return false; // For simplicity, avoid cases containing PHI nodes. - BasicBlock *LastCondBlock = NULL; - BasicBlock *FirstCondBlock = NULL; - BasicBlock *UnCondBlock = NULL; + BasicBlock *LastCondBlock = nullptr; + BasicBlock *FirstCondBlock = nullptr; + BasicBlock *UnCondBlock = nullptr; int Idx = -1; // Check predecessors of \param BB. diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp index e9ebc45..12057e4 100644 --- a/lib/Transforms/Utils/GlobalStatus.cpp +++ b/lib/Transforms/Utils/GlobalStatus.cpp @@ -61,7 +61,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, } else if (const Instruction *I = dyn_cast<Instruction>(UR)) { if (!GS.HasMultipleAccessingFunctions) { const Function *F = I->getParent()->getParent(); - if (GS.AccessingFunction == 0) + if (!GS.AccessingFunction) GS.AccessingFunction = F; else if (GS.AccessingFunction != F) GS.HasMultipleAccessingFunctions = true; @@ -176,6 +176,6 @@ bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) { GlobalStatus::GlobalStatus() : IsCompared(false), IsLoaded(false), StoredType(NotStored), - StoredOnceValue(0), AccessingFunction(0), + StoredOnceValue(nullptr), AccessingFunction(nullptr), HasMultipleAccessingFunctions(false), HasNonInstructionUser(false), Ordering(NotAtomic) {} diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 86def3e..e01d0c3 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" @@ -51,8 +52,8 @@ namespace { public: InvokeInliningInfo(InvokeInst *II) - : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0), - CallerLPad(0), InnerEHValuesPHI(0) { + : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr), + CallerLPad(nullptr), InnerEHValuesPHI(nullptr) { // If there are PHI nodes in the unwind destination block, we need to keep // track of which values came into them from the invoke before removing // the edge from this block. @@ -289,13 +290,13 @@ static void UpdateCallGraphAfterInlining(CallSite CS, ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); // Only copy the edge if the call was inlined! - if (VMI == VMap.end() || VMI->second == 0) + if (VMI == VMap.end() || VMI->second == nullptr) continue; // If the call was inlined, but then constant folded, there is no edge to // add. Check for this case. Instruction *NewCall = dyn_cast<Instruction>(VMI->second); - if (NewCall == 0) continue; + if (!NewCall) continue; // Remember that this call site got inlined for the client of // InlineFunction. @@ -306,7 +307,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS, // happens, set the callee of the new call site to a more precise // destination. This can also happen if the call graph node of the caller // was just unnecessarily imprecise. - if (I->second->getFunction() == 0) + if (!I->second->getFunction()) if (Function *F = CallSite(NewCall).getCalledFunction()) { // Indirect call site resolved to direct call. CallerNode->addCalledFunction(CallSite(NewCall), CG[F]); @@ -322,13 +323,44 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallerNode->removeCallEdgeFor(CS); } +static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, + BasicBlock *InsertBlock, + InlineFunctionInfo &IFI) { + LLVMContext &Context = Src->getContext(); + Type *VoidPtrTy = Type::getInt8PtrTy(Context); + Type *AggTy = cast<PointerType>(Src->getType())->getElementType(); + Type *Tys[3] = { VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context) }; + Function *MemCpyFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys); + IRBuilder<> builder(InsertBlock->begin()); + Value *DstCast = builder.CreateBitCast(Dst, VoidPtrTy, "tmp"); + Value *SrcCast = builder.CreateBitCast(Src, VoidPtrTy, "tmp"); + + Value *Size; + if (IFI.DL == nullptr) + Size = ConstantExpr::getSizeOf(AggTy); + else + Size = ConstantInt::get(Type::getInt64Ty(Context), + IFI.DL->getTypeStoreSize(AggTy)); + + // Always generate a memcpy of alignment 1 here because we don't know + // the alignment of the src pointer. Other optimizations can infer + // better alignment. + Value *CallArgs[] = { + DstCast, SrcCast, Size, + ConstantInt::get(Type::getInt32Ty(Context), 1), + ConstantInt::getFalse(Context) // isVolatile + }; + builder.CreateCall(MemCpyFn, CallArgs); +} + /// HandleByValArgument - When inlining a call site that has a byval argument, /// we have to make the implicit memcpy explicit by adding it. static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, InlineFunctionInfo &IFI, unsigned ByValAlignment) { - Type *AggTy = cast<PointerType>(Arg->getType())->getElementType(); + PointerType *ArgTy = cast<PointerType>(Arg->getType()); + Type *AggTy = ArgTy->getElementType(); // If the called function is readonly, then it could not mutate the caller's // copy of the byval'd memory. In this case, it is safe to elide the copy and @@ -349,11 +381,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, // Otherwise, we have to make a memcpy to get a safe alignment. This is bad // for code quality, but rarely happens and is required for correctness. } - - LLVMContext &Context = Arg->getContext(); - Type *VoidPtrTy = Type::getInt8PtrTy(Context); - // Create the alloca. If we have DataLayout, use nice alignment. unsigned Align = 1; if (IFI.DL) @@ -366,32 +394,9 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, Function *Caller = TheCall->getParent()->getParent(); - Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), + Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(), &*Caller->begin()->begin()); - // Emit a memcpy. - Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; - Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), - Intrinsic::memcpy, - Tys); - Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); - Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall); - - Value *Size; - if (IFI.DL == 0) - Size = ConstantExpr::getSizeOf(AggTy); - else - Size = ConstantInt::get(Type::getInt64Ty(Context), - IFI.DL->getTypeStoreSize(AggTy)); - - // Always generate a memcpy of alignment 1 here because we don't know - // the alignment of the src pointer. Other optimizations can infer - // better alignment. - Value *CallArgs[] = { - DestCast, SrcCast, Size, - ConstantInt::get(Type::getInt32Ty(Context), 1), - ConstantInt::getFalse(Context) // isVolatile - }; - IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs); + IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); // Uses of the argument in the function should use our new alloca // instead. @@ -417,8 +422,10 @@ static bool isUsedByLifetimeMarker(Value *V) { // hasLifetimeMarkers - Check whether the given alloca already has // lifetime.start or lifetime.end intrinsics. static bool hasLifetimeMarkers(AllocaInst *AI) { - Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext()); - if (AI->getType() == Int8PtrTy) + Type *Ty = AI->getType(); + Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(), + Ty->getPointerAddressSpace()); + if (Ty == Int8PtrTy) return isUsedByLifetimeMarker(AI); // Do a scan to find all the casts to i8*. @@ -472,6 +479,33 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, } } +/// Returns a musttail call instruction if one immediately precedes the given +/// return instruction with an optional bitcast instruction between them. +static CallInst *getPrecedingMustTailCall(ReturnInst *RI) { + Instruction *Prev = RI->getPrevNode(); + if (!Prev) + return nullptr; + + if (Value *RV = RI->getReturnValue()) { + if (RV != Prev) + return nullptr; + + // Look through the optional bitcast. + if (auto *BI = dyn_cast<BitCastInst>(Prev)) { + RV = BI->getOperand(0); + Prev = BI->getPrevNode(); + if (!Prev || RV != Prev) + return nullptr; + } + } + + if (auto *CI = dyn_cast<CallInst>(Prev)) { + if (CI->isMustTailCall()) + return CI; + } + return nullptr; +} + /// InlineFunction - This function inlines the called function into the basic /// block of the caller. This returns false if it is not possible to inline /// this call. The program is still in a well defined state if this occurs @@ -491,15 +525,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, IFI.reset(); const Function *CalledFunc = CS.getCalledFunction(); - if (CalledFunc == 0 || // Can't inline external function or indirect + if (!CalledFunc || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; - // If the call to the callee is not a tail call, we must clear the 'tail' - // flags on any calls that we inline. - bool MustClearTailCallFlags = - !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall()); - // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); @@ -519,7 +548,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } // Get the personality function from the callee if it contains a landing pad. - Value *CalleePersonality = 0; + Value *CalleePersonality = nullptr; for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { @@ -562,6 +591,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, { // Scope to destroy VMap after cloning. ValueToValueMapTy VMap; + // Keep a list of pair (dst, src) to emit byval initializations. + SmallVector<std::pair<Value*, Value*>, 4> ByValInit; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); @@ -581,11 +612,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, CalledFunc->getParamAlignment(ArgNo+1)); - - // Calls that we inline may use the new alloca, so we need to clear - // their 'tail' flags if HandleByValArgument introduced a new alloca and - // the callee has calls. - MustClearTailCallFlags |= ActualArg != *AI; + if (ActualArg != *AI) + ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); } VMap[I] = ActualArg; @@ -602,6 +630,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; + // Inject byval arguments initialization. + for (std::pair<Value*, Value*> &Init : ByValInit) + HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), + FirstNewBlock, IFI); + // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); @@ -619,7 +652,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast<AllocaInst>(I++); - if (AI == 0) continue; + if (!AI) continue; // If the alloca is now dead, remove it. This often occurs due to code // specialization. @@ -651,6 +684,45 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } + bool InlinedMustTailCalls = false; + if (InlinedFunctionInfo.ContainsCalls) { + CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; + if (CallInst *CI = dyn_cast<CallInst>(TheCall)) + CallSiteTailKind = CI->getTailCallKind(); + + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; + ++BB) { + for (Instruction &I : *BB) { + CallInst *CI = dyn_cast<CallInst>(&I); + if (!CI) + continue; + + // We need to reduce the strength of any inlined tail calls. For + // musttail, we have to avoid introducing potential unbounded stack + // growth. For example, if functions 'f' and 'g' are mutually recursive + // with musttail, we can inline 'g' into 'f' so long as we preserve + // musttail on the cloned call to 'f'. If either the inlined call site + // or the cloned call site is *not* musttail, the program already has + // one frame of stack growth, so it's safe to remove musttail. Here is + // a table of example transformations: + // + // f -> musttail g -> musttail f ==> f -> musttail f + // f -> musttail g -> tail f ==> f -> tail f + // f -> g -> musttail f ==> f -> f + // f -> g -> tail f ==> f -> f + CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); + ChildTCK = std::min(CallSiteTailKind, ChildTCK); + CI->setTailCallKind(ChildTCK); + InlinedMustTailCalls |= CI->isMustTailCall(); + + // Calls inlined through a 'nounwind' call site should be marked + // 'nounwind'. + if (MarkNoUnwind) + CI->setDoesNotThrow(); + } + } + } + // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. if (InsertLifetime && !IFI.StaticAllocas.empty()) { @@ -664,7 +736,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, continue; // Try to determine the size of the allocation. - ConstantInt *AllocaSize = 0; + ConstantInt *AllocaSize = nullptr; if (ConstantInt *AIArraySize = dyn_cast<ConstantInt>(AI->getArraySize())) { if (IFI.DL) { @@ -683,9 +755,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } builder.CreateLifetimeStart(AI, AllocaSize); - for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) { - IRBuilder<> builder(Returns[ri]); - builder.CreateLifetimeEnd(AI, AllocaSize); + for (ReturnInst *RI : Returns) { + // Don't insert llvm.lifetime.end calls between a musttail call and a + // return. The return kills all local allocas. + if (InlinedMustTailCalls && getPrecedingMustTailCall(RI)) + continue; + IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); } } } @@ -704,33 +779,56 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. - for (unsigned i = 0, e = Returns.size(); i != e; ++i) { - IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr); + for (ReturnInst *RI : Returns) { + // Don't insert llvm.stackrestore calls between a musttail call and a + // return. The return will restore the stack pointer. + if (InlinedMustTailCalls && getPrecedingMustTailCall(RI)) + continue; + IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr); } } - // If we are inlining tail call instruction through a call site that isn't - // marked 'tail', we must remove the tail marker for any calls in the inlined - // code. Also, calls inlined through a 'nounwind' call site should be marked - // 'nounwind'. - if (InlinedFunctionInfo.ContainsCalls && - (MustClearTailCallFlags || MarkNoUnwind)) { - for (Function::iterator BB = FirstNewBlock, E = Caller->end(); - BB != E; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (CallInst *CI = dyn_cast<CallInst>(I)) { - if (MustClearTailCallFlags) - CI->setTailCall(false); - if (MarkNoUnwind) - CI->setDoesNotThrow(); - } - } - // If we are inlining for an invoke instruction, we must make sure to rewrite // any call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); + // Handle any inlined musttail call sites. In order for a new call site to be + // musttail, the source of the clone and the inlined call site must have been + // musttail. Therefore it's safe to return without merging control into the + // phi below. + if (InlinedMustTailCalls) { + // Check if we need to bitcast the result of any musttail calls. + Type *NewRetTy = Caller->getReturnType(); + bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy; + + // Handle the returns preceded by musttail calls separately. + SmallVector<ReturnInst *, 8> NormalReturns; + for (ReturnInst *RI : Returns) { + CallInst *ReturnedMustTail = getPrecedingMustTailCall(RI); + if (!ReturnedMustTail) { + NormalReturns.push_back(RI); + continue; + } + if (!NeedBitCast) + continue; + + // Delete the old return and any preceding bitcast. + BasicBlock *CurBB = RI->getParent(); + auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue()); + RI->eraseFromParent(); + if (OldCast) + OldCast->eraseFromParent(); + + // Insert a new bitcast and return with the right type. + IRBuilder<> Builder(CurBB); + Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy)); + } + + // Leave behind the normal returns so we can merge control flow. + std::swap(Returns, NormalReturns); + } + // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. @@ -774,7 +872,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; - BranchInst *CreatedBranchToNormalDest = NULL; + BranchInst *CreatedBranchToNormalDest = nullptr; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... @@ -813,7 +911,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // any users of the original call/invoke instruction. Type *RTy = CalledFunc->getReturnType(); - PHINode *PHI = 0; + PHINode *PHI = nullptr; if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. @@ -886,6 +984,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); + // If we inlined any musttail calls and the original return is now + // unreachable, delete it. It can only contain a bitcast and ret. + if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB)) + AfterCallBB->eraseFromParent(); + // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp index e73a543..9f91eeb 100644 --- a/lib/Transforms/Utils/IntegerDivision.cpp +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "integer-division" #include "llvm/Transforms/Utils/IntegerDivision.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -24,6 +23,8 @@ using namespace llvm; +#define DEBUG_TYPE "integer-division" + /// Generate code to compute the remainder of two signed integers. Returns the /// remainder, which will have the sign of the dividend. Builder's insert point /// should be pointing where the caller wants code generated, e.g. at the srem diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index d538175..51a3d9c 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -27,7 +27,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "lcssa" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" @@ -44,6 +43,8 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; +#define DEBUG_TYPE "lcssa" + STATISTIC(NumLCSSA, "Number of live out of a loop variables"); /// Return true if the specified block is in the list. @@ -267,8 +268,6 @@ struct LCSSA : public FunctionPass { } private: - bool processLoop(Loop &L); - void verifyAnalysis() const override; }; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 9d0be8b..aedd787 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -43,6 +43,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "local" + STATISTIC(NumRemoved, "Number of unreachable basic blocks removed"); //===----------------------------------------------------------------------===// @@ -159,7 +161,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Otherwise, check to see if the switch only branches to one destination. // We do this by reseting "TheOnlyDest" to null when we find two non-equal // destinations. - if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = 0; + if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = nullptr; } if (CI && !TheOnlyDest) { @@ -180,7 +182,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Found case matching a constant operand? BasicBlock *Succ = SI->getSuccessor(i); if (Succ == TheOnlyDest) - TheOnlyDest = 0; // Don't modify the first branch to TheOnlyDest + TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest else Succ->removePredecessor(BB); } @@ -233,7 +235,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { if (IBI->getDestination(i) == TheOnlyDest) - TheOnlyDest = 0; + TheOnlyDest = nullptr; else IBI->getDestination(i)->removePredecessor(IBI->getParent()); } @@ -331,7 +333,7 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, // dead as we go. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { Value *OpV = I->getOperand(i); - I->setOperand(i, 0); + I->setOperand(i, nullptr); if (!OpV->use_empty()) continue; @@ -894,24 +896,26 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, return PrefAlign; } - if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + if (auto *GO = dyn_cast<GlobalObject>(V)) { // If there is a large requested alignment and we can, bump up the alignment // of the global. - if (GV->isDeclaration()) return Align; + if (GO->isDeclaration()) + return Align; // If the memory we set aside for the global may not be the memory used by // the final program then it is impossible for us to reliably enforce the // preferred alignment. - if (GV->isWeakForLinker()) return Align; + if (GO->isWeakForLinker()) + return Align; - if (GV->getAlignment() >= PrefAlign) - return GV->getAlignment(); + if (GO->getAlignment() >= PrefAlign) + return GO->getAlignment(); // We can only increase the alignment of the global if it has no alignment // specified or if it is not assigned a section. If it is assigned a // section, the global could be densely packed with other objects in the // section, increasing the alignment could cause padding issues. - if (!GV->hasSection() || GV->getAlignment() == 0) - GV->setAlignment(PrefAlign); - return GV->getAlignment(); + if (!GO->hasSection() || GO->getAlignment() == 0) + GO->setAlignment(PrefAlign); + return GO->getAlignment(); } return Align; @@ -928,7 +932,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64; APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, KnownZero, KnownOne, DL); + computeKnownBits(V, KnownZero, KnownOne, DL); unsigned TrailZ = KnownZero.countTrailingOnes(); // Avoid trouble with ridiculously large TrailZ values, such as @@ -981,10 +985,10 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (LdStHasDebugValue(DIVar, SI)) return true; - Instruction *DbgVal = NULL; + Instruction *DbgVal = nullptr; // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. - Argument *ExtendedArg = NULL; + Argument *ExtendedArg = nullptr; if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) @@ -993,14 +997,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, SI); else DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI); - - // Propagate any debug metadata from the store onto the dbg.value. - DebugLoc SIDL = SI->getDebugLoc(); - if (!SIDL.isUnknown()) - DbgVal->setDebugLoc(SIDL); - // Otherwise propagate debug metadata from dbg.declare. - else - DbgVal->setDebugLoc(DDI->getDebugLoc()); + DbgVal->setDebugLoc(DDI->getDebugLoc()); return true; } @@ -1020,17 +1017,16 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, Instruction *DbgVal = Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, LI); - - // Propagate any debug metadata from the store onto the dbg.value. - DebugLoc LIDL = LI->getDebugLoc(); - if (!LIDL.isUnknown()) - DbgVal->setDebugLoc(LIDL); - // Otherwise propagate debug metadata from dbg.declare. - else - DbgVal->setDebugLoc(DDI->getDebugLoc()); + DbgVal->setDebugLoc(DDI->getDebugLoc()); return true; } +/// Determine whether this alloca is either a VLA or an array. +static bool isArray(AllocaInst *AI) { + return AI->isArrayAllocation() || + AI->getType()->getElementType()->isArrayTy(); +} + /// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set /// of llvm.dbg.value intrinsics. bool llvm::LowerDbgDeclare(Function &F) { @@ -1049,20 +1045,26 @@ bool llvm::LowerDbgDeclare(Function &F) { AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress()); // If this is an alloca for a scalar variable, insert a dbg.value // at each load and store to the alloca and erase the dbg.declare. - if (AI && !AI->isArrayAllocation()) { - - // We only remove the dbg.declare intrinsic if all uses are - // converted to dbg.value intrinsics. - bool RemoveDDI = true; + // The dbg.values allow tracking a variable even if it is not + // stored on the stack, while the dbg.declare can only describe + // the stack slot (and at a lexical-scope granularity). Later + // passes will attempt to elide the stack slot. + if (AI && !isArray(AI)) { for (User *U : AI->users()) if (StoreInst *SI = dyn_cast<StoreInst>(U)) ConvertDebugDeclareToDebugValue(DDI, SI, DIB); else if (LoadInst *LI = dyn_cast<LoadInst>(U)) ConvertDebugDeclareToDebugValue(DDI, LI, DIB); - else - RemoveDDI = false; - if (RemoveDDI) - DDI->eraseFromParent(); + else if (CallInst *CI = dyn_cast<CallInst>(U)) { + // This is a call by-value or some other instruction that + // takes a pointer to the variable. Insert a *value* + // intrinsic that describes the alloca. + auto DbgVal = + DIB.insertDbgValueIntrinsic(AI, 0, + DIVariable(DDI->getVariable()), CI); + DbgVal->setDebugLoc(DDI->getDebugLoc()); + } + DDI->eraseFromParent(); } } return true; @@ -1076,7 +1078,7 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U)) return DDI; - return 0; + return nullptr; } bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 47083ea..f7787da 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -37,7 +37,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-simplify" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" @@ -63,6 +62,8 @@ #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; +#define DEBUG_TYPE "loop-simplify" + STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); STATISTIC(NumNested , "Number of nested loops split out"); @@ -85,7 +86,7 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB, // Figure out *which* outside block to put this after. Prefer an outside // block that neighbors a BB actually in the loop. - BasicBlock *FoundBB = 0; + BasicBlock *FoundBB = nullptr; for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { Function::iterator BBI = SplitPreds[i]; if (++BBI != NewBB->getParent()->end() && @@ -119,7 +120,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { // If the loop is branched to from an indirect branch, we won't // be able to fully transform the loop, because it prohibits // edge splitting. - if (isa<IndirectBrInst>(P->getTerminator())) return 0; + if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; // Keep track of it. OutsideBlocks.push_back(P); @@ -160,14 +161,14 @@ static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) { BasicBlock *P = *I; if (L->contains(P)) { // Don't do this if the loop is exited via an indirect branch. - if (isa<IndirectBrInst>(P->getTerminator())) return 0; + if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; LoopBlocks.push_back(P); } } assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); - BasicBlock *NewExitBB = 0; + BasicBlock *NewExitBB = nullptr; if (Exit->isLandingPad()) { SmallVector<BasicBlock*, 2> NewBBs; @@ -211,7 +212,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { + if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); @@ -226,7 +227,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, // We found something tasty to remove. return PN; } - return 0; + return nullptr; } /// \brief If this loop has multiple backedges, try to pull one of them out into @@ -253,14 +254,14 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, LoopInfo *LI, ScalarEvolution *SE, Pass *PP) { // Don't try to separate loops without a preheader. if (!Preheader) - return 0; + return nullptr; // The header is not a landing pad; preheader insertion should ensure this. assert(!L->getHeader()->isLandingPad() && "Can't insert backedge to landing pad"); PHINode *PN = findPHIToPartitionLoops(L, AA, DT); - if (PN == 0) return 0; // No known way to partition. + if (!PN) return nullptr; // No known way to partition. // Pull out all predecessors that have varying values in the loop. This // handles the case when a PHI node has multiple instances of itself as @@ -271,7 +272,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, !L->contains(PN->getIncomingBlock(i))) { // We can't split indirectbr edges. if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) - return 0; + return nullptr; OuterLoopPreds.push_back(PN->getIncomingBlock(i)); } } @@ -362,7 +363,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, // Unique backedge insertion currently depends on having a preheader. if (!Preheader) - return 0; + return nullptr; // The header is not a landing pad; preheader insertion should ensure this. assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); @@ -374,7 +375,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, // Indirectbr edges cannot be split, so we must fail if we find one. if (isa<IndirectBrInst>(P->getTerminator())) - return 0; + return nullptr; if (P != Preheader) BackedgeBlocks.push_back(P); } @@ -403,7 +404,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, // preheader over to the new PHI node. unsigned PreheaderIdx = ~0U; bool HasUniqueIncomingValue = true; - Value *UniqueValue = 0; + Value *UniqueValue = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *IBB = PN->getIncomingBlock(i); Value *IV = PN->getIncomingValue(i); @@ -412,7 +413,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, } else { NewPN->addIncoming(IV, IBB); if (HasUniqueIncomingValue) { - if (UniqueValue == 0) + if (!UniqueValue) UniqueValue = IV; else if (UniqueValue != IV) HasUniqueIncomingValue = false; @@ -609,7 +610,7 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { + if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT)) { if (AA) AA->deleteValue(PN); if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); @@ -653,7 +654,8 @@ ReprocessLoop: if (Inst == CI) continue; if (!L->makeLoopInvariant(Inst, AnyInvariant, - Preheader ? Preheader->getTerminator() : 0)) { + Preheader ? Preheader->getTerminator() + : nullptr)) { AllInvariant = false; break; } @@ -761,12 +763,6 @@ namespace { /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. void verifyAnalysis() const override; - - private: - bool ProcessLoop(Loop *L); - BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); - Loop *SeparateNestedLoop(Loop *L, BasicBlock *Preheader); - BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader); }; } @@ -782,7 +778,7 @@ INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", char &llvm::LoopSimplifyID = LoopSimplify::ID; Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } -/// runOnLoop - Run down all loops in the CFG (recursively, but we could do +/// runOnFunction - Run down all loops in the CFG (recursively, but we could do /// it in any convenient order) inserting preheaders... /// bool LoopSimplify::runOnFunction(Function &F) { diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index d2dfc20..d953e30 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -25,6 +24,8 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -34,6 +35,8 @@ #include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; +#define DEBUG_TYPE "loop-unroll" + // TODO: Should these be here or in LoopUnroll? STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); @@ -68,10 +71,10 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. BasicBlock *OnlyPred = BB->getSinglePredecessor(); - if (!OnlyPred) return 0; + if (!OnlyPred) return nullptr; if (OnlyPred->getTerminator()->getNumSuccessors() != 1) - return 0; + return nullptr; DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred); @@ -227,20 +230,33 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } + // Report the unrolling decision. + DebugLoc LoopLoc = L->getStartLoc(); + Function *F = Header->getParent(); + LLVMContext &Ctx = F->getContext(); + if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); + emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, + Twine("completely unrolled loop with ") + + Twine(TripCount) + " iterations"); } else { DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); + Twine DiagMsg("unrolled loop by a factor of " + Twine(Count)); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); + DiagMsg.concat(" with a breakout at trip " + Twine(BreakoutTrip)); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); + DiagMsg.concat(" with " + Twine(TripMultiple) + " trips per branch"); } else if (RuntimeTripCount) { DEBUG(dbgs() << " with run-time trip count"); + DiagMsg.concat(" with run-time trip count"); } DEBUG(dbgs() << "!\n"); + emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, DiagMsg); } bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); @@ -411,7 +427,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, } } - DominatorTree *DT = 0; + DominatorTree *DT = nullptr; if (PP) { // FIXME: Reconstruct dom info, because it is not preserved properly. // Incrementally updating domtree after loop unrolling would be easy. @@ -458,7 +474,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, Loop *OuterL = L->getParentLoop(); // Remove the loop from the LoopPassManager if it's completely removed. - if (CompletelyUnroll && LPM != NULL) + if (CompletelyUnroll && LPM != nullptr) LPM->deleteLoopFromQueue(L); // If we have a pass and a DominatorTree we should re-simplify impacted loops @@ -470,7 +486,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, OuterL = L; if (OuterL) { ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>(); - simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ 0, SE); + simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE); formLCSSARecursively(*OuterL, *DT, SE); } } diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index d801d5f..5bef091 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -21,7 +21,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopIterator.h" @@ -37,6 +36,8 @@ using namespace llvm; +#define DEBUG_TYPE "loop-unroll" + STATISTIC(NumRuntimeUnrolled, "Number of loops unrolled with run-time trip counts"); @@ -58,7 +59,7 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, BasicBlock *OrigPH, BasicBlock *NewPH, ValueToValueMapTy &LVMap, Pass *P) { BasicBlock *Latch = L->getLoopLatch(); - assert(Latch != 0 && "Loop must have a latch"); + assert(Latch && "Loop must have a latch"); // Create a PHI node for each outgoing value from the original loop // (which means it is an outgoing value from the prolog code too). @@ -110,7 +111,7 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount, ConstantInt::get(TripCount->getType(), Count)); BasicBlock *Exit = L->getUniqueExitBlock(); - assert(Exit != 0 && "Loop must have a single exit block only"); + assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit)); if (!Exit->isLandingPad()) { @@ -232,7 +233,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, // Make sure the loop is in canonical form, and there is a single // exit block only. - if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0) + if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock()) return false; // Use Scalar Evolution to compute the trip count. This allows more @@ -240,7 +241,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, if (!LPM) return false; ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); - if (SE == 0) + if (!SE) return false; // Only unroll loops with a computable trip count and the trip count needs @@ -301,7 +302,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, ValueToValueMapTy LVMap; Function *F = Header->getParent(); // These variables are used to update the CFG links in each iteration - BasicBlock *CompareBB = 0; + BasicBlock *CompareBB = nullptr; BasicBlock *LastLoopBB = PH; // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog code diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp index 3e61289..ff89e74 100644 --- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "lower-expect-intrinsic" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/BasicBlock.h" @@ -29,6 +28,8 @@ using namespace llvm; +#define DEBUG_TYPE "lower-expect-intrinsic" + STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled"); static cl::opt<uint32_t> diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index b1f758e..66d57b0 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "lowerinvoke" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -25,6 +24,8 @@ #include "llvm/Support/CommandLine.h" using namespace llvm; +#define DEBUG_TYPE "lowerinvoke" + STATISTIC(NumInvokes, "Number of invokes replaced"); namespace { diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 6fb7410..9ef694c 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -27,6 +27,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "lower-switch" + namespace { /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch /// instructions. @@ -51,7 +53,8 @@ namespace { Constant* High; BasicBlock* BB; - CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) : + CaseRange(Constant *low = nullptr, Constant *high = nullptr, + BasicBlock *bb = nullptr) : Low(low), High(high), BB(bb) { } }; @@ -182,7 +185,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, F->getBasicBlockList().insert(++FI, NewLeaf); // Emit comparison - ICmpInst* Comp = NULL; + ICmpInst* Comp = nullptr; if (Leaf.Low == Leaf.High) { // Make the seteq instruction... Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index a188ac5..189caa7 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Dominators.h" @@ -22,6 +21,8 @@ #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" using namespace llvm; +#define DEBUG_TYPE "mem2reg" + STATISTIC(NumPromoted, "Number of alloca's promoted"); namespace { diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index ff6e6f9..d9dbbca 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -24,16 +24,16 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F, int Priority) { IRBuilder<> IRB(M.getContext()); FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); - StructType *Ty = StructType::get( - IRB.getInt32Ty(), PointerType::getUnqual(FnTy), NULL); - - Constant *RuntimeCtorInit = ConstantStruct::get( - Ty, IRB.getInt32(Priority), F, NULL); // Get the current set of static global constructors and add the new ctor // to the list. SmallVector<Constant *, 16> CurrentCtors; - if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) { + StructType *EltTy; + if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) { + // If there is a global_ctors array, use the existing struct type, which can + // have 2 or 3 fields. + ArrayType *ATy = cast<ArrayType>(GVCtor->getType()->getElementType()); + EltTy = cast<StructType>(ATy->getElementType()); if (Constant *Init = GVCtor->getInitializer()) { unsigned n = Init->getNumOperands(); CurrentCtors.reserve(n + 1); @@ -41,13 +41,26 @@ static void appendToGlobalArray(const char *Array, CurrentCtors.push_back(cast<Constant>(Init->getOperand(i))); } GVCtor->eraseFromParent(); + } else { + // Use a simple two-field struct if there isn't one already. + EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), + nullptr); } + // Build a 2 or 3 field global_ctor entry. We don't take a comdat key. + Constant *CSVals[3]; + CSVals[0] = IRB.getInt32(Priority); + CSVals[1] = F; + // FIXME: Drop support for the two element form in LLVM 4.0. + if (EltTy->getNumElements() >= 3) + CSVals[2] = llvm::Constant::getNullValue(IRB.getInt8PtrTy()); + Constant *RuntimeCtorInit = + ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements())); + CurrentCtors.push_back(RuntimeCtorInit); // Create a new initializer. - ArrayType *AT = ArrayType::get(RuntimeCtorInit->getType(), - CurrentCtors.size()); + ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); Constant *NewInit = ConstantArray::get(AT, CurrentCtors); // Create the new global variable and replace all uses of diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 25fab89..06d73fe 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -25,7 +25,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -51,6 +50,8 @@ #include <queue> using namespace llvm; +#define DEBUG_TYPE "mem2reg" + STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); @@ -59,6 +60,7 @@ STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); bool llvm::isAllocaPromotable(const AllocaInst *AI) { // FIXME: If the memory unit is of pointer or integer type, we can permit // assignments to subsections of the memory unit. + unsigned AS = AI->getType()->getAddressSpace(); // Only allow direct and non-volatile loads and stores... for (const User *U : AI->users()) { @@ -79,12 +81,12 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { II->getIntrinsicID() != Intrinsic::lifetime_end) return false; } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { - if (BCI->getType() != Type::getInt8PtrTy(U->getContext())) + if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) return false; if (!onlyUsedByLifetimeMarkers(BCI)) return false; } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { - if (GEPI->getType() != Type::getInt8PtrTy(U->getContext())) + if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) return false; if (!GEPI->hasAllZeroIndices()) return false; @@ -114,11 +116,11 @@ struct AllocaInfo { void clear() { DefiningBlocks.clear(); UsingBlocks.clear(); - OnlyStore = 0; - OnlyBlock = 0; + OnlyStore = nullptr; + OnlyBlock = nullptr; OnlyUsedInOneBlock = true; - AllocaPointerVal = 0; - DbgDeclare = 0; + AllocaPointerVal = nullptr; + DbgDeclare = nullptr; } /// Scan the uses of the specified alloca, filling in the AllocaInfo used @@ -146,7 +148,7 @@ struct AllocaInfo { } if (OnlyUsedInOneBlock) { - if (OnlyBlock == 0) + if (!OnlyBlock) OnlyBlock = User->getParent(); else if (OnlyBlock != User->getParent()) OnlyUsedInOneBlock = false; @@ -162,7 +164,7 @@ class RenamePassData { public: typedef std::vector<Value *> ValVector; - RenamePassData() : BB(NULL), Pred(NULL), Values() {} + RenamePassData() : BB(nullptr), Pred(nullptr), Values() {} RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V) : BB(B), Pred(P), Values(V) {} BasicBlock *BB; @@ -471,7 +473,8 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // Find the nearest store that has a lower index than this load. StoresByIndexTy::iterator I = std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), - std::make_pair(LoadIdx, static_cast<StoreInst *>(0)), + std::make_pair(LoadIdx, + static_cast<StoreInst *>(nullptr)), less_first()); if (I == StoresByIndex.begin()) @@ -632,7 +635,7 @@ void PromoteMem2Reg::run() { // and inserting the phi nodes we marked as necessary // std::vector<RenamePassData> RenamePassWorkList; - RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); + RenamePassWorkList.push_back(RenamePassData(F.begin(), nullptr, Values)); do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); @@ -682,7 +685,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { + if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); @@ -990,7 +993,7 @@ NextIteration: // Get the next phi node. ++PNI; APN = dyn_cast<PHINode>(PNI); - if (APN == 0) + if (!APN) break; // Verify that it is missing entries. If not, it is not being inserted diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 28f5c44..3fcb789 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ssaupdater" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/TinyPtrVector.h" @@ -28,20 +27,22 @@ using namespace llvm; +#define DEBUG_TYPE "ssaupdater" + typedef DenseMap<BasicBlock*, Value*> AvailableValsTy; static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast<AvailableValsTy*>(AV); } SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) - : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {} + : AV(nullptr), ProtoType(nullptr), ProtoName(), InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { delete static_cast<AvailableValsTy*>(AV); } void SSAUpdater::Initialize(Type *Ty, StringRef Name) { - if (AV == 0) + if (!AV) AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); @@ -54,7 +55,7 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { } void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { - assert(ProtoType != 0 && "Need to initialize SSAUpdater"); + assert(ProtoType && "Need to initialize SSAUpdater"); assert(ProtoType == V->getType() && "All rewritten values must have the same type"); getAvailableVals(AV)[BB] = V; @@ -90,7 +91,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // Otherwise, we have the hard case. Get the live-in values for each // predecessor. SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues; - Value *SingularValue = 0; + Value *SingularValue = nullptr; // We can get our predecessor info by walking the pred_iterator list, but it // is relatively slow. If we already have PHI nodes in this block, walk one @@ -105,7 +106,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { if (i == 0) SingularValue = PredVal; else if (PredVal != SingularValue) - SingularValue = 0; + SingularValue = nullptr; } } else { bool isFirstPred = true; @@ -119,7 +120,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { SingularValue = PredVal; isFirstPred = false; } else if (PredVal != SingularValue) - SingularValue = 0; + SingularValue = nullptr; } } @@ -128,7 +129,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { return UndefValue::get(ProtoType); // Otherwise, if all the merged values are the same, just use it. - if (SingularValue != 0) + if (SingularValue) return SingularValue; // Otherwise, we do need a PHI: check to see if we already have one available @@ -291,7 +292,7 @@ public: PHINode *PHI = ValueIsPHI(Val, Updater); if (PHI && PHI->getNumIncomingValues() == 0) return PHI; - return 0; + return nullptr; } /// GetPHIValue - For the specified PHI instruction, return the value @@ -401,7 +402,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { // the order of these instructions in the block. If the first use in the // block is a load, then it uses the live in value. The last store defines // the live out value. We handle this by doing a linear scan of the block. - Value *StoredValue = 0; + Value *StoredValue = nullptr; for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { if (LoadInst *L = dyn_cast<LoadInst>(II)) { // If this is a load from an unrelated pointer, ignore it. diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 1e88587..150dbdd 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -50,6 +49,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "simplifycfg" + static cl::opt<unsigned> PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1), cl::desc("Control the amount of phi node folding to perform (default = 1)")); @@ -212,6 +213,7 @@ static unsigned ComputeSpeculationCost(const User *I) { if (!cast<GEPOperator>(I)->hasAllConstantIndices()) return UINT_MAX; return 1; + case Instruction::ExtractValue: case Instruction::Load: case Instruction::Add: case Instruction::Sub: @@ -272,12 +274,12 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // branch to BB, then it must be in the 'conditional' part of the "if // statement". If not, it definitely dominates the region. BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator()); - if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB) + if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB) return true; // If we aren't allowing aggressive promotion anymore, then don't consider // instructions in the 'if region'. - if (AggressiveInsts == 0) return false; + if (!AggressiveInsts) return false; // If we have seen this instruction before, don't count it again. if (AggressiveInsts->count(I)) return true; @@ -332,7 +334,7 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout *DL) { return cast<ConstantInt> (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false)); } - return 0; + return nullptr; } /// GatherConstantCompares - Given a potentially 'or'd or 'and'd together @@ -343,7 +345,7 @@ static Value * GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, const DataLayout *DL, bool isEQ, unsigned &UsedICmps) { Instruction *I = dyn_cast<Instruction>(V); - if (I == 0) return 0; + if (!I) return nullptr; // If this is an icmp against a constant, handle this as one of the cases. if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { @@ -390,19 +392,19 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, // If there are a ton of values, we don't want to make a ginormous switch. if (Span.getSetSize().ugt(8) || Span.isEmptySet()) - return 0; + return nullptr; for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) Vals.push_back(ConstantInt::get(V->getContext(), Tmp)); UsedICmps++; return hasAdd ? RHSVal : I->getOperand(0); } - return 0; + return nullptr; } // Otherwise, we can only handle an | or &, depending on isEQ. if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And)) - return 0; + return nullptr; unsigned NumValsBeforeLHS = Vals.size(); unsigned UsedICmpsBeforeLHS = UsedICmps; @@ -420,19 +422,19 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, // The RHS of the or/and can't be folded in and we haven't used "Extra" yet, // set it and return success. - if (Extra == 0 || Extra == I->getOperand(1)) { + if (Extra == nullptr || Extra == I->getOperand(1)) { Extra = I->getOperand(1); return LHS; } Vals.resize(NumValsBeforeLHS); UsedICmps = UsedICmpsBeforeLHS; - return 0; + return nullptr; } // If the LHS can't be folded in, but Extra is available and RHS can, try to // use LHS as Extra. - if (Extra == 0 || Extra == I->getOperand(0)) { + if (Extra == nullptr || Extra == I->getOperand(0)) { Value *OldExtra = Extra; Extra = I->getOperand(0); if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, DL, @@ -442,11 +444,11 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, Extra = OldExtra; } - return 0; + return nullptr; } static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { - Instruction *Cond = 0; + Instruction *Cond = nullptr; if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Cond = dyn_cast<Instruction>(SI->getCondition()); } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { @@ -463,7 +465,7 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { /// isValueEqualityComparison - Return true if the specified terminator checks /// to see if a value is equal to constant integer value. Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { - Value *CV = 0; + Value *CV = nullptr; if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { // Do not permit merging of large switch instructions into their // predecessors unless there is only one predecessor. @@ -653,11 +655,11 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // Otherwise, TI's block must correspond to some matched value. Find out // which value (or set of values) this is. - ConstantInt *TIV = 0; + ConstantInt *TIV = nullptr; BasicBlock *TIBB = TI->getParent(); for (unsigned i = 0, e = PredCases.size(); i != e; ++i) if (PredCases[i].Dest == TIBB) { - if (TIV != 0) + if (TIV) return false; // Cannot handle multiple values coming to this block. TIV = PredCases[i].Value; } @@ -665,7 +667,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // Okay, we found the one constant that our value can be if we get into TI's // BB. Find out which successor will unconditionally be branched to. - BasicBlock *TheRealDest = 0; + BasicBlock *TheRealDest = nullptr; for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) if (ThisCases[i].Value == TIV) { TheRealDest = ThisCases[i].Dest; @@ -673,7 +675,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, } // If not handled by any explicit cases, it is handled by the default case. - if (TheRealDest == 0) TheRealDest = ThisDef; + if (!TheRealDest) TheRealDest = ThisDef; // Remove PHI node entries for dead edges. BasicBlock *CheckEdge = TheRealDest; @@ -681,7 +683,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, if (*SI != CheckEdge) (*SI)->removePredecessor(TIBB); else - CheckEdge = 0; + CheckEdge = nullptr; // Insert the new branch. Instruction *NI = Builder.CreateBr(TheRealDest); @@ -950,10 +952,10 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // Okay, last check. If BB is still a successor of PSI, then we must // have an infinite loop case. If so, add an infinitely looping block // to handle the case to preserve the behavior of the code. - BasicBlock *InfLoopBlock = 0; + BasicBlock *InfLoopBlock = nullptr; for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) if (NewSI->getSuccessor(i) == BB) { - if (InfLoopBlock == 0) { + if (!InfLoopBlock) { // Insert it at the end of the function, because it's either code, // or it won't matter if it's hot. :) InfLoopBlock = BasicBlock::Create(BB->getContext(), @@ -1099,7 +1101,7 @@ HoistTerminator: // These values do not agree. Insert a select instruction before NT // that determines the right value. SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; - if (SI == 0) + if (!SI) SI = cast<SelectInst> (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, BB1V->getName()+"."+BB2V->getName())); @@ -1144,7 +1146,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // Gather the PHI nodes in BBEnd. std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2; - Instruction *FirstNonPhiInBBEnd = 0; + Instruction *FirstNonPhiInBBEnd = nullptr; for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); I != E; ++I) { if (PHINode *PN = dyn_cast<PHINode>(I)) { @@ -1222,7 +1224,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // The operands should be either the same or they need to be generated // with a PHI node after sinking. We only handle the case where there is // a single pair of different operands. - Value *DifferentOp1 = 0, *DifferentOp2 = 0; + Value *DifferentOp1 = nullptr, *DifferentOp2 = nullptr; unsigned Op1Idx = 0; for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) { if (I1->getOperand(I) == I2->getOperand(I)) @@ -1318,11 +1320,11 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB) { StoreInst *StoreToHoist = dyn_cast<StoreInst>(I); if (!StoreToHoist) - return 0; + return nullptr; // Volatile or atomic. if (!StoreToHoist->isSimple()) - return 0; + return nullptr; Value *StorePtr = StoreToHoist->getPointerOperand(); @@ -1334,7 +1336,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, // Could be calling an instruction that effects memory like free(). if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI)) - return 0; + return nullptr; StoreInst *SI = dyn_cast<StoreInst>(CurI); // Found the previous store make sure it stores to the same location. @@ -1342,10 +1344,10 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, // Found the previous store, return its value operand. return SI->getValueOperand(); else if (SI) - return 0; // Unknown store. + return nullptr; // Unknown store. } - return 0; + return nullptr; } /// \brief Speculate a conditional basic block flattening the CFG. @@ -1411,8 +1413,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts; unsigned SpeculationCost = 0; - Value *SpeculatedStoreValue = 0; - StoreInst *SpeculatedStore = 0; + Value *SpeculatedStoreValue = nullptr; + StoreInst *SpeculatedStore = nullptr; for (BasicBlock::iterator BBI = ThenBB->begin(), BBE = std::prev(ThenBB->end()); BBI != BBE; ++BBI) { @@ -1620,7 +1622,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) { // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i)); - if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue; + if (!CB || !CB->getType()->isIntegerTy(1)) continue; // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. @@ -1745,7 +1747,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { // If we folded the first phi, PN dangles at this point. Refresh it. If // we ran out of PHIs then we simplified them all. PN = dyn_cast<PHINode>(BB->begin()); - if (PN == 0) return true; + if (!PN) return true; // Don't fold i1 branches on PHIs which contain binary operators. These can // often be turned into switches and other things. @@ -1759,11 +1761,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { // instructions in the predecessor blocks can be promoted as well. If // not, we won't be able to get rid of the control flow, so it's not // worth promoting to select instructions. - BasicBlock *DomBlock = 0; + BasicBlock *DomBlock = nullptr; BasicBlock *IfBlock1 = PN->getIncomingBlock(0); BasicBlock *IfBlock2 = PN->getIncomingBlock(1); if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) { - IfBlock1 = 0; + IfBlock1 = nullptr; } else { DomBlock = *pred_begin(IfBlock1); for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I) @@ -1776,7 +1778,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { } if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) { - IfBlock2 = 0; + IfBlock2 = nullptr; } else { DomBlock = *pred_begin(IfBlock2); for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I) @@ -1959,7 +1961,7 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { bool llvm::FoldBranchToCommonDest(BranchInst *BI) { BasicBlock *BB = BI->getParent(); - Instruction *Cond = 0; + Instruction *Cond = nullptr; if (BI->isConditional()) Cond = dyn_cast<Instruction>(BI->getCondition()); else { @@ -1985,12 +1987,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { } } - if (Cond == 0) + if (!Cond) return false; } - if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || - Cond->getParent() != BB || !Cond->hasOneUse()) + if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || + Cond->getParent() != BB || !Cond->hasOneUse()) return false; // Only allow this if the condition is a simple instruction that can be @@ -2005,7 +2007,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // that feeds the branch. We later ensure that any values that _it_ uses // were also live in the predecessor, so that we don't unnecessarily create // register pressure or inhibit out-of-order execution. - Instruction *BonusInst = 0; + Instruction *BonusInst = nullptr; if (&*FrontIt != Cond && FrontIt->hasOneUse() && FrontIt->user_back() == Cond && isSafeToSpeculativelyExecute(FrontIt)) { @@ -2040,7 +2042,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Finally, don't infinitely unroll conditional loops. BasicBlock *TrueDest = BI->getSuccessor(0); - BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0; + BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr; if (TrueDest == BB || FalseDest == BB) return false; @@ -2052,7 +2054,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // the common successor, verify that the same value flows in from both // blocks. SmallVector<PHINode*, 4> PHIs; - if (PBI == 0 || PBI->isUnconditional() || + if (!PBI || PBI->isUnconditional() || (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) || (!BI->isConditional() && @@ -2142,7 +2144,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { } // If we have a bonus inst, clone it into the predecessor block. - Instruction *NewBonus = 0; + Instruction *NewBonus = nullptr; if (BonusInst) { NewBonus = BonusInst->clone(); @@ -2218,14 +2220,14 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { MDBuilder(BI->getContext()). createBranchWeights(MDWeights)); } else - PBI->setMetadata(LLVMContext::MD_prof, NULL); + PBI->setMetadata(LLVMContext::MD_prof, nullptr); } else { // Update PHI nodes in the common successors. for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { ConstantInt *PBI_C = cast<ConstantInt>( PHIs[i]->getIncomingValueForBlock(PBI->getParent())); assert(PBI_C->getType()->isIntegerTy(1)); - Instruction *MergedCond = 0; + Instruction *MergedCond = nullptr; if (PBI->getSuccessor(0) == TrueDest) { // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value) // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value) @@ -2498,16 +2500,16 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, // If TrueBB and FalseBB are equal, only try to preserve one copy of that // successor. BasicBlock *KeepEdge1 = TrueBB; - BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0; + BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; // Then remove the rest. for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) { BasicBlock *Succ = OldTerm->getSuccessor(I); // Make sure only to keep exactly one copy of each edge. if (Succ == KeepEdge1) - KeepEdge1 = 0; + KeepEdge1 = nullptr; else if (Succ == KeepEdge2) - KeepEdge2 = 0; + KeepEdge2 = nullptr; else Succ->removePredecessor(OldTerm->getParent()); } @@ -2516,7 +2518,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc()); // Insert an appropriate new terminator. - if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) { + if (!KeepEdge1 && !KeepEdge2) { if (TrueBB == FalseBB) // We were only looking for one successor, and it was present. // Create an unconditional branch to it. @@ -2538,7 +2540,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, // One of the selected values was a successor, but the other wasn't. // Insert an unconditional branch to the one that was found; // the edge to the one that wasn't must be unreachable. - if (KeepEdge1 == 0) + if (!KeepEdge1) // Only TrueBB was found. Builder.CreateBr(TrueBB); else @@ -2639,7 +2641,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( // 'V' and this block is the default case for the switch. In this case we can // fold the compared value into the switch to simplify things. BasicBlock *Pred = BB->getSinglePredecessor(); - if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false; + if (!Pred || !isa<SwitchInst>(Pred->getTerminator())) return false; SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator()); if (SI->getCondition() != V) @@ -2681,7 +2683,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( // the block. BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back()); - if (PHIUse == 0 || PHIUse != &SuccBlock->front() || + if (PHIUse == nullptr || PHIUse != &SuccBlock->front() || isa<PHINode>(++BasicBlock::iterator(PHIUse))) return false; @@ -2733,16 +2735,16 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL, IRBuilder<> &Builder) { Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); - if (Cond == 0) return false; + if (!Cond) return false; // Change br (X == 0 | X == 1), T, F into a switch instruction. // If this is a bunch of seteq's or'd together, or if it's a bunch of // 'setne's and'ed together, collect them. - Value *CompVal = 0; + Value *CompVal = nullptr; std::vector<ConstantInt*> Values; bool TrueWhenEqual = true; - Value *ExtraCase = 0; + Value *ExtraCase = nullptr; unsigned UsedICmps = 0; if (Cond->getOpcode() == Instruction::Or) { @@ -2755,7 +2757,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL, } // If we didn't have a multiply compared value, fail. - if (CompVal == 0) return false; + if (!CompVal) return false; // Avoid turning single icmps into a switch. if (UsedICmps <= 1) @@ -3050,7 +3052,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // Find the most popular block. unsigned MaxPop = 0; unsigned MaxIndex = 0; - BasicBlock *MaxBlock = 0; + BasicBlock *MaxBlock = nullptr; for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { if (I->second.first > MaxPop || @@ -3188,7 +3190,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); APInt KnownZero(Bits, 0), KnownOne(Bits, 0); - ComputeMaskedBits(Cond, KnownZero, KnownOne); + computeKnownBits(Cond, KnownZero, KnownOne); // Gather dead cases. SmallVector<ConstantInt*, 8> DeadCases; @@ -3241,13 +3243,13 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex) { if (BB->getFirstNonPHIOrDbg() != BB->getTerminator()) - return NULL; // BB must be empty to be a candidate for simplification. + return nullptr; // BB must be empty to be a candidate for simplification. if (!BB->getSinglePredecessor()) - return NULL; // BB must be dominated by the switch. + return nullptr; // BB must be dominated by the switch. BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator()); if (!Branch || !Branch->isUnconditional()) - return NULL; // Terminator must be unconditional branch. + return nullptr; // Terminator must be unconditional branch. BasicBlock *Succ = Branch->getSuccessor(0); @@ -3263,7 +3265,7 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, return PHI; } - return NULL; + return nullptr; } /// ForwardSwitchConditionToPHI - Try to forward the condition of a switch @@ -3336,12 +3338,12 @@ ConstantFold(Instruction *I, if (SelectInst *Select = dyn_cast<SelectInst>(I)) { Constant *A = LookupConstant(Select->getCondition(), ConstantPool); if (!A) - return 0; + return nullptr; if (A->isAllOnesValue()) return LookupConstant(Select->getTrueValue(), ConstantPool); if (A->isNullValue()) return LookupConstant(Select->getFalseValue(), ConstantPool); - return 0; + return nullptr; } SmallVector<Constant *, 4> COps; @@ -3349,7 +3351,7 @@ ConstantFold(Instruction *I, if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool)) COps.push_back(A); else - return 0; + return nullptr; } if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) @@ -3492,7 +3494,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M, const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values, Constant *DefaultValue, const DataLayout *DL) - : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) { + : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr), + Array(nullptr) { assert(Values.size() && "Can't build lookup table without values!"); assert(TableSize >= Values.size() && "Can't fit values in table!"); @@ -3513,7 +3516,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M, TableContents[Idx] = CaseRes; if (CaseRes != SingleValue) - SingleValue = 0; + SingleValue = nullptr; } // Fill in any holes in the table with the default result. @@ -3526,7 +3529,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M, } if (DefaultValue != SingleValue) - SingleValue = 0; + SingleValue = nullptr; } // If each element in the table contains the same value, we only need to store @@ -3696,7 +3699,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, ConstantInt *MinCaseVal = CI.getCaseValue(); ConstantInt *MaxCaseVal = CI.getCaseValue(); - BasicBlock *CommonDest = 0; + BasicBlock *CommonDest = nullptr; typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy; SmallDenseMap<PHINode*, ResultListTy> ResultLists; SmallDenseMap<PHINode*, Constant*> DefaultResults; @@ -3741,8 +3744,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; bool HasDefaultResults = false; if (TableHasHoles) { - HasDefaultResults = GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest, - DefaultResultsList, DL); + HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(), + &CommonDest, DefaultResultsList, DL); } bool NeedMask = (TableHasHoles && !HasDefaultResults); if (NeedMask) { @@ -4038,8 +4041,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // from BI. We know that the condbr dominates the two blocks, so see if // there is any identical code in the "then" and "else" blocks. If so, we // can hoist it up to the branching block. - if (BI->getSuccessor(0)->getSinglePredecessor() != 0) { - if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { + if (BI->getSuccessor(0)->getSinglePredecessor()) { + if (BI->getSuccessor(1)->getSinglePredecessor()) { if (HoistThenElseCodeToIf(BI)) return SimplifyCFG(BB, TTI, DL) | true; } else { @@ -4051,7 +4054,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0))) return SimplifyCFG(BB, TTI, DL) | true; } - } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { + } else if (BI->getSuccessor(1)->getSinglePredecessor()) { // If Successor #0 has multiple preds, we may be able to conditionally // execute Successor #1 if it branches to successor #0. TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 30f56be..b284e6f 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -13,8 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "indvars" - #include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -34,6 +32,8 @@ using namespace llvm; +#define DEBUG_TYPE "indvars" + STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); STATISTIC(NumElimOperand, "Number of IV operands folded into a use"); STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); @@ -56,14 +56,14 @@ namespace { public: SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM, - SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = NULL) : + SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = nullptr) : L(Loop), LI(LPM->getAnalysisIfAvailable<LoopInfo>()), SE(SE), DeadInsts(Dead), Changed(false) { DataLayoutPass *DLP = LPM->getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; assert(LI && "IV simplification requires LoopInfo"); } @@ -72,7 +72,7 @@ namespace { /// Iteratively perform simplification on a worklist of users of the /// specified induction variable. This is the top-level driver that applies /// all simplicitions to users of an IV. - void simplifyUsers(PHINode *CurrIV, IVVisitor *V = NULL); + void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr); Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); @@ -95,25 +95,25 @@ namespace { /// be folded (in case more folding opportunities have been exposed). /// Otherwise return null. Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) { - Value *IVSrc = 0; + Value *IVSrc = nullptr; unsigned OperIdx = 0; - const SCEV *FoldedExpr = 0; + const SCEV *FoldedExpr = nullptr; switch (UseInst->getOpcode()) { default: - return 0; + return nullptr; case Instruction::UDiv: case Instruction::LShr: // We're only interested in the case where we know something about // the numerator and have a constant denominator. if (IVOperand != UseInst->getOperand(OperIdx) || !isa<ConstantInt>(UseInst->getOperand(1))) - return 0; + return nullptr; // Attempt to fold a binary operator with constant operand. // e.g. ((I + 1) >> 2) => I >> 2 if (!isa<BinaryOperator>(IVOperand) || !isa<ConstantInt>(IVOperand->getOperand(1))) - return 0; + return nullptr; IVSrc = IVOperand->getOperand(0); // IVSrc must be the (SCEVable) IV, since the other operand is const. @@ -124,7 +124,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) // Get a constant for the divisor. See createSCEV. uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth(); if (D->getValue().uge(BitWidth)) - return 0; + return nullptr; D = ConstantInt::get(UseInst->getContext(), APInt::getOneBitSet(BitWidth, D->getZExtValue())); @@ -133,11 +133,11 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) } // We have something that might fold it's operand. Compare SCEVs. if (!SE->isSCEVable(UseInst->getType())) - return 0; + return nullptr; // Bypass the operand if SCEV can prove it has no effect. if (SE->getSCEV(UseInst) != FoldedExpr) - return 0; + return nullptr; DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand << " -> " << *UseInst << '\n'); @@ -283,8 +283,8 @@ Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser, return IVUser; // Find a branch guarded by the overflow check. - BranchInst *Branch = 0; - Instruction *AddVal = 0; + BranchInst *Branch = nullptr; + Instruction *AddVal = nullptr; for (User *U : II->users()) { if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(U)) { if (ExtractInst->getNumIndices() != 1) diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index bbd65f1..33b3637 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instsimplify" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" @@ -29,6 +28,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "instsimplify" + STATISTIC(NumSimplified, "Number of redundant instructions removed"); namespace { @@ -47,17 +48,18 @@ namespace { bool runOnFunction(Function &F) override { const DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); - const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0; + const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; do { - for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), - DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) - for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) { + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) + // Here be subtlety: the iterator must be incremented before the loop + // body (not sure why), so a range-for loop won't work here. + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { Instruction *I = BI++; // The first time through the loop ToSimplify is empty and we try to // simplify all instructions. On later iterations ToSimplify is not @@ -74,7 +76,15 @@ namespace { ++NumSimplified; Changed = true; } - Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + if (res) { + // RecursivelyDeleteTriviallyDeadInstruction can remove + // more than one instruction, so simply incrementing the + // iterator does not work. When instructions get deleted + // re-iterate instead. + BI = BB->begin(); BE = BB->end(); + Changed |= res; + } } // Place the list of instructions to simplify on the next loop iteration diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index b5bc391..3b61bb5 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" @@ -75,7 +76,7 @@ public: // We never change the calling convention. if (!ignoreCallingConv() && CI->getCallingConv() != llvm::CallingConv::C) - return NULL; + return nullptr; return callOptimizer(CI->getCalledFunction(), CI, B); } @@ -186,14 +187,14 @@ struct MemCpyChkOpt : public InstFortifiedLibCallOptimization { !FT->getParamType(1)->isPointerTy() || FT->getParamType(2) != DL->getIntPtrType(Context) || FT->getParamType(3) != DL->getIntPtrType(Context)) - return 0; + return nullptr; if (isFoldable(3, 2, false)) { B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1); return CI->getArgOperand(0); } - return 0; + return nullptr; } }; @@ -210,14 +211,14 @@ struct MemMoveChkOpt : public InstFortifiedLibCallOptimization { !FT->getParamType(1)->isPointerTy() || FT->getParamType(2) != DL->getIntPtrType(Context) || FT->getParamType(3) != DL->getIntPtrType(Context)) - return 0; + return nullptr; if (isFoldable(3, 2, false)) { B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1); return CI->getArgOperand(0); } - return 0; + return nullptr; } }; @@ -234,7 +235,7 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization { !FT->getParamType(1)->isIntegerTy() || FT->getParamType(2) != DL->getIntPtrType(Context) || FT->getParamType(3) != DL->getIntPtrType(Context)) - return 0; + return nullptr; if (isFoldable(3, 2, false)) { Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), @@ -242,7 +243,7 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization { B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); return CI->getArgOperand(0); } - return 0; + return nullptr; } }; @@ -260,7 +261,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(Context) || FT->getParamType(2) != DL->getIntPtrType(Context)) - return 0; + return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // __strcpy_chk(x,x) -> x @@ -277,10 +278,10 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { } else { // Maybe we can stil fold __strcpy_chk to __memcpy_chk. uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; + if (Len == 0) return nullptr; // This optimization require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; Value *Ret = EmitMemCpyChk(Dst, Src, @@ -288,7 +289,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { CI->getArgOperand(2), B, DL, TLI); return Ret; } - return 0; + return nullptr; } }; @@ -306,12 +307,12 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization { FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(Context) || FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0))) - return 0; + return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) Value *StrLen = EmitStrLen(Src, B, DL, TLI); - return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; } // If a) we don't have any length information, or b) we know this will @@ -325,10 +326,10 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization { } else { // Maybe we can stil fold __stpcpy_chk to __memcpy_chk. uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; + if (Len == 0) return nullptr; // This optimization require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; Type *PT = FT->getParamType(0); Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len); @@ -336,10 +337,10 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization { ConstantInt::get(DL->getIntPtrType(PT), Len - 1)); if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, DL, TLI)) - return 0; + return nullptr; return DstEnd; } - return 0; + return nullptr; } }; @@ -357,7 +358,7 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization { FT->getParamType(0) != Type::getInt8PtrTy(Context) || !FT->getParamType(2)->isIntegerTy() || FT->getParamType(3) != DL->getIntPtrType(Context)) - return 0; + return nullptr; if (isFoldable(3, 2, false)) { Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), @@ -365,7 +366,7 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization { Name.substr(2, 7)); return Ret; } - return 0; + return nullptr; } }; @@ -382,7 +383,7 @@ struct StrCatOpt : public LibCallOptimization { FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType()) - return 0; + return nullptr; // Extract some information from the instruction Value *Dst = CI->getArgOperand(0); @@ -390,7 +391,7 @@ struct StrCatOpt : public LibCallOptimization { // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; + if (Len == 0) return nullptr; --Len; // Unbias length. // Handle the simple, do-nothing case: strcat(x, "") -> x @@ -398,7 +399,7 @@ struct StrCatOpt : public LibCallOptimization { return Dst; // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; return emitStrLenMemCpy(Src, Dst, Len, B); } @@ -409,7 +410,7 @@ struct StrCatOpt : public LibCallOptimization { // memory is to be moved to. We just generate a call to strlen. Value *DstLen = EmitStrLen(Dst, B, DL, TLI); if (!DstLen) - return 0; + return nullptr; // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of @@ -434,7 +435,7 @@ struct StrNCatOpt : public StrCatOpt { FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType() || !FT->getParamType(2)->isIntegerTy()) - return 0; + return nullptr; // Extract some information from the instruction Value *Dst = CI->getArgOperand(0); @@ -445,11 +446,11 @@ struct StrNCatOpt : public StrCatOpt { if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) Len = LengthArg->getZExtValue(); else - return 0; + return nullptr; // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) return 0; + if (SrcLen == 0) return nullptr; --SrcLen; // Unbias length. // Handle the simple, do-nothing cases: @@ -458,10 +459,10 @@ struct StrNCatOpt : public StrCatOpt { if (SrcLen == 0 || Len == 0) return Dst; // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; // We don't optimize this case - if (Len < SrcLen) return 0; + if (Len < SrcLen) return nullptr; // strncat(x, s, c) -> strcat(x, s) // s is constant so the strcat can be optimized further @@ -478,20 +479,20 @@ struct StrChrOpt : public LibCallOptimization { FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || !FT->getParamType(1)->isIntegerTy(32)) - return 0; + return nullptr; Value *SrcStr = CI->getArgOperand(0); // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); - if (CharC == 0) { + if (!CharC) { // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; uint64_t Len = GetStringLength(SrcStr); if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. - return 0; + return nullptr; return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. ConstantInt::get(DL->getIntPtrType(*Context), Len), @@ -504,7 +505,7 @@ struct StrChrOpt : public LibCallOptimization { if (!getConstantStringInfo(SrcStr, Str)) { if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p) return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr"); - return 0; + return nullptr; } // Compute the offset, make sure to handle the case when we're searching for @@ -528,21 +529,21 @@ struct StrRChrOpt : public LibCallOptimization { FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || !FT->getParamType(1)->isIntegerTy(32)) - return 0; + return nullptr; Value *SrcStr = CI->getArgOperand(0); ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); // Cannot fold anything if we're not looking for a constant. if (!CharC) - return 0; + return nullptr; StringRef Str; if (!getConstantStringInfo(SrcStr, Str)) { // strrchr(s, 0) -> strchr(s, 0) if (DL && CharC->isZero()) return EmitStrChr(SrcStr, '\0', B, DL, TLI); - return 0; + return nullptr; } // Compute the offset. @@ -565,7 +566,7 @@ struct StrCmpOpt : public LibCallOptimization { !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy()) - return 0; + return nullptr; Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strcmp(x,x) -> 0 @@ -591,14 +592,14 @@ struct StrCmpOpt : public LibCallOptimization { uint64_t Len2 = GetStringLength(Str2P); if (Len1 && Len2) { // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; return EmitMemCmp(Str1P, Str2P, ConstantInt::get(DL->getIntPtrType(*Context), std::min(Len1, Len2)), B, DL, TLI); } - return 0; + return nullptr; } }; @@ -612,7 +613,7 @@ struct StrNCmpOpt : public LibCallOptimization { FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy() || !FT->getParamType(2)->isIntegerTy()) - return 0; + return nullptr; Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strncmp(x,x,n) -> 0 @@ -623,7 +624,7 @@ struct StrNCmpOpt : public LibCallOptimization { if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) Length = LengthArg->getZExtValue(); else - return 0; + return nullptr; if (Length == 0) // strncmp(x,y,0) -> 0 return ConstantInt::get(CI->getType(), 0); @@ -649,7 +650,7 @@ struct StrNCmpOpt : public LibCallOptimization { if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); - return 0; + return nullptr; } }; @@ -662,18 +663,18 @@ struct StrCpyOpt : public LibCallOptimization { FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy()) - return 0; + return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // strcpy(x,x) -> x return Src; // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; + if (Len == 0) return nullptr; // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. @@ -692,20 +693,20 @@ struct StpCpyOpt: public LibCallOptimization { FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy()) - return 0; + return nullptr; // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) Value *StrLen = EmitStrLen(Src, B, DL, TLI); - return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; } // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; + if (Len == 0) return nullptr; Type *PT = FT->getParamType(0); Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len); @@ -728,7 +729,7 @@ struct StrNCpyOpt : public LibCallOptimization { FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy() || !FT->getParamType(2)->isIntegerTy()) - return 0; + return nullptr; Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); @@ -736,7 +737,7 @@ struct StrNCpyOpt : public LibCallOptimization { // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) return 0; + if (SrcLen == 0) return nullptr; --SrcLen; if (SrcLen == 0) { @@ -749,15 +750,15 @@ struct StrNCpyOpt : public LibCallOptimization { if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp)) Len = LengthArg->getZExtValue(); else - return 0; + return nullptr; if (Len == 0) return Dst; // strncpy(x, y, 0) -> x // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; // Let strncpy handle the zero padding - if (Len > SrcLen+1) return 0; + if (Len > SrcLen+1) return nullptr; Type *PT = FT->getParamType(0); // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] @@ -776,7 +777,7 @@ struct StrLenOpt : public LibCallOptimization { if (FT->getNumParams() != 1 || FT->getParamType(0) != B.getInt8PtrTy() || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; Value *Src = CI->getArgOperand(0); @@ -784,11 +785,26 @@ struct StrLenOpt : public LibCallOptimization { if (uint64_t Len = GetStringLength(Src)) return ConstantInt::get(CI->getType(), Len-1); + // strlen(x?"foo":"bars") --> x ? 3 : 4 + if (SelectInst *SI = dyn_cast<SelectInst>(Src)) { + uint64_t LenTrue = GetStringLength(SI->getTrueValue()); + uint64_t LenFalse = GetStringLength(SI->getFalseValue()); + if (LenTrue && LenFalse) { + emitOptimizationRemark(*Context, "simplify-libcalls", *Caller, + SI->getDebugLoc(), + "folded strlen(select) to select of constants"); + return B.CreateSelect(SI->getCondition(), + ConstantInt::get(CI->getType(), LenTrue-1), + ConstantInt::get(CI->getType(), LenFalse-1)); + } + } + // strlen(x) != 0 --> *x != 0 // strlen(x) == 0 --> *x == 0 if (isOnlyUsedInZeroEqualityComparison(CI)) return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType()); - return 0; + + return nullptr; } }; @@ -800,7 +816,7 @@ struct StrPBrkOpt : public LibCallOptimization { FT->getParamType(0) != B.getInt8PtrTy() || FT->getParamType(1) != FT->getParamType(0) || FT->getReturnType() != FT->getParamType(0)) - return 0; + return nullptr; StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); @@ -824,7 +840,7 @@ struct StrPBrkOpt : public LibCallOptimization { if (DL && HasS2 && S2.size() == 1) return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI); - return 0; + return nullptr; } }; @@ -835,7 +851,7 @@ struct StrToOpt : public LibCallOptimization { if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy()) - return 0; + return nullptr; Value *EndPtr = CI->getArgOperand(1); if (isa<ConstantPointerNull>(EndPtr)) { @@ -844,7 +860,7 @@ struct StrToOpt : public LibCallOptimization { CI->addAttribute(1, Attribute::NoCapture); } - return 0; + return nullptr; } }; @@ -856,7 +872,7 @@ struct StrSpnOpt : public LibCallOptimization { FT->getParamType(0) != B.getInt8PtrTy() || FT->getParamType(1) != FT->getParamType(0) || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); @@ -874,7 +890,7 @@ struct StrSpnOpt : public LibCallOptimization { return ConstantInt::get(CI->getType(), Pos); } - return 0; + return nullptr; } }; @@ -886,7 +902,7 @@ struct StrCSpnOpt : public LibCallOptimization { FT->getParamType(0) != B.getInt8PtrTy() || FT->getParamType(1) != FT->getParamType(0) || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); @@ -907,7 +923,7 @@ struct StrCSpnOpt : public LibCallOptimization { if (DL && HasS2 && S2.empty()) return EmitStrLen(CI->getArgOperand(0), B, DL, TLI); - return 0; + return nullptr; } }; @@ -919,7 +935,7 @@ struct StrStrOpt : public LibCallOptimization { !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isPointerTy()) - return 0; + return nullptr; // fold strstr(x, x) -> x. if (CI->getArgOperand(0) == CI->getArgOperand(1)) @@ -929,11 +945,11 @@ struct StrStrOpt : public LibCallOptimization { if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI); if (!StrLen) - return 0; + return nullptr; Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), StrLen, B, DL, TLI); if (!StrNCmp) - return 0; + return nullptr; for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) { ICmpInst *Old = cast<ICmpInst>(*UI++); Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, @@ -969,9 +985,9 @@ struct StrStrOpt : public LibCallOptimization { // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) { Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI); - return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; + return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr; } - return 0; + return nullptr; } }; @@ -982,7 +998,7 @@ struct MemCmpOpt : public LibCallOptimization { if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isIntegerTy(32)) - return 0; + return nullptr; Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); @@ -991,7 +1007,7 @@ struct MemCmpOpt : public LibCallOptimization { // Make sure we have a constant length. ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (!LenC) return 0; + if (!LenC) return nullptr; uint64_t Len = LenC->getZExtValue(); if (Len == 0) // memcmp(s1,s2,0) -> 0 @@ -1012,7 +1028,7 @@ struct MemCmpOpt : public LibCallOptimization { getConstantStringInfo(RHS, RHSStr)) { // Make sure we're not reading out-of-bounds memory. if (Len > LHSStr.size() || Len > RHSStr.size()) - return 0; + return nullptr; // Fold the memcmp and normalize the result. This way we get consistent // results across multiple platforms. uint64_t Ret = 0; @@ -1024,7 +1040,7 @@ struct MemCmpOpt : public LibCallOptimization { return ConstantInt::get(CI->getType(), Ret); } - return 0; + return nullptr; } }; @@ -1032,14 +1048,14 @@ struct MemCpyOpt : public LibCallOptimization { Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) override { // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || FT->getParamType(2) != DL->getIntPtrType(*Context)) - return 0; + return nullptr; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), @@ -1052,14 +1068,14 @@ struct MemMoveOpt : public LibCallOptimization { Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) override { // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || FT->getParamType(2) != DL->getIntPtrType(*Context)) - return 0; + return nullptr; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), @@ -1072,14 +1088,14 @@ struct MemSetOpt : public LibCallOptimization { Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) override { // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0))) - return 0; + return nullptr; // memset(p, v, n) -> llvm.memset(p, v, n, 1) Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); @@ -1103,21 +1119,21 @@ struct UnaryDoubleFPOpt : public LibCallOptimization { FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || !FT->getParamType(0)->isDoubleTy()) - return 0; + return nullptr; if (CheckRetType) { // Check if all the uses for function like 'sin' are converted to float. for (User *U : CI->users()) { FPTruncInst *Cast = dyn_cast<FPTruncInst>(U); - if (Cast == 0 || !Cast->getType()->isFloatTy()) - return 0; + if (!Cast || !Cast->getType()->isFloatTy()) + return nullptr; } } // If this is something like 'floor((double)floatval)', convert to floorf. FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); - if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) - return 0; + if (!Cast || !Cast->getOperand(0)->getType()->isFloatTy()) + return nullptr; // floor((double)floatval) -> (double)floorf(floatval) Value *V = Cast->getOperand(0); @@ -1138,15 +1154,15 @@ struct BinaryDoubleFPOpt : public LibCallOptimization { if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return nullptr; if (CheckRetType) { // Check if all the uses for function like 'fmin/fmax' are converted to // float. for (User *U : CI->users()) { FPTruncInst *Cast = dyn_cast<FPTruncInst>(U); - if (Cast == 0 || !Cast->getType()->isFloatTy()) - return 0; + if (!Cast || !Cast->getType()->isFloatTy()) + return nullptr; } } @@ -1154,13 +1170,13 @@ struct BinaryDoubleFPOpt : public LibCallOptimization { // we convert it to fminf. FPExtInst *Cast1 = dyn_cast<FPExtInst>(CI->getArgOperand(0)); FPExtInst *Cast2 = dyn_cast<FPExtInst>(CI->getArgOperand(1)); - if (Cast1 == 0 || !Cast1->getOperand(0)->getType()->isFloatTy() || - Cast2 == 0 || !Cast2->getOperand(0)->getType()->isFloatTy()) - return 0; + if (!Cast1 || !Cast1->getOperand(0)->getType()->isFloatTy() || + !Cast2 || !Cast2->getOperand(0)->getType()->isFloatTy()) + return nullptr; // fmin((double)floatval1, (double)floatval2) // -> (double)fmin(floatval1, floatval2) - Value *V = NULL; + Value *V = nullptr; Value *V1 = Cast1->getOperand(0); Value *V2 = Cast2->getOperand(0); V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B, @@ -1180,7 +1196,7 @@ struct CosOpt : public UnsafeFPLibCallOptimization { CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) override { - Value *Ret = NULL; + Value *Ret = nullptr; if (UnsafeFPShrink && Callee->getName() == "cos" && TLI->has(LibFunc::cosf)) { UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); @@ -1208,7 +1224,7 @@ struct PowOpt : public UnsafeFPLibCallOptimization { PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) override { - Value *Ret = NULL; + Value *Ret = nullptr; if (UnsafeFPShrink && Callee->getName() == "pow" && TLI->has(LibFunc::powf)) { UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); @@ -1242,7 +1258,7 @@ struct PowOpt : public UnsafeFPLibCallOptimization { } ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); - if (Op2C == 0) return Ret; + if (!Op2C) return Ret; if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 return ConstantFP::get(CI->getType(), 1.0); @@ -1275,7 +1291,7 @@ struct PowOpt : public UnsafeFPLibCallOptimization { if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); - return 0; + return nullptr; } }; @@ -1283,7 +1299,7 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization { Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) override { - Value *Ret = NULL; + Value *Ret = nullptr; if (UnsafeFPShrink && Callee->getName() == "exp2" && TLI->has(LibFunc::exp2f)) { UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); @@ -1307,7 +1323,7 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization { LdExp = LibFunc::ldexp; if (TLI->has(LdExp)) { - Value *LdExpArg = 0; + Value *LdExpArg = nullptr; if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); @@ -1344,7 +1360,7 @@ struct SinCosPiOpt : public LibCallOptimization { // Make sure the prototype is as expected, otherwise the rest of the // function is probably invalid and likely to abort. if (!isTrigLibCall(CI)) - return 0; + return nullptr; Value *Arg = CI->getArgOperand(0); SmallVector<CallInst *, 1> SinCalls; @@ -1362,7 +1378,7 @@ struct SinCosPiOpt : public LibCallOptimization { // It's only worthwhile if both sinpi and cospi are actually used. if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty())) - return 0; + return nullptr; Value *Sin, *Cos, *SinCos; insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, @@ -1372,7 +1388,7 @@ struct SinCosPiOpt : public LibCallOptimization { replaceTrigInsts(CosCalls, Cos); replaceTrigInsts(SinCosCalls, SinCos); - return 0; + return nullptr; } bool isTrigLibCall(CallInst *CI) { @@ -1498,7 +1514,7 @@ struct FFSOpt : public LibCallOptimization { if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy(32) || !FT->getParamType(0)->isIntegerTy()) - return 0; + return nullptr; Value *Op = CI->getArgOperand(0); @@ -1531,7 +1547,7 @@ struct AbsOpt : public LibCallOptimization { // We require integer(integer) where the types agree. if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || FT->getParamType(0) != FT->getReturnType()) - return 0; + return nullptr; // abs(x) -> x >s -1 ? x : -x Value *Op = CI->getArgOperand(0); @@ -1549,7 +1565,7 @@ struct IsDigitOpt : public LibCallOptimization { // We require integer(i32) if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || !FT->getParamType(0)->isIntegerTy(32)) - return 0; + return nullptr; // isdigit(c) -> (c-'0') <u 10 Value *Op = CI->getArgOperand(0); @@ -1566,7 +1582,7 @@ struct IsAsciiOpt : public LibCallOptimization { // We require integer(i32) if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || !FT->getParamType(0)->isIntegerTy(32)) - return 0; + return nullptr; // isascii(c) -> c <u 128 Value *Op = CI->getArgOperand(0); @@ -1582,7 +1598,7 @@ struct ToAsciiOpt : public LibCallOptimization { // We require i32(i32) if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isIntegerTy(32)) - return 0; + return nullptr; // toascii(c) -> c & 0x7f return B.CreateAnd(CI->getArgOperand(0), @@ -1612,7 +1628,7 @@ struct ErrorReportingOpt : public LibCallOptimization { CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold); } - return 0; + return nullptr; } protected: @@ -1649,7 +1665,7 @@ struct PrintFOpt : public LibCallOptimization { // Check for a fixed format string. StringRef FormatStr; if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr)) - return 0; + return nullptr; // Empty format string -> noop. if (FormatStr.empty()) // Tolerate printf's declared void. @@ -1660,7 +1676,7 @@ struct PrintFOpt : public LibCallOptimization { // is used, in general the printf return value is not compatible with either // putchar() or puts(). if (!CI->use_empty()) - return 0; + return nullptr; // printf("x") -> putchar('x'), even for '%'. if (FormatStr.size() == 1) { @@ -1697,7 +1713,7 @@ struct PrintFOpt : public LibCallOptimization { CI->getArgOperand(1)->getType()->isPointerTy()) { return EmitPutS(CI->getArgOperand(1), B, DL, TLI); } - return 0; + return nullptr; } Value *callOptimizer(Function *Callee, CallInst *CI, @@ -1707,7 +1723,7 @@ struct PrintFOpt : public LibCallOptimization { if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy())) - return 0; + return nullptr; if (Value *V = optimizeFixedFormatString(Callee, CI, B)) { return V; @@ -1724,7 +1740,7 @@ struct PrintFOpt : public LibCallOptimization { B.Insert(New); return New; } - return 0; + return nullptr; } }; @@ -1734,7 +1750,7 @@ struct SPrintFOpt : public LibCallOptimization { // Check for a fixed format string. StringRef FormatStr; if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) - return 0; + return nullptr; // If we just have a format string (nothing else crazy) transform it. if (CI->getNumArgOperands() == 2) { @@ -1742,10 +1758,10 @@ struct SPrintFOpt : public LibCallOptimization { // %% -> % in the future if we cared. for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) if (FormatStr[i] == '%') - return 0; // we found a format specifier, bail out. + return nullptr; // we found a format specifier, bail out. // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), @@ -1758,12 +1774,12 @@ struct SPrintFOpt : public LibCallOptimization { // and have an extra operand. if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumArgOperands() < 3) - return 0; + return nullptr; // Decode the second character of the format string. if (FormatStr[1] == 'c') { // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 - if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr; Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); Value *Ptr = CastToCStr(CI->getArgOperand(0), B); B.CreateStore(V, Ptr); @@ -1775,14 +1791,14 @@ struct SPrintFOpt : public LibCallOptimization { if (FormatStr[1] == 's') { // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) - if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; + if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; Value *Len = EmitStrLen(CI->getArgOperand(2), B, DL, TLI); if (!Len) - return 0; + return nullptr; Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); @@ -1791,7 +1807,7 @@ struct SPrintFOpt : public LibCallOptimization { // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); } - return 0; + return nullptr; } Value *callOptimizer(Function *Callee, CallInst *CI, @@ -1801,7 +1817,7 @@ struct SPrintFOpt : public LibCallOptimization { if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { return V; @@ -1818,7 +1834,7 @@ struct SPrintFOpt : public LibCallOptimization { B.Insert(New); return New; } - return 0; + return nullptr; } }; @@ -1831,22 +1847,22 @@ struct FPrintFOpt : public LibCallOptimization { // All the optimizations depend on the format string. StringRef FormatStr; if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) - return 0; + return nullptr; // Do not do any of the following transformations if the fprintf return // value is used, in general the fprintf return value is not compatible // with fwrite(), fputc() or fputs(). if (!CI->use_empty()) - return 0; + return nullptr; // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) if (CI->getNumArgOperands() == 2) { for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) if (FormatStr[i] == '%') // Could handle %% -> % if we cared. - return 0; // We found a format specifier. + return nullptr; // We found a format specifier. // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; return EmitFWrite(CI->getArgOperand(1), ConstantInt::get(DL->getIntPtrType(*Context), @@ -1858,22 +1874,22 @@ struct FPrintFOpt : public LibCallOptimization { // and have an extra operand. if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumArgOperands() < 3) - return 0; + return nullptr; // Decode the second character of the format string. if (FormatStr[1] == 'c') { // fprintf(F, "%c", chr) --> fputc(chr, F) - if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr; return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI); } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) --> fputs(str, F) if (!CI->getArgOperand(2)->getType()->isPointerTy()) - return 0; + return nullptr; return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI); } - return 0; + return nullptr; } Value *callOptimizer(Function *Callee, CallInst *CI, @@ -1883,7 +1899,7 @@ struct FPrintFOpt : public LibCallOptimization { if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; if (Value *V = optimizeFixedFormatString(Callee, CI, B)) { return V; @@ -1900,7 +1916,7 @@ struct FPrintFOpt : public LibCallOptimization { B.Insert(New); return New; } - return 0; + return nullptr; } }; @@ -1917,12 +1933,12 @@ struct FWriteOpt : public LibCallOptimization { !FT->getParamType(2)->isIntegerTy() || !FT->getParamType(3)->isPointerTy() || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; // Get the element size and count. ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); - if (!SizeC || !CountC) return 0; + if (!SizeC || !CountC) return nullptr; uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); // If this is writing zero records, remove the call (it's a noop). @@ -1934,10 +1950,10 @@ struct FWriteOpt : public LibCallOptimization { if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI); - return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; + return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; } - return 0; + return nullptr; } }; @@ -1948,18 +1964,18 @@ struct FPutsOpt : public LibCallOptimization { (void) ER.callOptimizer(Callee, CI, B); // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; // Require two pointers. Also, we can't optimize if return value is used. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !CI->use_empty()) - return 0; + return nullptr; // fputs(s,F) --> fwrite(s,1,strlen(s),F) uint64_t Len = GetStringLength(CI->getArgOperand(0)); - if (!Len) return 0; + if (!Len) return nullptr; // Known to have no uses (see above). return EmitFWrite(CI->getArgOperand(0), ConstantInt::get(DL->getIntPtrType(*Context), Len-1), @@ -1975,12 +1991,12 @@ struct PutsOpt : public LibCallOptimization { if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy())) - return 0; + return nullptr; // Check for a constant string. StringRef Str; if (!getConstantStringInfo(CI->getArgOperand(0), Str)) - return 0; + return nullptr; if (Str.empty() && CI->use_empty()) { // puts("") -> putchar('\n') @@ -1989,7 +2005,7 @@ struct PutsOpt : public LibCallOptimization { return B.CreateIntCast(Res, CI->getType(), true); } - return 0; + return nullptr; } }; @@ -2100,7 +2116,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { case Intrinsic::exp2: return &Exp2; default: - return 0; + return nullptr; } } @@ -2210,7 +2226,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { case LibFunc::trunc: if (hasFloatVersion(FuncName)) return &UnaryDoubleFP; - return 0; + return nullptr; case LibFunc::acos: case LibFunc::acosh: case LibFunc::asin: @@ -2234,16 +2250,16 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { case LibFunc::tanh: if (UnsafeFPShrink && hasFloatVersion(FuncName)) return &UnsafeUnaryDoubleFP; - return 0; + return nullptr; case LibFunc::fmin: case LibFunc::fmax: if (hasFloatVersion(FuncName)) return &BinaryDoubleFP; - return 0; + return nullptr; case LibFunc::memcpy_chk: return &MemCpyChk; default: - return 0; + return nullptr; } } @@ -2263,7 +2279,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { return &StrNCpyChk; } - return 0; + return nullptr; } @@ -2273,7 +2289,7 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { IRBuilder<> Builder(CI); return LCO->optimizeCall(CI, DL, TLI, LCS, Builder); } - return 0; + return nullptr; } LibCallSimplifier::LibCallSimplifier(const DataLayout *DL, @@ -2287,7 +2303,7 @@ LibCallSimplifier::~LibCallSimplifier() { } Value *LibCallSimplifier::optimizeCall(CallInst *CI) { - if (CI->isNoBuiltin()) return 0; + if (CI->isNoBuiltin()) return nullptr; return Impl->optimizeCall(CI); } diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp index c318560..2c6fcd1 100644 --- a/lib/Transforms/Utils/SpecialCaseList.cpp +++ b/lib/Transforms/Utils/SpecialCaseList.cpp @@ -41,7 +41,7 @@ struct SpecialCaseList::Entry { StringSet<> Strings; Regex *RegEx; - Entry() : RegEx(0) {} + Entry() : RegEx(nullptr) {} bool match(StringRef Query) const { return Strings.count(Query) || (RegEx && RegEx->match(Query)); @@ -57,7 +57,7 @@ SpecialCaseList *SpecialCaseList::create( std::unique_ptr<MemoryBuffer> File; if (error_code EC = MemoryBuffer::getFile(Path, File)) { Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str(); - return 0; + return nullptr; } return create(File.get(), Error); } @@ -66,7 +66,7 @@ SpecialCaseList *SpecialCaseList::create( const MemoryBuffer *MB, std::string &Error) { std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList()); if (!SCL->parse(MB, Error)) - return 0; + return nullptr; return SCL.release(); } diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 560f581..0c2fc0a 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -59,7 +59,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // Then unreachable blocks. if (UnreachableBlocks.empty()) { - UnreachableBlock = 0; + UnreachableBlock = nullptr; } else if (UnreachableBlocks.size() == 1) { UnreachableBlock = UnreachableBlocks.front(); } else { @@ -77,7 +77,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // Now handle return blocks. if (ReturningBlocks.empty()) { - ReturnBlock = 0; + ReturnBlock = nullptr; return false; // No blocks return } else if (ReturningBlocks.size() == 1) { ReturnBlock = ReturningBlocks.front(); // Already has a single return block @@ -91,9 +91,9 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), "UnifiedReturnBlock", &F); - PHINode *PN = 0; + PHINode *PN = nullptr; if (F.getReturnType()->isVoidTy()) { - ReturnInst::Create(F.getContext(), NULL, NewRetBlock); + ReturnInst::Create(F.getContext(), nullptr, NewRetBlock); } else { // If the function doesn't return void... add a PHI node to the block... PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 457fc80..0f20e6d 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -71,12 +71,12 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Check all operands to see if any need to be remapped. for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { Value *OP = MD->getOperand(i); - if (OP == 0) continue; + if (!OP) continue; Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer); // Use identity map if Mapped_Op is null and we can ignore missing // entries. if (Mapped_OP == OP || - (Mapped_OP == 0 && (Flags & RF_IgnoreMissingEntries))) + (Mapped_OP == nullptr && (Flags & RF_IgnoreMissingEntries))) continue; // Ok, at least one operand needs remapping. @@ -84,13 +84,13 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, Elts.reserve(MD->getNumOperands()); for (i = 0; i != e; ++i) { Value *Op = MD->getOperand(i); - if (Op == 0) - Elts.push_back(0); + if (!Op) + Elts.push_back(nullptr); else { Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer); // Use identity map if Mapped_Op is null and we can ignore missing // entries. - if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries)) + if (Mapped_Op == nullptr && (Flags & RF_IgnoreMissingEntries)) Mapped_Op = Op; Elts.push_back(Mapped_Op); } @@ -112,8 +112,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Okay, this either must be a constant (which may or may not be mappable) or // is something that is not in the mapping table. Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V)); - if (C == 0) - return 0; + if (!C) + return nullptr; if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) { Function *F = @@ -126,7 +126,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Otherwise, we have some other constant to remap. Start by checking to see // if all operands have an identity remapping. unsigned OpNo = 0, NumOperands = C->getNumOperands(); - Value *Mapped = 0; + Value *Mapped = nullptr; for (; OpNo != NumOperands; ++OpNo) { Value *Op = C->getOperand(OpNo); Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer); @@ -187,7 +187,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer); // If we aren't ignoring missing entries, assert that something happened. - if (V != 0) + if (V) *op = V; else assert((Flags & RF_IgnoreMissingEntries) && @@ -199,7 +199,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags); // If we aren't ignoring missing entries, assert that something happened. - if (V != 0) + if (V) PN->setIncomingBlock(i, cast<BasicBlock>(V)); else assert((Flags & RF_IgnoreMissingEntries) && diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 71350e7..28ec83b 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -15,7 +15,6 @@ //===----------------------------------------------------------------------===// #define BBV_NAME "bb-vectorize" -#define DEBUG_TYPE BBV_NAME #include "llvm/Transforms/Vectorize.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -50,6 +49,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE BBV_NAME + static cl::opt<bool> IgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false), cl::Hidden, cl::desc("Ignore target information")); @@ -122,6 +123,10 @@ NoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize floating-point math intrinsics")); static cl::opt<bool> + NoBitManipulation("bb-vectorize-no-bitmanip", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize BitManipulation intrinsics")); + +static cl::opt<bool> NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize the fused-multiply-add intrinsic")); @@ -202,8 +207,8 @@ namespace { DT = &P->getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = &P->getAnalysis<ScalarEvolution>(); DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; - TTI = IgnoreTargetInfo ? 0 : &P->getAnalysis<TargetTransformInfo>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + TTI = IgnoreTargetInfo ? nullptr : &P->getAnalysis<TargetTransformInfo>(); } typedef std::pair<Value *, Value *> ValuePair; @@ -279,7 +284,7 @@ namespace { bool trackUsesOfI(DenseSet<Value *> &Users, AliasSetTracker &WriteSet, Instruction *I, Instruction *J, bool UpdateUsers = true, - DenseSet<ValuePair> *LoadMoveSetPairs = 0); + DenseSet<ValuePair> *LoadMoveSetPairs = nullptr); void computePairsConnectedTo( DenseMap<Value *, std::vector<Value *> > &CandidatePairs, @@ -292,8 +297,8 @@ namespace { bool pairsConflict(ValuePair P, ValuePair Q, DenseSet<ValuePair> &PairableInstUsers, DenseMap<ValuePair, std::vector<ValuePair> > - *PairableInstUserMap = 0, - DenseSet<VPPair> *PairableInstUserPairSet = 0); + *PairableInstUserMap = nullptr, + DenseSet<VPPair> *PairableInstUserPairSet = nullptr); bool pairWillFormCycle(ValuePair P, DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUsers, @@ -438,8 +443,8 @@ namespace { DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = &getAnalysis<ScalarEvolution>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; - TTI = IgnoreTargetInfo ? 0 : &getAnalysis<TargetTransformInfo>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + TTI = IgnoreTargetInfo ? nullptr : &getAnalysis<TargetTransformInfo>(); return vectorizeBB(BB); } @@ -674,7 +679,20 @@ namespace { case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::pow: + case Intrinsic::round: + case Intrinsic::copysign: + case Intrinsic::ceil: + case Intrinsic::nearbyint: + case Intrinsic::rint: + case Intrinsic::trunc: + case Intrinsic::floor: + case Intrinsic::fabs: return Config.VectorizeMath; + case Intrinsic::bswap: + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: + return Config.VectorizeBitManipulations; case Intrinsic::fma: case Intrinsic::fmuladd: return Config.VectorizeFMA; @@ -878,7 +896,7 @@ namespace { } // We can't vectorize memory operations without target data - if (DL == 0 && IsSimpleLoadStore) + if (!DL && IsSimpleLoadStore) return false; Type *T1, *T2; @@ -915,7 +933,7 @@ namespace { if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy()) return false; - if ((!Config.VectorizePointers || DL == 0) && + if ((!Config.VectorizePointers || !DL) && (T1->getScalarType()->isPointerTy() || T2->getScalarType()->isPointerTy())) return false; @@ -1049,7 +1067,7 @@ namespace { (isa<ConstantVector>(JOp) || isa<ConstantDataVector>(JOp))) { Op2VK = TargetTransformInfo::OK_NonUniformConstantValue; Constant *SplatValue = cast<Constant>(IOp)->getSplatValue(); - if (SplatValue != NULL && + if (SplatValue != nullptr && SplatValue == cast<Constant>(JOp)->getSplatValue()) Op2VK = TargetTransformInfo::OK_UniformConstantValue; } @@ -1079,13 +1097,14 @@ namespace { CostSavings = ICost + JCost - VCost; } - // The powi intrinsic is special because only the first argument is - // vectorized, the second arguments must be equal. + // The powi,ctlz,cttz intrinsics are special because only the first + // argument is vectorized, the second arguments must be equal. CallInst *CI = dyn_cast<CallInst>(I); Function *FI; if (CI && (FI = CI->getCalledFunction())) { Intrinsic::ID IID = (Intrinsic::ID) FI->getIntrinsicID(); - if (IID == Intrinsic::powi) { + if (IID == Intrinsic::powi || IID == Intrinsic::ctlz || + IID == Intrinsic::cttz) { Value *A1I = CI->getArgOperand(1), *A1J = cast<CallInst>(J)->getArgOperand(1); const SCEV *A1ISCEV = SE->getSCEV(A1I), @@ -1109,7 +1128,8 @@ namespace { assert(CI->getNumArgOperands() == CJ->getNumArgOperands() && "Intrinsic argument counts differ"); for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { - if (IID == Intrinsic::powi && i == 1) + if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz || + IID == Intrinsic::cttz) && i == 1) Tys.push_back(CI->getArgOperand(i)->getType()); else Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(), @@ -1665,8 +1685,9 @@ namespace { C2->first.second == C->first.first || C2->first.second == C->first.second || pairsConflict(C2->first, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0, - UseCycleCheck ? &PairableInstUserPairSet : 0)) { + UseCycleCheck ? &PairableInstUserMap : nullptr, + UseCycleCheck ? &PairableInstUserPairSet + : nullptr)) { if (C2->second >= C->second) { CanAdd = false; break; @@ -1686,8 +1707,9 @@ namespace { T->second == C->first.first || T->second == C->first.second || pairsConflict(*T, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0, - UseCycleCheck ? &PairableInstUserPairSet : 0)) { + UseCycleCheck ? &PairableInstUserMap : nullptr, + UseCycleCheck ? &PairableInstUserPairSet + : nullptr)) { CanAdd = false; break; } @@ -1704,8 +1726,9 @@ namespace { C2->first.second == C->first.first || C2->first.second == C->first.second || pairsConflict(C2->first, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0, - UseCycleCheck ? &PairableInstUserPairSet : 0)) { + UseCycleCheck ? &PairableInstUserMap : nullptr, + UseCycleCheck ? &PairableInstUserPairSet + : nullptr)) { CanAdd = false; break; } @@ -1720,8 +1743,9 @@ namespace { ChosenPairs.begin(), E2 = ChosenPairs.end(); C2 != E2; ++C2) { if (pairsConflict(*C2, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0, - UseCycleCheck ? &PairableInstUserPairSet : 0)) { + UseCycleCheck ? &PairableInstUserMap : nullptr, + UseCycleCheck ? &PairableInstUserPairSet + : nullptr)) { CanAdd = false; break; } @@ -1802,8 +1826,8 @@ namespace { for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(), E = ChosenPairs.end(); C != E; ++C) { if (pairsConflict(*C, IJ, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0, - UseCycleCheck ? &PairableInstUserPairSet : 0)) { + UseCycleCheck ? &PairableInstUserMap : nullptr, + UseCycleCheck ? &PairableInstUserPairSet : nullptr)) { DoesConflict = true; break; } @@ -2373,7 +2397,7 @@ namespace { } while ((LIENext = dyn_cast<InsertElementInst>(LIENext->getOperand(0)))); - LIENext = 0; + LIENext = nullptr; Value *LIEPrev = UndefValue::get(ArgTypeH); for (unsigned i = 0; i < numElemL; ++i) { if (isa<UndefValue>(VectElemts[i])) continue; @@ -2441,14 +2465,14 @@ namespace { if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) { // We can have at most two unique vector inputs. bool CanUseInputs = true; - Value *I1, *I2 = 0; + Value *I1, *I2 = nullptr; if (LEE) { I1 = LEE->getOperand(0); } else { I1 = LSV->getOperand(0); I2 = LSV->getOperand(1); if (I2 == I1 || isa<UndefValue>(I2)) - I2 = 0; + I2 = nullptr; } if (HEE) { @@ -2764,10 +2788,11 @@ namespace { ReplacedOperands[o] = Intrinsic::getDeclaration(M, IID, VArgType); continue; - } else if (IID == Intrinsic::powi && o == 1) { - // The second argument of powi is a single integer and we've already - // checked that both arguments are equal. As a result, we just keep - // I's second argument. + } else if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz || + IID == Intrinsic::cttz) && o == 1) { + // The second argument of powi/ctlz/cttz is a single integer/constant + // and we've already checked that both arguments are equal. + // As a result, we just keep I's second argument. ReplacedOperands[o] = I->getOperand(o); continue; } @@ -2952,7 +2977,7 @@ namespace { switch (Kind) { default: - K->setMetadata(Kind, 0); // Remove unknown metadata + K->setMetadata(Kind, nullptr); // Remove unknown metadata break; case LLVMContext::MD_tbaa: K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD)); @@ -3123,7 +3148,7 @@ namespace { // Instruction insertion point: Instruction *InsertionPt = K; - Instruction *K1 = 0, *K2 = 0; + Instruction *K1 = nullptr, *K2 = nullptr; replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2); // The use dag of the first original instruction must be moved to after @@ -3213,6 +3238,7 @@ VectorizeConfig::VectorizeConfig() { VectorizePointers = !::NoPointers; VectorizeCasts = !::NoCasts; VectorizeMath = !::NoMath; + VectorizeBitManipulations = !::NoBitManipulation; VectorizeFMA = !::NoFMA; VectorizeSelect = !::NoSelect; VectorizeCmp = !::NoCmp; diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 9a98c44..34d8a10 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -42,9 +42,6 @@ // //===----------------------------------------------------------------------===// -#define LV_NAME "loop-vectorize" -#define DEBUG_TYPE LV_NAME - #include "llvm/Transforms/Vectorize.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/EquivalenceClasses.h" @@ -54,6 +51,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -67,7 +65,9 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -85,16 +85,23 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/VectorUtils.h" #include <algorithm> #include <map> +#include <tuple> using namespace llvm; using namespace llvm::PatternMatch; +#define LV_NAME "loop-vectorize" +#define DEBUG_TYPE LV_NAME + +STATISTIC(LoopsVectorized, "Number of loops vectorized"); +STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization"); + static cl::opt<unsigned> VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect.")); @@ -223,8 +230,9 @@ public: const TargetLibraryInfo *TLI, unsigned VecWidth, unsigned UnrollFactor) : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI), - VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), Induction(0), - OldInduction(0), WidenMap(UnrollFactor), Legal(0) {} + VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), + Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor), + Legal(nullptr) {} // Perform the actual loop widening (vectorization). void vectorize(LoopVectorizationLegality *L) { @@ -469,6 +477,24 @@ static void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) { B.SetCurrentDebugLocation(DebugLoc()); } +#ifndef NDEBUG +/// \return string containing a file name and a line # for the given loop. +static std::string getDebugLocString(const Loop *L) { + std::string Result; + if (L) { + raw_string_ostream OS(Result); + const DebugLoc LoopDbgLoc = L->getStartLoc(); + if (!LoopDbgLoc.isUnknown()) + LoopDbgLoc.print(L->getHeader()->getContext(), OS); + else + // Just print the module name. + OS << L->getHeader()->getParent()->getParent()->getModuleIdentifier(); + OS.flush(); + } + return Result; +} +#endif + /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and /// to what vectorization factor. /// This class does not look at the profitability of vectorization, only the @@ -491,8 +517,8 @@ public: LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL, DominatorTree *DT, TargetLibraryInfo *TLI) : NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL), - DT(DT), TLI(TLI), Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false), - MaxSafeDepDistBytes(-1U) {} + DT(DT), TLI(TLI), Induction(nullptr), WidestIndTy(nullptr), + HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -530,7 +556,7 @@ public: /// This struct holds information about reduction variables. struct ReductionDescriptor { - ReductionDescriptor() : StartValue(0), LoopExitInstr(0), + ReductionDescriptor() : StartValue(nullptr), LoopExitInstr(nullptr), Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {} ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K, @@ -602,7 +628,7 @@ public: /// A struct for saving information about induction variables. struct InductionInfo { InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {} - InductionInfo() : StartValue(0), IK(IK_NoInduction) {} + InductionInfo() : StartValue(nullptr), IK(IK_NoInduction) {} /// Start value. TrackingVH<Value> StartValue; /// Induction kind. @@ -789,7 +815,8 @@ public: /// then this vectorization factor will be selected if vectorization is /// possible. VectorizationFactor selectVectorizationFactor(bool OptForSize, - unsigned UserVF); + unsigned UserVF, + bool ForceVectorization); /// \return The size (in bits) of the widest type in the code that /// needs to be vectorized. We ignore values that remain scalar such as @@ -856,35 +883,32 @@ private: /// Utility class for getting and setting loop vectorizer hints in the form /// of loop metadata. -struct LoopVectorizeHints { - /// Vectorization width. - unsigned Width; - /// Vectorization unroll factor. - unsigned Unroll; - /// Vectorization forced (-1 not selected, 0 force disabled, 1 force enabled) - int Force; +class LoopVectorizeHints { +public: + enum ForceKind { + FK_Undefined = -1, ///< Not selected. + FK_Disabled = 0, ///< Forcing disabled. + FK_Enabled = 1, ///< Forcing enabled. + }; LoopVectorizeHints(const Loop *L, bool DisableUnrolling) - : Width(VectorizationFactor) - , Unroll(DisableUnrolling ? 1 : VectorizationUnroll) - , Force(-1) - , LoopID(L->getLoopID()) { + : Width(VectorizationFactor), + Unroll(DisableUnrolling), + Force(FK_Undefined), + LoopID(L->getLoopID()) { getHints(L); - // The command line options override any loop metadata except for when - // width == 1 which is used to indicate the loop is already vectorized. - if (VectorizationFactor.getNumOccurrences() > 0 && Width != 1) - Width = VectorizationFactor; + // force-vector-unroll overrides DisableUnrolling. if (VectorizationUnroll.getNumOccurrences() > 0) Unroll = VectorizationUnroll; - DEBUG(if (DisableUnrolling && Unroll == 1) - dbgs() << "LV: Unrolling disabled by the pass manager\n"); + DEBUG(if (DisableUnrolling && Unroll == 1) dbgs() + << "LV: Unrolling disabled by the pass manager\n"); } /// Return the loop vectorizer metadata prefix. static StringRef Prefix() { return "llvm.vectorizer."; } - MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) { + MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) const { SmallVector<Value*, 2> Vals; Vals.push_back(MDString::get(Context, Name)); Vals.push_back(ConstantInt::get(Type::getInt32Ty(Context), V)); @@ -918,9 +942,12 @@ struct LoopVectorizeHints { LoopID = NewLoopID; } -private: - MDNode *LoopID; + unsigned getWidth() const { return Width; } + unsigned getUnroll() const { return Unroll; } + enum ForceKind getForce() const { return Force; } + MDNode *getLoopID() const { return LoopID; } +private: /// Find hints specified in the loop metadata. void getHints(const Loop *L) { if (!LoopID) @@ -931,7 +958,7 @@ private: assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { - const MDString *S = 0; + const MDString *S = nullptr; SmallVector<Value*, 4> Args; // The expected hint is either a MDString or a MDNode with the first @@ -980,13 +1007,23 @@ private: DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n"); } else if (Hint == "enable") { if (C->getBitWidth() == 1) - Force = Val; + Force = Val == 1 ? LoopVectorizeHints::FK_Enabled + : LoopVectorizeHints::FK_Disabled; else DEBUG(dbgs() << "LV: ignoring invalid enable hint metadata\n"); } else { DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n'); } } + + /// Vectorization width. + unsigned Width; + /// Vectorization unroll factor. + unsigned Unroll; + /// Vectorization forced + enum ForceKind Force; + + MDNode *LoopID; }; static void addInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) { @@ -1024,7 +1061,7 @@ struct LoopVectorize : public FunctionPass { bool runOnFunction(Function &F) override { SE = &getAnalysis<ScalarEvolution>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; LI = &getAnalysis<LoopInfo>(); TTI = &getAnalysis<TargetTransformInfo>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); @@ -1041,8 +1078,9 @@ struct LoopVectorize : public FunctionPass { if (!TTI->getNumberOfRegisters(true)) return false; - if (DL == NULL) { - DEBUG(dbgs() << "LV: Not vectorizing: Missing data layout\n"); + if (!DL) { + DEBUG(dbgs() << "\nLV: Not vectorizing " << F.getName() + << ": Missing data layout\n"); return false; } @@ -1054,6 +1092,8 @@ struct LoopVectorize : public FunctionPass { for (Loop *L : *LI) addInnerLoop(*L, Worklist); + LoopsAnalyzed += Worklist.size(); + // Now walk the identified inner loops. bool Changed = false; while (!Worklist.empty()) @@ -1065,26 +1105,56 @@ struct LoopVectorize : public FunctionPass { bool processLoop(Loop *L) { assert(L->empty() && "Only process inner loops."); - DEBUG(dbgs() << "LV: Checking a loop in \"" << - L->getHeader()->getParent()->getName() << "\"\n"); + +#ifndef NDEBUG + const std::string DebugLocStr = getDebugLocString(L); +#endif /* NDEBUG */ + + DEBUG(dbgs() << "\nLV: Checking a loop in \"" + << L->getHeader()->getParent()->getName() << "\" from " + << DebugLocStr << "\n"); LoopVectorizeHints Hints(L, DisableUnrolling); - if (Hints.Force == 0) { + DEBUG(dbgs() << "LV: Loop hints:" + << " force=" + << (Hints.getForce() == LoopVectorizeHints::FK_Disabled + ? "disabled" + : (Hints.getForce() == LoopVectorizeHints::FK_Enabled + ? "enabled" + : "?")) << " width=" << Hints.getWidth() + << " unroll=" << Hints.getUnroll() << "\n"); + + if (Hints.getForce() == LoopVectorizeHints::FK_Disabled) { DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); return false; } - if (!AlwaysVectorize && Hints.Force != 1) { + if (!AlwaysVectorize && Hints.getForce() != LoopVectorizeHints::FK_Enabled) { DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); return false; } - if (Hints.Width == 1 && Hints.Unroll == 1) { + if (Hints.getWidth() == 1 && Hints.getUnroll() == 1) { DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); return false; } + // Check the loop for a trip count threshold: + // do not vectorize loops with a tiny trip count. + BasicBlock *Latch = L->getLoopLatch(); + const unsigned TC = SE->getSmallConstantTripCount(L, Latch); + if (TC > 0u && TC < TinyTripCountVectorThreshold) { + DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " + << "This loop is not worth vectorizing."); + if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) + DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); + else { + DEBUG(dbgs() << "\n"); + return false; + } + } + // Check if it is legal to vectorize the loop. LoopVectorizationLegality LVL(L, SE, DL, DT, TLI); if (!LVL.canVectorize()) { @@ -1098,8 +1168,8 @@ struct LoopVectorize : public FunctionPass { // Check the function attributes to find out if this function should be // optimized for size. Function *F = L->getHeader()->getParent(); - bool OptForSize = - Hints.Force != 1 && F->hasFnAttribute(Attribute::OptimizeForSize); + bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled && + F->hasFnAttribute(Attribute::OptimizeForSize); // Compute the weighted frequency of this loop being executed and see if it // is less than 20% of the function entry baseline frequency. Note that we @@ -1108,7 +1178,8 @@ struct LoopVectorize : public FunctionPass { // exactly what block frequency models. if (LoopVectorizeWithBlockFrequency) { BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader()); - if (Hints.Force != 1 && LoopEntryFreq < ColdEntryFreq) + if (Hints.getForce() != LoopVectorizeHints::FK_Enabled && + LoopEntryFreq < ColdEntryFreq) OptForSize = true; } @@ -1123,14 +1194,17 @@ struct LoopVectorize : public FunctionPass { } // Select the optimal vectorization factor. - LoopVectorizationCostModel::VectorizationFactor VF; - VF = CM.selectVectorizationFactor(OptForSize, Hints.Width); + const LoopVectorizationCostModel::VectorizationFactor VF = + CM.selectVectorizationFactor(OptForSize, Hints.getWidth(), + Hints.getForce() == + LoopVectorizeHints::FK_Enabled); + // Select the unroll factor. - unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width, - VF.Cost); + const unsigned UF = + CM.selectUnrollFactor(OptForSize, Hints.getUnroll(), VF.Width, VF.Cost); - DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<< - F->getParent()->getModuleIdentifier() << '\n'); + DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " + << DebugLocStr << '\n'); DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n'); if (VF.Width == 1) { @@ -1138,6 +1212,13 @@ struct LoopVectorize : public FunctionPass { if (UF == 1) return false; DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n"); + + // Report the unrolling decision. + emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), + Twine("unrolled with interleaving factor " + + Twine(UF) + + " (vectorization not beneficial)")); + // We decided not to vectorize, but we may want to unroll. InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF); Unroller.vectorize(&LVL); @@ -1145,6 +1226,13 @@ struct LoopVectorize : public FunctionPass { // If we decided that it is *legal* to vectorize the loop then do it. InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF); LB.vectorize(&LVL); + ++LoopsVectorized; + + // Report the vectorization decision. + emitOptimizationRemark( + F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), + Twine("vectorized loop (vectorization factor: ") + Twine(VF.Width) + + ", unrolling interleave factor: " + Twine(UF) + ")"); } // Mark the loop as already vectorized to avoid vectorizing again. @@ -1188,7 +1276,7 @@ static Value *stripIntegerCast(Value *V) { /// \p Ptr. static const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE, ValueToValueMap &PtrToStride, - Value *Ptr, Value *OrigPtr = 0) { + Value *Ptr, Value *OrigPtr = nullptr) { const SCEV *OrigSCEV = SE->getSCEV(Ptr); @@ -1355,7 +1443,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { // We can emit wide load/stores only if the last non-zero index is the // induction variable. - const SCEV *Last = 0; + const SCEV *Last = nullptr; if (!Strides.count(Gep)) Last = SE->getSCEV(Gep->getOperand(InductionOperand)); else { @@ -1604,17 +1692,17 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic // Does this instruction return a value ? bool IsVoidRetTy = Instr->getType()->isVoidTy(); - Value *UndefVec = IsVoidRetTy ? 0 : + Value *UndefVec = IsVoidRetTy ? nullptr : UndefValue::get(VectorType::get(Instr->getType(), VF)); // Create a new entry in the WidenMap and initialize it to Undef or Null. VectorParts &VecResults = WidenMap.splat(Instr, UndefVec); Instruction *InsertPt = Builder.GetInsertPoint(); BasicBlock *IfBlock = Builder.GetInsertBlock(); - BasicBlock *CondBlock = 0; + BasicBlock *CondBlock = nullptr; VectorParts Cond; - Loop *VectorLp = 0; + Loop *VectorLp = nullptr; if (IfPredicateStore) { assert(Instr->getParent()->getSinglePredecessor() && "Only support single predecessor blocks"); @@ -1630,7 +1718,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic for (unsigned Width = 0; Width < VF; ++Width) { // Start if-block. - Value *Cmp = 0; + Value *Cmp = nullptr; if (IfPredicateStore) { Cmp = Builder.CreateExtractElement(Cond[Part], Builder.getInt32(Width)); Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp, ConstantInt::get(Cmp->getType(), 1)); @@ -1681,21 +1769,21 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V, if (FirstInst) return FirstInst; if (Instruction *I = dyn_cast<Instruction>(V)) - return I->getParent() == Loc->getParent() ? I : 0; - return 0; + return I->getParent() == Loc->getParent() ? I : nullptr; + return nullptr; } std::pair<Instruction *, Instruction *> InnerLoopVectorizer::addStrideCheck(Instruction *Loc) { - Instruction *tnullptr = 0; + Instruction *tnullptr = nullptr; if (!Legal->mustCheckStrides()) return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr); IRBuilder<> ChkBuilder(Loc); // Emit checks. - Value *Check = 0; - Instruction *FirstInst = 0; + Value *Check = nullptr; + Instruction *FirstInst = nullptr; for (SmallPtrSet<Value *, 8>::iterator SI = Legal->strides_begin(), SE = Legal->strides_end(); SI != SE; ++SI) { @@ -1727,7 +1815,7 @@ InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) { LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck = Legal->getRuntimePointerCheck(); - Instruction *tnullptr = 0; + Instruction *tnullptr = nullptr; if (!PtrRtCheck->Need) return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr); @@ -1737,7 +1825,7 @@ InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) { LLVMContext &Ctx = Loc->getContext(); SCEVExpander Exp(*SE, "induction"); - Instruction *FirstInst = 0; + Instruction *FirstInst = nullptr; for (unsigned i = 0; i < NumPointers; ++i) { Value *Ptr = PtrRtCheck->Pointers[i]; @@ -1764,7 +1852,7 @@ InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) { IRBuilder<> ChkBuilder(Loc); // Our instructions might fold to a constant. - Value *MemoryRuntimeCheck = 0; + Value *MemoryRuntimeCheck = nullptr; for (unsigned i = 0; i < NumPointers; ++i) { for (unsigned j = i+1; j < NumPointers; ++j) { // No need to check if two readonly pointers intersect. @@ -2028,7 +2116,7 @@ void InnerLoopVectorizer::createEmptyLoop() { // start value. // This variable saves the new starting index for the scalar loop. - PHINode *ResumeIndex = 0; + PHINode *ResumeIndex = nullptr; LoopVectorizationLegality::InductionList::iterator I, E; LoopVectorizationLegality::InductionList *List = Legal->getInductionVars(); // Set builder to point to last bypass block. @@ -2044,9 +2132,9 @@ void InnerLoopVectorizer::createEmptyLoop() { // truncated version for the scalar loop. PHINode *TruncResumeVal = (OrigPhi == OldInduction) ? PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val", - MiddleBlock->getTerminator()) : 0; + MiddleBlock->getTerminator()) : nullptr; - Value *EndValue = 0; + Value *EndValue = nullptr; switch (II.IK) { case LoopVectorizationLegality::IK_NoInduction: llvm_unreachable("Unknown induction"); @@ -2209,148 +2297,6 @@ LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) { } } -static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I, - Intrinsic::ID ValidIntrinsicID) { - if (I.getNumArgOperands() != 1 || - !I.getArgOperand(0)->getType()->isFloatingPointTy() || - I.getType() != I.getArgOperand(0)->getType() || - !I.onlyReadsMemory()) - return Intrinsic::not_intrinsic; - - return ValidIntrinsicID; -} - -static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I, - Intrinsic::ID ValidIntrinsicID) { - if (I.getNumArgOperands() != 2 || - !I.getArgOperand(0)->getType()->isFloatingPointTy() || - !I.getArgOperand(1)->getType()->isFloatingPointTy() || - I.getType() != I.getArgOperand(0)->getType() || - I.getType() != I.getArgOperand(1)->getType() || - !I.onlyReadsMemory()) - return Intrinsic::not_intrinsic; - - return ValidIntrinsicID; -} - - -static Intrinsic::ID -getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) { - // If we have an intrinsic call, check if it is trivially vectorizable. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { - switch (II->getIntrinsicID()) { - case Intrinsic::sqrt: - case Intrinsic::sin: - case Intrinsic::cos: - case Intrinsic::exp: - case Intrinsic::exp2: - case Intrinsic::log: - case Intrinsic::log10: - case Intrinsic::log2: - case Intrinsic::fabs: - case Intrinsic::copysign: - case Intrinsic::floor: - case Intrinsic::ceil: - case Intrinsic::trunc: - case Intrinsic::rint: - case Intrinsic::nearbyint: - case Intrinsic::round: - case Intrinsic::pow: - case Intrinsic::fma: - case Intrinsic::fmuladd: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - return II->getIntrinsicID(); - default: - return Intrinsic::not_intrinsic; - } - } - - if (!TLI) - return Intrinsic::not_intrinsic; - - LibFunc::Func Func; - Function *F = CI->getCalledFunction(); - // We're going to make assumptions on the semantics of the functions, check - // that the target knows that it's available in this environment and it does - // not have local linkage. - if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func)) - return Intrinsic::not_intrinsic; - - // Otherwise check if we have a call to a function that can be turned into a - // vector intrinsic. - switch (Func) { - default: - break; - case LibFunc::sin: - case LibFunc::sinf: - case LibFunc::sinl: - return checkUnaryFloatSignature(*CI, Intrinsic::sin); - case LibFunc::cos: - case LibFunc::cosf: - case LibFunc::cosl: - return checkUnaryFloatSignature(*CI, Intrinsic::cos); - case LibFunc::exp: - case LibFunc::expf: - case LibFunc::expl: - return checkUnaryFloatSignature(*CI, Intrinsic::exp); - case LibFunc::exp2: - case LibFunc::exp2f: - case LibFunc::exp2l: - return checkUnaryFloatSignature(*CI, Intrinsic::exp2); - case LibFunc::log: - case LibFunc::logf: - case LibFunc::logl: - return checkUnaryFloatSignature(*CI, Intrinsic::log); - case LibFunc::log10: - case LibFunc::log10f: - case LibFunc::log10l: - return checkUnaryFloatSignature(*CI, Intrinsic::log10); - case LibFunc::log2: - case LibFunc::log2f: - case LibFunc::log2l: - return checkUnaryFloatSignature(*CI, Intrinsic::log2); - case LibFunc::fabs: - case LibFunc::fabsf: - case LibFunc::fabsl: - return checkUnaryFloatSignature(*CI, Intrinsic::fabs); - case LibFunc::copysign: - case LibFunc::copysignf: - case LibFunc::copysignl: - return checkBinaryFloatSignature(*CI, Intrinsic::copysign); - case LibFunc::floor: - case LibFunc::floorf: - case LibFunc::floorl: - return checkUnaryFloatSignature(*CI, Intrinsic::floor); - case LibFunc::ceil: - case LibFunc::ceilf: - case LibFunc::ceill: - return checkUnaryFloatSignature(*CI, Intrinsic::ceil); - case LibFunc::trunc: - case LibFunc::truncf: - case LibFunc::truncl: - return checkUnaryFloatSignature(*CI, Intrinsic::trunc); - case LibFunc::rint: - case LibFunc::rintf: - case LibFunc::rintl: - return checkUnaryFloatSignature(*CI, Intrinsic::rint); - case LibFunc::nearbyint: - case LibFunc::nearbyintf: - case LibFunc::nearbyintl: - return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint); - case LibFunc::round: - case LibFunc::roundf: - case LibFunc::roundl: - return checkUnaryFloatSignature(*CI, Intrinsic::round); - case LibFunc::pow: - case LibFunc::powf: - case LibFunc::powl: - return checkBinaryFloatSignature(*CI, Intrinsic::pow); - } - - return Intrinsic::not_intrinsic; -} - /// This function translates the reduction kind to an LLVM binary operator. static unsigned getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) { @@ -2651,7 +2597,7 @@ void InnerLoopVectorizer::vectorizeLoop() { assert(isPowerOf2_32(VF) && "Reduction emission only supported for pow2 vectors!"); Value *TmpVec = ReducedPartRdx; - SmallVector<Constant*, 32> ShuffleMask(VF, 0); + SmallVector<Constant*, 32> ShuffleMask(VF, nullptr); for (unsigned i = VF; i != 1; i >>= 1) { // Move the upper half of the vector to the lower half. for (unsigned j = 0; j != i/2; ++j) @@ -3049,7 +2995,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { VectorParts &A = getVectorValue(it->getOperand(0)); VectorParts &B = getVectorValue(it->getOperand(1)); for (unsigned Part = 0; Part < UF; ++Part) { - Value *C = 0; + Value *C = nullptr; if (FCmp) C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]); else @@ -3275,15 +3221,6 @@ bool LoopVectorizationLegality::canVectorize() { return false; } - // Do not loop-vectorize loops with a tiny trip count. - BasicBlock *Latch = TheLoop->getLoopLatch(); - unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch); - if (TC > 0u && TC < TinyTripCountVectorThreshold) { - DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " << - "This loop is not worth vectorizing.\n"); - return false; - } - // Check if we can vectorize the instructions and CFG in this loop. if (!canVectorizeInstrs()) { DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); @@ -3536,14 +3473,14 @@ static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, ///\brief Look for a cast use of the passed value. static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) { - Value *UniqueCast = 0; + Value *UniqueCast = nullptr; for (User *U : Ptr->users()) { CastInst *CI = dyn_cast<CastInst>(U); if (CI && CI->getType() == Ty) { if (!UniqueCast) UniqueCast = CI; else - return 0; + return nullptr; } } return UniqueCast; @@ -3556,7 +3493,7 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, const DataLayout *DL, Loop *Lp) { const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); if (!PtrTy || PtrTy->isAggregateType()) - return 0; + return nullptr; // Try to remove a gep instruction to make the pointer (actually index at this // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the @@ -3576,11 +3513,11 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V); if (!S) - return 0; + return nullptr; V = S->getStepRecurrence(*SE); if (!V) - return 0; + return nullptr; // Strip off the size of access multiplication if we are still analyzing the // pointer. @@ -3588,24 +3525,24 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, DL->getTypeAllocSize(PtrTy->getElementType()); if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) { if (M->getOperand(0)->getSCEVType() != scConstant) - return 0; + return nullptr; const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue(); // Huge step value - give up. if (APStepVal.getBitWidth() > 64) - return 0; + return nullptr; int64_t StepVal = APStepVal.getSExtValue(); if (PtrAccessSize != StepVal) - return 0; + return nullptr; V = M->getOperand(1); } } // Strip off casts. - Type *StripedOffRecurrenceCast = 0; + Type *StripedOffRecurrenceCast = nullptr; if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) { StripedOffRecurrenceCast = C->getType(); V = C->getOperand(); @@ -3614,11 +3551,11 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, // Look for the loop invariant symbolic value. const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V); if (!U) - return 0; + return nullptr; Value *Stride = U->getValue(); if (!Lp->isLoopInvariant(Stride)) - return 0; + return nullptr; // If we have stripped off the recurrence cast we have to make sure that we // return the value that is used in this loop so that we can replace it later. @@ -3629,7 +3566,7 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, } void LoopVectorizationLegality::collectStridedAcccess(Value *MemAccess) { - Value *Ptr = 0; + Value *Ptr = nullptr; if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess)) Ptr = LI->getPointerOperand(); else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess)) @@ -4628,7 +4565,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // We only allow for a single reduction value to be used outside the loop. // This includes users of the reduction, variables (which form a cycle // which ends in the phi node). - Instruction *ExitInstruction = 0; + Instruction *ExitInstruction = nullptr; // Indicates that we found a reduction operation in our scan. bool FoundReduxOp = false; @@ -4642,7 +4579,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // the number of instruction we saw from the recognized min/max pattern, // to make sure we only see exactly the two instructions. unsigned NumCmpSelectPatternInst = 0; - ReductionInstDesc ReduxDesc(false, 0); + ReductionInstDesc ReduxDesc(false, nullptr); SmallPtrSet<Instruction *, 8> VisitedInsts; SmallVector<Instruction *, 8> Worklist; @@ -4725,7 +4662,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // being used. In this case the user uses the value of the previous // iteration, in which case we would loose "VF-1" iterations of the // reduction operation if we vectorize. - if (ExitInstruction != 0 || Cur == Phi) + if (ExitInstruction != nullptr || Cur == Phi) return false; // The instruction used by an outside user must be the last instruction @@ -4741,7 +4678,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // Process instructions only once (termination). Each reduction cycle // value must only be used once, except by phi nodes and min/max // reductions which are represented as a cmp followed by a select. - ReductionInstDesc IgnoredVal(false, 0); + ReductionInstDesc IgnoredVal(false, nullptr); if (VisitedInsts.insert(UI)) { if (isa<PHINode>(UI)) PHIs.push_back(UI); @@ -4795,8 +4732,8 @@ LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && "Expect a select instruction"); - Instruction *Cmp = 0; - SelectInst *Select = 0; + Instruction *Cmp = nullptr; + SelectInst *Select = nullptr; // We must handle the select(cmp()) as a single instruction. Advance to the // select. @@ -4982,7 +4919,8 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB, LoopVectorizationCostModel::VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, - unsigned UserVF) { + unsigned UserVF, + bool ForceVectorization) { // Width 1 means no vectorize VectorizationFactor Factor = { 1U, 0U }; if (OptForSize && Legal->getRuntimePointerCheck()->Need) { @@ -5052,8 +4990,18 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, } float Cost = expectedCost(1); +#ifndef NDEBUG + const float ScalarCost = Cost; +#endif /* NDEBUG */ unsigned Width = 1; - DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)Cost << ".\n"); + DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n"); + + // Ignore scalar width, because the user explicitly wants vectorization. + if (ForceVectorization && VF > 1) { + Width = 2; + Cost = expectedCost(Width) / (float)Width; + } + for (unsigned i=2; i <= VF; i*=2) { // Notice that the vector loop needs to be executed less times, so // we need to divide the cost of the vector loops by the width of @@ -5067,7 +5015,10 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, } } - DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n"); + DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs() + << "LV: Vectorization seems to be not beneficial, " + << "but was forced by a user.\n"); + DEBUG(dbgs() << "LV: Selecting VF: "<< Width << ".\n"); Factor.Width = Width; Factor.Cost = Width * Cost; return Factor; @@ -5516,7 +5467,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { Op2VK = TargetTransformInfo::OK_UniformConstantValue; else if (isa<ConstantVector>(Op2) || isa<ConstantDataVector>(Op2)) { Op2VK = TargetTransformInfo::OK_NonUniformConstantValue; - if (cast<Constant>(Op2)->getSplatValue() != NULL) + if (cast<Constant>(Op2)->getSplatValue() != nullptr) Op2VK = TargetTransformInfo::OK_UniformConstantValue; } @@ -5730,17 +5681,17 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, // Does this instruction return a value ? bool IsVoidRetTy = Instr->getType()->isVoidTy(); - Value *UndefVec = IsVoidRetTy ? 0 : + Value *UndefVec = IsVoidRetTy ? nullptr : UndefValue::get(Instr->getType()); // Create a new entry in the WidenMap and initialize it to Undef or Null. VectorParts &VecResults = WidenMap.splat(Instr, UndefVec); Instruction *InsertPt = Builder.GetInsertPoint(); BasicBlock *IfBlock = Builder.GetInsertBlock(); - BasicBlock *CondBlock = 0; + BasicBlock *CondBlock = nullptr; VectorParts Cond; - Loop *VectorLp = 0; + Loop *VectorLp = nullptr; if (IfPredicateStore) { assert(Instr->getParent()->getSinglePredecessor() && "Only support single predecessor blocks"); @@ -5755,7 +5706,7 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, // For each scalar that we create: // Start an "if (pred) a[i] = ..." block. - Value *Cmp = 0; + Value *Cmp = nullptr; if (IfPredicateStore) { if (Cond[Part]->getType()->isVectorTy()) Cond[Part] = diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index ee32227..e13ba95 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15,9 +15,6 @@ // "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks. // //===----------------------------------------------------------------------===// -#define SV_NAME "slp-vectorizer" -#define DEBUG_TYPE "SLP" - #include "llvm/Transforms/Vectorize.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" @@ -34,6 +31,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/NoFolder.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" @@ -41,11 +39,15 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/VectorUtils.h" #include <algorithm> #include <map> using namespace llvm; +#define SV_NAME "slp-vectorizer" +#define DEBUG_TYPE "SLP" + static cl::opt<int> SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this " @@ -72,8 +74,6 @@ struct BlockNumbering { BlockNumbering(BasicBlock *Bb) : BB(Bb), Valid(false) {} - BlockNumbering() : BB(0), Valid(false) {} - void numberInstructions() { unsigned Loc = 0; InstrIdx.clear(); @@ -120,15 +120,15 @@ private: static BasicBlock *getSameBlock(ArrayRef<Value *> VL) { Instruction *I0 = dyn_cast<Instruction>(VL[0]); if (!I0) - return 0; + return nullptr; BasicBlock *BB = I0->getParent(); for (int i = 1, e = VL.size(); i < e; i++) { Instruction *I = dyn_cast<Instruction>(VL[i]); if (!I) - return 0; + return nullptr; if (BB != I->getParent()) - return 0; + return nullptr; } return BB; } @@ -180,7 +180,7 @@ static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) { switch (Kind) { default: - MD = 0; // Remove unknown metadata + MD = nullptr; // Remove unknown metadata break; case LLVMContext::MD_tbaa: MD = MDNode::getMostGenericTBAA(MD, IMD); @@ -201,7 +201,7 @@ static Type* getSameType(ArrayRef<Value *> VL) { Type *Ty = VL[0]->getType(); for (int i = 1, e = VL.size(); i < e; i++) if (VL[i]->getType() != Ty) - return 0; + return nullptr; return Ty; } @@ -345,17 +345,10 @@ public: typedef SmallVector<StoreInst *, 8> StoreList; BoUpSLP(Function *Func, ScalarEvolution *Se, const DataLayout *Dl, - TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li, - DominatorTree *Dt) : - F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), DT(Dt), - Builder(Se->getContext()) { - // Setup the block numbering utility for all of the blocks in the - // function. - for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) { - BasicBlock *BB = it; - BlocksNumbers[BB] = BlockNumbering(BB); - } - } + TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa, + LoopInfo *Li, DominatorTree *Dt) + : F(Func), SE(Se), DL(Dl), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), + Builder(Se->getContext()) {} /// \brief Vectorize the tree that starts with the elements in \p VL. /// Returns the vectorized root. @@ -365,13 +358,13 @@ public: /// A negative number means that this is profitable. int getTreeCost(); - /// Construct a vectorizable tree that starts at \p Roots and is possibly - /// used by a reduction of \p RdxOps. - void buildTree(ArrayRef<Value *> Roots, ValueSet *RdxOps = 0); + /// Construct a vectorizable tree that starts at \p Roots, ignoring users for + /// the purpose of scheduling and extraction in the \p UserIgnoreLst. + void buildTree(ArrayRef<Value *> Roots, + ArrayRef<Value *> UserIgnoreLst = None); /// Clear the internal data structures that are created by 'buildTree'. void deleteTree() { - RdxOps = 0; VectorizableTree.clear(); ScalarToTreeEntry.clear(); MustGather.clear(); @@ -446,7 +439,7 @@ private: bool isFullyVectorizableTinyTree(); struct TreeEntry { - TreeEntry() : Scalars(), VectorizedValue(0), LastScalarIndex(0), + TreeEntry() : Scalars(), VectorizedValue(nullptr), LastScalarIndex(0), NeedToGather(0) {} /// \returns true if the scalars in VL are equal to this entry. @@ -527,14 +520,22 @@ private: /// Numbers instructions in different blocks. DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers; - /// Reduction operators. - ValueSet *RdxOps; + /// \brief Get the corresponding instruction numbering list for a given + /// BasicBlock. The list is allocated lazily. + BlockNumbering &getBlockNumbering(BasicBlock *BB) { + auto I = BlocksNumbers.insert(std::make_pair(BB, BlockNumbering(BB))); + return I.first->second; + } + + /// List of users to ignore during scheduling and that don't need extracting. + ArrayRef<Value *> UserIgnoreList; // Analysis and block reference. Function *F; ScalarEvolution *SE; const DataLayout *DL; TargetTransformInfo *TTI; + TargetLibraryInfo *TLI; AliasAnalysis *AA; LoopInfo *LI; DominatorTree *DT; @@ -542,9 +543,10 @@ private: IRBuilder<> Builder; }; -void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) { +void BoUpSLP::buildTree(ArrayRef<Value *> Roots, + ArrayRef<Value *> UserIgnoreLst) { deleteTree(); - RdxOps = Rdx; + UserIgnoreList = UserIgnoreLst; if (!getSameType(Roots)) return; buildTree_rec(Roots, 0); @@ -576,8 +578,9 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) { if (!UserInst) continue; - // Ignore uses that are part of the reduction. - if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end()) + // Ignore users in the user ignore list. + if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) != + UserIgnoreList.end()) continue; DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " << @@ -708,12 +711,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { continue; } - // This user is part of the reduction. - if (RdxOps && RdxOps->count(UI)) + // Ignore users in the user ignore list. + if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UI) != + UserIgnoreList.end()) continue; // Make sure that we can schedule this unknown user. - BlockNumbering &BN = BlocksNumbers[BB]; + BlockNumbering &BN = getBlockNumbering(BB); int UserIndex = BN.getIndex(UI); if (UserIndex < MyLastIndex) { @@ -948,32 +952,36 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { } case Instruction::Call: { // Check if the calls are all to the same vectorizable intrinsic. - IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]); - if (II==NULL) { + CallInst *CI = cast<CallInst>(VL[0]); + // Check if this is an Intrinsic call or something that can be + // represented by an intrinsic call + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); + if (!isTriviallyVectorizable(ID)) { newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; } - Function *Int = II->getCalledFunction(); + Function *Int = CI->getCalledFunction(); for (unsigned i = 1, e = VL.size(); i != e; ++i) { - IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[i]); - if (!II2 || II2->getCalledFunction() != Int) { + CallInst *CI2 = dyn_cast<CallInst>(VL[i]); + if (!CI2 || CI2->getCalledFunction() != Int || + getIntrinsicIDForCall(CI2, TLI) != ID) { newTreeEntry(VL, false); - DEBUG(dbgs() << "SLP: mismatched calls:" << *II << "!=" << *VL[i] + DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] << "\n"); return; } } newTreeEntry(VL, true); - for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) { + for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) { ValueList Operands; // Prepare the operand vector. for (unsigned j = 0; j < VL.size(); ++j) { - IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[j]); - Operands.push_back(II2->getArgOperand(i)); + CallInst *CI2 = dyn_cast<CallInst>(VL[j]); + Operands.push_back(CI2->getArgOperand(i)); } buildTree_rec(Operands, Depth + 1); } @@ -1090,7 +1098,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { // If instead not all operands are constants, then set the operand kind // to OK_AnyValue. If all operands are constants but not the same, // then set the operand kind to OK_NonUniformConstantValue. - ConstantInt *CInt = NULL; + ConstantInt *CInt = nullptr; for (unsigned i = 0; i < VL.size(); ++i) { const Instruction *I = cast<Instruction>(VL[i]); if (!isa<ConstantInt>(I->getOperand(1))) { @@ -1129,12 +1137,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } case Instruction::Call: { CallInst *CI = cast<CallInst>(VL0); - IntrinsicInst *II = cast<IntrinsicInst>(CI); - Intrinsic::ID ID = II->getIntrinsicID(); + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); // Calculate the cost of the scalar and vector calls. SmallVector<Type*, 4> ScalarTys, VecTys; - for (unsigned op = 0, opc = II->getNumArgOperands(); op!= opc; ++op) { + for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) { ScalarTys.push_back(CI->getArgOperand(op)->getType()); VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(), VecTy->getNumElements())); @@ -1147,7 +1154,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost << " (" << VecCallCost << "-" << ScalarCallCost << ")" - << " for " << *II << "\n"); + << " for " << *CI << "\n"); return VecCallCost - ScalarCallCost; } @@ -1244,7 +1251,7 @@ Value *BoUpSLP::getPointerOperand(Value *I) { return LI->getPointerOperand(); if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); - return 0; + return nullptr; } unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { @@ -1318,13 +1325,13 @@ Value *BoUpSLP::getSinkBarrier(Instruction *Src, Instruction *Dst) { if (!A.Ptr || !B.Ptr || AA->alias(A, B)) return I; } - return 0; + return nullptr; } int BoUpSLP::getLastIndex(ArrayRef<Value *> VL) { BasicBlock *BB = cast<Instruction>(VL[0])->getParent(); - assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block"); - BlockNumbering &BN = BlocksNumbers[BB]; + assert(BB == getSameBlock(VL) && "Invalid block"); + BlockNumbering &BN = getBlockNumbering(BB); int MaxIdx = BN.getIndex(BB->getFirstNonPHI()); for (unsigned i = 0, e = VL.size(); i < e; ++i) @@ -1334,8 +1341,8 @@ int BoUpSLP::getLastIndex(ArrayRef<Value *> VL) { Instruction *BoUpSLP::getLastInstruction(ArrayRef<Value *> VL) { BasicBlock *BB = cast<Instruction>(VL[0])->getParent(); - assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block"); - BlockNumbering &BN = BlocksNumbers[BB]; + assert(BB == getSameBlock(VL) && "Invalid block"); + BlockNumbering &BN = getBlockNumbering(BB); int MaxIdx = BN.getIndex(cast<Instruction>(VL[0])); for (unsigned i = 1, e = VL.size(); i < e; ++i) @@ -1394,7 +1401,7 @@ Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const { if (En->isSame(VL) && En->VectorizedValue) return En->VectorizedValue; } - return 0; + return nullptr; } Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) { @@ -1615,6 +1622,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { VecTy->getPointerTo(AS)); unsigned Alignment = LI->getAlignment(); LI = Builder.CreateLoad(VecPtr); + if (!Alignment) + Alignment = DL->getABITypeAlignment(LI->getPointerOperand()->getType()); LI->setAlignment(Alignment); E->VectorizedValue = LI; return propagateMetadata(LI, E->Scalars); @@ -1634,13 +1643,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo(AS)); StoreInst *S = Builder.CreateStore(VecValue, VecPtr); + if (!Alignment) + Alignment = DL->getABITypeAlignment(SI->getPointerOperand()->getType()); S->setAlignment(Alignment); E->VectorizedValue = S; return propagateMetadata(S, E->Scalars); } case Instruction::Call: { CallInst *CI = cast<CallInst>(VL0); - setInsertPointAfterBundle(E->Scalars); std::vector<Value *> OpVecs; for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) { @@ -1656,8 +1666,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Module *M = F->getParent(); - IntrinsicInst *II = cast<IntrinsicInst>(CI); - Intrinsic::ID ID = II->getIntrinsicID(); + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) }; Function *CF = Intrinsic::getDeclaration(M, ID, Tys); Value *V = Builder.CreateCall(CF, OpVecs); @@ -1667,7 +1676,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { default: llvm_unreachable("unknown inst"); } - return 0; + return nullptr; } Value *BoUpSLP::vectorizeTree() { @@ -1746,8 +1755,9 @@ Value *BoUpSLP::vectorizeTree() { DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); assert((ScalarToTreeEntry.count(U) || - // It is legal to replace the reduction users by undef. - (RdxOps && RdxOps->count(U))) && + // It is legal to replace users in the ignorelist by undef. + (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) != + UserIgnoreList.end())) && "Replacing out-of-tree value with undef"); } #endif @@ -1759,9 +1769,9 @@ Value *BoUpSLP::vectorizeTree() { } } - for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) { - BlocksNumbers[it].forget(); - } + for (auto &BN : BlocksNumbers) + BN.second.forget(); + Builder.ClearInsertionPoint(); return VectorizableTree[0].VectorizedValue; @@ -1802,11 +1812,19 @@ void BoUpSLP::optimizeGatherSequence() { Insert->moveBefore(PreHeader->getTerminator()); } + // Make a list of all reachable blocks in our CSE queue. + SmallVector<const DomTreeNode *, 8> CSEWorkList; + CSEWorkList.reserve(CSEBlocks.size()); + for (BasicBlock *BB : CSEBlocks) + if (DomTreeNode *N = DT->getNode(BB)) { + assert(DT->isReachableFromEntry(N)); + CSEWorkList.push_back(N); + } + // Sort blocks by domination. This ensures we visit a block after all blocks // dominating it are visited. - SmallVector<BasicBlock *, 8> CSEWorkList(CSEBlocks.begin(), CSEBlocks.end()); std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), - [this](const BasicBlock *A, const BasicBlock *B) { + [this](const DomTreeNode *A, const DomTreeNode *B) { return DT->properlyDominates(A, B); }); @@ -1814,12 +1832,10 @@ void BoUpSLP::optimizeGatherSequence() { // instructions. TODO: We can further optimize this scan if we split the // instructions into different buckets based on the insert lane. SmallVector<Instruction *, 16> Visited; - for (SmallVectorImpl<BasicBlock *>::iterator I = CSEWorkList.begin(), - E = CSEWorkList.end(); - I != E; ++I) { + for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) { assert((I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) && "Worklist not sorted properly!"); - BasicBlock *BB = *I; + BasicBlock *BB = (*I)->getBlock(); // For all instructions in blocks containing gather sequences: for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { Instruction *In = it++; @@ -1835,7 +1851,7 @@ void BoUpSLP::optimizeGatherSequence() { DT->dominates((*v)->getParent(), In->getParent())) { In->replaceAllUsesWith(*v); In->eraseFromParent(); - In = 0; + In = nullptr; break; } } @@ -1864,6 +1880,7 @@ struct SLPVectorizer : public FunctionPass { ScalarEvolution *SE; const DataLayout *DL; TargetTransformInfo *TTI; + TargetLibraryInfo *TLI; AliasAnalysis *AA; LoopInfo *LI; DominatorTree *DT; @@ -1874,8 +1891,9 @@ struct SLPVectorizer : public FunctionPass { SE = &getAnalysis<ScalarEvolution>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TTI = &getAnalysis<TargetTransformInfo>(); + TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); AA = &getAnalysis<AliasAnalysis>(); LI = &getAnalysis<LoopInfo>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); @@ -1900,8 +1918,8 @@ struct SLPVectorizer : public FunctionPass { DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n"); // Use the bottom up slp vectorizer to construct chains that start with - // he store instructions. - BoUpSLP R(&F, SE, DL, TTI, AA, LI, DT); + // store instructions. + BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT); // Scan the blocks in the function in post order. for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()), @@ -1951,8 +1969,11 @@ private: bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); /// \brief Try to vectorize a list of operands. + /// \@param BuildVector A list of users to ignore for the purpose of + /// scheduling and that don't need extracting. /// \returns true if a value was vectorized. - bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R); + bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, + ArrayRef<Value *> BuildVector = None); /// \brief Try to vectorize a chain that may start at the operands of \V; bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); @@ -2106,7 +2127,7 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { // Check that the pointer points to scalars. Type *Ty = SI->getValueOperand()->getType(); if (Ty->isAggregateType() || Ty->isVectorTy()) - return 0; + continue; // Find the base pointer. Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), DL); @@ -2125,7 +2146,8 @@ bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { return tryToVectorizeList(VL, R); } -bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) { +bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, + ArrayRef<Value *> BuildVector) { if (VL.size() < 2) return false; @@ -2153,7 +2175,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) { bool Changed = false; - // Keep track of values that were delete by vectorizing in the loop below. + // Keep track of values that were deleted by vectorizing in the loop below. SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end()); for (unsigned i = 0, e = VL.size(); i < e; ++i) { @@ -2175,13 +2197,38 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) { << "\n"); ArrayRef<Value *> Ops = VL.slice(i, OpsWidth); - R.buildTree(Ops); + ArrayRef<Value *> BuildVectorSlice; + if (!BuildVector.empty()) + BuildVectorSlice = BuildVector.slice(i, OpsWidth); + + R.buildTree(Ops, BuildVectorSlice); int Cost = R.getTreeCost(); if (Cost < -SLPCostThreshold) { DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); - R.vectorizeTree(); - + Value *VectorizedRoot = R.vectorizeTree(); + + // Reconstruct the build vector by extracting the vectorized root. This + // way we handle the case where some elements of the vector are undefined. + // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) + if (!BuildVectorSlice.empty()) { + // The insert point is the last build vector instruction. The vectorized + // root will precede it. This guarantees that we get an instruction. The + // vectorized tree could have been constant folded. + Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back()); + unsigned VecIdx = 0; + for (auto &V : BuildVectorSlice) { + IRBuilder<true, NoFolder> Builder( + ++BasicBlock::iterator(InsertAfter)); + InsertElementInst *IE = cast<InsertElementInst>(V); + Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement( + VectorizedRoot, Builder.getInt32(VecIdx++))); + IE->setOperand(1, Extract); + IE->removeFromParent(); + IE->insertAfter(Extract); + InsertAfter = IE; + } + } // Move to the next bundle. i += VF - 1; Changed = true; @@ -2290,7 +2337,7 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx, /// *p = /// class HorizontalReduction { - SmallPtrSet<Value *, 16> ReductionOps; + SmallVector<Value *, 16> ReductionOps; SmallVector<Value *, 32> ReducedVals; BinaryOperator *ReductionRoot; @@ -2308,7 +2355,7 @@ class HorizontalReduction { public: HorizontalReduction() - : ReductionRoot(0), ReductionPHI(0), ReductionOpcode(0), + : ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0), ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {} /// \brief Try to find a reduction tree. @@ -2323,10 +2370,10 @@ public: // In such a case start looking for a tree rooted in the first '+'. if (Phi) { if (B->getOperand(0) == Phi) { - Phi = 0; + Phi = nullptr; B = dyn_cast<BinaryOperator>(B->getOperand(1)); } else if (B->getOperand(1) == Phi) { - Phi = 0; + Phi = nullptr; B = dyn_cast<BinaryOperator>(B->getOperand(0)); } } @@ -2384,7 +2431,7 @@ public: // We need to be able to reassociate the adds. if (!TreeN->isAssociative()) return false; - ReductionOps.insert(TreeN); + ReductionOps.push_back(TreeN); } // Retract. Stack.pop_back(); @@ -2412,7 +2459,7 @@ public: if (NumReducedVals < ReduxWidth) return false; - Value *VectorizedTree = 0; + Value *VectorizedTree = nullptr; IRBuilder<> Builder(ReductionRoot); FastMathFlags Unsafe; Unsafe.setUnsafeAlgebra(); @@ -2421,7 +2468,7 @@ public: for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) { ArrayRef<Value *> ValsToReduce(&ReducedVals[i], ReduxWidth); - V.buildTree(ValsToReduce, &ReductionOps); + V.buildTree(ValsToReduce, ReductionOps); // Estimate cost. int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]); @@ -2455,13 +2502,13 @@ public: } // Update users. if (ReductionPHI) { - assert(ReductionRoot != NULL && "Need a reduction operation"); + assert(ReductionRoot && "Need a reduction operation"); ReductionRoot->setOperand(0, VectorizedTree); ReductionRoot->setOperand(1, ReductionPHI); } else ReductionRoot->replaceAllUsesWith(VectorizedTree); } - return VectorizedTree != 0; + return VectorizedTree != nullptr; } private: @@ -2540,13 +2587,16 @@ private: /// /// Returns true if it matches /// -static bool findBuildVector(InsertElementInst *IE, - SmallVectorImpl<Value *> &Ops) { - if (!isa<UndefValue>(IE->getOperand(0))) +static bool findBuildVector(InsertElementInst *FirstInsertElem, + SmallVectorImpl<Value *> &BuildVector, + SmallVectorImpl<Value *> &BuildVectorOpds) { + if (!isa<UndefValue>(FirstInsertElem->getOperand(0))) return false; + InsertElementInst *IE = FirstInsertElem; while (true) { - Ops.push_back(IE->getOperand(1)); + BuildVector.push_back(IE); + BuildVectorOpds.push_back(IE->getOperand(1)); if (IE->use_empty()) return false; @@ -2641,7 +2691,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { Value *Rdx = (P->getIncomingBlock(0) == BB ? (P->getIncomingValue(0)) - : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : 0)); + : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) + : nullptr)); // Check if this is a Binary Operator. BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx); if (!BI) @@ -2680,7 +2731,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(SI->getValueOperand())) { HorizontalReduction HorRdx; - if (((HorRdx.matchAssociativeReduction(0, BinOp, DL) && + if (((HorRdx.matchAssociativeReduction(nullptr, BinOp, DL) && HorRdx.tryToReduce(R, TTI)) || tryToVectorize(BinOp, R))) { Changed = true; @@ -2716,12 +2767,16 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { } // Try to vectorize trees that start at insertelement instructions. - if (InsertElementInst *IE = dyn_cast<InsertElementInst>(it)) { - SmallVector<Value *, 8> Ops; - if (!findBuildVector(IE, Ops)) + if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) { + SmallVector<Value *, 16> BuildVector; + SmallVector<Value *, 16> BuildVectorOpds; + if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds)) continue; - if (tryToVectorizeList(Ops, R)) { + // Vectorize starting with the build vector operands ignoring the + // BuildVector instructions for the purpose of scheduling and user + // extraction. + if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) { Changed = true; it = BB->begin(); e = BB->end(); |