diff options
Diffstat (limited to 'lib/Transforms')
37 files changed, 1498 insertions, 540 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 6d950d2..b888e95 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -346,7 +346,7 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) { /// Given a value that is stored to a global but never read, determine whether /// it's safe to remove the store and the chain of computation that feeds the /// store. -static bool IsSafeComputationToRemove(Value *V) { +static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { do { if (isa<Constant>(V)) return true; @@ -355,7 +355,7 @@ static bool IsSafeComputationToRemove(Value *V) { if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) || isa<GlobalValue>(V)) return false; - if (isAllocationFn(V)) + if (isAllocationFn(V, TLI)) return true; Instruction *I = cast<Instruction>(V); @@ -376,7 +376,8 @@ static bool IsSafeComputationToRemove(Value *V) { /// of the global and clean up any that obviously don't assign the global a /// value that isn't dynamically allocated. /// -static bool CleanupPointerRootUsers(GlobalVariable *GV) { +static bool CleanupPointerRootUsers(GlobalVariable *GV, + const TargetLibraryInfo *TLI) { // A brief explanation of leak checkers. The goal is to find bugs where // pointers are forgotten, causing an accumulating growth in memory // usage over time. The common strategy for leak checkers is to whitelist the @@ -432,18 +433,18 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV) { C->destroyConstant(); // This could have invalidated UI, start over from scratch. Dead.clear(); - CleanupPointerRootUsers(GV); + CleanupPointerRootUsers(GV, TLI); return true; } } } for (int i = 0, e = Dead.size(); i != e; ++i) { - if (IsSafeComputationToRemove(Dead[i].first)) { + if (IsSafeComputationToRemove(Dead[i].first, TLI)) { Dead[i].second->eraseFromParent(); Instruction *I = Dead[i].first; do { - if (isAllocationFn(I)) + if (isAllocationFn(I, TLI)) break; Instruction *J = dyn_cast<Instruction>(I->getOperand(0)); if (!J) @@ -975,7 +976,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, // nor is the global. if (AllNonStoreUsesGone) { if (isLeakCheckerRoot(GV)) { - Changed |= CleanupPointerRootUsers(GV); + Changed |= CleanupPointerRootUsers(GV, TLI); } else { Changed = true; CleanupConstantGlobalUsers(GV, 0, TD, TLI); @@ -1465,9 +1466,10 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, - Value *NElems, TargetData *TD) { + Value *NElems, TargetData *TD, + const TargetLibraryInfo *TLI) { DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); - Type *MAT = getMallocAllocatedType(CI); + Type *MAT = getMallocAllocatedType(CI, TLI); StructType *STy = cast<StructType>(MAT); // There is guaranteed to be at least one use of the malloc (storing @@ -1688,7 +1690,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // This eliminates dynamic allocation, avoids an indirection accessing the // data, and exposes the resultant global to further GlobalOpt. // We cannot optimize the malloc if we cannot determine malloc array size. - Value *NElems = getMallocArraySize(CI, TD, true); + Value *NElems = getMallocArraySize(CI, TD, TLI, true); if (!NElems) return false; @@ -1725,7 +1727,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is a fixed size array, transform the Malloc to be an alloc of // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { + if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) { Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); @@ -1742,7 +1744,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CI = cast<CallInst>(Malloc); } - GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true), TD); + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, TLI, true), + TD, TLI); return true; } @@ -1771,8 +1774,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, // Optimize away any trapping uses of the loaded value. if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI)) return true; - } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { - Type *MallocType = getMallocAllocatedType(CI); + } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) { + Type *MallocType = getMallocAllocatedType(CI, TLI); if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI, TD, TLI)) @@ -1964,7 +1967,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, bool Changed; if (isLeakCheckerRoot(GV)) { // Delete any constant stores to the global. - Changed = CleanupPointerRootUsers(GV); + Changed = CleanupPointerRootUsers(GV, TLI); } else { // Delete any stores we can find to the global. We may not be able to // make it completely dead though. diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 712888a..69a22fb 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -339,6 +340,7 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraph>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); SmallPtrSet<Function*, 8> SCCFunctions; DEBUG(dbgs() << "Inliner visiting SCC:"); @@ -417,7 +419,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // just delete the call instead of trying to inline it, regardless of // size. This happens because IPSCCP propagates the result out of the // call and then we're left with the dead call. - if (isInstructionTriviallyDead(CS.getInstruction())) { + if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) { DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction() << "\n"); // Update the call graph by deleting the edge from Callee to Caller. diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index cbe1ca4..b12fc01 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -168,7 +168,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { /// the heavy lifting. /// Instruction *InstCombiner::visitCallInst(CallInst &CI) { - if (isFreeCall(&CI)) + if (isFreeCall(&CI, TLI)) return visitFree(CI); // If the caller function is nounwind, mark the call as nounwind, even if the @@ -243,7 +243,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { default: break; case Intrinsic::objectsize: { uint64_t Size; - if (getObjectSize(II->getArgOperand(0), Size, TD)) + if (getObjectSize(II->getArgOperand(0), Size, TD, TLI)) return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); return 0; } @@ -877,7 +877,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) { // visitCallSite - Improvements for call and invoke instructions. // Instruction *InstCombiner::visitCallSite(CallSite CS) { - if (isAllocLikeFn(CS.getInstruction())) + if (isAllocLikeFn(CS.getInstruction(), TLI)) return visitAllocSite(*CS.getInstruction()); bool Changed = false; diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 35a0bbb..2a7182f 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -462,6 +462,16 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { } } + // (x lshr C1) udiv C2 --> x udiv (C2 << C1) + if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) { + Value *X; + ConstantInt *C1; + if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) { + APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1)); + return BinaryOperator::CreateUDiv(X, Builder->getInt(NC)); + } + } + // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) { const APInt *CI; Value *N; if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) || diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 68ecd51..ff758c4 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1068,7 +1068,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. if (isa<AllocaInst>(BCI->getOperand(0)) || - isAllocationFn(BCI->getOperand(0))) { + isAllocationFn(BCI->getOperand(0), TLI)) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { @@ -1107,7 +1107,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { static bool -isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) { +isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users, + const TargetLibraryInfo *TLI) { SmallVector<Instruction*, 4> Worklist; Worklist.push_back(AI); @@ -1163,7 +1164,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) { } } - if (isFreeCall(I)) { + if (isFreeCall(I, TLI)) { Users.push_back(I); continue; } @@ -1188,7 +1189,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { // to null and free calls, delete the calls and replace the comparisons with // true or false as appropriate. SmallVector<WeakVH, 64> Users; - if (isAllocSiteRemovable(&MI, Users)) { + if (isAllocSiteRemovable(&MI, Users, TLI)) { for (unsigned i = 0, e = Users.size(); i != e; ++i) { Instruction *I = cast_or_null<Instruction>(&*Users[i]); if (!I) continue; @@ -1872,7 +1873,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, Instruction *Inst = BBI++; // DCE instruction if trivially dead. - if (isInstructionTriviallyDead(Inst)) { + if (isInstructionTriviallyDead(Inst, TLI)) { ++NumDeadInst; DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); Inst->eraseFromParent(); @@ -2002,7 +2003,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { if (I == 0) continue; // skip null values. // Check to see if we can DCE the instruction. - if (isInstructionTriviallyDead(I)) { + if (isInstructionTriviallyDead(I, TLI)) { DEBUG(errs() << "IC: DCE: " << *I << '\n'); EraseInstFromFunction(*I); ++NumDeadInst; @@ -2102,7 +2103,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // If the instruction was modified, it's possible that it is now dead. // if so, remove it. - if (isInstructionTriviallyDead(I)) { + if (isInstructionTriviallyDead(I, TLI)) { EraseInstFromFunction(*I); } else { Worklist.Add(I); diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 06f4d2f..0775cf4 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -15,7 +15,7 @@ #define DEBUG_TYPE "asan" -#include "FunctionBlackList.h" +#include "BlackList.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" #include "llvm/InlineAsm.h" @@ -217,7 +217,7 @@ struct AddressSanitizer : public ModulePass { Function *AsanCtorFunction; Function *AsanInitFunction; Instruction *CtorInsertBefore; - OwningPtr<FunctionBlackList> BL; + OwningPtr<BlackList> BL; // This array is indexed by AccessIsWrite and log2(AccessSize). Function *AsanErrorCallback[2][kNumberOfAccessSizes]; InlineAsm *EmptyAsm; @@ -544,6 +544,7 @@ bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { Type *Ty = cast<PointerType>(G->getType())->getElementType(); DEBUG(dbgs() << "GLOBAL: " << *G); + if (BL->isIn(*G)) return false; if (!Ty->isSized()) return false; if (!G->hasInitializer()) return false; // Touch only those globals that will not be defined in other modules. @@ -643,6 +644,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); // Determine whether this global should be poisoned in initialization. bool GlobalHasDynamicInitializer = HasDynamicInitializer(G); + // Don't check initialization order if this global is blacklisted. + GlobalHasDynamicInitializer &= !BL->isInInit(*G); StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL); Constant *NewInitializer = ConstantStruct::get( @@ -736,7 +739,7 @@ bool AddressSanitizer::runOnModule(Module &M) { TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; - BL.reset(new FunctionBlackList(ClBlackListFile)); + BL.reset(new BlackList(ClBlackListFile)); C = &(M.getContext()); LongSize = TD->getPointerSizeInBits(); @@ -774,7 +777,7 @@ bool AddressSanitizer::runOnModule(Module &M) { /*hasSideEffects=*/true); llvm::Triple targetTriple(M.getTargetTriple()); - bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::ANDROIDEABI; + bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android; MappingOffset = isAndroid ? kDefaultShadowOffsetAndroid : (LongSize == 32 ? kDefaultShadowOffset32 : kDefaultShadowOffset64); diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp new file mode 100644 index 0000000..2cb1199 --- /dev/null +++ b/lib/Transforms/Instrumentation/BlackList.cpp @@ -0,0 +1,102 @@ +//===-- BlackList.cpp - blacklist for sanitizers --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions or global +// variables based on a user-supplied blacklist. +// +//===----------------------------------------------------------------------===// + +#include <utility> +#include <string> + +#include "BlackList.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Module.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" + +namespace llvm { + +BlackList::BlackList(const StringRef Path) { + // Validate and open blacklist file. + if (!Path.size()) return; + OwningPtr<MemoryBuffer> File; + if (error_code EC = MemoryBuffer::getFile(Path, File)) { + report_fatal_error("Can't open blacklist file: " + Path + ": " + + EC.message()); + } + + // Iterate through each line in the blacklist file. + SmallVector<StringRef, 16> Lines; + SplitString(File.take()->getBuffer(), Lines, "\n\r"); + StringMap<std::string> Regexps; + for (SmallVector<StringRef, 16>::iterator I = Lines.begin(), E = Lines.end(); + I != E; ++I) { + // Get our prefix and unparsed regexp. + std::pair<StringRef, StringRef> SplitLine = I->split(":"); + StringRef Prefix = SplitLine.first; + std::string Regexp = SplitLine.second; + + // Replace * with .* + for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos; + pos += strlen(".*")) { + Regexp.replace(pos, strlen("*"), ".*"); + } + + // Check that the regexp is valid. + Regex CheckRE(Regexp); + std::string Error; + if (!CheckRE.isValid(Error)) { + report_fatal_error("malformed blacklist regex: " + SplitLine.second + + ": " + Error); + } + + // Add this regexp into the proper group by its prefix. + if (Regexps[Prefix].size()) + Regexps[Prefix] += "|"; + Regexps[Prefix] += Regexp; + } + + // Iterate through each of the prefixes, and create Regexs for them. + for (StringMap<std::string>::iterator I = Regexps.begin(), E = Regexps.end(); + I != E; ++I) { + Entries[I->getKey()] = new Regex(I->getValue()); + } +} + +bool BlackList::isIn(const Function &F) { + return isIn(*F.getParent()) || inSection("fun", F.getName()); +} + +bool BlackList::isIn(const GlobalVariable &G) { + return isIn(*G.getParent()) || inSection("global", G.getName()); +} + +bool BlackList::isIn(const Module &M) { + return inSection("src", M.getModuleIdentifier()); +} + +bool BlackList::isInInit(const GlobalVariable &G) { + return isIn(*G.getParent()) || inSection("global-init", G.getName()); +} + +bool BlackList::inSection(const StringRef Section, + const StringRef Query) { + Regex *FunctionRegex = Entries[Section]; + return FunctionRegex ? FunctionRegex->match(Query) : false; +} + +} // namespace llvm diff --git a/lib/Transforms/Instrumentation/BlackList.h b/lib/Transforms/Instrumentation/BlackList.h new file mode 100644 index 0000000..73977fc --- /dev/null +++ b/lib/Transforms/Instrumentation/BlackList.h @@ -0,0 +1,55 @@ +//===-- BlackList.h - blacklist for sanitizers ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions or global +// variables based on a user-supplied blacklist. +// +// The blacklist disables instrumentation of various functions and global +// variables. Each line contains a prefix, followed by a wild card expression. +// --- +// fun:*_ZN4base6subtle* +// global:*global_with_bad_access_or_initialization* +// global-init:*global_with_initialization_issues* +// src:file_with_tricky_code.cc +// --- +// Note that the wild card is in fact an llvm::Regex, but * is automatically +// replaced with .* +// This is similar to the "ignore" feature of ThreadSanitizer. +// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores +// +//===----------------------------------------------------------------------===// +// + +#include "llvm/ADT/StringMap.h" + +namespace llvm { +class Function; +class GlobalVariable; +class Module; +class Regex; +class StringRef; + +class BlackList { + public: + BlackList(const StringRef Path); + // Returns whether either this function or it's source file are blacklisted. + bool isIn(const Function &F); + // Returns whether either this global or it's source file are blacklisted. + bool isIn(const GlobalVariable &G); + // Returns whether this module is blacklisted by filename. + bool isIn(const Module &M); + // Returns whether a global should be excluded from initialization checking. + bool isInInit(const GlobalVariable &G); + private: + StringMap<Regex*> Entries; + + bool inSection(const StringRef Section, const StringRef Query); +}; + +} // namespace llvm diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index 09e0f14..6429081 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -24,6 +24,7 @@ #include "llvm/Support/TargetFolder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Instrumentation.h" using namespace llvm; @@ -48,10 +49,12 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetData>(); + AU.addRequired<TargetLibraryInfo>(); } private: const TargetData *TD; + const TargetLibraryInfo *TLI; ObjectSizeOffsetEvaluator *ObjSizeEval; BuilderTy *Builder; Instruction *Inst; @@ -166,11 +169,12 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { bool BoundsChecking::runOnFunction(Function &F) { TD = &getAnalysis<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); TrapBB = 0; BuilderTy TheBuilder(F.getContext(), TargetFolder(TD)); Builder = &TheBuilder; - ObjectSizeOffsetEvaluator TheObjSizeEval(TD, F.getContext()); + ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext()); ObjSizeEval = &TheObjSizeEval; // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 00de882..058f68c 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,8 +1,8 @@ add_llvm_library(LLVMInstrumentation AddressSanitizer.cpp + BlackList.cpp BoundsChecking.cpp EdgeProfiling.cpp - FunctionBlackList.cpp GCOVProfiling.cpp Instrumentation.cpp OptimalEdgeProfiling.cpp diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.cpp b/lib/Transforms/Instrumentation/FunctionBlackList.cpp deleted file mode 100644 index 188ea4d..0000000 --- a/lib/Transforms/Instrumentation/FunctionBlackList.cpp +++ /dev/null @@ -1,79 +0,0 @@ -//===-- FunctionBlackList.cpp - blacklist of functions --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is a utility class for instrumentation passes (like AddressSanitizer -// or ThreadSanitizer) to avoid instrumenting some functions based on -// user-supplied blacklist. -// -//===----------------------------------------------------------------------===// - -#include "FunctionBlackList.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Function.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" - -namespace llvm { - -FunctionBlackList::FunctionBlackList(const std::string &Path) { - Functions = NULL; - const char *kFunPrefix = "fun:"; - if (!Path.size()) return; - std::string Fun; - - OwningPtr<MemoryBuffer> File; - if (error_code EC = MemoryBuffer::getFile(Path.c_str(), File)) { - report_fatal_error("Can't open blacklist file " + Path + ": " + - EC.message()); - } - MemoryBuffer *Buff = File.take(); - const char *Data = Buff->getBufferStart(); - size_t DataLen = Buff->getBufferSize(); - SmallVector<StringRef, 16> Lines; - SplitString(StringRef(Data, DataLen), Lines, "\n\r"); - for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) { - if (Lines[i].startswith(kFunPrefix)) { - std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix)); - std::string ThisFuncRE; - // add ThisFunc replacing * with .* - for (size_t j = 0, n = ThisFunc.size(); j < n; j++) { - if (ThisFunc[j] == '*') - ThisFuncRE += '.'; - ThisFuncRE += ThisFunc[j]; - } - // Check that the regexp is valid. - Regex CheckRE(ThisFuncRE); - std::string Error; - if (!CheckRE.isValid(Error)) - report_fatal_error("malformed blacklist regex: " + ThisFunc + - ": " + Error); - // Append to the final regexp. - if (Fun.size()) - Fun += "|"; - Fun += ThisFuncRE; - } - } - if (Fun.size()) { - Functions = new Regex(Fun); - } -} - -bool FunctionBlackList::isIn(const Function &F) { - if (Functions) { - bool Res = Functions->match(F.getName()); - return Res; - } - return false; -} - -} // namespace llvm diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.h b/lib/Transforms/Instrumentation/FunctionBlackList.h deleted file mode 100644 index c1239b9..0000000 --- a/lib/Transforms/Instrumentation/FunctionBlackList.h +++ /dev/null @@ -1,37 +0,0 @@ -//===-- FunctionBlackList.cpp - blacklist of functions ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -//===----------------------------------------------------------------------===// -// -// This is a utility class for instrumentation passes (like AddressSanitizer -// or ThreadSanitizer) to avoid instrumenting some functions based on -// user-supplied blacklist. -// -//===----------------------------------------------------------------------===// -// - -#include <string> - -namespace llvm { -class Function; -class Regex; - -// Blacklisted functions are not instrumented. -// The blacklist file contains one or more lines like this: -// --- -// fun:FunctionWildCard -// --- -// This is similar to the "ignore" feature of ThreadSanitizer. -// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores -class FunctionBlackList { - public: - FunctionBlackList(const std::string &Path); - bool isIn(const Function &F); - private: - Regex *Functions; -}; - -} // namespace llvm diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 264a6a6..9fcde31 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -88,11 +88,10 @@ namespace { // Add the function to write out all our counters to the global destructor // list. - void insertCounterWriteout(SmallVector<std::pair<GlobalVariable *, - MDNode *>, 8> &); + void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); void insertIndirectCounterIncrement(); - std::string mangleName(DICompileUnit CU, std::string NewStem); + std::string mangleName(DICompileUnit CU, const char *NewStem); bool EmitNotes; bool EmitData; @@ -329,7 +328,7 @@ namespace { }; } -std::string GCOVProfiler::mangleName(DICompileUnit CU, std::string NewStem) { +std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) { if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) { for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) { MDNode *N = GCov->getOperand(i); @@ -630,7 +629,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() { } void GCOVProfiler::insertCounterWriteout( - SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &CountersBySP) { + ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) { FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *WriteoutF = Function::Create(WriteoutFTy, @@ -652,7 +651,7 @@ void GCOVProfiler::insertCounterWriteout( std::string FilenameGcda = mangleName(compile_unit, "gcda"); Builder.CreateCall(StartFile, Builder.CreateGlobalStringPtr(FilenameGcda)); - for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator + for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator I = CountersBySP.begin(), E = CountersBySP.end(); I != E; ++I) { DISubprogram SP(I->second); diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index dc0fa71..17b7775 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -21,7 +21,7 @@ #define DEBUG_TYPE "tsan" -#include "FunctionBlackList.h" +#include "BlackList.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" @@ -50,7 +50,7 @@ static cl::opt<std::string> ClBlackListFile("tsan-blacklist", STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); -STATISTIC(NumOmittedReadsBeforeWrite, +STATISTIC(NumOmittedReadsBeforeWrite, "Number of reads ignored due to following writes"); STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size"); STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes"); @@ -77,7 +77,7 @@ struct ThreadSanitizer : public FunctionPass { int getMemoryAccessFuncIndex(Value *Addr); TargetData *TD; - OwningPtr<FunctionBlackList> BL; + OwningPtr<BlackList> BL; IntegerType *OrdTy; // Callbacks to run-time library are computed in doInitialization. Function *TsanFuncEntry; @@ -121,7 +121,7 @@ bool ThreadSanitizer::doInitialization(Module &M) { TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; - BL.reset(new FunctionBlackList(ClBlackListFile)); + BL.reset(new BlackList(ClBlackListFile)); // Always insert a call to __tsan_init into the module's CTORs. IRBuilder<> IRB(M.getContext()); @@ -186,7 +186,7 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { NumOmittedReadsFromConstantGlobals++; return true; } - } else if(LoadInst *L = dyn_cast<LoadInst>(Addr)) { + } else if (LoadInst *L = dyn_cast<LoadInst>(Addr)) { if (isVtableAccess(L)) { // Reads from a vtable pointer can not race with any writes. NumOmittedReadsFromVtable++; @@ -344,7 +344,7 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { case NotAtomic: assert(false); case Unordered: // Fall-through. case Monotonic: v = 1 << 0; break; - // case Consume: v = 1 << 1; break; // Not specified yet. + // case Consume: v = 1 << 1; break; // Not specified yet. case Acquire: v = 1 << 2; break; case Release: v = 1 << 3; break; case AcquireRelease: v = 1 << 4; break; diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index a8deda8..5912107 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -43,6 +43,7 @@ #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -148,7 +149,19 @@ bool CodeGenPrepare::runOnFunction(Function &F) { PFI = getAnalysisIfAvailable<ProfileInfo>(); OptSize = F.hasFnAttr(Attribute::OptimizeForSize); - // First pass, eliminate blocks that contain only PHI nodes and an + /// This optimization identifies DIV instructions that can be + /// profitably bypassed and carried out with a shorter, faster divide. + if (TLI && TLI->isSlowDivBypassed()) { + const DenseMap<Type *, Type *> &BypassTypeMap = TLI->getBypassSlowDivTypes(); + + for (Function::iterator I = F.begin(); I != F.end(); I++) { + EverMadeChange |= bypassSlowDivision(F, + I, + BypassTypeMap); + } + } + + // Eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); @@ -988,7 +1001,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, WeakVH IterHandle(CurInstIterator); BasicBlock *BB = CurInstIterator->getParent(); - RecursivelyDeleteTriviallyDeadInstructions(Repl); + RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); if (IterHandle != CurInstIterator) { // If the iterator instruction was recursively deleted, start over at the @@ -1174,17 +1187,32 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { } +/// If we have a SelectInst that will likely profit from branch prediction, +/// turn it into a branch. bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { - // If we have a SelectInst that will likely profit from branch prediction, - // turn it into a branch. - if (DisableSelectToBranch || OptSize || !TLI || - !TLI->isPredictableSelectExpensive()) - return false; + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); - if (!SI->getCondition()->getType()->isIntegerTy(1) || - !isFormingBranchFromSelectProfitable(SI)) + // Can we convert the 'select' to CF ? + if (DisableSelectToBranch || OptSize || !TLI || VectorCond) return false; + TargetLowering::SelectSupportKind SelectKind; + if (VectorCond) + SelectKind = TargetLowering::VectorMaskSelect; + else if (SI->getType()->isVectorTy()) + SelectKind = TargetLowering::ScalarCondVectorVal; + else + SelectKind = TargetLowering::ScalarValSelect; + + // Do we have efficient codegen support for this kind of 'selects' ? + if (TLI->isSelectSupported(SelectKind)) { + // We have efficient codegen support for the select instruction. + // Check if it is profitable to keep this 'select'. + if (!TLI->isPredictableSelectExpensive() || + !isFormingBranchFromSelectProfitable(SI)) + return false; + } + ModifiedDT = true; // First, we split the block containing the select into 2 blocks. diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 8dbcc23..086f0a1 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -22,6 +22,7 @@ #include "llvm/Instruction.h" #include "llvm/Pass.h" #include "llvm/Support/InstIterator.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -38,10 +39,11 @@ namespace { initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry()); } virtual bool runOnBasicBlock(BasicBlock &BB) { + TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); bool Changed = false; for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) { Instruction *Inst = DI++; - if (isInstructionTriviallyDead(Inst)) { + if (isInstructionTriviallyDead(Inst, TLI)) { Inst->eraseFromParent(); Changed = true; ++DIEEliminated; @@ -87,6 +89,8 @@ char DCE::ID = 0; INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false) bool DCE::runOnFunction(Function &F) { + TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); + // Start out with all of the instructions in the worklist... std::vector<Instruction*> WorkList; for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) @@ -101,7 +105,7 @@ bool DCE::runOnFunction(Function &F) { Instruction *I = WorkList.back(); WorkList.pop_back(); - if (isInstructionTriviallyDead(I)) { // If the instruction is dead. + if (isInstructionTriviallyDead(I, TLI)) { // If the instruction is dead. // Loop over all of the values that the instruction uses, if there are // instructions being used, add them to the worklist, because they might // go dead after this one is removed. diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 8b1283f..1ff4329 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -106,6 +106,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } /// static void DeleteDeadInstruction(Instruction *I, MemoryDependenceAnalysis &MD, + const TargetLibraryInfo *TLI, SmallSetVector<Value*, 16> *ValueSet = 0) { SmallVector<Instruction*, 32> NowDeadInsts; @@ -130,7 +131,7 @@ static void DeleteDeadInstruction(Instruction *I, if (!Op->use_empty()) continue; if (Instruction *OpI = dyn_cast<Instruction>(Op)) - if (isInstructionTriviallyDead(OpI)) + if (isInstructionTriviallyDead(OpI, TLI)) NowDeadInsts.push_back(OpI); } @@ -276,7 +277,7 @@ static Value *getStoredPointerOperand(Instruction *I) { static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) { uint64_t Size; - if (getObjectSize(V, Size, AA.getTargetData())) + if (getObjectSize(V, Size, AA.getTargetData(), AA.getTargetLibraryInfo())) return Size; return AliasAnalysis::UnknownSize; } @@ -454,7 +455,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { Instruction *Inst = BBI++; // Handle 'free' calls specially. - if (CallInst *F = isFreeCall(Inst)) { + if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) { MadeChange |= HandleFree(F); continue; } @@ -483,7 +484,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // in case we need it. WeakVH NextInst(BBI); - DeleteDeadInstruction(SI, *MD); + DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo()); if (NextInst == 0) // Next instruction deleted. BBI = BB.begin(); @@ -530,7 +531,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { << *DepWrite << "\n KILLER: " << *Inst << '\n'); // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD); + DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo()); ++NumFastStores; MadeChange = true; @@ -640,7 +641,7 @@ bool DSE::HandleFree(CallInst *F) { Instruction *Next = llvm::next(BasicBlock::iterator(Dependency)); // DCE instructions only used to calculate that store - DeleteDeadInstruction(Dependency, *MD); + DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo()); ++NumFastStores; MadeChange = true; @@ -680,7 +681,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. - else if (isAllocLikeFn(I) && !PointerMayBeCaptured(I, true, true)) + else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) && + !PointerMayBeCaptured(I, true, true)) DeadStackObjects.insert(I); } @@ -724,7 +726,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { dbgs() << '\n'); // DCE instructions only used to calculate that store. - DeleteDeadInstruction(Dead, *MD, &DeadStackObjects); + DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(), + &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; @@ -732,9 +735,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } // Remove any dead non-memory-mutating instructions. - if (isInstructionTriviallyDead(BBI)) { + if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) { Instruction *Inst = BBI++; - DeleteDeadInstruction(Inst, *MD, &DeadStackObjects); + DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(), + &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; @@ -750,7 +754,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (CallSite CS = cast<Value>(BBI)) { // Remove allocation function calls from the list of dead stack objects; // there can't be any references before the definition. - if (isAllocLikeFn(BBI)) + if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo())) DeadStackObjects.remove(BBI); // If this call does not access memory, it can't be loading any of our @@ -771,15 +775,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) { LiveAllocas.push_back(*I); } - for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), - E = LiveAllocas.end(); I != E; ++I) - DeadStackObjects.remove(*I); - // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. - if (DeadStackObjects.empty()) + if (DeadStackObjects.size() == LiveAllocas.size()) break; + for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), + E = LiveAllocas.end(); I != E; ++I) + DeadStackObjects.remove(*I); + continue; } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 9759549..2627113 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -374,7 +374,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { Instruction *Inst = I++; // Dead instructions should just be removed. - if (isInstructionTriviallyDead(Inst)) { + if (isInstructionTriviallyDead(Inst, TLI)) { DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n'); Inst->eraseFromParent(); Changed = true; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 4822fd0..16ae6ad 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -271,16 +271,16 @@ void ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); } -uint32_t ValueTable::lookup_or_add_call(CallInst* C) { +uint32_t ValueTable::lookup_or_add_call(CallInst *C) { if (AA->doesNotAccessMemory(C)) { Expression exp = create_expression(C); - uint32_t& e = expressionNumbering[exp]; + uint32_t &e = expressionNumbering[exp]; if (!e) e = nextValueNumber++; valueNumbering[C] = e; return e; } else if (AA->onlyReadsMemory(C)) { Expression exp = create_expression(C); - uint32_t& e = expressionNumbering[exp]; + uint32_t &e = expressionNumbering[exp]; if (!e) { e = nextValueNumber++; valueNumbering[C] = e; @@ -413,7 +413,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) { case Instruction::LShr: case Instruction::AShr: case Instruction::And: - case Instruction::Or : + case Instruction::Or: case Instruction::Xor: case Instruction::ICmp: case Instruction::FCmp: @@ -632,6 +632,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) +#ifndef NDEBUG void GVN::dump(DenseMap<uint32_t, Value*>& d) { errs() << "{\n"; for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), @@ -641,6 +642,7 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) { } errs() << "}\n"; } +#endif /// IsValueFullyAvailableInBlock - Return true if we can prove that the value /// we're analyzing is fully available in the specified block. As we go, keep @@ -1436,7 +1438,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { Instruction *DepInst = DepInfo.getInst(); // Loading the allocation -> undef. - if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst) || + if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) || // Loading immediately after lifetime begin -> undef. isLifetimeStart(DepInst)) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, @@ -1951,7 +1953,7 @@ bool GVN::processLoad(LoadInst *L) { // If this load really doesn't depend on anything, then we must be loading an // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. - if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst)) { + if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); markInstructionForDeletion(L); ++NumGVNLoad; @@ -2231,12 +2233,20 @@ bool GVN::processInstruction(Instruction *I) { Value *SwitchCond = SI->getCondition(); BasicBlock *Parent = SI->getParent(); bool Changed = false; + + // Remember how many outgoing edges there are to every successor. + SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; + for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i) + ++SwitchEdges[SI->getSuccessor(i)]; + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { BasicBlock *Dst = i.getCaseSuccessor(); - BasicBlockEdge E(Parent, Dst); - if (E.isSingleEdge()) + // If there is only a single edge, propagate the case value into it. + if (SwitchEdges.lookup(Dst) == 1) { + BasicBlockEdge E(Parent, Dst); Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E); + } } return Changed; } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 37f8bdf..c933a17 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -44,6 +44,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -68,6 +69,7 @@ namespace { ScalarEvolution *SE; DominatorTree *DT; TargetData *TD; + TargetLibraryInfo *TLI; SmallVector<WeakVH, 16> DeadInsts; bool Changed; @@ -414,11 +416,11 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // new comparison. NewCompare->takeName(Compare); Compare->replaceAllUsesWith(NewCompare); - RecursivelyDeleteTriviallyDeadInstructions(Compare); + RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); // Delete the old floating point increment. Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); - RecursivelyDeleteTriviallyDeadInstructions(Incr); + RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); // If the FP induction variable still has uses, this is because something else // in the loop uses its value. In order to canonicalize the induction @@ -431,7 +433,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", PN->getParent()->getFirstInsertionPt()); PN->replaceAllUsesWith(Conv); - RecursivelyDeleteTriviallyDeadInstructions(PN); + RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); } Changed = true; } @@ -550,14 +552,14 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { PN->setIncomingValue(i, ExitVal); // If this instruction is dead now, delete it. - RecursivelyDeleteTriviallyDeadInstructions(Inst); + RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); if (NumPreds == 1) { // Completely replace a single-pred PHI. This is safe, because the // NewVal won't be variant in the loop, so we don't need an LCSSA phi // node anymore. PN->replaceAllUsesWith(ExitVal); - RecursivelyDeleteTriviallyDeadInstructions(PN); + RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); } } if (NumPreds != 1) { @@ -1697,6 +1699,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTree>(); TD = getAnalysisIfAvailable<TargetData>(); + TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); DeadInsts.clear(); Changed = false; @@ -1763,7 +1766,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { while (!DeadInsts.empty()) if (Instruction *Inst = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); + RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); // The Rewriter may not be used from this point on. @@ -1772,7 +1775,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { SinkUnusedInvariants(L); // Clean up dead instructions. - Changed |= DeleteDeadPHIs(L->getHeader()); + Changed |= DeleteDeadPHIs(L->getHeader(), TLI); // Check a post-condition. assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!"); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index dd42c59..20844c6 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -1455,7 +1455,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation. - SimplifyInstructionsInBlock(NewBB, TD); + SimplifyInstructionsInBlock(NewBB, TD, TLI); // Threaded an edge! ++NumThreads; diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 0192e92..99bedce 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -108,6 +108,9 @@ namespace { BasicBlock *Preheader; // The preheader block of the current loop... Loop *CurLoop; // The current loop we are working on... AliasSetTracker *CurAST; // AliasSet information for the current loop... + bool MayThrow; // The current loop contains an instruction which + // may throw, thus preventing code motion of + // instructions with side effects. DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap; /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. @@ -240,6 +243,15 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { CurAST->add(*BB); // Incorporate the specified basic block } + MayThrow = false; + // TODO: We've already searched for instructions which may throw in subloops. + // We may want to reuse this information. + for (Loop::block_iterator BB = L->block_begin(), BBE = L->block_end(); + (BB != BBE) && !MayThrow ; ++BB) + for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); + (I != E) && !MayThrow; ++I) + MayThrow |= I->mayThrow(); + // We want to visit all of the instructions in this loop... that are not parts // of our subloops (they have already had their invariants hoisted out of // their loop, into this loop, so there is no need to process the BODIES of @@ -307,7 +319,7 @@ void LICM::SinkRegion(DomTreeNode *N) { // If the instruction is dead, we would try to sink it because it isn't used // in the loop, instead, just delete it. - if (isInstructionTriviallyDead(&I)) { + if (isInstructionTriviallyDead(&I, TLI)) { DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); ++II; CurAST->deleteValue(&I); @@ -418,17 +430,22 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { if (!FoundMod) return true; } - // FIXME: This should use mod/ref information to see if we can hoist or sink - // the call. + // FIXME: This should use mod/ref information to see if we can hoist or + // sink the call. return false; } - // Otherwise these instructions are hoistable/sinkable - return isa<BinaryOperator>(I) || isa<CastInst>(I) || - isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || - isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || - isa<ShuffleVectorInst>(I); + // Only these instructions are hoistable/sinkable. + bool HoistableKind = (isa<BinaryOperator>(I) || isa<CastInst>(I) || + isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || + isa<CmpInst>(I) || isa<InsertElementInst>(I) || + isa<ExtractElementInst>(I) || + isa<ShuffleVectorInst>(I)); + if (!HoistableKind) + return false; + + return isSafeToExecuteUnconditionally(I); } /// isNotUsedInLoop - Return true if the only users of this instruction are @@ -604,6 +621,12 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { } bool LICM::isGuaranteedToExecute(Instruction &Inst) { + + // Somewhere in this loop there is an instruction which may throw and make us + // exit the loop. + if (MayThrow) + return false; + // Otherwise we have to check to make sure that the instruction dominates all // of the exit blocks. If it doesn't, then there is a path out of the loop // which does not execute this instruction, so we can't hoist it. diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index ac1082c..a72e288 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -132,7 +132,8 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } /// and zero out all the operands of this instruction. If any of them become /// dead, delete them and the computation tree that feeds them. /// -static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { +static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE, + const TargetLibraryInfo *TLI) { SmallVector<Instruction*, 32> NowDeadInsts; NowDeadInsts.push_back(I); @@ -153,7 +154,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { if (!Op->use_empty()) continue; if (Instruction *OpI = dyn_cast<Instruction>(Op)) - if (isInstructionTriviallyDead(OpI)) + if (isInstructionTriviallyDead(OpI, TLI)) NowDeadInsts.push_back(OpI); } @@ -164,10 +165,11 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { /// deleteIfDeadInstruction - If the specified value is a dead instruction, /// delete it and any recursively used instructions. -static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { +static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE, + const TargetLibraryInfo *TLI) { if (Instruction *I = dyn_cast<Instruction>(V)) - if (isInstructionTriviallyDead(I)) - deleteDeadInstruction(I, SE); + if (isInstructionTriviallyDead(I, TLI)) + deleteDeadInstruction(I, SE, TLI); } bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -490,7 +492,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){ Expander.clear(); // If we generated new code for the base pointer, clean up. - deleteIfDeadInstruction(BasePtr, *SE); + deleteIfDeadInstruction(BasePtr, *SE, TLI); return false; } @@ -538,7 +540,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - deleteDeadInstruction(TheStore, *SE); + deleteDeadInstruction(TheStore, *SE, TLI); ++NumMemSet; return true; } @@ -579,7 +581,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, getAnalysis<AliasAnalysis>(), SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. - deleteIfDeadInstruction(StoreBasePtr, *SE); + deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); return false; } @@ -594,8 +596,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, StoreSize, getAnalysis<AliasAnalysis>(), SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. - deleteIfDeadInstruction(LoadBasePtr, *SE); - deleteIfDeadInstruction(StoreBasePtr, *SE); + deleteIfDeadInstruction(LoadBasePtr, *SE, TLI); + deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); return false; } @@ -628,7 +630,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - deleteDeadInstruction(SI, *SE); + deleteDeadInstruction(SI, *SE, TLI); ++NumMemCpy; return true; } diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 982400c..f5daa7b 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -120,7 +120,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ++NumSimplified; } } - LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I); + LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); if (IsSubloopHeader && !isa<PHINode>(I)) break; diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 7eeb152..abe07aa 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -256,6 +257,7 @@ bool LoopRotate::rotateLoop(Loop *L) { return false; BasicBlock *OrigHeader = L->getHeader(); + BasicBlock *OrigLatch = L->getLoopLatch(); BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); if (BI == 0 || BI->isUnconditional()) @@ -267,13 +269,9 @@ bool LoopRotate::rotateLoop(Loop *L) { if (!L->isLoopExiting(OrigHeader)) return false; - // Updating PHInodes in loops with multiple exits adds complexity. - // Keep it simple, and restrict loop rotation to loops with one exit only. - // In future, lift this restriction and support for multiple exits if - // required. - SmallVector<BasicBlock*, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - if (ExitBlocks.size() > 1) + // If the loop latch already contains a branch that leaves the loop then the + // loop is already rotated. + if (OrigLatch == 0 || L->isLoopExiting(OrigLatch)) return false; // Check size of original header and reject loop if it is very big. @@ -286,11 +284,10 @@ bool LoopRotate::rotateLoop(Loop *L) { // Now, this loop is suitable for rotation. BasicBlock *OrigPreheader = L->getLoopPreheader(); - BasicBlock *OrigLatch = L->getLoopLatch(); // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. - if (OrigPreheader == 0 || OrigLatch == 0) + if (OrigPreheader == 0) return false; // Anything ScalarEvolution may know about this loop or the PHI nodes @@ -298,6 +295,8 @@ bool LoopRotate::rotateLoop(Loop *L) { if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) SE->forgetLoop(L); + DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); + // Find new Loop header. NewHeader is a Header's one and only successor // that is inside loop. Header's other successor is outside the // loop. Otherwise loop is not suitable for rotation. @@ -408,10 +407,19 @@ bool LoopRotate::rotateLoop(Loop *L) { // Update DominatorTree to reflect the CFG change we just made. Then split // edges as necessary to preserve LoopSimplify form. if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { - // Since OrigPreheader now has the conditional branch to Exit block, it is - // the dominator of Exit. - DT->changeImmediateDominator(Exit, OrigPreheader); - DT->changeImmediateDominator(NewHeader, OrigPreheader); + // Everything that was dominated by the old loop header is now dominated + // by the original loop preheader. Conceptually the header was merged + // into the preheader, even though we reuse the actual block as a new + // loop latch. + DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); + SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(), + OrigHeaderNode->end()); + DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader); + for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) + DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode); + + assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode); + assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode); // Update OrigHeader to be dominated by the new header block. DT->changeImmediateDominator(OrigHeader, OrigLatch); @@ -440,6 +448,35 @@ bool LoopRotate::rotateLoop(Loop *L) { // Update OrigHeader to be dominated by the new header block. DT->changeImmediateDominator(NewHeader, OrigPreheader); DT->changeImmediateDominator(OrigHeader, OrigLatch); + + // Brute force incremental dominator tree update. Call + // findNearestCommonDominator on all CFG predecessors of each child of the + // original header. + DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); + SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(), + OrigHeaderNode->end()); + bool Changed; + do { + Changed = false; + for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) { + DomTreeNode *Node = HeaderChildren[I]; + BasicBlock *BB = Node->getBlock(); + + pred_iterator PI = pred_begin(BB); + BasicBlock *NearestDom = *PI; + for (pred_iterator PE = pred_end(BB); PI != PE; ++PI) + NearestDom = DT->findNearestCommonDominator(NearestDom, *PI); + + // Remember if this changes the DomTree. + if (Node->getIDom()->getBlock() != NearestDom) { + DT->changeImmediateDominator(BB, NearestDom); + Changed = true; + } + } + + // If the dominator changed, this may have an effect on other + // predecessors, continue until we reach a fixpoint. + } while (Changed); } } @@ -452,6 +489,8 @@ bool LoopRotate::rotateLoop(Loop *L) { // emitted code isn't too gross in this common case. MergeBlockIntoPredecessor(OrigHeader, this); + DEBUG(dbgs() << "LoopRotation: into "; L->dump()); + ++NumRotated; return true; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 0ae7a51..d7495da 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -121,9 +121,11 @@ void RegSortData::print(raw_ostream &OS) const { OS << "[NumUses=" << UsedByIndices.count() << ']'; } +#ifndef NDEBUG void RegSortData::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -414,9 +416,11 @@ void Formula::print(raw_ostream &OS) const { } } +#ifndef NDEBUG void Formula::dump() const { print(errs()); errs() << '\n'; } +#endif /// isAddRecSExtable - Return true if the given addrec can be sign-extended /// without changing its value. @@ -974,9 +978,11 @@ void Cost::print(raw_ostream &OS) const { OS << ", plus " << SetupCost << " setup cost"; } +#ifndef NDEBUG void Cost::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -1060,9 +1066,11 @@ void LSRFixup::print(raw_ostream &OS) const { OS << ", Offset=" << Offset; } +#ifndef NDEBUG void LSRFixup::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -1252,9 +1260,11 @@ void LSRUse::print(raw_ostream &OS) const { OS << ", widest fixup type: " << *WidestFixupType; } +#ifndef NDEBUG void LSRUse::dump() const { print(errs()); errs() << '\n'; } +#endif /// isLegalUse - Test whether the use described by AM is "legal", meaning it can /// be completely folded into the user instruction at isel time. This includes @@ -3436,9 +3446,11 @@ void WorkItem::print(raw_ostream &OS) const { << " , add offset " << Imm; } +#ifndef NDEBUG void WorkItem::dump() const { print(errs()); errs() << '\n'; } +#endif /// GenerateCrossUseConstantOffsets - Look for registers which are a constant /// distance apart and try to form reuse opportunities between them. @@ -4731,9 +4743,11 @@ void LSRInstance::print(raw_ostream &OS) const { print_uses(OS); } +#ifndef NDEBUG void LSRInstance::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 3222f20..dce8e8b 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -1236,16 +1236,19 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) { // An ObjC-Identified object can't alias a load if it is never locally stored. if (AIsIdentified) { + // Check for an obvious escape. + if (isa<LoadInst>(B)) + return isStoredObjCPointer(A); if (BIsIdentified) { - // If both pointers have provenance, they can be directly compared. - if (A != B) - return false; - } else { - if (isa<LoadInst>(B)) - return isStoredObjCPointer(A); + // Check for an obvious escape. + if (isa<LoadInst>(A)) + return isStoredObjCPointer(B); + // Both pointers are identified and escapes aren't an evident problem. + return false; } - } else { - if (BIsIdentified && isa<LoadInst>(A)) + } else if (BIsIdentified) { + // Check for an obvious escape. + if (isa<LoadInst>(A)) return isStoredObjCPointer(B); } @@ -1381,9 +1384,6 @@ namespace { /// PtrState - This class summarizes several per-pointer runtime properties /// which are propogated through the flow graph. class PtrState { - /// NestCount - The known minimum level of retain+release nesting. - unsigned NestCount; - /// KnownPositiveRefCount - True if the reference count is known to /// be incremented. bool KnownPositiveRefCount; @@ -1401,7 +1401,7 @@ namespace { /// TODO: Encapsulate this better. RRInfo RRI; - PtrState() : NestCount(0), KnownPositiveRefCount(false), Partial(false), + PtrState() : KnownPositiveRefCount(false), Partial(false), Seq(S_None) {} void SetKnownPositiveRefCount() { @@ -1416,18 +1416,6 @@ namespace { return KnownPositiveRefCount; } - void IncrementNestCount() { - if (NestCount != UINT_MAX) ++NestCount; - } - - void DecrementNestCount() { - if (NestCount != 0) --NestCount; - } - - bool IsKnownNested() const { - return NestCount > 0; - } - void SetSeq(Sequence NewSeq) { Seq = NewSeq; } @@ -1454,7 +1442,6 @@ void PtrState::Merge(const PtrState &Other, bool TopDown) { Seq = MergeSeqs(Seq, Other.Seq, TopDown); KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount; - NestCount = std::min(NestCount, Other.NestCount); // We can't merge a plain objc_retain with an objc_retainBlock. if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) @@ -1868,6 +1855,26 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { return AutoreleaseCallee; } +/// IsPotentialUse - Test whether the given value is possible a +/// reference-counted pointer, including tests which utilize AliasAnalysis. +static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) { + // First make the rudimentary check. + if (!IsPotentialUse(Op)) + return false; + + // Objects in constant memory are not reference-counted. + if (AA.pointsToConstantMemory(Op)) + return false; + + // Pointers in constant memory are not pointing to reference-counted objects. + if (const LoadInst *LI = dyn_cast<LoadInst>(Op)) + if (AA.pointsToConstantMemory(LI->getPointerOperand())) + return false; + + // Otherwise assume the worst. + return true; +} + /// CanAlterRefCount - Test whether the given instruction can result in a /// reference count modification (positive or negative) for the pointer's /// object. @@ -1894,7 +1901,7 @@ CanAlterRefCount(const Instruction *Inst, const Value *Ptr, for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) { const Value *Op = *I; - if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -1919,14 +1926,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, // Comparing a pointer with null, or any other constant, isn't really a use, // because we don't care what the pointer points to, or about the values // of any other dynamic reference-counted pointers. - if (!IsPotentialUse(ICI->getOperand(1))) + if (!IsPotentialUse(ICI->getOperand(1), *PA.getAA())) return false; } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) { // For calls, just check the arguments (and not the callee operand). for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), OE = CS.arg_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -1936,14 +1943,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); // If we can't tell what the underlying object was, assume there is a // dependence. - return IsPotentialUse(Op) && PA.related(Op, Ptr); + return IsPotentialUse(Op, *PA.getAA()) && PA.related(Op, Ptr); } // Check each operand for a match. for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -2612,11 +2619,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); S.RRI.ReleaseMetadata = ReleaseMetadata; - S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); + S.RRI.KnownSafe = S.IsKnownIncremented(); S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); S.RRI.Calls.insert(Inst); - S.IncrementNestCount(); + S.SetKnownPositiveRefCount(); break; } case IC_RetainBlock: @@ -2631,7 +2638,6 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, PtrState &S = MyStates.getPtrBottomUpState(Arg); S.SetKnownPositiveRefCount(); - S.DecrementNestCount(); switch (S.GetSeq()) { case S_Stop: @@ -2747,8 +2753,9 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, // Merge the states from each successor to compute the initial state // for the current block. - for (BBState::edge_iterator SI(MyStates.succ_begin()), - SE(MyStates.succ_end()); SI != SE; ++SI) { + BBState::edge_iterator SI(MyStates.succ_begin()), + SE(MyStates.succ_end()); + if (SI != SE) { const BasicBlock *Succ = *SI; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); assert(I != BBStates.end()); @@ -2760,7 +2767,6 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, assert(I != BBStates.end()); MyStates.MergeSucc(I->second); } - break; } // Visit all the instructions, bottom-up. @@ -2823,12 +2829,11 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, S.ResetSequenceProgress(S_Retain); S.RRI.IsRetainBlock = Class == IC_RetainBlock; - // Don't check S.IsKnownIncremented() here because it's not sufficient. - S.RRI.KnownSafe = S.IsKnownNested(); + S.RRI.KnownSafe = S.IsKnownIncremented(); S.RRI.Calls.insert(Inst); } - S.IncrementNestCount(); + S.SetKnownPositiveRefCount(); // A retain can be a potential use; procede to the generic checking // code below. @@ -2838,7 +2843,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, Arg = GetObjCArg(Inst); PtrState &S = MyStates.getPtrTopDownState(Arg); - S.DecrementNestCount(); + S.ClearRefCount(); switch (S.GetSeq()) { case S_Retain: @@ -2935,8 +2940,9 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, // Merge the states from each predecessor to compute the initial state // for the current block. - for (BBState::edge_iterator PI(MyStates.pred_begin()), - PE(MyStates.pred_end()); PI != PE; ++PI) { + BBState::edge_iterator PI(MyStates.pred_begin()), + PE(MyStates.pred_end()); + if (PI != PE) { const BasicBlock *Pred = *PI; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); assert(I != BBStates.end()); @@ -2948,7 +2954,6 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, assert(I != BBStates.end()); MyStates.MergePred(I->second); } - break; } // Visit all the instructions, top-down. diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index d13e4ab..6d27db1 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -59,9 +59,9 @@ FunctionPass *llvm::createCFGSimplificationPass() { return new CFGSimplifyPass(); } -/// ChangeToUnreachable - Insert an unreachable instruction before the specified +/// changeToUnreachable - Insert an unreachable instruction before the specified /// instruction, making it and the rest of the code in the block dead. -static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { +static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { BasicBlock *BB = I->getParent(); // Loop over all of the successors, removing BB's entry from any PHI // nodes. @@ -87,8 +87,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { } } -/// ChangeToCall - Convert the specified invoke into a normal call. -static void ChangeToCall(InvokeInst *II) { +/// changeToCall - Convert the specified invoke into a normal call. +static void changeToCall(InvokeInst *II) { SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); NewCall->takeName(II); @@ -105,7 +105,7 @@ static void ChangeToCall(InvokeInst *II) { II->eraseFromParent(); } -static bool MarkAliveBlocks(BasicBlock *BB, +static bool markAliveBlocks(BasicBlock *BB, SmallPtrSet<BasicBlock*, 128> &Reachable) { SmallVector<BasicBlock*, 128> Worklist; @@ -129,7 +129,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, ++BBI; if (!isa<UnreachableInst>(BBI)) { // Don't insert a call to llvm.trap right before the unreachable. - ChangeToUnreachable(BBI, false); + changeToUnreachable(BBI, false); Changed = true; } break; @@ -148,7 +148,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (isa<UndefValue>(Ptr) || (isa<ConstantPointerNull>(Ptr) && SI->getPointerAddressSpace() == 0)) { - ChangeToUnreachable(SI, true); + changeToUnreachable(SI, true); Changed = true; break; } @@ -159,7 +159,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { Value *Callee = II->getCalledValue(); if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { - ChangeToUnreachable(II, true); + changeToUnreachable(II, true); Changed = true; } else if (II->doesNotThrow()) { if (II->use_empty() && II->onlyReadsMemory()) { @@ -168,7 +168,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, II->getUnwindDest()->removePredecessor(II->getParent()); II->eraseFromParent(); } else - ChangeToCall(II); + changeToCall(II); Changed = true; } } @@ -180,12 +180,12 @@ static bool MarkAliveBlocks(BasicBlock *BB, return Changed; } -/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. -static bool RemoveUnreachableBlocksFromFn(Function &F) { +static bool removeUnreachableBlocksFromFn(Function &F) { SmallPtrSet<BasicBlock*, 128> Reachable; - bool Changed = MarkAliveBlocks(F.begin(), Reachable); + bool Changed = markAliveBlocks(F.begin(), Reachable); // If there are unreachable blocks in the CFG... if (Reachable.size() == F.size()) @@ -215,9 +215,9 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) { return true; } -/// MergeEmptyReturnBlocks - If we have more than one empty (other than phi +/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi /// node) return blocks, merge them together to promote recursive block merging. -static bool MergeEmptyReturnBlocks(Function &F) { +static bool mergeEmptyReturnBlocks(Function &F) { bool Changed = false; BasicBlock *RetBlock = 0; @@ -291,9 +291,9 @@ static bool MergeEmptyReturnBlocks(Function &F) { return Changed; } -/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function, +/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. -static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { +static bool iterativelySimplifyCFG(Function &F, const TargetData *TD) { bool Changed = false; bool LocalChange = true; while (LocalChange) { @@ -317,24 +317,24 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { // bool CFGSimplifyPass::runOnFunction(Function &F) { const TargetData *TD = getAnalysisIfAvailable<TargetData>(); - bool EverChanged = RemoveUnreachableBlocksFromFn(F); - EverChanged |= MergeEmptyReturnBlocks(F); - EverChanged |= IterativeSimplifyCFG(F, TD); + bool EverChanged = removeUnreachableBlocksFromFn(F); + EverChanged |= mergeEmptyReturnBlocks(F); + EverChanged |= iterativelySimplifyCFG(F, TD); // If neither pass changed anything, we're done. if (!EverChanged) return false; - // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens, - // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should + // iterativelySimplifyCFG can (rarely) make some loops dead. If this happens, + // removeUnreachableBlocksFromFn is needed to nuke them, which means we should // iterate between the two optimizations. We structure the code like this to - // avoid reruning IterativeSimplifyCFG if the second pass of - // RemoveUnreachableBlocksFromFn doesn't do anything. - if (!RemoveUnreachableBlocksFromFn(F)) + // avoid reruning iterativelySimplifyCFG if the second pass of + // removeUnreachableBlocksFromFn doesn't do anything. + if (!removeUnreachableBlocksFromFn(F)) return true; do { - EverChanged = IterativeSimplifyCFG(F, TD); - EverChanged |= RemoveUnreachableBlocksFromFn(F); + EverChanged = iterativelySimplifyCFG(F, TD); + EverChanged |= removeUnreachableBlocksFromFn(F); } while (EverChanged); return true; diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 3904419..65311fe 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" @@ -38,6 +39,10 @@ using namespace llvm; STATISTIC(NumSimplified, "Number of library calls simplified"); STATISTIC(NumAnnotated, "Number of attributes added to library functions"); +static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden, + cl::init(false), + cl::desc("Enable unsafe double to float " + "shrinking for math lib calls")); //===----------------------------------------------------------------------===// // Optimizer Base Class //===----------------------------------------------------------------------===// @@ -893,16 +898,56 @@ struct MemSetOpt : public LibCallOptimization { //===----------------------------------------------------------------------===// //===---------------------------------------===// -// 'cos*' Optimizations +// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' + +struct UnaryDoubleFPOpt : public LibCallOptimization { + bool CheckRetType; + UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {} + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || + !FT->getParamType(0)->isDoubleTy()) + return 0; + + if (CheckRetType) { + // Check if all the uses for function like 'sin' are converted to float. + for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end(); + ++UseI) { + FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI); + if (Cast == 0 || !Cast->getType()->isFloatTy()) + return 0; + } + } + + // If this is something like 'floor((double)floatval)', convert to floorf. + FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); + if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) + return 0; + + // floor((double)floatval) -> (double)floorf(floatval) + Value *V = Cast->getOperand(0); + V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); + return B.CreateFPExt(V, B.getDoubleTy()); + } +}; +//===---------------------------------------===// +// 'cos*' Optimizations struct CosOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "cos" && + TLI->has(LibFunc::cosf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); + } + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the // result type. if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return Ret; // cos(-x) -> cos(x) Value *Op1 = CI->getArgOperand(0); @@ -910,7 +955,7 @@ struct CosOpt : public LibCallOptimization { BinaryOperator *BinExpr = cast<BinaryOperator>(Op1); return B.CreateCall(Callee, BinExpr->getOperand(1), "cos"); } - return 0; + return Ret; } }; @@ -919,13 +964,20 @@ struct CosOpt : public LibCallOptimization { struct PowOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "pow" && + TLI->has(LibFunc::powf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); + } + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return Ret; Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { @@ -936,7 +988,7 @@ struct PowOpt : public LibCallOptimization { } ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); - if (Op2C == 0) return 0; + if (Op2C == 0) return Ret; if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 return ConstantFP::get(CI->getType(), 1.0); @@ -974,12 +1026,19 @@ struct PowOpt : public LibCallOptimization { struct Exp2Opt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "exp2" && + TLI->has(LibFunc::exp2)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); + } + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the // result type. if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return Ret; Value *Op = CI->getArgOperand(0); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 @@ -1016,29 +1075,7 @@ struct Exp2Opt : public LibCallOptimization { return CI; } - return 0; - } -}; - -//===---------------------------------------===// -// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' - -struct UnaryDoubleFPOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || - !FT->getParamType(0)->isDoubleTy()) - return 0; - - // If this is something like 'floor((double)floatval)', convert to floorf. - FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); - if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) - return 0; - - // floor((double)floatval) -> (double)floorf(floatval) - Value *V = Cast->getOperand(0); - V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); - return B.CreateFPExt(V, B.getDoubleTy()); + return Ret; } }; @@ -1534,7 +1571,8 @@ namespace { StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; // Math Library Optimizations - CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP; + CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; + UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP; // Integer Optimizations FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii; ToAsciiOpt ToAscii; @@ -1547,10 +1585,13 @@ namespace { public: static char ID; // Pass identification SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true), - StpCpy(false), StpCpyChk(true) { + StpCpy(false), StpCpyChk(true), + UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true) { initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); } void AddOpt(LibFunc::Func F, LibCallOptimization* Opt); + void AddOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt); + void InitOptimizations(); bool runOnFunction(Function &F); @@ -1586,6 +1627,12 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F, LibCallOptimization* Opt) { Optimizations[TLI->getName(F)] = Opt; } +void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2, + LibCallOptimization* Opt) { + if (TLI->has(F1) && TLI->has(F2)) + Optimizations[TLI->getName(F1)] = Opt; +} + /// Optimizations - Populate the Optimizations map with all the optimizations /// we know. void SimplifyLibCalls::InitOptimizations() { @@ -1641,20 +1688,37 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["llvm.exp2.f64"] = &Exp2; Optimizations["llvm.exp2.f32"] = &Exp2; - if (TLI->has(LibFunc::fabs) && TLI->has(LibFunc::fabsf)) - Optimizations["fabs"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf)) - Optimizations["floor"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf)) - Optimizations["ceil"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::round) && TLI->has(LibFunc::roundf)) - Optimizations["round"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::rint) && TLI->has(LibFunc::rintf)) - Optimizations["rint"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf)) - Optimizations["nearbyint"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::trunc) && TLI->has(LibFunc::truncf)) - Optimizations["trunc"] = &UnaryDoubleFP; + AddOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP); + AddOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP); + AddOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP); + AddOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP); + AddOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP); + AddOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP); + AddOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP); + + if(UnsafeFPShrink) { + AddOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP); + } // Integer Optimizations Optimizations["ffs"] = &FFS; diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index d831452..1e6586b 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -55,10 +55,12 @@ void ExtAddrMode::print(raw_ostream &OS) const { OS << ']'; } +#ifndef NDEBUG void ExtAddrMode::dump() const { print(dbgs()); dbgs() << '\n'; } +#endif /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 2679b93..75a7817 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -94,7 +94,7 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) { /// is dead. Also recursively delete any operands that become dead as /// a result. This includes tracing the def-use list from the PHI to see if /// it is ultimately unused or if it reaches an unused cycle. -bool llvm::DeleteDeadPHIs(BasicBlock *BB) { +bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { // Recursively deleting a PHI may cause multiple PHIs to be deleted // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. SmallVector<WeakVH, 8> PHIs; @@ -105,7 +105,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB) { bool Changed = false; for (unsigned i = 0, e = PHIs.size(); i != e; ++i) if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*())) - Changed |= RecursivelyDeleteDeadPHINode(PN); + Changed |= RecursivelyDeleteDeadPHINode(PN, TLI); return Changed; } diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp new file mode 100644 index 0000000..30d60be --- /dev/null +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -0,0 +1,253 @@ +//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an optimization for div and rem on architectures that +// execute short instructions significantly faster than longer instructions. +// For example, on Intel Atom 32-bit divides are slow enough that during +// runtime it is profitable to check the value of the operands, and if they are +// positive and less than 256 use an unsigned 8-bit divide. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "bypass-slow-division" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" + +using namespace llvm; + +namespace { + struct DivOpInfo { + bool SignedOp; + Value *Dividend; + Value *Divisor; + + DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) + : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} + }; + + struct DivPhiNodes { + PHINode *Quotient; + PHINode *Remainder; + + DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) + : Quotient(InQuotient), Remainder(InRemainder) {} + }; +} + +namespace llvm { + template<> + struct DenseMapInfo<DivOpInfo> { + static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { + return Val1.SignedOp == Val2.SignedOp && + Val1.Dividend == Val2.Dividend && + Val1.Divisor == Val2.Divisor; + } + + static DivOpInfo getEmptyKey() { + return DivOpInfo(false, 0, 0); + } + + static DivOpInfo getTombstoneKey() { + return DivOpInfo(true, 0, 0); + } + + static unsigned getHashValue(const DivOpInfo &Val) { + return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ + reinterpret_cast<uintptr_t>(Val.Divisor)) ^ + (unsigned)Val.SignedOp; + } + }; + + typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; +} + +// insertFastDiv - Substitutes the div/rem instruction with code that checks the +// value of the operands and uses a shorter-faster div/rem instruction when +// possible and the longer-slower div/rem instruction otherwise. +static bool insertFastDiv(Function &F, + Function::iterator &I, + BasicBlock::iterator &J, + IntegerType *BypassType, + bool UseDivOp, + bool UseSignedOp, + DivCacheTy &PerBBDivCache) { + // Get instruction operands + Instruction *Instr = J; + Value *Dividend = Instr->getOperand(0); + Value *Divisor = Instr->getOperand(1); + + if (isa<ConstantInt>(Divisor) || + (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { + // Operations with immediate values should have + // been solved and replaced during compile time. + return false; + } + + // Basic Block is split before divide + BasicBlock *MainBB = I; + BasicBlock *SuccessorBB = I->splitBasicBlock(J); + ++I; //advance iterator I to successorBB + + // Add new basic block for slow divide operation + BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", + MainBB->getParent(), SuccessorBB); + SlowBB->moveBefore(SuccessorBB); + IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); + Value *SlowQuotientV; + Value *SlowRemainderV; + if (UseSignedOp) { + SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); + } else { + SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); + } + SlowBuilder.CreateBr(SuccessorBB); + + // Add new basic block for fast divide operation + BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", + MainBB->getParent(), SuccessorBB); + FastBB->moveBefore(SlowBB); + IRBuilder<> FastBuilder(FastBB, FastBB->begin()); + Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, + BypassType); + Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, + BypassType); + + // udiv/urem because optimization only handles positive numbers + Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, + ShortDivisorV); + Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, + ShortDivisorV); + Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, + ShortQuotientV, + Dividend->getType()); + Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, + ShortRemainderV, + Dividend->getType()); + FastBuilder.CreateBr(SuccessorBB); + + // Phi nodes for result of div and rem + IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); + PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + QuoPhi->addIncoming(SlowQuotientV, SlowBB); + QuoPhi->addIncoming(FastQuotientV, FastBB); + PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + RemPhi->addIncoming(SlowRemainderV, SlowBB); + RemPhi->addIncoming(FastRemainderV, FastBB); + + // Replace Instr with appropriate phi node + if (UseDivOp) + Instr->replaceAllUsesWith(QuoPhi); + else + Instr->replaceAllUsesWith(RemPhi); + Instr->eraseFromParent(); + + // Combine operands into a single value with OR for value testing below + MainBB->getInstList().back().eraseFromParent(); + IRBuilder<> MainBuilder(MainBB, MainBB->end()); + Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); + + // BitMask is inverted to check if the operands are + // larger than the bypass type + uint64_t BitMask = ~BypassType->getBitMask(); + Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); + + // Compare operand values and branch + Value *ZeroV = MainBuilder.getInt32(0); + Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); + MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); + + // point iterator J at first instruction of successorBB + J = I->begin(); + + // Cache phi nodes to be used later in place of other instances + // of div or rem with the same sign, dividend, and divisor + DivOpInfo Key(UseSignedOp, Dividend, Divisor); + DivPhiNodes Value(QuoPhi, RemPhi); + PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); + return true; +} + +// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if +// operands and operation are identical. Otherwise call insertFastDiv to perform +// the optimization and cache the resulting dividend and remainder. +static bool reuseOrInsertFastDiv(Function &F, + Function::iterator &I, + BasicBlock::iterator &J, + IntegerType *BypassType, + bool UseDivOp, + bool UseSignedOp, + DivCacheTy &PerBBDivCache) { + // Get instruction operands + Instruction *Instr = J; + DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); + DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); + + if (CacheI == PerBBDivCache.end()) { + // If previous instance does not exist, insert fast div + return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, + PerBBDivCache); + } + + // Replace operation value with previously generated phi node + DivPhiNodes &Value = CacheI->second; + if (UseDivOp) { + // Replace all uses of div instruction with quotient phi node + J->replaceAllUsesWith(Value.Quotient); + } else { + // Replace all uses of rem instruction with remainder phi node + J->replaceAllUsesWith(Value.Remainder); + } + + // Advance to next operation + ++J; + + // Remove redundant operation + Instr->eraseFromParent(); + return true; +} + +// bypassSlowDivision - This optimization identifies DIV instructions that can +// be profitably bypassed and carried out with a shorter, faster divide. +bool llvm::bypassSlowDivision(Function &F, + Function::iterator &I, + const DenseMap<Type *, Type *> &BypassTypeMap) { + DivCacheTy DivCache; + + bool MadeChange = false; + for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { + + // Get instruction details + unsigned Opcode = J->getOpcode(); + bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; + bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; + bool UseSignedOp = Opcode == Instruction::SDiv || + Opcode == Instruction::SRem; + + // Only optimize div or rem ops + if (!UseDivOp && !UseRemOp) + continue; + + // Continue if div/rem type is not bypassed + DenseMap<Type *, Type *>::const_iterator BT = + BypassTypeMap.find(J->getType()); + if (BT == BypassTypeMap.end()) + continue; + + IntegerType *BypassType = cast<IntegerType>(BT->second); + MadeChange |= reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp, + UseSignedOp, DivCache); + } + + return MadeChange; +} diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 4ff31ca..215a16f 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMTransformUtils BasicBlockUtils.cpp BreakCriticalEdges.cpp BuildLibCalls.cpp + BypassSlowDivision.cpp CloneFunction.cpp CloneModule.cpp CmpInstAnalysis.cpp diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index bed7d72..0601433 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -52,7 +52,8 @@ using namespace llvm; /// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch /// conditions and indirectbr addresses this might make dead if /// DeleteDeadConditions is true. -bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { +bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, + const TargetLibraryInfo *TLI) { TerminatorInst *T = BB->getTerminator(); IRBuilder<> Builder(T); @@ -96,7 +97,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { Value *Cond = BI->getCondition(); BI->eraseFromParent(); if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Cond); + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); return true; } return false; @@ -161,7 +162,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { Value *Cond = SI->getCondition(); SI->eraseFromParent(); if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Cond); + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); return true; } @@ -205,7 +206,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { Value *Address = IBI->getAddress(); IBI->eraseFromParent(); if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Address); + RecursivelyDeleteTriviallyDeadInstructions(Address, TLI); // If we didn't find our destination in the IBI successor list, then we // have undefined behavior. Replace the unconditional branch with an @@ -230,7 +231,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { /// isInstructionTriviallyDead - Return true if the result produced by the /// instruction is not used, and the instruction has no side effects. /// -bool llvm::isInstructionTriviallyDead(Instruction *I) { +bool llvm::isInstructionTriviallyDead(Instruction *I, + const TargetLibraryInfo *TLI) { if (!I->use_empty() || isa<TerminatorInst>(I)) return false; // We don't want the landingpad instruction removed by anything this general. @@ -265,9 +267,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { return isa<UndefValue>(II->getArgOperand(1)); } - if (isAllocLikeFn(I)) return true; + if (isAllocLikeFn(I, TLI)) return true; - if (CallInst *CI = isFreeCall(I)) + if (CallInst *CI = isFreeCall(I, TLI)) if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0))) return C->isNullValue() || isa<UndefValue>(C); @@ -278,9 +280,11 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { /// trivially dead instruction, delete it. If that makes any of its operands /// trivially dead, delete them too, recursively. Return true if any /// instructions were deleted. -bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { +bool +llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, + const TargetLibraryInfo *TLI) { Instruction *I = dyn_cast<Instruction>(V); - if (!I || !I->use_empty() || !isInstructionTriviallyDead(I)) + if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI)) return false; SmallVector<Instruction*, 16> DeadInsts; @@ -301,7 +305,7 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { // operand, and if it is 'trivially' dead, delete it in a future loop // iteration. if (Instruction *OpI = dyn_cast<Instruction>(OpV)) - if (isInstructionTriviallyDead(OpI)) + if (isInstructionTriviallyDead(OpI, TLI)) DeadInsts.push_back(OpI); } @@ -334,19 +338,20 @@ static bool areAllUsesEqual(Instruction *I) { /// either forms a cycle or is terminated by a trivially dead instruction, /// delete it. If that makes any of its operands trivially dead, delete them /// too, recursively. Return true if a change was made. -bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { +bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, + const TargetLibraryInfo *TLI) { SmallPtrSet<Instruction*, 4> Visited; for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects(); I = cast<Instruction>(*I->use_begin())) { if (I->use_empty()) - return RecursivelyDeleteTriviallyDeadInstructions(I); + return RecursivelyDeleteTriviallyDeadInstructions(I, TLI); // If we find an instruction more than once, we're on a cycle that // won't prove fruitful. if (!Visited.insert(I)) { // Break the cycle and delete the instruction and its operands. I->replaceAllUsesWith(UndefValue::get(I->getType())); - (void)RecursivelyDeleteTriviallyDeadInstructions(I); + (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI); return true; } } @@ -358,7 +363,8 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { /// /// This returns true if it changed the code, note that it can delete /// instructions in other blocks as well in this block. -bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { +bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD, + const TargetLibraryInfo *TLI) { bool MadeChange = false; #ifndef NDEBUG @@ -381,7 +387,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { continue; } - MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst); + MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); if (BIHandle != BI) BI = BB->begin(); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 518df7c..32d7fa1 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -22,6 +22,7 @@ #include "llvm/LLVMContext.h" #include "llvm/MDBuilder.h" #include "llvm/Metadata.h" +#include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Type.h" #include "llvm/ADT/DenseMap.h" @@ -54,6 +55,7 @@ DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); +STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); namespace { /// ValueEqualityComparisonCase - Represents a case of a switch. @@ -101,14 +103,14 @@ public: /// static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { if (SI1 == SI2) return false; // Can't merge with self! - + // It is not safe to merge these two switch instructions if they have a common // successor, and if that successor has a PHI node, and if *that* PHI node has // conflicting incoming values from the two switch blocks. BasicBlock *SI1BB = SI1->getParent(); BasicBlock *SI2BB = SI2->getParent(); SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); - + for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I) if (SI1Succs.count(*I)) for (BasicBlock::iterator BBI = (*I)->begin(); @@ -118,7 +120,7 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { PN->getIncomingValueForBlock(SI2BB)) return false; } - + return true; } @@ -135,7 +137,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1, assert(SI1->isUnconditional() && SI2->isConditional()); // We fold the unconditional branch if we can easily update all PHI nodes in - // common successors: + // common successors: // 1> We have a constant incoming value for the conditional branch; // 2> We have "Cond" as the incoming value for the unconditional branch; // 3> SI2->getCondition() and Cond have same operands. @@ -170,7 +172,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1, static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred) { if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do - + PHINode *PN; for (BasicBlock::iterator I = Succ->begin(); (PN = dyn_cast<PHINode>(I)); ++I) @@ -222,7 +224,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // doesn't dominate BB. if (Pred2->getSinglePredecessor() == 0) return 0; - + // If we found a conditional branch predecessor, make sure that it branches // to BB and Pred2Br. If it doesn't, this isn't an "if statement". if (Pred1Br->getSuccessor(0) == BB && @@ -252,7 +254,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // Otherwise, if this is a conditional branch, then we can use it! BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); if (BI == 0) return 0; - + assert(BI->isConditional() && "Two successors but not conditional?"); if (BI->getSuccessor(0) == Pred1) { IfTrue = Pred1; @@ -345,7 +347,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // If we aren't allowing aggressive promotion anymore, then don't consider // instructions in the 'if region'. if (AggressiveInsts == 0) return false; - + // If we have seen this instruction before, don't count it again. if (AggressiveInsts->count(I)) return true; @@ -411,7 +413,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, const TargetData *TD, bool isEQ, unsigned &UsedICmps) { Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return 0; - + // If this is an icmp against a constant, handle this as one of the cases. if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) { @@ -420,21 +422,21 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, Vals.push_back(C); return I->getOperand(0); } - + // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to // the set. ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue()); - + // If this is an and/!= check then we want to optimize "x ugt 2" into // x != 0 && x != 1. if (!isEQ) Span = Span.inverse(); - + // If there are a ton of values, we don't want to make a ginormous switch. if (Span.getSetSize().ugt(8) || Span.isEmptySet()) return 0; - + for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) Vals.push_back(ConstantInt::get(V->getContext(), Tmp)); UsedICmps++; @@ -442,11 +444,11 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, } return 0; } - + // Otherwise, we can only handle an | or &, depending on isEQ. if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And)) return 0; - + unsigned NumValsBeforeLHS = Vals.size(); unsigned UsedICmpsBeforeLHS = UsedICmps; if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD, @@ -467,12 +469,12 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, Extra = I->getOperand(1); return LHS; } - + Vals.resize(NumValsBeforeLHS); UsedICmps = UsedICmpsBeforeLHS; return 0; } - + // If the LHS can't be folded in, but Extra is available and RHS can, try to // use LHS as Extra. if (Extra == 0 || Extra == I->getOperand(0)) { @@ -484,7 +486,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, assert(Vals.size() == NumValsBeforeLHS); Extra = OldExtra; } - + return 0; } @@ -615,6 +617,9 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, assert(ThisVal && "This isn't a value comparison!!"); if (ThisVal != PredVal) return false; // Different predicates. + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + // Find out information about when control will move from Pred to TI's block. std::vector<ValueEqualityComparisonCase> PredCases; BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(), @@ -634,7 +639,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // can simplify TI. if (!ValuesOverlap(PredCases, ThisCases)) return false; - + if (isa<BranchInst>(TI)) { // Okay, one of the successors of this condbr is dead. Convert it to a // uncond br. @@ -652,7 +657,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, EraseTerminatorInstAndDCECond(TI); return true; } - + SwitchInst *SI = cast<SwitchInst>(TI); // Okay, TI has cases that are statically dead, prune them away. SmallPtrSet<Constant*, 16> DeadCases; @@ -673,7 +678,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; } - + // Otherwise, TI's block must correspond to some matched value. Find out // which value (or set of values) this is. ConstantInt *TIV = 0; @@ -729,8 +734,8 @@ namespace { } static int ConstantIntSortPredicate(const void *P1, const void *P2) { - const ConstantInt *LHS = *(const ConstantInt**)P1; - const ConstantInt *RHS = *(const ConstantInt**)P2; + const ConstantInt *LHS = *(const ConstantInt*const*)P1; + const ConstantInt *RHS = *(const ConstantInt*const*)P2; if (LHS->getValue().ult(RHS->getValue())) return 1; if (LHS->getValue() == RHS->getValue()) @@ -738,6 +743,67 @@ static int ConstantIntSortPredicate(const void *P1, const void *P2) { return -1; } +static inline bool HasBranchWeights(const Instruction* I) { + MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof); + if (ProfMD && ProfMD->getOperand(0)) + if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) + return MDS->getString().equals("branch_weights"); + + return false; +} + +/// Tries to get a branch weight for the given instruction, returns NULL if it +/// can't. Pos starts at 0. +static ConstantInt* GetWeight(Instruction* I, int Pos) { + MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof); + if (ProfMD && ProfMD->getOperand(0)) { + if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) { + if (MDS->getString().equals("branch_weights")) { + assert(ProfMD->getNumOperands() >= 3); + return dyn_cast<ConstantInt>(ProfMD->getOperand(1 + Pos)); + } + } + } + + return 0; +} + +/// Scale the given weights based on the successor TI's metadata. Scaling is +/// done by multiplying every weight by the sum of the successor's weights. +static void ScaleWeights(Instruction* STI, MutableArrayRef<uint64_t> Weights) { + // Sum the successor's weights + assert(HasBranchWeights(STI)); + unsigned Scale = 0; + MDNode* ProfMD = STI->getMetadata(LLVMContext::MD_prof); + for (unsigned i = 1; i < ProfMD->getNumOperands(); ++i) { + ConstantInt* CI = dyn_cast<ConstantInt>(ProfMD->getOperand(i)); + assert(CI); + Scale += CI->getValue().getZExtValue(); + } + + // Skip default, as it's replaced during the folding + for (unsigned i = 1; i < Weights.size(); ++i) { + Weights[i] *= Scale; + } +} + +/// Sees if any of the weights are too big for a uint32_t, and halves all the +/// weights if any are. +static void FitWeights(MutableArrayRef<uint64_t> Weights) { + bool Halve = false; + for (unsigned i = 0; i < Weights.size(); ++i) + if (Weights[i] > UINT_MAX) { + Halve = true; + break; + } + + if (! Halve) + return; + + for (unsigned i = 0; i < Weights.size(); ++i) + Weights[i] /= 2; +} + /// FoldValueComparisonIntoPredecessors - The specified terminator is a value /// equality comparison instruction (either a switch or a branch on "X == c"). /// See if any of the predecessors of the terminator block are value comparisons @@ -770,6 +836,55 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // build. SmallVector<BasicBlock*, 8> NewSuccessors; + // Update the branch weight metadata along the way + SmallVector<uint64_t, 8> Weights; + uint64_t PredDefaultWeight = 0; + bool PredHasWeights = HasBranchWeights(PTI); + bool SuccHasWeights = HasBranchWeights(TI); + + if (PredHasWeights) { + MDNode* MD = PTI->getMetadata(LLVMContext::MD_prof); + assert(MD); + for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) { + ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i)); + assert(CI); + Weights.push_back(CI->getValue().getZExtValue()); + } + + // If the predecessor is a conditional eq, then swap the default weight + // to be the first entry. + if (BranchInst* BI = dyn_cast<BranchInst>(PTI)) { + assert(Weights.size() == 2); + ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); + + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) { + std::swap(Weights.front(), Weights.back()); + } + } + + PredDefaultWeight = Weights.front(); + } else if (SuccHasWeights) { + // If there are no predecessor weights but there are successor weights, + // populate Weights with 1, which will later be scaled to the sum of + // successor's weights + Weights.assign(1 + PredCases.size(), 1); + PredDefaultWeight = 1; + } + + uint64_t SuccDefaultWeight = 0; + if (SuccHasWeights) { + int Index = 0; + if (BranchInst* BI = dyn_cast<BranchInst>(TI)) { + ICmpInst* ICI = dyn_cast<ICmpInst>(BI->getCondition()); + assert(ICI); + + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + Index = 1; + } + + SuccDefaultWeight = GetWeight(TI, Index)->getValue().getZExtValue(); + } + if (PredDefault == BB) { // If this is the default destination from PTI, only the edges in TI // that don't occur in PTI, or that branch to BB will be activated. @@ -780,6 +895,12 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, else { // The default destination is BB, we don't need explicit targets. std::swap(PredCases[i], PredCases.back()); + + if (PredHasWeights) { + std::swap(Weights[i+1], Weights.back()); + Weights.pop_back(); + } + PredCases.pop_back(); --i; --e; } @@ -790,14 +911,34 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, PredDefault = BBDefault; NewSuccessors.push_back(BBDefault); } + + if (SuccHasWeights) { + ScaleWeights(TI, Weights); + Weights.front() *= SuccDefaultWeight; + } else if (PredHasWeights) { + Weights.front() /= (1 + BBCases.size()); + } + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) { PredCases.push_back(BBCases[i]); NewSuccessors.push_back(BBCases[i].Dest); + if (SuccHasWeights) { + Weights.push_back(PredDefaultWeight * + GetWeight(TI, i)->getValue().getZExtValue()); + } else if (PredHasWeights) { + // Split the old default's weight amongst the children + Weights.push_back(PredDefaultWeight / (1 + BBCases.size())); + } } } else { + // FIXME: preserve branch weight metadata, similarly to the 'then' + // above. For now, drop it. + PredHasWeights = false; + SuccHasWeights = false; + // If this is not the default destination from PSI, only the edges // in SI that occur in PSI with a destination of BB will be // activated. @@ -822,7 +963,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // If there are any constants vectored to BB that TI doesn't handle, // they must go to the default destination of TI. - for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = + for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = PTIHandled.begin(), E = PTIHandled.end(); I != E; ++I) { PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault)); @@ -851,6 +992,17 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, for (unsigned i = 0, e = PredCases.size(); i != e; ++i) NewSI->addCase(PredCases[i].Value, PredCases[i].Dest); + if (PredHasWeights || SuccHasWeights) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(Weights); + + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + + NewSI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(MDWeights)); + } + EraseTerminatorInstAndDCECond(PTI); // Okay, last check. If BB is still a successor of PSI, then we must @@ -984,11 +1136,11 @@ HoistTerminator: Value *BB1V = PN->getIncomingValueForBlock(BB1); Value *BB2V = PN->getIncomingValueForBlock(BB2); if (BB1V == BB2V) continue; - + // These values do not agree. Insert a select instruction before NT // that determines the right value. SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; - if (SI == 0) + if (SI == 0) SI = cast<SelectInst> (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, BB1V->getName()+"."+BB2V->getName())); @@ -1056,7 +1208,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { // Do not hoist the instruction if any of its operands are defined but not // used in this BB. The transformation will prevent the operand from // being sunk into the use block. - for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); + for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); i != e; ++i) { Instruction *OpI = dyn_cast<Instruction>(*i); if (OpI && OpI->getParent() == BIParent && @@ -1112,7 +1264,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { // as well. if (PHIs.empty()) return false; - + // If we get here, we can hoist the instruction and if-convert. DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";); @@ -1162,13 +1314,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); unsigned Size = 0; - + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { if (isa<DbgInfoIntrinsic>(BBI)) continue; if (Size > 10) return false; // Don't clone large BB's. ++Size; - + // We can only support instructions that do not define values that are // live outside of the current basic block. for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); @@ -1176,7 +1328,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { Instruction *U = cast<Instruction>(*UI); if (U->getParent() != BB || isa<PHINode>(U)) return false; } - + // Looks ok, continue checking. } @@ -1194,31 +1346,31 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { // outside of the block. if (!PN || PN->getParent() != BB || !PN->hasOneUse()) return false; - + // Degenerate case of a single entry PHI. if (PN->getNumIncomingValues() == 1) { FoldSingleEntryPHINodes(PN->getParent()); - return true; + return true; } // Now we know that this block has multiple preds and two succs. if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; - + // Okay, this is a simple enough basic block. See if any phi values are // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i)); if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue; - + // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. BasicBlock *PredBB = PN->getIncomingBlock(i); BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); - + if (RealDest == BB) continue; // Skip self loops. // Skip if the predecessor's terminator is an indirect branch. if (isa<IndirectBrInst>(PredBB->getTerminator())) continue; - + // The dest block might have PHI nodes, other predecessors and other // difficult cases. Instead of being smart about this, just insert a new // block that jumps to the destination block, effectively splitting @@ -1227,7 +1379,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { RealDest->getName()+".critedge", RealDest->getParent(), RealDest); BranchInst::Create(RealDest, EdgeBB); - + // Update PHI nodes. AddPredecessorToBlock(RealDest, EdgeBB, BB); @@ -1244,7 +1396,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { // Clone the instruction. Instruction *N = BBI->clone(); if (BBI->hasName()) N->setName(BBI->getName()+".c"); - + // Update operands due to translation. for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; ++i) { @@ -1252,7 +1404,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { if (PI != TranslateMap.end()) *i = PI->second; } - + // Check for trivial simplification. if (Value *V = SimplifyInstruction(N, TD)) { TranslateMap[BBI] = V; @@ -1297,7 +1449,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { // Don't bother if the branch will be constant folded trivially. isa<ConstantInt>(IfCond)) return false; - + // Okay, we found that we can merge this two-entry phi node into a select. // Doing so would require us to fold *all* two entry phi nodes in this block. // At some point this becomes non-profitable (particularly if the target @@ -1307,14 +1459,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I) if (NumPhis > 2) return false; - + // Loop over the PHI's seeing if we can promote them all to select // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet<Instruction*, 4> AggressiveInsts; unsigned MaxCostVal0 = PHINodeFoldingThreshold, MaxCostVal1 = PHINodeFoldingThreshold; - + for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); if (Value *V = SimplifyInstruction(PN, TD)) { @@ -1322,19 +1474,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { PN->eraseFromParent(); continue; } - + if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, MaxCostVal0) || !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, MaxCostVal1)) return false; } - + // If we folded the first phi, PN dangles at this point. Refresh it. If // we ran out of PHIs then we simplified them all. PN = dyn_cast<PHINode>(BB->begin()); if (PN == 0) return true; - + // Don't fold i1 branches on PHIs which contain binary operators. These can // often be turned into switches and other things. if (PN->getType()->isIntegerTy(1) && @@ -1342,7 +1494,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { isa<BinaryOperator>(PN->getIncomingValue(1)) || isa<BinaryOperator>(IfCond))) return false; - + // If we all PHI nodes are promotable, check to make sure that all // instructions in the predecessor blocks can be promoted as well. If // not, we won't be able to get rid of the control flow, so it's not @@ -1362,7 +1514,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { return false; } } - + if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) { IfBlock2 = 0; } else { @@ -1375,15 +1527,15 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { return false; } } - + DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); - + // If we can still promote the PHI nodes after this gauntlet of tests, // do all of the PHI's now. Instruction *InsertPt = DomBlock->getTerminator(); IRBuilder<true, NoFolder> Builder(InsertPt); - + // Move all 'aggressive' instructions, which are defined in the // conditional parts of the if's up to the dominating block. if (IfBlock1) @@ -1394,19 +1546,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { DomBlock->getInstList().splice(InsertPt, IfBlock2->getInstList(), IfBlock2->begin(), IfBlock2->getTerminator()); - + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { // Change the PHI node into a select instruction. Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); - - SelectInst *NV = + + SelectInst *NV = cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, "")); PN->replaceAllUsesWith(NV); NV->takeName(PN); PN->eraseFromParent(); } - + // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement // has been flattened. Change DomBlock to jump directly to our new block to // avoid other simplifycfg's kicking in on the diamond. @@ -1420,14 +1572,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { /// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes /// to two returning blocks, try to merge them together into one return, /// introducing a select if the return values disagree. -static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, +static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder) { assert(BI->isConditional() && "Must be a conditional branch"); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator()); ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator()); - + // Check to ensure both blocks are empty (just a return) or optionally empty // with PHI nodes. If there are other instructions, merging would cause extra // computation on one path or the other. @@ -1447,12 +1599,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, EraseTerminatorInstAndDCECond(BI); return true; } - + // Otherwise, figure out what the true and false return values are // so we can insert a new select instruction. Value *TrueValue = TrueRet->getReturnValue(); Value *FalseValue = FalseRet->getReturnValue(); - + // Unwrap any PHI nodes in the return blocks. if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue)) if (TVPN->getParent() == TrueSucc) @@ -1460,7 +1612,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue)) if (FVPN->getParent() == FalseSucc) FalseValue = FVPN->getIncomingValueForBlock(BI->getParent()); - + // In order for this transformation to be safe, we must be able to // unconditionally execute both operands to the return. This is // normally the case, but we could have a potentially-trapping @@ -1472,12 +1624,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue)) if (FCV->canTrap()) return false; - + // Okay, we collected all the mapped values and checked them for sanity, and // defined to really do this transformation. First, update the CFG. TrueSucc->removePredecessor(BI->getParent()); FalseSucc->removePredecessor(BI->getParent()); - + // Insert select instructions where needed. Value *BrCond = BI->getCondition(); if (TrueValue) { @@ -1491,15 +1643,15 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, } } - Value *RI = !TrueValue ? + Value *RI = !TrueValue ? Builder.CreateRetVoid() : Builder.CreateRet(TrueValue); (void) RI; - + DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" << "\n " << *BI << "NewRet = " << *RI << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc); - + EraseTerminatorInstAndDCECond(BI); return true; @@ -1600,7 +1752,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (Cond == 0) return false; } - + if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return false; @@ -1623,7 +1775,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { isSafeToSpeculativelyExecute(FrontIt)) { BonusInst = &*FrontIt; ++FrontIt; - + // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; } @@ -1631,13 +1783,13 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Only a single bonus inst is allowed. if (&*FrontIt != Cond) return false; - + // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = Cond; ++CondIt; // Ingore dbg intrinsics. while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt; - + if (&*CondIt != BI) return false; @@ -1649,7 +1801,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1))) if (CE->canTrap()) return false; - + // Finally, don't infinitely unroll conditional loops. BasicBlock *TrueDest = BI->getSuccessor(0); BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0; @@ -1659,22 +1811,22 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *PredBlock = *PI; BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator()); - + // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. SmallVector<PHINode*, 4> PHIs; if (PBI == 0 || PBI->isUnconditional() || - (BI->isConditional() && + (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) || (!BI->isConditional() && !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs))) continue; - + // Determine if the two branches share a common destination. Instruction::BinaryOps Opc; bool InvertPredCond = false; - + if (BI->isConditional()) { if (PBI->getSuccessor(0) == TrueDest) Opc = Instruction::Or; @@ -1693,7 +1845,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Ensure that any values used in the bonus instruction are also used // by the terminator of the predecessor. This means that those values - // must already have been resolved, so we won't be inhibiting the + // must already have been resolved, so we won't be inhibiting the // out-of-order core by speculating them earlier. if (BonusInst) { // Collect the values used by the bonus inst @@ -1707,47 +1859,47 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { SmallVector<std::pair<Value*, unsigned>, 4> Worklist; Worklist.push_back(std::make_pair(PBI->getOperand(0), 0)); - + // Walk up to four levels back up the use-def chain of the predecessor's // terminator to see if all those values were used. The choice of four // levels is arbitrary, to provide a compile-time-cost bound. while (!Worklist.empty()) { std::pair<Value*, unsigned> Pair = Worklist.back(); Worklist.pop_back(); - + if (Pair.second >= 4) continue; UsedValues.erase(Pair.first); if (UsedValues.empty()) break; - + if (Instruction *I = dyn_cast<Instruction>(Pair.first)) { for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) Worklist.push_back(std::make_pair(OI->get(), Pair.second+1)); - } + } } - + if (!UsedValues.empty()) return false; } DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); - IRBuilder<> Builder(PBI); + IRBuilder<> Builder(PBI); // If we need to invert the condition in the pred block to match, do so now. if (InvertPredCond) { Value *NewCond = PBI->getCondition(); - + if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { CmpInst *CI = cast<CmpInst>(NewCond); CI->setPredicate(CI->getInversePredicate()); } else { - NewCond = Builder.CreateNot(NewCond, + NewCond = Builder.CreateNot(NewCond, PBI->getCondition()->getName()+".not"); } - + PBI->setCondition(NewCond); PBI->swapSuccessors(); } - + // If we have a bonus inst, clone it into the predecessor block. Instruction *NewBonus = 0; if (BonusInst) { @@ -1756,7 +1908,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { NewBonus->takeName(BonusInst); BonusInst->setName(BonusInst->getName()+".old"); } - + // Clone Cond into the predecessor basic block, and or/and the // two conditions together. Instruction *New = Cond->clone(); @@ -1764,9 +1916,9 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { PredBlock->getInstList().insert(PBI, New); New->takeName(Cond); Cond->setName(New->getName()+".old"); - + if (BI->isConditional()) { - Instruction *NewCond = + Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(), New, "or.cond")); PBI->setCondition(NewCond); @@ -1806,7 +1958,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C) // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond) // is false: PBI_Cond and BI_Value - MergedCond = + MergedCond = cast<Instruction>(Builder.CreateBinOp(Instruction::And, PBI->getCondition(), New, "and.cond")); @@ -1814,7 +1966,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { Instruction *NotCond = cast<Instruction>(Builder.CreateNot(PBI->getCondition(), "not.cond")); - MergedCond = + MergedCond = cast<Instruction>(Builder.CreateBinOp(Instruction::Or, NotCond, MergedCond, "or.cond")); @@ -1921,7 +2073,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (isa<DbgInfoIntrinsic>(*I)) I->clone()->insertBefore(PBI); - + return true; } return false; @@ -1936,7 +2088,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { BasicBlock *BB = BI->getParent(); // If this block ends with a branch instruction, and if there is a - // predecessor that ends on a branch of the same condition, make + // predecessor that ends on a branch of the same condition, make // this conditional branch redundant. if (PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { @@ -1945,11 +2097,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (BB->getSinglePredecessor()) { // Turn this into a branch on constant. bool CondIsTrue = PBI->getSuccessor(0) == BB; - BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue)); return true; // Nuke the branch on constant. } - + // Otherwise, if there are multiple predecessors, insert a PHI that merges // in the constant and simplify the block result. Subsequent passes of // simplifycfg will thread the block. @@ -1969,18 +2121,18 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { bool CondIsTrue = PBI->getSuccessor(0) == BB; - NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue), P); } else { NewPN->addIncoming(BI->getCondition(), P); } } - + BI->setCondition(NewPN); return true; } } - + // If this is a conditional branch in an empty block, and if any // predecessors is a conditional branch to one of our destinations, // fold the conditions into logical ops and one cond br. @@ -1991,11 +2143,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (&*BBI != BI) return false; - + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition())) if (CE->canTrap()) return false; - + int PBIOp, BIOp; if (PBI->getSuccessor(0) == BI->getSuccessor(0)) PBIOp = BIOp = 0; @@ -2007,31 +2159,31 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBIOp = BIOp = 1; else return false; - + // Check to make sure that the other destination of this branch // isn't BB itself. If so, this is an infinite loop that will // keep getting unwound. if (PBI->getSuccessor(PBIOp) == BB) return false; - - // Do not perform this transformation if it would require + + // Do not perform this transformation if it would require // insertion of a large number of select instructions. For targets // without predication/cmovs, this is a big pessimization. BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); - + unsigned NumPhis = 0; for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II); ++II, ++NumPhis) if (NumPhis > 2) // Disable this xform. return false; - + // Finally, if everything is ok, fold the branches to logical ops. BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); - + DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() << "AND: " << *BI->getParent()); - - + + // If OtherDest *is* BB, then BB is a basic block with a single conditional // branch in it, where one edge (OtherDest) goes back to itself but the other // exits. We don't *know* that the program avoids the infinite loop @@ -2046,13 +2198,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); OtherDest = InfLoopBlock; - } - + } + DEBUG(dbgs() << *PBI->getParent()->getParent()); // BI may have other predecessors. Because of this, we leave // it alone, but modify PBI. - + // Make sure we get to CommonDest on True&True directions. Value *PBICond = PBI->getCondition(); IRBuilder<true, NoFolder> Builder(PBI); @@ -2065,16 +2217,16 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Merge the conditions. Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge"); - + // Modify PBI to branch on the new condition to the new dests. PBI->setCondition(Cond); PBI->setSuccessor(0, CommonDest); PBI->setSuccessor(1, OtherDest); - + // OtherDest may have phi nodes. If so, add an entry from PBI's // block that are identical to the entries for BI's block. AddPredecessorToBlock(OtherDest, PBI->getParent(), BB); - + // We know that the CommonDest already had an edge from PBI to // it. If it has PHIs though, the PHIs may have different // entries for BB and PBI's BB. If so, insert a select to make @@ -2092,10 +2244,10 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PN->setIncomingValue(PBBIdx, NV); } } - + DEBUG(dbgs() << "INTO: " << *PBI->getParent()); DEBUG(dbgs() << *PBI->getParent()->getParent()); - + // This basic block is probably dead. We know it has at least // one fewer predecessor. return true; @@ -2214,7 +2366,7 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// br label %end /// end: /// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] -/// +/// /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, @@ -2228,17 +2380,17 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, Value *V = ICI->getOperand(0); ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1)); - + // The pattern we're looking for is where our only predecessor is a switch on // 'V' and this block is the default case for the switch. In this case we can // fold the compared value into the switch to simplify things. BasicBlock *Pred = BB->getSinglePredecessor(); if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false; - + SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator()); if (SI->getCondition() != V) return false; - + // If BB is reachable on a non-default case, then we simply know the value of // V in this block. Substitute it and constant fold the icmp instruction // away. @@ -2246,7 +2398,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, ConstantInt *VVal = SI->findCaseDest(BB); assert(VVal && "Should have a unique destination value"); ICI->setOperand(0, VVal); - + if (Value *V = SimplifyInstruction(ICI, TD)) { ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); @@ -2254,7 +2406,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, // BB is now empty, so it is likely to simplify away. return SimplifyCFG(BB) | true; } - + // Ok, the block is reachable from the default dest. If the constant we're // comparing exists in one of the other edges, then we can constant fold ICI // and zap it. @@ -2264,13 +2416,13 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, V = ConstantInt::getFalse(BB->getContext()); else V = ConstantInt::getTrue(BB->getContext()); - + ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. return SimplifyCFG(BB) | true; } - + // The use of the icmp has to be in the 'end' block, by the only PHI node in // the block. BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); @@ -2297,7 +2449,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB); SI->addCase(Cst, NewBB); - + // NewBB branches to the phi block, add the uncond branch and the phi entry. Builder.SetInsertPoint(NewBB); Builder.SetCurrentDebugLocation(SI->getDebugLoc()); @@ -2313,8 +2465,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, IRBuilder<> &Builder) { Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (Cond == 0) return false; - - + + // Change br (X == 0 | X == 1), T, F into a switch instruction. // If this is a bunch of seteq's or'd together, or if it's a bunch of // 'setne's and'ed together, collect them. @@ -2323,7 +2475,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, bool TrueWhenEqual = true; Value *ExtraCase = 0; unsigned UsedICmps = 0; - + if (Cond->getOpcode() == Instruction::Or) { CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true, UsedICmps); @@ -2332,7 +2484,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, UsedICmps); TrueWhenEqual = false; } - + // If we didn't have a multiply compared value, fail. if (CompVal == 0) return false; @@ -2344,21 +2496,24 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, // instruction can't handle, remove them now. array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate); Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); - + // If Extra was used, we require at least two switch values to do the // transformation. A switch with one value is just an cond branch. if (ExtraCase && Values.size() < 2) return false; - + + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + // Figure out which block is which destination. BasicBlock *DefaultBB = BI->getSuccessor(1); BasicBlock *EdgeBB = BI->getSuccessor(0); if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); - + BasicBlock *BB = BI->getParent(); - + DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() << " cases into SWITCH. BB is:\n" << *BB); - + // If there are any extra values that couldn't be folded into the switch // then we evaluate them with an explicit branch first. Split the block // right before the condbr to handle it. @@ -2372,13 +2527,13 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB); else Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); - + OldTI->eraseFromParent(); - + // If there are PHI nodes in EdgeBB, then we need to add a new entry to them // for the edge we just added. AddPredecessorToBlock(EdgeBB, BB, NewBB); - + DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase << "\nEXTRABB = " << *BB); BB = NewBB; @@ -2392,14 +2547,14 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, TD->getIntPtrType(CompVal->getContext()), "magicptr"); } - + // Create the new switch instruction now. SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size()); // Add all of the 'cases' to the switch instruction. for (unsigned i = 0, e = Values.size(); i != e; ++i) New->addCase(Values[i], EdgeBB); - + // We added edges from PI to the EdgeBB. As such, if there were any // PHI nodes in EdgeBB, they need entries to be added corresponding to // the number of edges added. @@ -2410,10 +2565,10 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, for (unsigned i = 0, e = Values.size()-1; i != e; ++i) PN->addIncoming(InVal, BB); } - + // Erase the old branch instruction. EraseTerminatorInstAndDCECond(BI); - + DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); return true; } @@ -2467,7 +2622,7 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { BasicBlock *BB = RI->getParent(); if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; - + // Find predecessors that end with branches. SmallVector<BasicBlock*, 8> UncondBranchPreds; SmallVector<BranchInst*, 8> CondBranchPreds; @@ -2481,7 +2636,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { CondBranchPreds.push_back(BI); } } - + // If we found some, do the transformation! if (!UncondBranchPreds.empty() && DupRet) { while (!UncondBranchPreds.empty()) { @@ -2490,21 +2645,21 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { << "INTO UNCOND BRANCH PRED: " << *Pred); (void)FoldReturnIntoUncondBranch(RI, BB, Pred); } - + // If we eliminated all predecessors of the block, delete the block now. if (pred_begin(BB) == pred_end(BB)) // We know there are no successors, so just nuke the block. BB->eraseFromParent(); - + return true; } - + // Check out all of the conditional branches going to this return // instruction. If any of them just select between returns, change the // branch itself into a select/return pair. while (!CondBranchPreds.empty()) { BranchInst *BI = CondBranchPreds.pop_back_val(); - + // Check to see if the non-BB successor is also a return block. if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) && isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) && @@ -2516,9 +2671,9 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BasicBlock *BB = UI->getParent(); - + bool Changed = false; - + // If there are any instructions immediately before the unreachable that can // be removed, do so. while (UI != BB->begin()) { @@ -2558,11 +2713,11 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BBI->eraseFromParent(); Changed = true; } - + // If the unreachable instruction is the first in the block, take a gander // at all of the predecessors of this instruction, and simplify them. if (&BB->front() != UI) return Changed; - + SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); for (unsigned i = 0, e = Preds.size(); i != e; ++i) { TerminatorInst *TI = Preds[i]->getTerminator(); @@ -2615,7 +2770,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BasicBlock *MaxBlock = 0; for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { - if (I->second.first > MaxPop || + if (I->second.first > MaxPop || (I->second.first == MaxPop && MaxIndex > I->second.second)) { MaxPop = I->second.first; MaxIndex = I->second.second; @@ -2627,13 +2782,13 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // edges to it. SI->setDefaultDest(MaxBlock); Changed = true; - + // If MaxBlock has phinodes in it, remove MaxPop-1 entries from // it. if (isa<PHINode>(MaxBlock->begin())) for (unsigned i = 0; i != MaxPop-1; ++i) MaxBlock->removePredecessor(SI->getParent()); - + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) if (i.getCaseSuccessor() == MaxBlock) { @@ -2648,7 +2803,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // place to note that the call does not throw though. BranchInst *BI = Builder.CreateBr(II->getNormalDest()); II->removeFromParent(); // Take out of symbol table - + // Insert the call now... SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3); Builder.SetInsertPoint(BI); @@ -2663,7 +2818,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } } } - + // If this block is now dead, remove it. if (pred_begin(BB) == pred_end(BB) && BB != &BB->getParent()->getEntryBlock()) { @@ -2823,6 +2978,285 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { return Changed; } +/// ValidLookupTableConstant - Return true if the backend will be able to handle +/// initializing an array of constants like C. +static bool ValidLookupTableConstant(Constant *C) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + return CE->isGEPWithNoNotionalOverIndexing(); + + return isa<ConstantFP>(C) || + isa<ConstantInt>(C) || + isa<ConstantPointerNull>(C) || + isa<GlobalValue>(C) || + isa<UndefValue>(C); +} + +/// GetCaseResulsts - Try to determine the resulting constant values in phi +/// nodes at the common destination basic block for one of the case +/// destinations of a switch instruction. +static bool GetCaseResults(SwitchInst *SI, + BasicBlock *CaseDest, + BasicBlock **CommonDest, + SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) { + // The block from which we enter the common destination. + BasicBlock *Pred = SI->getParent(); + + // If CaseDest is empty, continue to its successor. + if (CaseDest->getFirstNonPHIOrDbg() == CaseDest->getTerminator() && + !isa<PHINode>(CaseDest->begin())) { + + TerminatorInst *Terminator = CaseDest->getTerminator(); + if (Terminator->getNumSuccessors() != 1) + return false; + + Pred = CaseDest; + CaseDest = Terminator->getSuccessor(0); + } + + // If we did not have a CommonDest before, use the current one. + if (!*CommonDest) + *CommonDest = CaseDest; + // If the destination isn't the common one, abort. + if (CaseDest != *CommonDest) + return false; + + // Get the values for this case from phi nodes in the destination block. + BasicBlock::iterator I = (*CommonDest)->begin(); + while (PHINode *PHI = dyn_cast<PHINode>(I++)) { + int Idx = PHI->getBasicBlockIndex(Pred); + if (Idx == -1) + continue; + + Constant *ConstVal = dyn_cast<Constant>(PHI->getIncomingValue(Idx)); + if (!ConstVal) + return false; + + // Be conservative about which kinds of constants we support. + if (!ValidLookupTableConstant(ConstVal)) + return false; + + Res.push_back(std::make_pair(PHI, ConstVal)); + } + + return true; +} + +/// BuildLookupTable - Build a lookup table with the contents of Results, using +/// DefaultResult to fill the holes in the table. If the table ends up +/// containing the same result in each element, set *SingleResult to that value +/// and return NULL. +static GlobalVariable *BuildLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Results, + Constant *DefaultResult, + Constant **SingleResult) { + assert(Results.size() && "Need values to build lookup table"); + assert(TableSize >= Results.size() && "Table needs to hold all values"); + + // If all values in the table are equal, this is that value. + Constant *SameResult = Results.begin()->second; + + // Build up the table contents. + std::vector<Constant*> TableContents(TableSize); + for (size_t I = 0, E = Results.size(); I != E; ++I) { + ConstantInt *CaseVal = Results[I].first; + Constant *CaseRes = Results[I].second; + + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); + TableContents[Idx] = CaseRes; + + if (CaseRes != SameResult) + SameResult = NULL; + } + + // Fill in any holes in the table with the default result. + if (Results.size() < TableSize) { + for (unsigned i = 0; i < TableSize; ++i) { + if (!TableContents[i]) + TableContents[i] = DefaultResult; + } + + if (DefaultResult != SameResult) + SameResult = NULL; + } + + // Same result was used in the entire table; just return that. + if (SameResult) { + *SingleResult = SameResult; + return NULL; + } + + ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize); + Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); + + GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true, + GlobalVariable::PrivateLinkage, + Initializer, + "switch.table"); + GV->setUnnamedAddr(true); + return GV; +} + +/// SwitchToLookupTable - If the switch is only used to initialize one or more +/// phi nodes in a common successor block with different constant values, +/// replace the switch with lookup tables. +static bool SwitchToLookupTable(SwitchInst *SI, + IRBuilder<> &Builder) { + assert(SI->getNumCases() > 1 && "Degenerate switch?"); + // FIXME: Handle unreachable cases. + + // FIXME: If the switch is too sparse for a lookup table, perhaps we could + // split off a dense part and build a lookup table for that. + + // FIXME: If the results are all integers and the lookup table would fit in a + // target-legal register, we should store them as a bitmap and use shift/mask + // to look up the result. + + // FIXME: This creates arrays of GEPs to constant strings, which means each + // GEP needs a runtime relocation in PIC code. We should just build one big + // string and lookup indices into that. + + // Ignore the switch if the number of cases are too small. + // This is similar to the check when building jump tables in + // SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Determine the best cut-off. + if (SI->getNumCases() < 4) + return false; + + // Figure out the corresponding result for each case value and phi node in the + // common destination, as well as the the min and max case values. + assert(SI->case_begin() != SI->case_end()); + SwitchInst::CaseIt CI = SI->case_begin(); + ConstantInt *MinCaseVal = CI.getCaseValue(); + ConstantInt *MaxCaseVal = CI.getCaseValue(); + + BasicBlock *CommonDest = NULL; + typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy; + SmallDenseMap<PHINode*, ResultListTy> ResultLists; + SmallDenseMap<PHINode*, Constant*> DefaultResults; + SmallDenseMap<PHINode*, Type*> ResultTypes; + SmallVector<PHINode*, 4> PHIs; + + for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { + ConstantInt *CaseVal = CI.getCaseValue(); + if (CaseVal->getValue().slt(MinCaseVal->getValue())) + MinCaseVal = CaseVal; + if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) + MaxCaseVal = CaseVal; + + // Resulting value at phi nodes for this case value. + typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy; + ResultsTy Results; + if (!GetCaseResults(SI, CI.getCaseSuccessor(), &CommonDest, Results)) + return false; + + // Append the result from this case to the list for each phi. + for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) { + if (!ResultLists.count(I->first)) + PHIs.push_back(I->first); + ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second)); + } + } + + // Get the resulting values for the default case. + SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; + if (!GetCaseResults(SI, SI->getDefaultDest(), &CommonDest, DefaultResultsList)) + return false; + for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { + PHINode *PHI = DefaultResultsList[I].first; + Constant *Result = DefaultResultsList[I].second; + DefaultResults[PHI] = Result; + ResultTypes[PHI] = Result->getType(); + } + + APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); + // The table density should be at lest 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + // Be careful to avoid overlow in the density computation. + if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1)) + return false; + uint64_t TableSize = RangeSpread.getLimitedValue() + 1; + if (SI->getNumCases() * 10 < TableSize * 4) + return false; + + // Build the lookup tables. + SmallDenseMap<PHINode*, GlobalVariable*> LookupTables; + SmallDenseMap<PHINode*, Constant*> SingleResults; + + Module &Mod = *CommonDest->getParent()->getParent(); + for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); + I != E; ++I) { + PHINode *PHI = *I; + + Constant *SingleResult = NULL; + LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal, + ResultLists[PHI], DefaultResults[PHI], + &SingleResult); + SingleResults[PHI] = SingleResult; + } + + // Create the BB that does the lookups. + BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(), + "switch.lookup", + CommonDest->getParent(), + CommonDest); + + // Check whether the condition value is within the case range, and branch to + // the new BB. + Builder.SetInsertPoint(SI); + Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, + "switch.tableidx"); + Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( + MinCaseVal->getType(), TableSize)); + Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + + // Populate the BB that does the lookups. + Builder.SetInsertPoint(LookupBB); + bool ReturnedEarly = false; + for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); + I != E; ++I) { + PHINode *PHI = *I; + // There was a single result for this phi; just use that. + if (Constant *SingleResult = SingleResults[PHI]) { + PHI->addIncoming(SingleResult, LookupBB); + continue; + } + + Value *GEPIndices[] = { Builder.getInt32(0), TableIndex }; + Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices, + "switch.gep"); + Value *Result = Builder.CreateLoad(GEP, "switch.load"); + + // If the result is only going to be used to return from the function, + // we want to do that right here. + if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin())) { + if (CommonDest->getFirstNonPHIOrDbg() == CommonDest->getTerminator()) { + Builder.CreateRet(Result); + ReturnedEarly = true; + } + } + + if (!ReturnedEarly) + PHI->addIncoming(Result, LookupBB); + } + + if (!ReturnedEarly) + Builder.CreateBr(CommonDest); + + // Remove the switch. + for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) { + BasicBlock *Succ = SI->getSuccessor(i); + if (Succ == SI->getDefaultDest()) continue; + Succ->removePredecessor(SI->getParent()); + } + SI->eraseFromParent(); + + ++NumLookupTables; + return true; +} + bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // If this switch is too complex to want to look at, ignore it. if (!isValueEqualityComparison(SI)) @@ -2862,13 +3296,16 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB) | true; + if (SwitchToLookupTable(SI, Builder)) + return SimplifyCFG(BB) | true; + return false; } bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { BasicBlock *BB = IBI->getParent(); bool Changed = false; - + // Eliminate redundant destinations. SmallPtrSet<Value *, 8> Succs; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { @@ -2879,7 +3316,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { --i; --e; Changed = true; } - } + } if (IBI->getNumDestinations() == 0) { // If the indirectbr has no successors, change it to unreachable. @@ -2887,14 +3324,14 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { EraseTerminatorInstAndDCECond(IBI); return true; } - + if (IBI->getNumDestinations() == 1) { // If the indirectbr has one successor, change it to a direct branch. BranchInst::Create(IBI->getDestination(0), IBI); EraseTerminatorInstAndDCECond(IBI); return true; } - + if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) return SimplifyCFG(BB) | true; @@ -2904,13 +3341,13 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); - + // If the Terminator is the only non-phi instruction, simplify the block. BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; - + // If the only instruction in the block is a seteq/setne comparison // against a constant, try to simplify the block. if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) @@ -2921,7 +3358,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder)) return true; } - + // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value @@ -2934,7 +3371,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); - + // Conditional branch if (isValueEqualityComparison(BI)) { // If we only have one predecessor, and if it is a branch on this value, @@ -2943,7 +3380,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) return SimplifyCFG(BB) | true; - + // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. BasicBlock::iterator I = BB->begin(); @@ -2962,17 +3399,17 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { return SimplifyCFG(BB) | true; } } - + // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. if (SimplifyBranchOnICmpChain(BI, TD, Builder)) return true; - + // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. if (FoldBranchToCommonDest(BI)) return SimplifyCFG(BB) | true; - + // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if // there is any identical code in the "then" and "else" blocks. If so, we @@ -2999,14 +3436,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) return SimplifyCFG(BB) | true; } - + // If this is a branch on a phi node in the current block, thread control // through this block if any PHI node entries are constants. if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) if (PN->getParent() == BI->getParent()) if (FoldCondBranchOnPHI(BI, TD)) return SimplifyCFG(BB) | true; - + // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) @@ -3114,7 +3551,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // if (MergeBlockIntoPredecessor(BB)) return true; - + IRBuilder<> Builder(BB); // If there is a trivial two-entry PHI node in this basic block, and we can diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 81eb9e0..528e6a1 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -72,7 +72,7 @@ namespace { ++NumSimplified; Changed = true; } - Changed |= RecursivelyDeleteTriviallyDeadInstructions(I); + Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); } // Place the list of instructions to simplify on the next loop iteration diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 62d23cb..c09dcd2 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -601,7 +601,7 @@ namespace { // It is important to cleanup here so that future iterations of this // function have less work to do. - (void) SimplifyInstructionsInBlock(&BB, TD); + (void) SimplifyInstructionsInBlock(&BB, TD, AA->getTargetLibraryInfo()); return true; } |