diff options
| author | Stephen Hines <srhines@google.com> | 2012-08-23 19:08:53 -0700 |
|---|---|---|
| committer | Stephen Hines <srhines@google.com> | 2012-08-23 19:08:53 -0700 |
| commit | 31675153bd2d7617db8cb6aeb58054934c7b9f73 (patch) | |
| tree | c1970fcebc736d4f731db0559a79a7ac5cb0f8bf /lib/Transforms | |
| parent | 416bb6a168a9316547db6ce3909c515f70a84f52 (diff) | |
| parent | 75dd7f0c4a2b3fb9e9d4d5a0517591810c57ed92 (diff) | |
| download | external_llvm-31675153bd2d7617db8cb6aeb58054934c7b9f73.zip external_llvm-31675153bd2d7617db8cb6aeb58054934c7b9f73.tar.gz external_llvm-31675153bd2d7617db8cb6aeb58054934c7b9f73.tar.bz2 | |
Merge branch 'upstream' into merge_2
Conflicts:
lib/Target/ARM/ARMCodeEmitter.cpp
Change-Id: I6702d340c733e9721499b5d85b13b96ad9c14eb5
Diffstat (limited to 'lib/Transforms')
20 files changed, 855 insertions, 545 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 60ce958..6d950d2 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -352,7 +352,8 @@ static bool IsSafeComputationToRemove(Value *V) { return true; if (!V->hasOneUse()) return false; - if (isa<LoadInst>(V) || isa<Argument>(V) || isa<GlobalValue>(V)) + if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) || + isa<GlobalValue>(V)) return false; if (isAllocationFn(V)) return true; @@ -442,12 +443,14 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV) { Dead[i].second->eraseFromParent(); Instruction *I = Dead[i].first; do { + if (isAllocationFn(I)) + break; Instruction *J = dyn_cast<Instruction>(I->getOperand(0)); if (!J) break; I->eraseFromParent(); I = J; - } while (!isAllocationFn(I)); + } while (1); I->eraseFromParent(); } } diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index d8e8cf7..80bfc1c 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -27,6 +27,7 @@ #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/TypeFinder.h" #include "llvm/ValueSymbolTable.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" @@ -175,8 +176,8 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) { // Strip any named types of their names. static void StripTypeNames(Module &M, bool PreserveDbgInfo) { - std::vector<StructType*> StructTypes; - M.findUsedStructTypes(StructTypes); + TypeFinder StructTypes; + StructTypes.run(M, false); for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { StructType *STy = StructTypes[i]; diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index c1d9d01..cbe1ca4 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -51,8 +51,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // if the size is something we can handle with a single primitive load/store. // A single load+store correctly handles overlapping memory in the memmove // case. - unsigned Size = MemOpLength->getZExtValue(); - if (Size == 0) return MI; // Delete this mem transfer. + uint64_t Size = MemOpLength->getLimitedValue(); + assert(Size && "0-sized memory transfering should be removed already."); if (Size > 8 || (Size&(Size-1))) return 0; // If not 1/2/4/8 bytes, exit. @@ -133,11 +133,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) return 0; - uint64_t Len = LenC->getZExtValue(); + uint64_t Len = LenC->getLimitedValue(); Alignment = MI->getAlignment(); - - // If the length is zero, this is a no-op - if (Len == 0) return MI; // memset(d,c,0,a) -> noop + assert(Len && "0-sized memory setting should be removed already."); // memset(s,c,n) -> store s, c (for n=1,2,4,8) if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { @@ -795,7 +793,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { if (CI->getCalledFunction() == 0) return 0; InstCombineFortifiedLibCalls Simplifier(this); - Simplifier.fold(CI, TD); + Simplifier.fold(CI, TD, TLI); return Simplifier.NewInstruction; } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7076d88..c3fc18c 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" @@ -2824,7 +2825,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, case ICmpInst::ICMP_UGE: // (float)int >= -4.4 --> true // (float)int >= 4.4 --> int > 4 - if (!RHS.isNegative()) + if (RHS.isNegative()) return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); Pred = ICmpInst::ICMP_UGT; break; @@ -2985,6 +2986,44 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return Res; } break; + case Instruction::Call: { + CallInst *CI = cast<CallInst>(LHSI); + LibFunc::Func Func; + // Various optimization for fabs compared with zero. + if (RHSC->isNullValue() && CI->getCalledFunction() && + TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && + TLI->has(Func)) { + if (Func == LibFunc::fabs || Func == LibFunc::fabsf || + Func == LibFunc::fabsl) { + switch (I.getPredicate()) { + default: break; + // fabs(x) < 0 --> false + case FCmpInst::FCMP_OLT: + return ReplaceInstUsesWith(I, Builder->getFalse()); + // fabs(x) > 0 --> x != 0 + case FCmpInst::FCMP_OGT: + return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), + RHSC); + // fabs(x) <= 0 --> x == 0 + case FCmpInst::FCMP_OLE: + return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), + RHSC); + // fabs(x) >= 0 --> !isnan(x) + case FCmpInst::FCMP_OGE: + return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), + RHSC); + // fabs(x) == 0 --> x == 0 + // fabs(x) != 0 --> x != 0 + case FCmpInst::FCMP_OEQ: + case FCmpInst::FCMP_UEQ: + case FCmpInst::FCMP_ONE: + case FCmpInst::FCMP_UNE: + return new FCmpInst(I.getPredicate(), CI->getArgOperand(0), + RHSC); + } + } + } + } } } diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index c485844..6ecb4c5 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -20,7 +20,154 @@ #include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(NumDeadStore, "Number of dead stores eliminated"); +STATISTIC(NumDeadStore, "Number of dead stores eliminated"); +STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); + +/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to +/// some part of a constant global variable. This intentionally only accepts +/// constant expressions because we can't rewrite arbitrary instructions. +static bool pointsToConstantGlobal(Value *V) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + return GV->isConstant(); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::GetElementPtr) + return pointsToConstantGlobal(CE->getOperand(0)); + return false; +} + +/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) +/// pointer to an alloca. Ignore any reads of the pointer, return false if we +/// see any stores or other unknown uses. If we see pointer arithmetic, keep +/// track of whether it moves the pointer (with IsOffset) but otherwise traverse +/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to +/// the alloca, and if the source pointer is a pointer to a constant global, we +/// can optimize this. +static bool +isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, + SmallVectorImpl<Instruction *> &ToDelete, + bool IsOffset = false) { + // We track lifetime intrinsics as we encounter them. If we decide to go + // ahead and replace the value with the global, this lets the caller quickly + // eliminate the markers. + + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { + User *U = cast<Instruction>(*UI); + + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + // Ignore non-volatile loads, they are always ok. + if (!LI->isSimple()) return false; + continue; + } + + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + // If uses of the bitcast are ok, we are ok. + if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset)) + return false; + continue; + } + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { + // If the GEP has all zero indices, it doesn't offset the pointer. If it + // doesn't, it does. + if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete, + IsOffset || !GEP->hasAllZeroIndices())) + return false; + continue; + } + + if (CallSite CS = U) { + // If this is the function being called then we treat it like a load and + // ignore it. + if (CS.isCallee(UI)) + continue; + + // If this is a readonly/readnone call site, then we know it is just a + // load (but one that potentially returns the value itself), so we can + // ignore it if we know that the value isn't captured. + unsigned ArgNo = CS.getArgumentNo(UI); + if (CS.onlyReadsMemory() && + (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) + continue; + + // If this is being passed as a byval argument, the caller is making a + // copy, so it is only a read of the alloca. + if (CS.isByValArgument(ArgNo)) + continue; + } + + // Lifetime intrinsics can be handled by the caller. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + assert(II->use_empty() && "Lifetime markers have no result to use!"); + ToDelete.push_back(II); + continue; + } + } + + // If this is isn't our memcpy/memmove, reject it as something we can't + // handle. + MemTransferInst *MI = dyn_cast<MemTransferInst>(U); + if (MI == 0) + return false; + + // If the transfer is using the alloca as a source of the transfer, then + // ignore it since it is a load (unless the transfer is volatile). + if (UI.getOperandNo() == 1) { + if (MI->isVolatile()) return false; + continue; + } + + // If we already have seen a copy, reject the second one. + if (TheCopy) return false; + + // If the pointer has been offset from the start of the alloca, we can't + // safely handle this. + if (IsOffset) return false; + + // If the memintrinsic isn't using the alloca as the dest, reject it. + if (UI.getOperandNo() != 0) return false; + + // If the source of the memcpy/move is not a constant global, reject it. + if (!pointsToConstantGlobal(MI->getSource())) + return false; + + // Otherwise, the transform is safe. Remember the copy instruction. + TheCopy = MI; + } + return true; +} + +/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only +/// modified by a copy from a constant global. If we can prove this, we can +/// replace any uses of the alloca with uses of the global directly. +static MemTransferInst * +isOnlyCopiedFromConstantGlobal(AllocaInst *AI, + SmallVectorImpl<Instruction *> &ToDelete) { + MemTransferInst *TheCopy = 0; + if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) + return TheCopy; + return 0; +} + +/// getPointeeAlignment - Compute the minimum alignment of the value pointed +/// to by the given pointer. +static unsigned getPointeeAlignment(Value *V, const TargetData &TD) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::BitCast || + (CE->getOpcode() == Instruction::GetElementPtr && + cast<GEPOperator>(CE)->hasAllZeroIndices())) + return getPointeeAlignment(CE->getOperand(0), TD); + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + if (!GV->isDeclaration()) + return TD.getPreferredAlignment(GV); + + if (PointerType *PT = dyn_cast<PointerType>(V->getType())) + return TD.getABITypeAlignment(PT->getElementType()); + + return 0; +} Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that @@ -113,6 +260,29 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { } } + // Check to see if this allocation is only modified by a memcpy/memmove from + // a constant global whose alignment is equal to or exceeds that of the + // allocation. If this is the case, we can change all users to use + // the constant global instead. This is commonly produced by the CFE by + // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' + // is only subsequently read. + SmallVector<Instruction *, 4> ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { + if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { + DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + EraseInstFromFunction(*ToDelete[i]); + Constant *TheSrc = cast<Constant>(Copy->getSource()); + Instruction *NewI + = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, + AI.getType())); + EraseInstFromFunction(*Copy); + ++NumGlobalCopies; + return NewI; + } + } + // At last, use the generic allocation site handler to aggressively remove // unused allocas. return visitAllocSite(AI); diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index eb9945b..291e800 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -881,12 +881,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) { if (TrueSI->getCondition() == CondVal) { + if (SI.getTrueValue() == TrueSI->getTrueValue()) + return 0; SI.setOperand(1, TrueSI->getTrueValue()); return &SI; } } if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) { if (FalseSI->getCondition() == CondVal) { + if (SI.getFalseValue() == FalseSI->getFalseValue()) + return 0; SI.setOperand(2, FalseSI->getFalseValue()); return &SI; } @@ -899,5 +903,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return &SI; } + if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) { + unsigned VWidth = VecTy->getNumElements(); + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (Value *V = SimplifyDemandedVectorElts(&SI, AllOnesEltMask, UndefElts)) { + if (V != &SI) + return ReplaceInstUsesWith(SI, V); + return &SI; + } + } + return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 125c74a..54be8ed 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -989,6 +989,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } break; } + case Instruction::Select: { + APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts); + if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) { + for (unsigned i = 0; i < VWidth; i++) { + if (CV->getAggregateElement(i)->isNullValue()) + LeftDemanded.clearBit(i); + else + RightDemanded.clearBit(i); + } + } + + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } + + TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded, + UndefElts2, Depth+1); + if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; } + + // Output elements are undefined if both are undefined. + UndefElts &= UndefElts2; + break; + } case Instruction::BitCast: { // Vector->vector casts only. VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); @@ -1074,6 +1097,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // like undef&0. The result is known zero, not undef. UndefElts &= UndefElts2; break; + case Instruction::FPTrunc: + case Instruction::FPExt: + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + break; case Instruction::Call: { IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 3368026..06f4d2f 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -61,6 +61,8 @@ static const int kAsanCtorAndCtorPriority = 1; static const char *kAsanReportErrorTemplate = "__asan_report_"; static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; +static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; +static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; static const char *kAsanInitName = "__asan_init"; static const char *kAsanHandleNoReturnName = "__asan_handle_no_return"; static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; @@ -86,8 +88,8 @@ static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes", static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics", cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, cl::init(true)); -static cl::opt<bool> ClMergeCallbacks("asan-merge-callbacks", - cl::desc("merge __asan_report_ callbacks to create fewer BBs"), +static cl::opt<bool> ClAlwaysSlowPath("asan-always-slow-path", + cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden, cl::init(false)); // This flag limits the number of instructions to be instrumented // in any given BB. Normally, this should be set to unlimited (INT_MAX), @@ -106,6 +108,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return", // This flag may need to be replaced with -f[no]asan-globals. static cl::opt<bool> ClGlobals("asan-globals", cl::desc("Handle global objects"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClInitializers("asan-initialization-order", + cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false)); static cl::opt<bool> ClMemIntrin("asan-memintrin", cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true)); // This flag may need to be replaced with -fasan-blacklist. @@ -145,24 +149,11 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"), namespace { -/// When the crash callbacks are merged, they receive some amount of arguments -/// that are merged in a PHI node. This struct represents arguments from one -/// call site. -struct CrashArg { - Value *Arg1; - Value *Arg2; -}; - /// An object of this type is created while instrumenting every function. struct AsanFunctionContext { - AsanFunctionContext(Function &Function) : F(Function), CrashBlock() { } + AsanFunctionContext(Function &Function) : F(Function) { } Function &F; - // These are initially zero. If we require at least one call to - // __asan_report_{read,write}{1,2,4,8,16}, an appropriate BB is created. - BasicBlock *CrashBlock[2][kNumberOfAccessSizes]; - typedef SmallVector<CrashArg, 8> CrashArgsVec; - CrashArgsVec CrashArgs[2][kNumberOfAccessSizes]; }; /// AddressSanitizer: instrument the code in module to find memory bugs. @@ -175,7 +166,7 @@ struct AddressSanitizer : public ModulePass { Value *Addr, uint32_t TypeSize, bool IsWrite); Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize); - Instruction *generateCrashCode(BasicBlock *BB, Value *Addr, Value *PC, + Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex); bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI); void instrumentMemIntrinsicParam(AsanFunctionContext &AFC, @@ -184,6 +175,8 @@ struct AddressSanitizer : public ModulePass { Instruction *InsertBefore, bool IsWrite); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool handleFunction(Module &M, Function &F); + void createInitializerPoisonCalls(Module &M, + Value *FirstAddr, Value *LastAddr); bool maybeInsertAsanInitAtFunctionEntry(Function &F); bool poisonStackInFunction(Module &M, Function &F); virtual bool runOnModule(Module &M); @@ -191,7 +184,6 @@ struct AddressSanitizer : public ModulePass { static char ID; // Pass identification, replacement for typeid private: - uint64_t getAllocaSizeInBytes(AllocaInst *AI) { Type *Ty = AI->getAllocatedType(); uint64_t SizeInBytes = TD->getTypeAllocSize(Ty); @@ -207,9 +199,12 @@ struct AddressSanitizer : public ModulePass { } Function *checkInterfaceFunction(Constant *FuncOrBitcast); + bool ShouldInstrumentGlobal(GlobalVariable *G); void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, Value *ShadowBase, bool DoPoison); bool LooksLikeCodeInBug11395(Instruction *I); + void FindDynamicInitializers(Module &M); + bool HasDynamicInitializer(GlobalVariable *G); LLVMContext *C; TargetData *TD; @@ -226,6 +221,7 @@ struct AddressSanitizer : public ModulePass { // This array is indexed by AccessIsWrite and log2(AccessSize). Function *AsanErrorCallback[2][kNumberOfAccessSizes]; InlineAsm *EmptyAsm; + SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals; }; } // namespace @@ -267,24 +263,24 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { // ThenBlock // Tail // -// If ThenBlock is zero, a new block is created and its terminator is returned. -// Otherwize 0 is returned. -static BranchInst *splitBlockAndInsertIfThen(Value *Cmp, - BasicBlock *ThenBlock = 0) { +// ThenBlock block is created and its terminator is returned. +// If Unreachable, ThenBlock is terminated with UnreachableInst, otherwise +// it is terminated with BranchInst to Tail. +static TerminatorInst *splitBlockAndInsertIfThen(Value *Cmp, bool Unreachable) { Instruction *SplitBefore = cast<Instruction>(Cmp)->getNextNode(); BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); TerminatorInst *HeadOldTerm = Head->getTerminator(); - BranchInst *CheckTerm = 0; - if (!ThenBlock) { - LLVMContext &C = Head->getParent()->getParent()->getContext(); - ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + LLVMContext &C = Head->getParent()->getParent()->getContext(); + BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + TerminatorInst *CheckTerm; + if (Unreachable) + CheckTerm = new UnreachableInst(C, ThenBlock); + else CheckTerm = BranchInst::Create(Tail, ThenBlock); - } BranchInst *HeadNewTerm = BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp); ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); - return CheckTerm; } @@ -336,7 +332,7 @@ bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC, Value *Cmp = IRB.CreateICmpNE(Length, Constant::getNullValue(Length->getType())); - InsertBefore = splitBlockAndInsertIfThen(Cmp); + InsertBefore = splitBlockAndInsertIfThen(Cmp, false); } instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true); @@ -371,14 +367,50 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) { return NULL; } +void AddressSanitizer::FindDynamicInitializers(Module& M) { + // Clang generates metadata identifying all dynamically initialized globals. + NamedMDNode *DynamicGlobals = + M.getNamedMetadata("llvm.asan.dynamically_initialized_globals"); + if (!DynamicGlobals) + return; + for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) { + MDNode *MDN = DynamicGlobals->getOperand(i); + assert(MDN->getNumOperands() == 1); + Value *VG = MDN->getOperand(0); + // The optimizer may optimize away a global entirely, in which case we + // cannot instrument access to it. + if (!VG) + continue; + + GlobalVariable *G = cast<GlobalVariable>(VG); + DynamicallyInitializedGlobals.insert(G); + } +} +// Returns true if a global variable is initialized dynamically in this TU. +bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) { + return DynamicallyInitializedGlobals.count(G); +} + void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) { bool IsWrite; Value *Addr = isInterestingMemoryAccess(I, &IsWrite); assert(Addr); - if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) { - // We are accessing a global scalar variable. Nothing to catch here. - return; + if (ClOpt && ClOptGlobals) { + if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) { + // If initialization order checking is disabled, a simple access to a + // dynamically initialized global is always valid. + if (!ClInitializers) + return; + // If a global variable does not have dynamic initialization we don't + // have to instrument it. However, if a global has external linkage, we + // assume it has dynamic initialization, as it may have an initializer + // in a different TU. + if (G->getLinkage() != GlobalVariable::ExternalLinkage && + !HasDynamicInitializer(G)) + return; + } } + Type *OrigPtrTy = Addr->getType(); Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); @@ -407,15 +439,11 @@ Function *AddressSanitizer::checkInterfaceFunction(Constant *FuncOrBitcast) { } Instruction *AddressSanitizer::generateCrashCode( - BasicBlock *BB, Value *Addr, Value *PC, + Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex) { - IRBuilder<> IRB(BB->getFirstNonPHI()); - CallInst *Call; - if (PC) - Call = IRB.CreateCall2(AsanErrorCallback[IsWrite][AccessSizeIndex], - Addr, PC); - else - Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr); + IRBuilder<> IRB(InsertBefore); + CallInst *Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], + Addr); // We don't do Call->setDoesNotReturn() because the BB already has // UnreachableInst at the end. // This EmptyAsm is required to avoid callback merge. @@ -436,7 +464,7 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)); // (uint8_t) ((Addr & (Granularity-1)) + size - 1) LastAccessedByte = IRB.CreateIntCast( - LastAccessedByte, IRB.getInt8Ty(), false); + LastAccessedByte, ShadowValue->getType(), false); // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue); } @@ -456,112 +484,129 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); - - BasicBlock *CrashBlock = 0; - if (ClMergeCallbacks) { - size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); - BasicBlock **Cached = &AFC.CrashBlock[IsWrite][AccessSizeIndex]; - if (!*Cached) { - std::string BBName("crash_bb-"); - BBName += (IsWrite ? "w-" : "r-") + itostr(1 << AccessSizeIndex); - BasicBlock *BB = BasicBlock::Create(*C, BBName, &AFC.F); - new UnreachableInst(*C, BB); - *Cached = BB; - } - CrashBlock = *Cached; - // We need to pass the PC as the second parameter to __asan_report_*. - // There are few problems: - // - Some architectures (e.g. x86_32) don't have a cheap way to get the PC. - // - LLVM doesn't have the appropriate intrinsic. - // For now, put a random number into the PC, just to allow experiments. - Value *PC = ConstantInt::get(IntptrTy, rand()); - CrashArg Arg = {AddrLong, PC}; - AFC.CrashArgs[IsWrite][AccessSizeIndex].push_back(Arg); - } else { - CrashBlock = BasicBlock::Create(*C, "crash_bb", &AFC.F); - new UnreachableInst(*C, CrashBlock); - size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); - Instruction *Crash = - generateCrashCode(CrashBlock, AddrLong, 0, IsWrite, AccessSizeIndex); - Crash->setDebugLoc(OrigIns->getDebugLoc()); - } - + size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); size_t Granularity = 1 << MappingScale; - if (TypeSize < 8 * Granularity) { - BranchInst *CheckTerm = splitBlockAndInsertIfThen(Cmp); - assert(CheckTerm->isUnconditional()); + TerminatorInst *CrashTerm = 0; + + if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { + TerminatorInst *CheckTerm = splitBlockAndInsertIfThen(Cmp, false); + assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional()); BasicBlock *NextBB = CheckTerm->getSuccessor(0); IRB.SetInsertPoint(CheckTerm); Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize); + BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB); + CrashTerm = new UnreachableInst(*C, CrashBlock); BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2); ReplaceInstWithInst(CheckTerm, NewTerm); } else { - splitBlockAndInsertIfThen(Cmp, CrashBlock); + CrashTerm = splitBlockAndInsertIfThen(Cmp, true); + } + + Instruction *Crash = + generateCrashCode(CrashTerm, AddrLong, IsWrite, AccessSizeIndex); + Crash->setDebugLoc(OrigIns->getDebugLoc()); +} + +void AddressSanitizer::createInitializerPoisonCalls(Module &M, + Value *FirstAddr, + Value *LastAddr) { + // We do all of our poisoning and unpoisoning within _GLOBAL__I_a. + Function *GlobalInit = M.getFunction("_GLOBAL__I_a"); + // If that function is not present, this TU contains no globals, or they have + // all been optimized away + if (!GlobalInit) + return; + + // Set up the arguments to our poison/unpoison functions. + IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt()); + + // Declare our poisoning and unpoisoning functions. + Function *AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( + kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + AsanPoisonGlobals->setLinkage(Function::ExternalLinkage); + Function *AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( + kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL)); + AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage); + + // Add a call to poison all external globals before the given function starts. + IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr); + + // Add calls to unpoison all globals before each return instruction. + for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end(); + I != E; ++I) { + if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) { + CallInst::Create(AsanUnpoisonGlobals, "", RI); + } } } +bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { + Type *Ty = cast<PointerType>(G->getType())->getElementType(); + DEBUG(dbgs() << "GLOBAL: " << *G); + + if (!Ty->isSized()) return false; + if (!G->hasInitializer()) return false; + // Touch only those globals that will not be defined in other modules. + // Don't handle ODR type linkages since other modules may be built w/o asan. + if (G->getLinkage() != GlobalVariable::ExternalLinkage && + G->getLinkage() != GlobalVariable::PrivateLinkage && + G->getLinkage() != GlobalVariable::InternalLinkage) + return false; + // Two problems with thread-locals: + // - The address of the main thread's copy can't be computed at link-time. + // - Need to poison all copies, not just the main thread's one. + if (G->isThreadLocal()) + return false; + // For now, just ignore this Alloca if the alignment is large. + if (G->getAlignment() > RedzoneSize) return false; + + // Ignore all the globals with the names starting with "\01L_OBJC_". + // Many of those are put into the .cstring section. The linker compresses + // that section by removing the spare \0s after the string terminator, so + // our redzones get broken. + if ((G->getName().find("\01L_OBJC_") == 0) || + (G->getName().find("\01l_OBJC_") == 0)) { + DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G); + return false; + } + + if (G->hasSection()) { + StringRef Section(G->getSection()); + // Ignore the globals from the __OBJC section. The ObjC runtime assumes + // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to + // them. + if ((Section.find("__OBJC,") == 0) || + (Section.find("__DATA, __objc_") == 0)) { + DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G); + return false; + } + // See http://code.google.com/p/address-sanitizer/issues/detail?id=32 + // Constant CFString instances are compiled in the following way: + // -- the string buffer is emitted into + // __TEXT,__cstring,cstring_literals + // -- the constant NSConstantString structure referencing that buffer + // is placed into __DATA,__cfstring + // Therefore there's no point in placing redzones into __DATA,__cfstring. + // Moreover, it causes the linker to crash on OS X 10.7 + if (Section.find("__DATA,__cfstring") == 0) { + DEBUG(dbgs() << "Ignoring CFString: " << *G); + return false; + } + } + + return true; +} + // This function replaces all global variables with new variables that have // trailing redzones. It also creates a function that poisons // redzones and inserts this function into llvm.global_ctors. bool AddressSanitizer::insertGlobalRedzones(Module &M) { SmallVector<GlobalVariable *, 16> GlobalsToChange; - for (Module::GlobalListType::iterator G = M.getGlobalList().begin(), - E = M.getGlobalList().end(); G != E; ++G) { - Type *Ty = cast<PointerType>(G->getType())->getElementType(); - DEBUG(dbgs() << "GLOBAL: " << *G); - - if (!Ty->isSized()) continue; - if (!G->hasInitializer()) continue; - // Touch only those globals that will not be defined in other modules. - // Don't handle ODR type linkages since other modules may be built w/o asan. - if (G->getLinkage() != GlobalVariable::ExternalLinkage && - G->getLinkage() != GlobalVariable::PrivateLinkage && - G->getLinkage() != GlobalVariable::InternalLinkage) - continue; - // Two problems with thread-locals: - // - The address of the main thread's copy can't be computed at link-time. - // - Need to poison all copies, not just the main thread's one. - if (G->isThreadLocal()) - continue; - // For now, just ignore this Alloca if the alignment is large. - if (G->getAlignment() > RedzoneSize) continue; - - // Ignore all the globals with the names starting with "\01L_OBJC_". - // Many of those are put into the .cstring section. The linker compresses - // that section by removing the spare \0s after the string terminator, so - // our redzones get broken. - if ((G->getName().find("\01L_OBJC_") == 0) || - (G->getName().find("\01l_OBJC_") == 0)) { - DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G); - continue; - } - - if (G->hasSection()) { - StringRef Section(G->getSection()); - // Ignore the globals from the __OBJC section. The ObjC runtime assumes - // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to - // them. - if ((Section.find("__OBJC,") == 0) || - (Section.find("__DATA, __objc_") == 0)) { - DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G); - continue; - } - // See http://code.google.com/p/address-sanitizer/issues/detail?id=32 - // Constant CFString instances are compiled in the following way: - // -- the string buffer is emitted into - // __TEXT,__cstring,cstring_literals - // -- the constant NSConstantString structure referencing that buffer - // is placed into __DATA,__cfstring - // Therefore there's no point in placing redzones into __DATA,__cfstring. - // Moreover, it causes the linker to crash on OS X 10.7 - if (Section.find("__DATA,__cfstring") == 0) { - DEBUG(dbgs() << "Ignoring CFString: " << *G); - continue; - } - } - - GlobalsToChange.push_back(G); + for (Module::GlobalListType::iterator G = M.global_begin(), + E = M.global_end(); G != E; ++G) { + if (ShouldInstrumentGlobal(G)) + GlobalsToChange.push_back(G); } size_t n = GlobalsToChange.size(); @@ -572,13 +617,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { // size_t size; // size_t size_with_redzone; // const char *name; + // size_t has_dynamic_init; // We initialize an array of such structures and pass it to a run-time call. StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, - IntptrTy, IntptrTy, NULL); - SmallVector<Constant *, 16> Initializers(n); + IntptrTy, IntptrTy, + IntptrTy, NULL); + SmallVector<Constant *, 16> Initializers(n), DynamicInit; IRBuilder<> IRB(CtorInsertBefore); + if (ClInitializers) + FindDynamicInitializers(M); + + // The addresses of the first and last dynamically initialized globals in + // this TU. Used in initialization order checking. + Value *FirstDynamic = 0, *LastDynamic = 0; + for (size_t i = 0; i < n; i++) { GlobalVariable *G = GlobalsToChange[i]; PointerType *PtrTy = cast<PointerType>(G->getType()); @@ -587,6 +641,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { uint64_t RightRedzoneSize = RedzoneSize + (RedzoneSize - (SizeInBytes % RedzoneSize)); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); + // Determine whether this global should be poisoned in initialization. + bool GlobalHasDynamicInitializer = HasDynamicInitializer(G); StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL); Constant *NewInitializer = ConstantStruct::get( @@ -621,7 +677,16 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { ConstantInt::get(IntptrTy, SizeInBytes), ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), ConstantExpr::getPointerCast(Name, IntptrTy), + ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer), NULL); + + // Populate the first and last globals declared in this TU. + if (ClInitializers && GlobalHasDynamicInitializer) { + LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy); + if (FirstDynamic == 0) + FirstDynamic = LastDynamic; + } + DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal); } @@ -630,8 +695,13 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage, ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); + // Create calls for poisoning before initializers run and unpoisoning after. + if (ClInitializers && FirstDynamic && LastDynamic) + createInitializerPoisonCalls(M, FirstDynamic, LastDynamic); + Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction( - kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + kAsanRegisterGlobalsName, IRB.getVoidTy(), + IntptrTy, IntptrTy, NULL)); AsanRegisterGlobals->setLinkage(Function::ExternalLinkage); IRB.CreateCall2(AsanRegisterGlobals, @@ -694,12 +764,7 @@ bool AddressSanitizer::runOnModule(Module &M) { std::string FunctionName = std::string(kAsanReportErrorTemplate) + (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex); // If we are merging crash callbacks, they have two parameters. - if (ClMergeCallbacks) - AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>( - M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, - IntptrTy, NULL)); - else - AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>( + AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>( M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL)); } } @@ -845,33 +910,6 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { NumInstrumented++; } - // Create PHI nodes and crash callbacks if we are merging crash callbacks. - if (NumInstrumented) { - for (size_t IsWrite = 0; IsWrite <= 1; IsWrite++) { - for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; - AccessSizeIndex++) { - BasicBlock *BB = AFC.CrashBlock[IsWrite][AccessSizeIndex]; - if (!BB) continue; - assert(ClMergeCallbacks); - AsanFunctionContext::CrashArgsVec &Args = - AFC.CrashArgs[IsWrite][AccessSizeIndex]; - IRBuilder<> IRB(BB->getFirstNonPHI()); - size_t n = Args.size(); - PHINode *PN1 = IRB.CreatePHI(IntptrTy, n); - PHINode *PN2 = IRB.CreatePHI(IntptrTy, n); - // We need to match crash parameters and the predecessors. - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - PI != PE; ++PI) { - n--; - PN1->addIncoming(Args[n].Arg1, *PI); - PN2->addIncoming(Args[n].Arg2, *PI); - } - assert(n == 0); - generateCrashCode(BB, PN1, PN2, IsWrite, AccessSizeIndex); - } - } - } - DEBUG(dbgs() << F); bool ChangedStack = poisonStackInFunction(M, F); diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h index f76c77e..a4bb5a6 100644 --- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -26,30 +26,6 @@ namespace llvm { /// The type parameter T determines the type of the nodes of the graph. template <typename T> class MaximumSpanningTree { - - // A comparing class for comparing weighted edges. - template <typename CT> - struct EdgeWeightCompare { - bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X, - typename MaximumSpanningTree<CT>::EdgeWeight Y) const { - if (X.second > Y.second) return true; - if (X.second < Y.second) return false; - if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) { - if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) { - if (BBX->size() > BBY->size()) return true; - if (BBX->size() < BBY->size()) return false; - } - } - if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) { - if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) { - if (BBX->size() > BBY->size()) return true; - if (BBX->size() < BBY->size()) return false; - } - } - return false; - } - }; - public: typedef std::pair<const T*, const T*> Edge; typedef std::pair<Edge, double> EdgeWeight; @@ -59,6 +35,33 @@ namespace llvm { MaxSpanTree MST; + private: + // A comparing class for comparing weighted edges. + struct EdgeWeightCompare { + static bool getBlockSize(const T *X) { + const BasicBlock *BB = dyn_cast_or_null<BasicBlock>(X); + return BB ? BB->size() : 0; + } + + bool operator()(EdgeWeight X, EdgeWeight Y) const { + if (X.second > Y.second) return true; + if (X.second < Y.second) return false; + + // Equal edge weights: break ties by comparing block sizes. + size_t XSizeA = getBlockSize(X.first.first); + size_t YSizeA = getBlockSize(Y.first.first); + if (XSizeA > YSizeA) return true; + if (XSizeA < YSizeA) return false; + + size_t XSizeB = getBlockSize(X.first.second); + size_t YSizeB = getBlockSize(Y.first.second); + if (XSizeB > YSizeB) return true; + if (XSizeB < YSizeB) return false; + + return false; + } + }; + public: static char ID; // Class identification, replacement for typeinfo @@ -66,7 +69,7 @@ namespace llvm { /// spanning tree. MaximumSpanningTree(EdgeWeights &EdgeVector) { - std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>()); + std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare()); // Create spanning tree, Forest contains a special data structure // that makes checking if two nodes are already in a common (sub-)tree diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 277c4d5..a8deda8 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -66,11 +66,6 @@ static cl::opt<bool> DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); -// FIXME: Remove this abomination once all of the tests pass without it! -static cl::opt<bool> DisableDeleteDeadBlocks( - "disable-cgp-delete-dead-blocks", cl::Hidden, cl::init(false), - cl::desc("Disable deleting dead blocks in CodeGenPrepare")); - static cl::opt<bool> DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); @@ -116,6 +111,7 @@ namespace { } private: + bool EliminateFallThrough(Function &F); bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; void EliminateMostlyEmptyBlock(BasicBlock *BB); @@ -187,10 +183,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) { WorkList.insert(*II); } - if (!DisableDeleteDeadBlocks) - for (SmallPtrSet<BasicBlock*, 8>::iterator - I = WorkList.begin(), E = WorkList.end(); I != E; ++I) - DeleteDeadBlock(*I); + for (SmallPtrSet<BasicBlock*, 8>::iterator + I = WorkList.begin(), E = WorkList.end(); I != E; ++I) + DeleteDeadBlock(*I); + + // Merge pairs of basic blocks with unconditional branches, connected by + // a single edge. + if (EverMadeChange || MadeChange) + MadeChange |= EliminateFallThrough(F); if (MadeChange) ModifiedDT = true; @@ -203,6 +203,39 @@ bool CodeGenPrepare::runOnFunction(Function &F) { return EverMadeChange; } +/// EliminateFallThrough - Merge basic blocks which are connected +/// by a single edge, where one of the basic blocks has a single successor +/// pointing to the other basic block, which has a single predecessor. +bool CodeGenPrepare::EliminateFallThrough(Function &F) { + bool Changed = false; + // Scan all of the blocks in the function, except for the entry block. + for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; + // If the destination block has a single pred, then this is a trivial + // edge, just collapse it. + BasicBlock *SinglePred = BB->getSinglePredecessor(); + + if (!SinglePred || SinglePred == BB) continue; + + BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); + if (Term && !Term->isConditional()) { + Changed = true; + DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); + // Remember if SinglePred was the entry block of the function. + // If so, we will need to move BB back to the entry position. + bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + MergeBasicBlockIntoOnlyPred(BB, this); + + if (isEntry && BB != &BB->getParent()->getEntryBlock()) + BB->moveBefore(&BB->getParent()->getEntryBlock()); + + // We have erased a block. Update the iterator. + I = BB; + } + } + return Changed; +} + /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, /// debug info directives, and an unconditional branch. Passes before isel /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for @@ -610,7 +643,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // that have the default "don't know" as the objectsize. Anything else // should be left alone. CodeGenPrepareFortifiedLibCalls Simplifier; - return Simplifier.fold(CI, TD); + return Simplifier.fold(CI, TD, TLInfo); } /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return @@ -645,10 +678,18 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { if (!TLI) return false; + PHINode *PN = 0; + BitCastInst *BCI = 0; Value *V = RI->getReturnValue(); - PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL; - if (V && !PN) - return false; + if (V) { + BCI = dyn_cast<BitCastInst>(V); + if (BCI) + V = BCI->getOperand(0); + + PN = dyn_cast<PHINode>(V); + if (!PN) + return false; + } BasicBlock *BB = RI->getParent(); if (PN && PN->getParent() != BB) @@ -666,6 +707,9 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { if (PN) { BasicBlock::iterator BI = BB->begin(); do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); + if (&*BI == BCI) + // Also skip over the bitcast. + ++BI; if (&*BI != RI) return false; } else { diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 5eff0e5..8b1283f 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -378,7 +378,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // // We have to be careful here as *Off is signed while *.Size is unsigned. if (EarlierOff >= LaterOff && - Later.Size > Earlier.Size && + Later.Size >= Earlier.Size && uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) return OverwriteComplete; @@ -740,12 +740,19 @@ bool DSE::handleEndBlock(BasicBlock &BB) { continue; } - if (isa<AllocaInst>(BBI) || isAllocLikeFn(BBI)) { + if (isa<AllocaInst>(BBI)) { + // Remove allocas from the list of dead stack objects; there can't be + // any references before the definition. DeadStackObjects.remove(BBI); continue; } if (CallSite CS = cast<Value>(BBI)) { + // Remove allocation function calls from the list of dead stack objects; + // there can't be any references before the definition. + if (isAllocLikeFn(BBI)) + DeadStackObjects.remove(BBI); + // If this call does not access memory, it can't be loading any of our // pointers. if (AA->doesNotAccessMemory(CS)) @@ -771,7 +778,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. if (DeadStackObjects.empty()) - return MadeChange; + break; continue; } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 140864d..4822fd0 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -512,7 +512,7 @@ namespace { /// have that value number. Use findLeader to query it. struct LeaderTableEntry { Value *Val; - BasicBlock *BB; + const BasicBlock *BB; LeaderTableEntry *Next; }; DenseMap<uint32_t, LeaderTableEntry> LeaderTable; @@ -542,7 +542,7 @@ namespace { private: /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for /// its value number. - void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { + void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { LeaderTableEntry &Curr = LeaderTable[N]; if (!Curr.Val) { Curr.Val = V; @@ -608,13 +608,13 @@ namespace { void dump(DenseMap<uint32_t, Value*> &d); bool iterateOnFunction(Function &F); bool performPRE(Function &F); - Value *findLeader(BasicBlock *BB, uint32_t num); + Value *findLeader(const BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); unsigned replaceAllDominatedUsesWith(Value *From, Value *To, - BasicBlock *Root); - bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root); + const BasicBlockEdge &Root); + bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root); }; char GVN::ID = 0; @@ -1977,7 +1977,7 @@ bool GVN::processLoad(LoadInst *L) { // and then scan the list to find one whose block dominates the block in // question. This is fast because dominator tree queries consist of only // a few comparisons of DFS numbers. -Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { +Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) { LeaderTableEntry Vals = LeaderTable[num]; if (!Vals.Val) return 0; @@ -2004,22 +2004,13 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { /// use is dominated by the given basic block. Returns the number of uses that /// were replaced. unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, - BasicBlock *Root) { + const BasicBlockEdge &Root) { unsigned Count = 0; for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); UI != UE; ) { Use &U = (UI++).getUse(); - // If From occurs as a phi node operand then the use implicitly lives in the - // corresponding incoming block. Otherwise it is the block containing the - // user that must be dominated by Root. - BasicBlock *UsingBlock; - if (PHINode *PN = dyn_cast<PHINode>(U.getUser())) - UsingBlock = PN->getIncomingBlock(U); - else - UsingBlock = cast<Instruction>(U.getUser())->getParent(); - - if (DT->dominates(Root, UsingBlock)) { + if (DT->dominates(Root, U)) { U.set(To); ++Count; } @@ -2027,13 +2018,34 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, return Count; } +/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return +/// true if every path from the entry block to 'Dst' passes via this edge. In +/// particular 'Dst' must not be reachable via another edge from 'Src'. +static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, + DominatorTree *DT) { + // While in theory it is interesting to consider the case in which Dst has + // more than one predecessor, because Dst might be part of a loop which is + // only reachable from Src, in practice it is pointless since at the time + // GVN runs all such loops have preheaders, which means that Dst will have + // been changed to have only one predecessor, namely Src. + const BasicBlock *Pred = E.getEnd()->getSinglePredecessor(); + const BasicBlock *Src = E.getStart(); + assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); + (void)Src; + return Pred != 0; +} + /// propagateEquality - The given values are known to be equal in every block /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with /// 'RHS' everywhere in the scope. Returns whether a change was made. -bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { +bool GVN::propagateEquality(Value *LHS, Value *RHS, + const BasicBlockEdge &Root) { SmallVector<std::pair<Value*, Value*>, 4> Worklist; Worklist.push_back(std::make_pair(LHS, RHS)); bool Changed = false; + // For speed, compute a conservative fast approximation to + // DT->dominates(Root, Root.getEnd()); + bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT); while (!Worklist.empty()) { std::pair<Value*, Value*> Item = Worklist.pop_back_val(); @@ -2065,9 +2077,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { LVN = RVN; } } - assert((!isa<Instruction>(RHS) || - DT->properlyDominates(cast<Instruction>(RHS)->getParent(), Root)) && - "Instruction doesn't dominate scope!"); // If value numbering later sees that an instruction in the scope is equal // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve @@ -2076,8 +2085,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { // if RHS is an instruction (if an instruction in the scope is morphed into // LHS then it will be turned into RHS by the next GVN iteration anyway, so // using the leader table is about compiling faster, not optimizing better). - if (!isa<Instruction>(RHS)) - addToLeaderTable(LVN, RHS, Root); + // The leader table only tracks basic blocks, not edges. Only add to if we + // have the simple case where the edge dominates the end. + if (RootDominatesEnd && !isa<Instruction>(RHS)) + addToLeaderTable(LVN, RHS, Root.getEnd()); // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As // LHS always has at least one use that is not dominated by Root, this will @@ -2136,7 +2147,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { // If the number we were assigned was brand new then there is no point in // looking for an instruction realizing it: there cannot be one! if (Num < NextNum) { - Value *NotCmp = findLeader(Root, Num); + Value *NotCmp = findLeader(Root.getEnd(), Num); if (NotCmp && isa<Instruction>(NotCmp)) { unsigned NumReplacements = replaceAllDominatedUsesWith(NotCmp, NotVal, Root); @@ -2146,7 +2157,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { } // Ensure that any instruction in scope that gets the "A < B" value number // is replaced with false. - addToLeaderTable(Num, NotVal, Root); + // The leader table only tracks basic blocks, not edges. Only add to if we + // have the simple case where the edge dominates the end. + if (RootDominatesEnd) + addToLeaderTable(Num, NotVal, Root.getEnd()); continue; } @@ -2155,22 +2169,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { return Changed; } -/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return -/// true if every path from the entry block to 'Dst' passes via this edge. In -/// particular 'Dst' must not be reachable via another edge from 'Src'. -static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst, - DominatorTree *DT) { - // While in theory it is interesting to consider the case in which Dst has - // more than one predecessor, because Dst might be part of a loop which is - // only reachable from Src, in practice it is pointless since at the time - // GVN runs all such loops have preheaders, which means that Dst will have - // been changed to have only one predecessor, namely Src. - BasicBlock *Pred = Dst->getSinglePredecessor(); - assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); - (void)Src; - return Pred != 0; -} - /// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets bool GVN::processInstruction(Instruction *I) { @@ -2210,18 +2208,20 @@ bool GVN::processInstruction(Instruction *I) { BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); + // Avoid multiple edges early. + if (TrueSucc == FalseSucc) + return false; + BasicBlock *Parent = BI->getParent(); bool Changed = false; - if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT)) - Changed |= propagateEquality(BranchCond, - ConstantInt::getTrue(TrueSucc->getContext()), - TrueSucc); + Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext()); + BasicBlockEdge TrueE(Parent, TrueSucc); + Changed |= propagateEquality(BranchCond, TrueVal, TrueE); - if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT)) - Changed |= propagateEquality(BranchCond, - ConstantInt::getFalse(FalseSucc->getContext()), - FalseSucc); + Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext()); + BasicBlockEdge FalseE(Parent, FalseSucc); + Changed |= propagateEquality(BranchCond, FalseVal, FalseE); return Changed; } @@ -2234,8 +2234,9 @@ bool GVN::processInstruction(Instruction *I) { for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { BasicBlock *Dst = i.getCaseSuccessor(); - if (isOnlyReachableViaThisEdge(Parent, Dst, DT)) - Changed |= propagateEquality(SwitchCond, i.getCaseValue(), Dst); + BasicBlockEdge E(Parent, Dst); + if (E.isSingleEdge()) + Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E); } return Changed; } diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 582948e..0192e92 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -175,7 +175,9 @@ namespace { bool canSinkOrHoistInst(Instruction &I); bool isNotUsedInLoop(Instruction &I); - void PromoteAliasSet(AliasSet &AS); + void PromoteAliasSet(AliasSet &AS, + SmallVectorImpl<BasicBlock*> &ExitBlocks, + SmallVectorImpl<Instruction*> &InsertPts); }; } @@ -256,10 +258,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { + SmallVector<BasicBlock *, 8> ExitBlocks; + SmallVector<Instruction *, 8> InsertPts; + // Loop over all of the alias sets in the tracker object. for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); I != E; ++I) - PromoteAliasSet(*I); + PromoteAliasSet(*I, ExitBlocks, InsertPts); } // Clear out loops state information for the next iteration @@ -631,6 +636,7 @@ namespace { Value *SomePtr; // Designated pointer to store to. SmallPtrSet<Value*, 4> &PointerMustAliases; SmallVectorImpl<BasicBlock*> &LoopExitBlocks; + SmallVectorImpl<Instruction*> &LoopInsertPts; AliasSetTracker &AST; DebugLoc DL; int Alignment; @@ -638,11 +644,12 @@ namespace { LoopPromoter(Value *SP, const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, SmallPtrSet<Value*, 4> &PMA, - SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast, - DebugLoc dl, int alignment) + SmallVectorImpl<BasicBlock*> &LEB, + SmallVectorImpl<Instruction*> &LIP, + AliasSetTracker &ast, DebugLoc dl, int alignment) : LoadAndStorePromoter(Insts, S), SomePtr(SP), - PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl), - Alignment(alignment) {} + PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), + AST(ast), DL(dl), Alignment(alignment) {} virtual bool isInstInList(Instruction *I, const SmallVectorImpl<Instruction*> &) const { @@ -662,7 +669,7 @@ namespace { for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = LoopExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); - Instruction *InsertPos = ExitBlock->getFirstInsertionPt(); + Instruction *InsertPos = LoopInsertPts[i]; StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); @@ -684,7 +691,9 @@ namespace { /// looping over the stores in the loop, looking for stores to Must pointers /// which are loop invariant. /// -void LICM::PromoteAliasSet(AliasSet &AS) { +void LICM::PromoteAliasSet(AliasSet &AS, + SmallVectorImpl<BasicBlock*> &ExitBlocks, + SmallVectorImpl<Instruction*> &InsertPts) { // We can promote this alias set if it has a store, if it is a "Must" alias // set, if the pointer is loop invariant, and if we are not eliminating any // volatile loads or stores. @@ -794,14 +803,20 @@ void LICM::PromoteAliasSet(AliasSet &AS) { // location is better than none. DebugLoc DL = LoopUses[0]->getDebugLoc(); - SmallVector<BasicBlock*, 8> ExitBlocks; - CurLoop->getUniqueExitBlocks(ExitBlocks); + // Figure out the loop exits and their insertion points, if this is the + // first promotion. + if (ExitBlocks.empty()) { + CurLoop->getUniqueExitBlocks(ExitBlocks); + InsertPts.resize(ExitBlocks.size()); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt(); + } // We use the SSAUpdater interface to insert phi nodes as required. SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - *CurAST, DL, Alignment); + InsertPts, *CurAST, DL, Alignment); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index b14a713..0ae7a51 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -738,7 +738,8 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { bool Changed = false; while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()); + Value *V = DeadInsts.pop_back_val(); + Instruction *I = dyn_cast_or_null<Instruction>(V); if (I == 0 || !isInstructionTriviallyDead(I)) continue; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index ffcf97c..09687d8 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -543,6 +543,7 @@ static bool LinearizeExprTree(BinaryOperator *I, // Update the number of paths to the leaf. IncorporateWeight(It->second, Weight, Opcode); +#if 0 // TODO: Re-enable once PR13021 is fixed. // The leaf already has one use from inside the expression. As we want // exactly one such use, drop this new use of the leaf. assert(!Op->hasOneUse() && "Only one use, but we got here twice!"); @@ -559,6 +560,7 @@ static bool LinearizeExprTree(BinaryOperator *I, Leaves.erase(It); continue; } +#endif // If we still have uses that are not accounted for by the expression // then it is not safe to modify the value. diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index ec835b1..8090fdf 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -56,7 +56,6 @@ STATISTIC(NumReplaced, "Number of allocas broken up"); STATISTIC(NumPromoted, "Number of allocas promoted"); STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); STATISTIC(NumConverted, "Number of aggregates converted to scalar"); -STATISTIC(NumGlobals, "Number of allocas copied from constant global"); namespace { struct SROA : public FunctionPass { @@ -183,9 +182,6 @@ namespace { void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); bool ShouldAttemptScalarRepl(AllocaInst *AI); - - static MemTransferInst *isOnlyCopiedFromConstantGlobal( - AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete); }; // SROA_DT - SROA that uses DominatorTree. @@ -612,11 +608,16 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); - if (!GEP->hasAllConstantIndices()) - NonConstantIdx = Indices.pop_back_val(); + Value* GEPNonConstantIdx = 0; + if (!GEP->hasAllConstantIndices()) { + assert(!NonConstantIdx && + "Dynamic GEP reading from dynamic GEP unsupported"); + GEPNonConstantIdx = Indices.pop_back_val(); + } else + GEPNonConstantIdx = NonConstantIdx; uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), Indices); - ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, NonConstantIdx); + ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx); GEP->eraseFromParent(); continue; } @@ -1460,26 +1461,6 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) { return false; } -/// getPointeeAlignment - Compute the minimum alignment of the value pointed -/// to by the given pointer. -static unsigned getPointeeAlignment(Value *V, const TargetData &TD) { - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::BitCast || - (CE->getOpcode() == Instruction::GetElementPtr && - cast<GEPOperator>(CE)->hasAllZeroIndices())) - return getPointeeAlignment(CE->getOperand(0), TD); - - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) - if (!GV->isDeclaration()) - return TD.getPreferredAlignment(GV); - - if (PointerType *PT = dyn_cast<PointerType>(V->getType())) - return TD.getABITypeAlignment(PT->getElementType()); - - return 0; -} - - // performScalarRepl - This algorithm is a simple worklist driven algorithm, // which runs on all of the alloca instructions in the function, removing them // if they are only used by getelementptr instructions. @@ -1511,29 +1492,6 @@ bool SROA::performScalarRepl(Function &F) { if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) continue; - // Check to see if this allocation is only modified by a memcpy/memmove from - // a constant global whose alignment is equal to or exceeds that of the - // allocation. If this is the case, we can change all users to use - // the constant global instead. This is commonly produced by the CFE by - // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' - // is only subsequently read. - SmallVector<Instruction *, 4> ToDelete; - if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) { - if (AI->getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { - DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); - DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); - for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) - ToDelete[i]->eraseFromParent(); - Constant *TheSrc = cast<Constant>(Copy->getSource()); - AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); - Copy->eraseFromParent(); // Don't mutate the global. - AI->eraseFromParent(); - ++NumGlobals; - Changed = true; - continue; - } - } - // Check to see if we can perform the core SROA transformation. We cannot // transform the allocation instruction if it is an array allocation // (allocations OF arrays are ok though), and an allocation of a scalar @@ -2651,134 +2609,3 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { return true; } - - - -/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to -/// some part of a constant global variable. This intentionally only accepts -/// constant expressions because we don't can't rewrite arbitrary instructions. -static bool PointsToConstantGlobal(Value *V) { - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) - return GV->isConstant(); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::BitCast || - CE->getOpcode() == Instruction::GetElementPtr) - return PointsToConstantGlobal(CE->getOperand(0)); - return false; -} - -/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) -/// pointer to an alloca. Ignore any reads of the pointer, return false if we -/// see any stores or other unknown uses. If we see pointer arithmetic, keep -/// track of whether it moves the pointer (with isOffset) but otherwise traverse -/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to -/// the alloca, and if the source pointer is a pointer to a constant global, we -/// can optimize this. -static bool -isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, - bool isOffset, - SmallVector<Instruction *, 4> &LifetimeMarkers) { - // We track lifetime intrinsics as we encounter them. If we decide to go - // ahead and replace the value with the global, this lets the caller quickly - // eliminate the markers. - - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { - User *U = cast<Instruction>(*UI); - - if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - // Ignore non-volatile loads, they are always ok. - if (!LI->isSimple()) return false; - continue; - } - - if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { - // If uses of the bitcast are ok, we are ok. - if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset, - LifetimeMarkers)) - return false; - continue; - } - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { - // If the GEP has all zero indices, it doesn't offset the pointer. If it - // doesn't, it does. - if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, - isOffset || !GEP->hasAllZeroIndices(), - LifetimeMarkers)) - return false; - continue; - } - - if (CallSite CS = U) { - // If this is the function being called then we treat it like a load and - // ignore it. - if (CS.isCallee(UI)) - continue; - - // If this is a readonly/readnone call site, then we know it is just a - // load (but one that potentially returns the value itself), so we can - // ignore it if we know that the value isn't captured. - unsigned ArgNo = CS.getArgumentNo(UI); - if (CS.onlyReadsMemory() && - (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) - continue; - - // If this is being passed as a byval argument, the caller is making a - // copy, so it is only a read of the alloca. - if (CS.isByValArgument(ArgNo)) - continue; - } - - // Lifetime intrinsics can be handled by the caller. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - assert(II->use_empty() && "Lifetime markers have no result to use!"); - LifetimeMarkers.push_back(II); - continue; - } - } - - // If this is isn't our memcpy/memmove, reject it as something we can't - // handle. - MemTransferInst *MI = dyn_cast<MemTransferInst>(U); - if (MI == 0) - return false; - - // If the transfer is using the alloca as a source of the transfer, then - // ignore it since it is a load (unless the transfer is volatile). - if (UI.getOperandNo() == 1) { - if (MI->isVolatile()) return false; - continue; - } - - // If we already have seen a copy, reject the second one. - if (TheCopy) return false; - - // If the pointer has been offset from the start of the alloca, we can't - // safely handle this. - if (isOffset) return false; - - // If the memintrinsic isn't using the alloca as the dest, reject it. - if (UI.getOperandNo() != 0) return false; - - // If the source of the memcpy/move is not a constant global, reject it. - if (!PointsToConstantGlobal(MI->getSource())) - return false; - - // Otherwise, the transform is safe. Remember the copy instruction. - TheCopy = MI; - } - return true; -} - -/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only -/// modified by a copy from a constant global. If we can prove this, we can -/// replace any uses of the alloca with uses of the global directly. -MemTransferInst * -SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI, - SmallVector<Instruction*, 4> &ToDelete) { - MemTransferInst *TheCopy = 0; - if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete)) - return TheCopy; - return 0; -} diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index a1a8a41..3904419 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -157,14 +157,15 @@ struct StrCatOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - EmitStrLenMemCpy(Src, Dst, Len, B); - return Dst; + return EmitStrLenMemCpy(Src, Dst, Len, B); } - void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) { + Value *EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) { // We need to find the end of the destination string. That's where the // memory is to be moved to. We just generate a call to strlen. - Value *DstLen = EmitStrLen(Dst, B, TD); + Value *DstLen = EmitStrLen(Dst, B, TD, TLI); + if (!DstLen) + return 0; // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of @@ -175,6 +176,7 @@ struct StrCatOpt : public LibCallOptimization { // concatenation for us. Make a memcpy to copy the nul byte with align = 1. B.CreateMemCpy(CpyDst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1); + return Dst; } }; @@ -221,8 +223,7 @@ struct StrNCatOpt : public StrCatOpt { // strncat(x, s, c) -> strcat(x, s) // s is constant so the strcat can be optimized further - EmitStrLenMemCpy(Src, Dst, SrcLen, B); - return Dst; + return EmitStrLenMemCpy(Src, Dst, SrcLen, B); } }; @@ -254,7 +255,7 @@ struct StrChrOpt : public LibCallOptimization { return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. ConstantInt::get(TD->getIntPtrType(*Context), Len), - B, TD); + B, TD, TLI); } // Otherwise, the character is a constant, see if the first argument is @@ -299,7 +300,7 @@ struct StrRChrOpt : public LibCallOptimization { if (!getConstantStringInfo(SrcStr, Str)) { // strrchr(s, 0) -> strchr(s, 0) if (TD && CharC->isZero()) - return EmitStrChr(SrcStr, '\0', B, TD); + return EmitStrChr(SrcStr, '\0', B, TD, TLI); return 0; } @@ -355,7 +356,7 @@ struct StrCmpOpt : public LibCallOptimization { return EmitMemCmp(Str1P, Str2P, ConstantInt::get(TD->getIntPtrType(*Context), - std::min(Len1, Len2)), B, TD); + std::min(Len1, Len2)), B, TD, TLI); } return 0; @@ -391,7 +392,7 @@ struct StrNCmpOpt : public LibCallOptimization { return ConstantInt::get(CI->getType(), 0); if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) - return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD); + return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI); StringRef Str1, Str2; bool HasStr1 = getConstantStringInfo(Str1P, Str1); @@ -447,11 +448,10 @@ struct StrCpyOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. - if (OptChkCall) - EmitMemCpyChk(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), - CI->getArgOperand(2), B, TD); - else + if (!OptChkCall || + !EmitMemCpyChk(Dst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len), + CI->getArgOperand(2), B, TD, TLI)) B.CreateMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); return Dst; @@ -480,8 +480,10 @@ struct StpCpyOpt: public LibCallOptimization { if (!TD) return 0; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); - if (Dst == Src) // stpcpy(x,x) -> x+strlen(x) - return B.CreateInBoundsGEP(Dst, EmitStrLen(Src, B, TD)); + if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) + Value *StrLen = EmitStrLen(Src, B, TD, TLI); + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + } // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); @@ -494,9 +496,8 @@ struct StpCpyOpt: public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. - if (OptChkCall) - EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD); - else + if (!OptChkCall || !EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, + TD, TLI)) B.CreateMemCpy(Dst, Src, LenV, 1); return DstEnd; } @@ -609,7 +610,7 @@ struct StrPBrkOpt : public LibCallOptimization { // strpbrk(s, "a") -> strchr(s, 'a') if (TD && HasS2 && S2.size() == 1) - return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD); + return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI); return 0; } @@ -698,7 +699,7 @@ struct StrCSpnOpt : public LibCallOptimization { // strcspn(s, "") -> strlen(s) if (TD && HasS2 && S2.empty()) - return EmitStrLen(CI->getArgOperand(0), B, TD); + return EmitStrLen(CI->getArgOperand(0), B, TD, TLI); return 0; } @@ -722,9 +723,13 @@ struct StrStrOpt : public LibCallOptimization { // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { - Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD); + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI); + if (!StrLen) + return 0; Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), - StrLen, B, TD); + StrLen, B, TD, TLI); + if (!StrNCmp) + return 0; for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); UI != UE; ) { ICmpInst *Old = cast<ICmpInst>(*UI++); @@ -760,9 +765,10 @@ struct StrStrOpt : public LibCallOptimization { } // fold strstr(x, "y") -> strchr(x, 'y'). - if (HasStr2 && ToFindStr.size() == 1) - return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), - ToFindStr[0], B, TD), CI->getType()); + if (HasStr2 && ToFindStr.size() == 1) { + Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI); + return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; + } return 0; } }; @@ -1179,8 +1185,8 @@ struct PrintFOpt : public LibCallOptimization { // printf("x") -> putchar('x'), even for '%'. if (FormatStr.size() == 1) { - Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD); - if (CI->use_empty()) return CI; + Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI); + if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); } @@ -1191,26 +1197,26 @@ struct PrintFOpt : public LibCallOptimization { // pass to be run after this pass, to merge duplicate strings. FormatStr = FormatStr.drop_back(); Value *GV = B.CreateGlobalString(FormatStr, "str"); - EmitPutS(GV, B, TD); - return CI->use_empty() ? (Value*)CI : - ConstantInt::get(CI->getType(), FormatStr.size()+1); + Value *NewCI = EmitPutS(GV, B, TD, TLI); + return (CI->use_empty() || !NewCI) ? + NewCI : + ConstantInt::get(CI->getType(), FormatStr.size()+1); } // Optimize specific format strings. // printf("%c", chr) --> putchar(chr) if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isIntegerTy()) { - Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD); + Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI); - if (CI->use_empty()) return CI; + if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); } // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isPointerTy()) { - EmitPutS(CI->getArgOperand(1), B, TD); - return CI; + return EmitPutS(CI->getArgOperand(1), B, TD, TLI); } return 0; } @@ -1297,7 +1303,9 @@ struct SPrintFOpt : public LibCallOptimization { // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; - Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD); + Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI); + if (!Len) + return 0; Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); @@ -1364,8 +1372,8 @@ struct FWriteOpt : public LibCallOptimization { // This optimisation is only valid, if the return value is unused. if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); - EmitFPutC(Char, CI->getArgOperand(3), B, TD); - return ConstantInt::get(CI->getType(), 1); + Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; } return 0; @@ -1390,10 +1398,10 @@ struct FPutsOpt : public LibCallOptimization { // fputs(s,F) --> fwrite(s,1,strlen(s),F) uint64_t Len = GetStringLength(CI->getArgOperand(0)); if (!Len) return 0; - EmitFWrite(CI->getArgOperand(0), - ConstantInt::get(TD->getIntPtrType(*Context), Len-1), - CI->getArgOperand(1), B, TD, TLI); - return CI; // Known to have no uses (see above). + // Known to have no uses (see above). + return EmitFWrite(CI->getArgOperand(0), + ConstantInt::get(TD->getIntPtrType(*Context), Len-1), + CI->getArgOperand(1), B, TD, TLI); } }; @@ -1417,11 +1425,11 @@ struct FPrintFOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - EmitFWrite(CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()), - CI->getArgOperand(0), B, TD, TLI); - return ConstantInt::get(CI->getType(), FormatStr.size()); + Value *NewCI = EmitFWrite(CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()), + CI->getArgOperand(0), B, TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0; } // The remaining optimizations require the format string to be "%s" or "%c" @@ -1434,16 +1442,16 @@ struct FPrintFOpt : public LibCallOptimization { if (FormatStr[1] == 'c') { // fprintf(F, "%c", chr) --> fputc(chr, F) if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; - EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD); - return ConstantInt::get(CI->getType(), 1); + Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, + TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) --> fputs(str, F) if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) return 0; - EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); - return CI; + return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); } return 0; } @@ -1494,8 +1502,8 @@ struct PutsOpt : public LibCallOptimization { if (Str.empty() && CI->use_empty()) { // puts("") -> putchar('\n') - Value *Res = EmitPutChar(B.getInt32('\n'), B, TD); - if (CI->use_empty()) return CI; + Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI); + if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); } @@ -1633,6 +1641,8 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["llvm.exp2.f64"] = &Exp2; Optimizations["llvm.exp2.f32"] = &Exp2; + if (TLI->has(LibFunc::fabs) && TLI->has(LibFunc::fabsf)) + Optimizations["fabs"] = &UnaryDoubleFP; if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf)) Optimizations["floor"] = &UnaryDoubleFP; if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf)) @@ -1643,6 +1653,8 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["rint"] = &UnaryDoubleFP; if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf)) Optimizations["nearbyint"] = &UnaryDoubleFP; + if (TLI->has(LibFunc::trunc) && TLI->has(LibFunc::truncf)) + Optimizations["trunc"] = &UnaryDoubleFP; // Integer Optimizations Optimizations["ffs"] = &FFS; diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 5576432..2679b93 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -659,10 +659,26 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, // If the return instruction returns a value, and if the value was a // PHI node in "BB", propagate the right value into the return. for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); - i != e; ++i) - if (PHINode *PN = dyn_cast<PHINode>(*i)) - if (PN->getParent() == BB) - *i = PN->getIncomingValueForBlock(Pred); + i != e; ++i) { + Value *V = *i; + Instruction *NewBC = 0; + if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) { + // Return value might be bitcasted. Clone and insert it before the + // return instruction. + V = BCI->getOperand(0); + NewBC = BCI->clone(); + Pred->getInstList().insert(NewRet, NewBC); + *i = NewBC; + } + if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (PN->getParent() == BB) { + if (NewBC) + NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred)); + else + *i = PN->getIncomingValueForBlock(Pred); + } + } + } // Update any PHI nodes in the returning block to realize that we no // longer branch to them. diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 27f7724..e13fd71 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -34,7 +34,11 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { /// EmitStrLen - Emit a call to the strlen function to the builder, for the /// specified pointer. This always returns an integer value of size intptr_t. -Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strlen)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -53,11 +57,41 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) { return CI; } +/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the +/// specified pointer. Ptr is required to be some pointer type, MaxLen must +/// be of size_t type, and the return value has 'intptr_t' type. +Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strnlen)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI), + TD->getIntPtrType(Context), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + NULL); + CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen"); + if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + /// EmitStrChr - Emit a call to the strchr function to the builder, for the /// specified pointer and character. Ptr is required to be some pointer type, /// and the return value has 'i8*' type. Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, - const TargetData *TD) { + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strchr)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); @@ -75,7 +109,11 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, /// EmitStrNCmp - Emit a call to the strncmp function to the builder. Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, - IRBuilder<> &B, const TargetData *TD) { + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strncmp)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -101,7 +139,11 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, - const TargetData *TD, StringRef Name) { + const TargetData *TD, const TargetLibraryInfo *TLI, + StringRef Name) { + if (!TLI->has(LibFunc::strcpy)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); @@ -119,7 +161,11 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, - IRBuilder<> &B, const TargetData *TD, StringRef Name) { + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI, StringRef Name) { + if (!TLI->has(LibFunc::strncpy)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); @@ -139,7 +185,11 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src /// are pointers. Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, - IRBuilder<> &B, const TargetData *TD) { + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memcpy_chk)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind); @@ -162,7 +212,11 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. Value *llvm::EmitMemChr(Value *Ptr, Value *Val, - Value *Len, IRBuilder<> &B, const TargetData *TD) { + Value *Len, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memchr)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); @@ -183,7 +237,11 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, /// EmitMemCmp - Emit a call to the memcmp function. Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, - Value *Len, IRBuilder<> &B, const TargetData *TD) { + Value *Len, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memcmp)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -236,7 +294,11 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. -Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::putchar)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), B.getInt32Ty(), NULL); @@ -254,7 +316,11 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) { /// EmitPutS - Emit a call to the puts function. This assumes that Str is /// some pointer. -void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::puts)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -267,13 +333,16 @@ void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) { CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); - + return CI; } /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is /// an integer and File is a pointer to FILE. -void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, - const TargetData *TD) { +Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fputc)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); @@ -295,12 +364,16 @@ void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); + return CI; } /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. -void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, - const TargetData *TD, const TargetLibraryInfo *TLI) { +Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fputs)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -321,13 +394,17 @@ void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); + return CI; } /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. -void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, - IRBuilder<> &B, const TargetData *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fwrite)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -354,11 +431,13 @@ void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); + return CI; } SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { } -bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { +bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD, + const TargetLibraryInfo *TLI) { // We really need TargetData for later. if (!TD) return false; @@ -446,7 +525,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { // string lengths for varying. if (isFoldable(2, 1, true)) { Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD, - Name.substr(2, 6)); + TLI, Name.substr(2, 6)); + if (!Ret) + return false; replaceCall(Ret); return true; } @@ -464,7 +545,10 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { if (isFoldable(3, 2, false)) { Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TD, Name.substr(2, 7)); + CI->getArgOperand(2), B, TD, TLI, + Name.substr(2, 7)); + if (!Ret) + return false; replaceCall(Ret); return true; } diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index b3f5289..72d4199 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -39,7 +39,7 @@ SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { - delete &getAvailableVals(AV); + delete static_cast<AvailableValsTy*>(AV); } /// Initialize - Reset this object to get ready for a new set of SSA @@ -214,6 +214,11 @@ void SSAUpdater::RewriteUse(Use &U) { else V = GetValueInMiddleOfBlock(User->getParent()); + // Notify that users of the existing value that it is being replaced. + Value *OldVal = U.get(); + if (OldVal != V && OldVal->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(OldVal, V); + U.set(V); } |
