diff options
Diffstat (limited to 'lib/Transforms')
20 files changed, 1055 insertions, 624 deletions
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 8a6bfc6..fa3d72d 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -60,7 +60,7 @@ namespace { continue; } - bool Local = I->hasLocalLinkage(); + bool Local = I->isDiscardableIfUnused(); if (Local) I->setVisibility(GlobalValue::HiddenVisibility); @@ -80,7 +80,7 @@ namespace { continue; } - bool Local = I->hasLocalLinkage(); + bool Local = I->isDiscardableIfUnused(); if (Local) I->setVisibility(GlobalValue::HiddenVisibility); @@ -97,7 +97,7 @@ namespace { Module::alias_iterator CurI = I; ++I; - if (CurI->hasLocalLinkage()) { + if (CurI->isDiscardableIfUnused()) { CurI->setVisibility(GlobalValue::HiddenVisibility); CurI->setLinkage(GlobalValue::ExternalLinkage); } diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 47b2b51..027a9f2 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -391,9 +391,9 @@ LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, LLVMPassManagerRef PM, - bool Internalize, - bool RunInliner) { + LLVMBool Internalize, + LLVMBool RunInliner) { PassManagerBuilder *Builder = unwrap(PMB); PassManagerBase *LPM = unwrap(PM); - Builder->populateLTOPassManager(*LPM, Internalize, RunInliner); + Builder->populateLTOPassManager(*LPM, Internalize != 0, RunInliner != 0); } diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c6d60d6..3c5781c 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -150,7 +150,9 @@ namespace { typedef SmallVector<const FAddend*, 4> AddendVect; Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); - + + Value *performFactorization(Instruction *I); + /// Convert given addend to a Value Value *createAddendVal(const FAddend &A, bool& NeedNeg); @@ -159,6 +161,7 @@ namespace { Value *createFSub(Value *Opnd0, Value *Opnd1); Value *createFAdd(Value *Opnd0, Value *Opnd1); Value *createFMul(Value *Opnd0, Value *Opnd1); + Value *createFDiv(Value *Opnd0, Value *Opnd1); Value *createFNeg(Value *V); Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); void createInstPostProc(Instruction *NewInst); @@ -388,6 +391,78 @@ unsigned FAddend::drillAddendDownOneStep return BreakNum; } +// Try to perform following optimization on the input instruction I. Return the +// simplified expression if was successful; otherwise, return 0. +// +// Instruction "I" is Simplified into +// ------------------------------------------------------- +// (x * y) +/- (x * z) x * (y +/- z) +// (y / x) +/- (z / x) (y +/- z) / x +// +Value *FAddCombine::performFactorization(Instruction *I) { + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expect add/sub"); + + Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); + Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); + + if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) + return 0; + + bool isMpy = false; + if (I0->getOpcode() == Instruction::FMul) + isMpy = true; + else if (I0->getOpcode() != Instruction::FDiv) + return 0; + + Value *Opnd0_0 = I0->getOperand(0); + Value *Opnd0_1 = I0->getOperand(1); + Value *Opnd1_0 = I1->getOperand(0); + Value *Opnd1_1 = I1->getOperand(1); + + // Input Instr I Factor AddSub0 AddSub1 + // ---------------------------------------------- + // (x*y) +/- (x*z) x y z + // (y/x) +/- (z/x) x y z + // + Value *Factor = 0; + Value *AddSub0 = 0, *AddSub1 = 0; + + if (isMpy) { + if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) + Factor = Opnd0_0; + else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1) + Factor = Opnd0_1; + + if (Factor) { + AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0; + AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0; + } + } else if (Opnd0_1 == Opnd1_1) { + Factor = Opnd0_1; + AddSub0 = Opnd0_0; + AddSub1 = Opnd1_0; + } + + if (!Factor) + return 0; + + // Create expression "NewAddSub = AddSub0 +/- AddsSub1" + Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ? + createFAdd(AddSub0, AddSub1) : + createFSub(AddSub0, AddSub1); + if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { + const APFloat &F = CFP->getValueAPF(); + if (!F.isNormal() || F.isDenormal()) + return 0; + } + + if (isMpy) + return createFMul(Factor, NewAddSub); + + return createFDiv(NewAddSub, Factor); +} + Value *FAddCombine::simplify(Instruction *I) { assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); @@ -471,7 +546,8 @@ Value *FAddCombine::simplify(Instruction *I) { return R; } - return 0; + // step 6: Try factorization as the last resort, + return performFactorization(I); } Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { @@ -627,7 +703,8 @@ Value *FAddCombine::createNaryFAdd Value *FAddCombine::createFSub (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFSub(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } @@ -639,13 +716,22 @@ Value *FAddCombine::createFNeg(Value *V) { Value *FAddCombine::createFAdd (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFAdd(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFMul(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFDiv(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 4332467..990cbc3 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -22,8 +22,8 @@ using namespace PatternMatch; /// AddOne - Add one to a ConstantInt. -static Constant *AddOne(Constant *C) { - return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); +static Constant *AddOne(ConstantInt *C) { + return ConstantInt::get(C->getContext(), C->getValue() + 1); } /// SubOne - Subtract one from a ConstantInt. static Constant *SubOne(ConstantInt *C) { diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index a960ab2..d162223 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -104,6 +104,12 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); if (CastElTySize == 0 || AllocElTySize == 0) return 0; + // If the allocation has multiple uses, only promote it if we're not + // shrinking the amount of memory being allocated. + uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy); + uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy); + if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0; + // See if we can satisfy the modulus by pulling a scale out of the array // size argument. unsigned ArraySizeScale; diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index bad46b4..32fdb9b 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1333,13 +1333,14 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } // Transform (icmp pred iM (shl iM %v, N), CI) - // -> (icmp pred i(M-N) (trunc %v iM to i(N-N)), (trunc (CI>>N)) - // Transform the shl to a trunc if (trunc (CI>>N)) has no loss. + // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N)) + // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N. // This enables to get rid of the shift in favor of a trunc which can be // free on the target. It has the additional benefit of comparing to a // smaller constant, which will be target friendly. unsigned Amt = ShAmt->getLimitedValue(TypeBits-1); - if (Amt != 0 && RHSV.countTrailingZeros() >= Amt) { + if (LHSI->hasOneUse() && + Amt != 0 && RHSV.countTrailingZeros() >= Amt) { Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt); Constant *NCI = ConstantExpr::getTrunc( ConstantExpr::getAShr(RHS, diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 8e4267f..173f2bf 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -402,7 +402,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { return ReplaceInstUsesWith(I, V); } - // (MDC +/- C1) * C2 => (MDC * C2) +/- (C1 * C2) + // (MDC +/- C1) * C => (MDC * C) +/- (C1 * C) Instruction *FAddSub = dyn_cast<Instruction>(Op0); if (FAddSub && (FAddSub->getOpcode() == Instruction::FAdd || @@ -420,8 +420,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (C1 && C1->getValueAPF().isNormal() && isFMulOrFDivWithConstant(Opnd0)) { - Value *M0 = ConstantExpr::getFMul(C1, C); - Value *M1 = isNormalFp(cast<ConstantFP>(M0)) ? + Value *M1 = ConstantExpr::getFMul(C1, C); + Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ? foldFMulConst(cast<Instruction>(Opnd0), C, &I) : 0; if (M0 && M1) { diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 6877475..92b42ee 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -71,7 +71,7 @@ static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; -static const char *kAsanInitName = "__asan_init_v1"; +static const char *kAsanInitName = "__asan_init_v2"; static const char *kAsanHandleNoReturnName = "__asan_handle_no_return"; static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; static const char *kAsanMappingScaleName = "__asan_mapping_scale"; @@ -244,7 +244,7 @@ static size_t RedzoneSizeForScale(int MappingScale) { /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public FunctionPass { - AddressSanitizer(bool CheckInitOrder = false, + AddressSanitizer(bool CheckInitOrder = true, bool CheckUseAfterReturn = false, bool CheckLifetime = false, StringRef BlacklistFile = StringRef(), @@ -315,7 +315,7 @@ struct AddressSanitizer : public FunctionPass { class AddressSanitizerModule : public ModulePass { public: - AddressSanitizerModule(bool CheckInitOrder = false, + AddressSanitizerModule(bool CheckInitOrder = true, StringRef BlacklistFile = StringRef(), bool ZeroBaseShadow = false) : ModulePass(ID), @@ -531,9 +531,12 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { // Create a constant for Str so that we can pass it to the run-time lib. static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); - return new GlobalVariable(M, StrConst->getType(), true, + GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true, GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix); + GV->setUnnamedAddr(true); // Ok to merge these. + GV->setAlignment(1); // Strings may not be merged w/o setting align 1. + return GV; } static bool GlobalWasGeneratedByAsan(GlobalVariable *G) { @@ -885,11 +888,12 @@ bool AddressSanitizerModule::runOnModule(Module &M) { // size_t size; // size_t size_with_redzone; // const char *name; + // const char *module_name; // size_t has_dynamic_init; // We initialize an array of such structures and pass it to a run-time call. StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, - IntptrTy, NULL); + IntptrTy, IntptrTy, NULL); SmallVector<Constant *, 16> Initializers(n), DynamicInit; @@ -901,6 +905,9 @@ bool AddressSanitizerModule::runOnModule(Module &M) { // this TU. Used in initialization order checking. Value *FirstDynamic = 0, *LastDynamic = 0; + GlobalVariable *ModuleName = createPrivateGlobalForString( + M, M.getModuleIdentifier()); + for (size_t i = 0; i < n; i++) { static const uint64_t kMaxGlobalRedzone = 1 << 18; GlobalVariable *G = GlobalsToChange[i]; @@ -930,11 +937,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { NewTy, G->getInitializer(), Constant::getNullValue(RightRedZoneTy), NULL); - SmallString<2048> DescriptionOfGlobal = G->getName(); - DescriptionOfGlobal += " ("; - DescriptionOfGlobal += M.getModuleIdentifier(); - DescriptionOfGlobal += ")"; - GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal); + GlobalVariable *Name = createPrivateGlobalForString(M, G->getName()); // Create a new global variable with enough space for a redzone. GlobalVariable *NewGlobal = new GlobalVariable( @@ -958,6 +961,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { ConstantInt::get(IntptrTy, SizeInBytes), ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), ConstantExpr::getPointerCast(Name, IntptrTy), + ConstantExpr::getPointerCast(ModuleName, IntptrTy), ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer), NULL); @@ -1095,6 +1099,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { bool AddressSanitizer::runOnFunction(Function &F) { if (BL->isIn(F)) return false; if (&F == AsanCtorFunction) return false; + if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); initializeCallbacks(*F.getParent()); diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index eb0dc1e..e8d4ac8 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugLoc.h" #include "llvm/Support/InstIterator.h" @@ -39,31 +40,57 @@ #include <utility> using namespace llvm; +static cl::opt<std::string> +DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden, + cl::ValueRequired); + +GCOVOptions GCOVOptions::getDefault() { + GCOVOptions Options; + Options.EmitNotes = true; + Options.EmitData = true; + Options.UseCfgChecksum = false; + Options.NoRedZone = false; + Options.FunctionNamesInData = true; + + if (DefaultGCOVVersion.size() != 4) { + llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") + + DefaultGCOVVersion); + } + memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4); + return Options; +} + namespace { class GCOVProfiler : public ModulePass { public: static char ID; - GCOVProfiler() - : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false), - UseExtraChecksum(false), NoRedZone(false) { + GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) { + ReversedVersion[0] = Options.Version[3]; + ReversedVersion[1] = Options.Version[2]; + ReversedVersion[2] = Options.Version[1]; + ReversedVersion[3] = Options.Version[0]; + ReversedVersion[4] = '\0'; initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } - GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format, - bool useExtraChecksum, bool NoRedZone_) - : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData), - Use402Format(use402Format), UseExtraChecksum(useExtraChecksum), - NoRedZone(NoRedZone_) { - assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); + GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){ + assert((Options.EmitNotes || Options.EmitData) && + "GCOVProfiler asked to do nothing?"); + ReversedVersion[0] = Options.Version[3]; + ReversedVersion[1] = Options.Version[2]; + ReversedVersion[2] = Options.Version[1]; + ReversedVersion[3] = Options.Version[0]; + ReversedVersion[4] = '\0'; initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } virtual const char *getPassName() const { return "GCOV Profiler"; } + private: bool runOnModule(Module &M); - // Create the GCNO files for the Module based on DebugInfo. - void emitGCNO(); + // Create the .gcno files for the Module based on DebugInfo. + void emitProfileNotes(); // Modify the program to track transitions along edges and call into the // profiling runtime to emit .gcda files when run. @@ -74,6 +101,7 @@ namespace { Constant *getIncrementIndirectCounterFunc(); Constant *getEmitFunctionFunc(); Constant *getEmitArcsFunc(); + Constant *getDeleteFlushFunctionListFunc(); Constant *getEndFileFunc(); // Create or retrieve an i32 state value that is used to represent the @@ -84,22 +112,22 @@ namespace { // block number. GlobalVariable *buildEdgeLookupTable(Function *F, GlobalVariable *Counter, - const UniqueVector<BasicBlock *> &Preds, - const UniqueVector<BasicBlock *> &Succs); + const UniqueVector<BasicBlock *>&Preds, + const UniqueVector<BasicBlock*>&Succs); // Add the function to write out all our counters to the global destructor // list. - void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); + Function *insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, + MDNode*> >); + Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); void insertIndirectCounterIncrement(); - void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); std::string mangleName(DICompileUnit CU, const char *NewStem); - bool EmitNotes; - bool EmitData; - bool Use402Format; - bool UseExtraChecksum; - bool NoRedZone; + GCOVOptions Options; + + // Reversed, NUL-terminated copy of Options.Version. + char ReversedVersion[5]; Module *M; LLVMContext *Ctx; @@ -110,12 +138,8 @@ char GCOVProfiler::ID = 0; INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", "Insert instrumentation for GCOV profiling", false, false) -ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData, - bool Use402Format, - bool UseExtraChecksum, - bool NoRedZone) { - return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum, - NoRedZone); +ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) { + return new GCOVProfiler(Options); } namespace { @@ -253,8 +277,8 @@ namespace { // object users can construct, the blocks and lines will be rooted here. class GCOVFunction : public GCOVRecord { public: - GCOVFunction(DISubprogram SP, raw_ostream *os, - bool Use402Format, bool UseExtraChecksum) { + GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident, + bool UseCfgChecksum) { this->os = os; Function *F = SP.getFunction(); @@ -268,13 +292,12 @@ namespace { writeBytes(FunctionTag, 4); uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) + 1 + lengthOfGCOVString(SP.getFilename()) + 1; - if (UseExtraChecksum) + if (UseCfgChecksum) ++BlockLen; write(BlockLen); - uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP); write(Ident); write(0); // lineno checksum - if (UseExtraChecksum) + if (UseCfgChecksum) write(0); // cfg checksum writeGCOVString(SP.getName()); writeGCOVString(SP.getFilename()); @@ -358,12 +381,12 @@ bool GCOVProfiler::runOnModule(Module &M) { this->M = &M; Ctx = &M.getContext(); - if (EmitNotes) emitGCNO(); - if (EmitData) return emitProfileArcs(); + if (Options.EmitNotes) emitProfileNotes(); + if (Options.EmitData) return emitProfileArcs(); return false; } -void GCOVProfiler::emitGCNO() { +void GCOVProfiler::emitProfileNotes() { NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; @@ -376,10 +399,9 @@ void GCOVProfiler::emitGCNO() { std::string ErrorInfo; raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo, raw_fd_ostream::F_Binary); - if (!Use402Format) - out.write("oncg*404MVLL", 12); - else - out.write("oncg*204MVLL", 12); + out.write("oncg", 4); + out.write(ReversedVersion, 4); + out.write("MVLL", 4); DIArray SPs = CU.getSubprograms(); for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { @@ -388,7 +410,7 @@ void GCOVProfiler::emitGCNO() { Function *F = SP.getFunction(); if (!F) continue; - GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum); + GCOVFunction Func(SP, &out, i, Options.UseCfgChecksum); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { GCOVBlock &Block = Func.getBlock(BB); @@ -469,21 +491,18 @@ bool GCOVProfiler::emitProfileArcs() { Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge); Value *Count = Builder.CreateLoad(Counter); - Count = Builder.CreateAdd(Count, - ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Count = Builder.CreateAdd(Count, Builder.getInt64(1)); Builder.CreateStore(Count, Counter); } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - Value *Sel = Builder.CreateSelect( - BI->getCondition(), - ConstantInt::get(Type::getInt64Ty(*Ctx), Edge), - ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1)); + Value *Sel = Builder.CreateSelect(BI->getCondition(), + Builder.getInt64(Edge), + Builder.getInt64(Edge + 1)); SmallVector<Value *, 2> Idx; - Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx))); + Idx.push_back(Builder.getInt64(0)); Idx.push_back(Sel); Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx); Value *Count = Builder.CreateLoad(Counter); - Count = Builder.CreateAdd(Count, - ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Count = Builder.CreateAdd(Count, Builder.getInt64(1)); Builder.CreateStore(Count, Counter); } else { ComplexEdgePreds.insert(BB); @@ -500,10 +519,9 @@ bool GCOVProfiler::emitProfileArcs() { ComplexEdgePreds, ComplexEdgeSuccs); GlobalVariable *EdgeState = getEdgeStateValue(); - Type *Int32Ty = Type::getInt32Ty(*Ctx); for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) { IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator()); - Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState); + Builder.CreateStore(Builder.getInt32(i), EdgeState); } for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) { // call runtime to perform increment @@ -522,8 +540,42 @@ bool GCOVProfiler::emitProfileArcs() { } } - insertCounterWriteout(CountersBySP); - insertFlush(CountersBySP); + Function *WriteoutF = insertCounterWriteout(CountersBySP); + Function *FlushF = insertFlush(CountersBySP); + + // Create a small bit of code that registers the "__llvm_gcov_writeout" to + // be executed at exit and the "__llvm_gcov_flush" function to be executed + // when "__gcov_flush" is called. + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, + "__llvm_gcov_init", M); + F->setUnnamedAddr(true); + F->setLinkage(GlobalValue::InternalLinkage); + F->addFnAttr(Attribute::NoInline); + if (Options.NoRedZone) + F->addFnAttr(Attribute::NoRedZone); + + BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F); + IRBuilder<> Builder(BB); + + FTy = FunctionType::get(Builder.getInt32Ty(), + PointerType::get(FTy, 0), false); + Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy); + Builder.CreateCall(AtExitFn, WriteoutF); + + // Register the local flush function. + FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + FTy = FunctionType::get(Builder.getVoidTy(), + PointerType::get(FTy, 0), false); + Constant *RegFlush = + M->getOrInsertFunction("llvm_register_flush_function", FTy); + Builder.CreateCall(RegFlush, FlushF); + + // Make sure that all the flush function list is deleted. + Builder.CreateCall(AtExitFn, getDeleteFlushFunctionListFunc()); + Builder.CreateRetVoid(); + + appendToGlobalCtors(*M, F, 0); } if (InsertIndCounterIncrCode) @@ -560,8 +612,8 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) { for (int i = 0; i != Successors; ++i) { BasicBlock *Succ = TI->getSuccessor(i); - IRBuilder<> builder(Succ); - Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0, + IRBuilder<> Builder(Succ); + Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge + i); EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) + (Preds.idFor(BB)-1)] = cast<Constant>(Counter); @@ -581,8 +633,11 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( } Constant *GCOVProfiler::getStartFileFunc() { - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), - Type::getInt8PtrTy(*Ctx), false); + Type *Args[] = { + Type::getInt8PtrTy(*Ctx), // const char *orig_filename + Type::getInt8PtrTy(*Ctx), // const char version[4] + }; + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_start_file", FTy); } @@ -598,9 +653,10 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { } Constant *GCOVProfiler::getEmitFunctionFunc() { - Type *Args[2] = { + Type *Args[3] = { Type::getInt32Ty(*Ctx), // uint32_t ident Type::getInt8PtrTy(*Ctx), // const char *function_name + Type::getInt8Ty(*Ctx), // uint8_t use_extra_checksum }; FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); @@ -611,11 +667,15 @@ Constant *GCOVProfiler::getEmitArcsFunc() { Type::getInt32Ty(*Ctx), // uint32_t num_counters Type::getInt64PtrTy(*Ctx), // uint64_t *counters }; - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), - Args, false); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy); } +Constant *GCOVProfiler::getDeleteFlushFunctionListFunc() { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + return M->getOrInsertFunction("llvm_delete_flush_function_list", FTy); +} + Constant *GCOVProfiler::getEndFileFunc() { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); return M->getOrInsertFunction("llvm_gcda_end_file", FTy); @@ -634,7 +694,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() { return GV; } -void GCOVProfiler::insertCounterWriteout( +Function *GCOVProfiler::insertCounterWriteout( ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) { FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *WriteoutF = M->getFunction("__llvm_gcov_writeout"); @@ -643,7 +703,7 @@ void GCOVProfiler::insertCounterWriteout( "__llvm_gcov_writeout", M); WriteoutF->setUnnamedAddr(true); WriteoutF->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) WriteoutF->addFnAttr(Attribute::NoRedZone); BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF); @@ -659,50 +719,31 @@ void GCOVProfiler::insertCounterWriteout( for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CU(CU_Nodes->getOperand(i)); std::string FilenameGcda = mangleName(CU, "gcda"); - Builder.CreateCall(StartFile, - Builder.CreateGlobalStringPtr(FilenameGcda)); - for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator - I = CountersBySP.begin(), E = CountersBySP.end(); - I != E; ++I) { - DISubprogram SP(I->second); - intptr_t ident = reinterpret_cast<intptr_t>(I->second); - Builder.CreateCall2(EmitFunction, - ConstantInt::get(Type::getInt32Ty(*Ctx), ident), - Builder.CreateGlobalStringPtr(SP.getName())); - - GlobalVariable *GV = I->first; + Builder.CreateCall2(StartFile, + Builder.CreateGlobalStringPtr(FilenameGcda), + Builder.CreateGlobalStringPtr(ReversedVersion)); + for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) { + DISubprogram SP(CountersBySP[j].second); + Builder.CreateCall3(EmitFunction, + Builder.getInt32(j), + Options.FunctionNamesInData ? + Builder.CreateGlobalStringPtr(SP.getName()) : + Constant::getNullValue(Builder.getInt8PtrTy()), + Builder.getInt8(Options.UseCfgChecksum)); + + GlobalVariable *GV = CountersBySP[j].first; unsigned Arcs = cast<ArrayType>(GV->getType()->getElementType())->getNumElements(); Builder.CreateCall2(EmitArcs, - ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs), + Builder.getInt32(Arcs), Builder.CreateConstGEP2_64(GV, 0, 0)); } Builder.CreateCall(EndFile); } } - Builder.CreateRetVoid(); - // Create a small bit of code that registers the "__llvm_gcov_writeout" - // function to be executed at exit. - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, - "__llvm_gcov_init", M); - F->setUnnamedAddr(true); - F->setLinkage(GlobalValue::InternalLinkage); - F->addFnAttr(Attribute::NoInline); - if (NoRedZone) - F->addFnAttr(Attribute::NoRedZone); - - BB = BasicBlock::Create(*Ctx, "entry", F); - Builder.SetInsertPoint(BB); - - FTy = FunctionType::get(Type::getInt32Ty(*Ctx), - PointerType::get(FTy, 0), false); - Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy); - Builder.CreateCall(AtExitFn, WriteoutF); Builder.CreateRetVoid(); - - appendToGlobalCtors(*M, F, 0); + return WriteoutF; } void GCOVProfiler::insertIndirectCounterIncrement() { @@ -711,13 +752,9 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Fn->setUnnamedAddr(true); Fn->setLinkage(GlobalValue::InternalLinkage); Fn->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) Fn->addFnAttr(Attribute::NoRedZone); - Type *Int32Ty = Type::getInt32Ty(*Ctx); - Type *Int64Ty = Type::getInt64Ty(*Ctx); - Constant *NegOne = ConstantInt::get(Int32Ty, 0xffffffff); - // Create basic blocks for function. BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", Fn); IRBuilder<> Builder(BB); @@ -731,26 +768,27 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Argument *Arg = Fn->arg_begin(); Arg->setName("predecessor"); Value *Pred = Builder.CreateLoad(Arg, "pred"); - Value *Cond = Builder.CreateICmpEQ(Pred, NegOne); + Value *Cond = Builder.CreateICmpEQ(Pred, Builder.getInt32(0xffffffff)); BranchInst::Create(Exit, PredNotNegOne, Cond, BB); Builder.SetInsertPoint(PredNotNegOne); // uint64_t *counter = counters[pred]; // if (!counter) return; - Value *ZExtPred = Builder.CreateZExt(Pred, Int64Ty); + Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty()); Arg = llvm::next(Fn->arg_begin()); Arg->setName("counters"); Value *GEP = Builder.CreateGEP(Arg, ZExtPred); Value *Counter = Builder.CreateLoad(GEP, "counter"); Cond = Builder.CreateICmpEQ(Counter, - Constant::getNullValue(Int64Ty->getPointerTo())); + Constant::getNullValue( + Builder.getInt64Ty()->getPointerTo())); Builder.CreateCondBr(Cond, Exit, CounterEnd); // ++*counter; Builder.SetInsertPoint(CounterEnd); Value *Add = Builder.CreateAdd(Builder.CreateLoad(Counter), - ConstantInt::get(Int64Ty, 1)); + Builder.getInt64(1)); Builder.CreateStore(Add, Counter); Builder.CreateBr(Exit); @@ -759,18 +797,18 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Builder.CreateRetVoid(); } -void GCOVProfiler:: +Function *GCOVProfiler:: insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Function *FlushF = M->getFunction("__gcov_flush"); + Function *FlushF = M->getFunction("__llvm_gcov_flush"); if (!FlushF) FlushF = Function::Create(FTy, GlobalValue::InternalLinkage, - "__gcov_flush", M); + "__llvm_gcov_flush", M); else FlushF->setLinkage(GlobalValue::InternalLinkage); FlushF->setUnnamedAddr(true); FlushF->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) FlushF->addFnAttr(Attribute::NoRedZone); BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF); @@ -795,8 +833,10 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { if (RetTy == Type::getVoidTy(*Ctx)) Builder.CreateRetVoid(); else if (RetTy->isIntegerTy()) - // Used if __gcov_flush was implicitly declared. + // Used if __llvm_gcov_flush was implicitly declared. Builder.CreateRet(ConstantInt::get(RetTy, 0)); else - report_fatal_error("invalid return type for __gcov_flush"); + report_fatal_error("invalid return type for __llvm_gcov_flush"); + + return FlushF; } diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 80705af..fce6513 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -418,13 +418,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes; ValueMap<Value*, Value*> ShadowMap, OriginMap; bool InsertChecks; + bool LoadShadow; OwningPtr<VarArgHelper> VAHelper; - // An unfortunate workaround for asymmetric lowering of va_arg stuff. - // See a comment in visitCallSite for more details. - static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7 - static const unsigned AMD64FpEndOffset = 176; - struct ShadowOriginAndInsertPoint { Instruction *Shadow; Instruction *Origin; @@ -437,11 +433,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { SmallVector<Instruction*, 16> StoreList; MemorySanitizerVisitor(Function &F, MemorySanitizer &MS) - : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) { - InsertChecks = !MS.BL->isIn(F); + : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) { + LoadShadow = InsertChecks = + !MS.BL->isIn(F) && + F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::SanitizeMemory); + DEBUG(if (!InsertChecks) - dbgs() << "MemorySanitizer is not inserting checks into '" - << F.getName() << "'\n"); + dbgs() << "MemorySanitizer is not inserting checks into '" + << F.getName() << "'\n"); } void materializeStores() { @@ -836,15 +836,25 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { IRBuilder<> IRB(&I); Type *ShadowTy = getShadowTy(&I); Value *Addr = I.getPointerOperand(); - Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB); - setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld")); + if (LoadShadow) { + Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB); + setShadow(&I, + IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld")); + } else { + setShadow(&I, getCleanShadow(&I)); + } if (ClCheckAccessAddress) insertCheck(I.getPointerOperand(), &I); if (MS.TrackOrigins) { - unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment()); - setOrigin(&I, IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), Alignment)); + if (LoadShadow) { + unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment()); + setOrigin(&I, + IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), Alignment)); + } else { + setOrigin(&I, getCleanOrigin()); + } } } @@ -1410,16 +1420,25 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Value *Addr = I.getArgOperand(0); Type *ShadowTy = getShadowTy(&I); - Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB); - // We don't know the pointer alignment (could be unaligned SSE load!). - // Have to assume to worst case. - setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, 1, "_msld")); + if (LoadShadow) { + Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB); + // We don't know the pointer alignment (could be unaligned SSE load!). + // Have to assume to worst case. + setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, 1, "_msld")); + } else { + setShadow(&I, getCleanShadow(&I)); + } + if (ClCheckAccessAddress) insertCheck(Addr, &I); - if (MS.TrackOrigins) - setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB))); + if (MS.TrackOrigins) { + if (LoadShadow) + setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB))); + else + setOrigin(&I, getCleanOrigin()); + } return true; } diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index d71dd5d..015fd2e 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -154,7 +154,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. - if (TLI && TLI->isSlowDivBypassed()) { + if (!OptSize && TLI && TLI->isSlowDivBypassed()) { const DenseMap<unsigned int, unsigned int> &BypassWidths = TLI->getBypassSlowDivWidths(); for (Function::iterator I = F.begin(); I != F.end(); I++) diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index c04b447..129af8d 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1714,7 +1714,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return true; } -static void patchReplacementInstruction(Value *Repl, Instruction *I) { +static void patchReplacementInstruction(Instruction *I, Value *Repl) { // Patch the replacement so that it is not more restrictive than the value // being replaced. BinaryOperator *Op = dyn_cast<BinaryOperator>(I); @@ -1756,8 +1756,8 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) { } } -static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) { - patchReplacementInstruction(Repl, I); +static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { + patchReplacementInstruction(I, Repl); I->replaceAllUsesWith(Repl); } @@ -1919,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) { } // Remove it! - patchAndReplaceAllUsesWith(AvailableVal, L); + patchAndReplaceAllUsesWith(L, AvailableVal); if (DepLI->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); markInstructionForDeletion(L); @@ -2260,7 +2260,7 @@ bool GVN::processInstruction(Instruction *I) { } // Remove it! - patchAndReplaceAllUsesWith(repl, I); + patchAndReplaceAllUsesWith(I, repl); if (MD && repl->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(repl); markInstructionForDeletion(I); diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 1601a8d..14e463a 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -53,6 +53,7 @@ #define DEBUG_TYPE "global-merge" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -60,14 +61,22 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; +static cl::opt<bool> +EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, + cl::desc("Enable global merge pass on constants"), + cl::init(false)); + STATISTIC(NumMerged , "Number of globals merged"); namespace { class GlobalMerge : public FunctionPass { @@ -78,6 +87,23 @@ namespace { bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const; + /// \brief Check if the given variable has been identified as must keep + /// \pre setMustKeepGlobalVariables must have been called on the Module that + /// contains GV + bool isMustKeepGlobalVariable(const GlobalVariable *GV) const { + return MustKeepGlobalVariables.count(GV); + } + + /// Collect every variables marked as "used" or used in a landing pad + /// instruction for this Module. + void setMustKeepGlobalVariables(Module &M); + + /// Collect every variables marked as "used" + void collectUsedGlobalVariables(Module &M); + + /// Keep track of the GlobalVariable that are marked as "used" + SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables; + public: static char ID; // Pass identification, replacement for typeid. explicit GlobalMerge(const TargetLowering *tli = 0) @@ -169,6 +195,46 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, return true; } +void GlobalMerge::collectUsedGlobalVariables(Module &M) { + // Extract global variables from llvm.used array + const GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + if (!GV || !GV->hasInitializer()) return; + + // Should be an array of 'i8*'. + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); + if (InitList == 0) return; + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (const GlobalVariable *G = + dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts())) + MustKeepGlobalVariables.insert(G); +} + +void GlobalMerge::setMustKeepGlobalVariables(Module &M) { + // If we already processed a Module, UsedGlobalVariables may have been + // populated. Reset the information for this module. + MustKeepGlobalVariables.clear(); + collectUsedGlobalVariables(M); + + for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn; + ++IFn) { + for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end(); + IBB != IEndBB; ++IBB) { + // Follow the inwoke link to find the landing pad instruction + const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator()); + if (!II) continue; + + const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst(); + // Look for globals in the clauses of the landing pad instruction + for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses(); + Idx != NumClauses; ++Idx) + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>(LPInst->getClause(Idx) + ->stripPointerCasts())) + MustKeepGlobalVariables.insert(GV); + } + } +} bool GlobalMerge::doInitialization(Module &M) { DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, @@ -176,6 +242,7 @@ bool GlobalMerge::doInitialization(Module &M) { const DataLayout *TD = TLI->getDataLayout(); unsigned MaxOffset = TLI->getMaximalGlobalOffset(); bool Changed = false; + setMustKeepGlobalVariables(M); // Grab all non-const globals. for (Module::global_iterator I = M.global_begin(), @@ -200,6 +267,12 @@ bool GlobalMerge::doInitialization(Module &M) { I->getName().startswith(".llvm.")) continue; + // Ignore all "required" globals: + // - the ones used for EH + // - the ones marked with "used" attribute + if (isMustKeepGlobalVariable(I)) + continue; + if (TD->getTypeAllocSize(Ty) < MaxOffset) { if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine()) .isBSSLocal()) @@ -221,11 +294,11 @@ bool GlobalMerge::doInitialization(Module &M) { if (I->second.size() > 1) Changed |= doMerge(I->second, M, false, I->first); - // FIXME: This currently breaks the EH processing due to way how the - // typeinfo detection works. We might want to detect the TIs and ignore - // them in the future. - // if (ConstGlobals.size() > 1) - // Changed |= doMerge(ConstGlobals, M, true); + if (EnableGlobalMergeOnConst) + for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator + I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I) + if (I->second.size() > 1) + Changed |= doMerge(I->second, M, true, I->first); return Changed; } diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index e90fe90..0e57e5c 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -69,112 +69,130 @@ static cl::opt<bool> ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden); namespace { -/// \brief Alloca partitioning representation. -/// -/// This class represents a partitioning of an alloca into slices, and -/// information about the nature of uses of each slice of the alloca. The goal -/// is that this information is sufficient to decide if and how to split the -/// alloca apart and replace slices with scalars. It is also intended that this -/// structure can capture the relevant information needed both to decide about -/// and to enact these transformations. -class AllocaPartitioning { -public: - /// \brief A common base class for representing a half-open byte range. - struct ByteRange { - /// \brief The beginning offset of the range. - uint64_t BeginOffset; +/// \brief A common base class for representing a half-open byte range. +struct ByteRange { + /// \brief The beginning offset of the range. + uint64_t BeginOffset; - /// \brief The ending offset, not included in the range. - uint64_t EndOffset; + /// \brief The ending offset, not included in the range. + uint64_t EndOffset; - ByteRange() : BeginOffset(), EndOffset() {} - ByteRange(uint64_t BeginOffset, uint64_t EndOffset) - : BeginOffset(BeginOffset), EndOffset(EndOffset) {} + ByteRange() : BeginOffset(), EndOffset() {} + ByteRange(uint64_t BeginOffset, uint64_t EndOffset) + : BeginOffset(BeginOffset), EndOffset(EndOffset) {} - /// \brief Support for ordering ranges. - /// - /// This provides an ordering over ranges such that start offsets are - /// always increasing, and within equal start offsets, the end offsets are - /// decreasing. Thus the spanning range comes first in a cluster with the - /// same start position. - bool operator<(const ByteRange &RHS) const { - if (BeginOffset < RHS.BeginOffset) return true; - if (BeginOffset > RHS.BeginOffset) return false; - if (EndOffset > RHS.EndOffset) return true; - return false; - } + /// \brief Support for ordering ranges. + /// + /// This provides an ordering over ranges such that start offsets are + /// always increasing, and within equal start offsets, the end offsets are + /// decreasing. Thus the spanning range comes first in a cluster with the + /// same start position. + bool operator<(const ByteRange &RHS) const { + if (BeginOffset < RHS.BeginOffset) return true; + if (BeginOffset > RHS.BeginOffset) return false; + if (EndOffset > RHS.EndOffset) return true; + return false; + } - /// \brief Support comparison with a single offset to allow binary searches. - friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) { - return LHS.BeginOffset < RHSOffset; - } + /// \brief Support comparison with a single offset to allow binary searches. + friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) { + return LHS.BeginOffset < RHSOffset; + } - friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, - const ByteRange &RHS) { - return LHSOffset < RHS.BeginOffset; - } + friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, + const ByteRange &RHS) { + return LHSOffset < RHS.BeginOffset; + } - bool operator==(const ByteRange &RHS) const { - return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset; - } - bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); } - }; + bool operator==(const ByteRange &RHS) const { + return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset; + } + bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); } +}; - /// \brief A partition of an alloca. +/// \brief A partition of an alloca. +/// +/// This structure represents a contiguous partition of the alloca. These are +/// formed by examining the uses of the alloca. During formation, they may +/// overlap but once an AllocaPartitioning is built, the Partitions within it +/// are all disjoint. +struct Partition : public ByteRange { + /// \brief Whether this partition is splittable into smaller partitions. /// - /// This structure represents a contiguous partition of the alloca. These are - /// formed by examining the uses of the alloca. During formation, they may - /// overlap but once an AllocaPartitioning is built, the Partitions within it - /// are all disjoint. - struct Partition : public ByteRange { - /// \brief Whether this partition is splittable into smaller partitions. - /// - /// We flag partitions as splittable when they are formed entirely due to - /// accesses by trivially splittable operations such as memset and memcpy. - bool IsSplittable; - - /// \brief Test whether a partition has been marked as dead. - bool isDead() const { - if (BeginOffset == UINT64_MAX) { - assert(EndOffset == UINT64_MAX); - return true; - } - return false; + /// We flag partitions as splittable when they are formed entirely due to + /// accesses by trivially splittable operations such as memset and memcpy. + bool IsSplittable; + + /// \brief Test whether a partition has been marked as dead. + bool isDead() const { + if (BeginOffset == UINT64_MAX) { + assert(EndOffset == UINT64_MAX); + return true; } + return false; + } - /// \brief Kill a partition. - /// This is accomplished by setting both its beginning and end offset to - /// the maximum possible value. - void kill() { - assert(!isDead() && "He's Dead, Jim!"); - BeginOffset = EndOffset = UINT64_MAX; - } + /// \brief Kill a partition. + /// This is accomplished by setting both its beginning and end offset to + /// the maximum possible value. + void kill() { + assert(!isDead() && "He's Dead, Jim!"); + BeginOffset = EndOffset = UINT64_MAX; + } - Partition() : ByteRange(), IsSplittable() {} - Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable) - : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {} - }; + Partition() : ByteRange(), IsSplittable() {} + Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable) + : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {} +}; + +/// \brief A particular use of a partition of the alloca. +/// +/// This structure is used to associate uses of a partition with it. They +/// mark the range of bytes which are referenced by a particular instruction, +/// and includes a handle to the user itself and the pointer value in use. +/// The bounds of these uses are determined by intersecting the bounds of the +/// memory use itself with a particular partition. As a consequence there is +/// intentionally overlap between various uses of the same partition. +class PartitionUse : public ByteRange { + /// \brief Combined storage for both the Use* and split state. + PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit; - /// \brief A particular use of a partition of the alloca. +public: + PartitionUse() : ByteRange(), UsePtrAndIsSplit() {} + PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U, + bool IsSplit) + : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {} + + /// \brief The use in question. Provides access to both user and used value. /// - /// This structure is used to associate uses of a partition with it. They - /// mark the range of bytes which are referenced by a particular instruction, - /// and includes a handle to the user itself and the pointer value in use. - /// The bounds of these uses are determined by intersecting the bounds of the - /// memory use itself with a particular partition. As a consequence there is - /// intentionally overlap between various uses of the same partition. - struct PartitionUse : public ByteRange { - /// \brief The use in question. Provides access to both user and used value. - /// - /// Note that this may be null if the partition use is *dead*, that is, it - /// should be ignored. - Use *U; + /// Note that this may be null if the partition use is *dead*, that is, it + /// should be ignored. + Use *getUse() const { return UsePtrAndIsSplit.getPointer(); } - PartitionUse() : ByteRange(), U() {} - PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U) - : ByteRange(BeginOffset, EndOffset), U(U) {} - }; + /// \brief Set the use for this partition use range. + void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); } + + /// \brief Whether this use is split across multiple partitions. + bool isSplit() const { return UsePtrAndIsSplit.getInt(); } +}; +} + +namespace llvm { +template <> struct isPodLike<Partition> : llvm::true_type {}; +template <> struct isPodLike<PartitionUse> : llvm::true_type {}; +} +namespace { +/// \brief Alloca partitioning representation. +/// +/// This class represents a partitioning of an alloca into slices, and +/// information about the nature of uses of each slice of the alloca. The goal +/// is that this information is sufficient to decide if and how to split the +/// alloca apart and replace slices with scalars. It is also intended that this +/// structure can capture the relevant information needed both to decide about +/// and to enact these transformations. +class AllocaPartitioning { +public: /// \brief Construct a partitioning of a particular alloca. /// /// Construction does most of the work for partitioning the alloca. This @@ -456,10 +474,10 @@ private: // Clamp the end offset to the end of the allocation. Note that this is // formulated to handle even the case where "BeginOffset + Size" overflows. - // NOTE! This may appear superficially to be something we could ignore - // entirely, but that is not so! There may be PHI-node uses where some - // instructions are dead but not others. We can't completely ignore the - // PHI node, and so have to record at least the information here. + // This may appear superficially to be something we could ignore entirely, + // but that is not so! There may be widened loads or PHI-node uses where + // some instructions are dead but not others. We can't completely ignore + // them, and so have to record at least the information here. assert(AllocSize >= BeginOffset); // Established above. if (Size > AllocSize - BeginOffset) { DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset @@ -474,33 +492,17 @@ private: } void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset, - bool IsVolatile) { - uint64_t Size = DL.getTypeStoreSize(Ty); - - // If this memory access can be shown to *statically* extend outside the - // bounds of of the allocation, it's behavior is undefined, so simply - // ignore it. Note that this is more strict than the generic clamping - // behavior of insertUse. We also try to handle cases which might run the - // risk of overflow. - // FIXME: We should instead consider the pointer to have escaped if this - // function is being instrumented for addressing bugs or race conditions. - if (Offset.isNegative() || Size > AllocSize || - Offset.ugt(AllocSize - Size)) { - DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte " - << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset - << " which extends past the end of the " << AllocSize - << " byte alloca:\n" - << " alloca: " << P.AI << "\n" - << " use: " << I << "\n"); - return; - } - + uint64_t Size, bool IsVolatile) { // We allow splitting of loads and stores where the type is an integer type - // and which cover the entire alloca. Such integer loads and stores - // often require decomposition into fine grained loads and stores. - bool IsSplittable = false; - if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) - IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8; + // and cover the entire alloca. This prevents us from splitting over + // eagerly. + // FIXME: In the great blue eventually, we should eagerly split all integer + // loads and stores, and then have a separate step that merges adjacent + // alloca partitions into a single partition suitable for integer widening. + // Or we should skip the merge step and rely on GVN and other passes to + // merge adjacent loads and stores that survive mem2reg. + bool IsSplittable = + Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize; insertUse(I, Offset, Size, IsSplittable); } @@ -512,7 +514,8 @@ private: if (!IsOffsetKnown) return PI.setAborted(&LI); - return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile()); + uint64_t Size = DL.getTypeStoreSize(LI.getType()); + return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); } void visitStoreInst(StoreInst &SI) { @@ -522,9 +525,28 @@ private: if (!IsOffsetKnown) return PI.setAborted(&SI); + uint64_t Size = DL.getTypeStoreSize(ValOp->getType()); + + // If this memory access can be shown to *statically* extend outside the + // bounds of of the allocation, it's behavior is undefined, so simply + // ignore it. Note that this is more strict than the generic clamping + // behavior of insertUse. We also try to handle cases which might run the + // risk of overflow. + // FIXME: We should instead consider the pointer to have escaped if this + // function is being instrumented for addressing bugs or race conditions. + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) { + DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset + << " which extends past the end of the " << AllocSize + << " byte alloca:\n" + << " alloca: " << P.AI << "\n" + << " use: " << SI << "\n"); + return; + } + assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && "All simple FCA stores should have been pre-split"); - handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile()); + handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); } @@ -795,13 +817,14 @@ private: EndOffset = AllocSize; // NB: This only works if we have zero overlapping partitions. - iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset); - if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset) - B = llvm::prior(B); - for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset; - ++I) { + iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset); + if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset) + I = llvm::prior(I); + iterator E = P.end(); + bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset; + for (; I != E && I->BeginOffset < EndOffset; ++I) { PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset), - std::min(I->EndOffset, EndOffset), U); + std::min(I->EndOffset, EndOffset), U, IsSplit); P.use_push_back(I, NewPU); if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser())) P.PHIOrSelectOpMap[U] @@ -809,20 +832,6 @@ private: } } - void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) { - uint64_t Size = DL.getTypeStoreSize(Ty); - - // If this memory access can be shown to *statically* extend outside the - // bounds of of the allocation, it's behavior is undefined, so simply - // ignore it. Note that this is more strict than the generic clamping - // behavior of insertUse. - if (Offset.isNegative() || Size > AllocSize || - Offset.ugt(AllocSize - Size)) - return markAsDead(I); - - insertUse(I, Offset, Size); - } - void visitBitCastInst(BitCastInst &BC) { if (BC.use_empty()) return markAsDead(BC); @@ -839,12 +848,23 @@ private: void visitLoadInst(LoadInst &LI) { assert(IsOffsetKnown); - handleLoadOrStore(LI.getType(), LI, Offset); + uint64_t Size = DL.getTypeStoreSize(LI.getType()); + insertUse(LI, Offset, Size); } void visitStoreInst(StoreInst &SI) { assert(IsOffsetKnown); - handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset); + uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType()); + + // If this memory access can be shown to *statically* extend outside the + // bounds of of the allocation, it's behavior is undefined, so simply + // ignore it. Note that this is more strict than the generic clamping + // behavior of insertUse. + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) + return markAsDead(SI); + + insertUse(SI, Offset, Size); } void visitMemSetInst(MemSetInst &II) { @@ -1089,21 +1109,21 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) Type *AllocaPartitioning::getCommonType(iterator I) const { Type *Ty = 0; for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { - if (!UI->U) + Use *U = UI->getUse(); + if (!U) continue; // Skip dead uses. - if (isa<IntrinsicInst>(*UI->U->getUser())) + if (isa<IntrinsicInst>(*U->getUser())) continue; if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset) continue; Type *UserTy = 0; - if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) { + if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) UserTy = LI->getType(); - } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) { + else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) UserTy = SI->getValueOperand()->getType(); - } else { + else return 0; // Bail if we have weird uses. - } if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) { // If the type is larger than the partition, skip it. We only encounter @@ -1140,11 +1160,12 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I, void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I, StringRef Indent) const { for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { - if (!UI->U) + if (!UI->getUse()) continue; // Skip dead uses. OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") " - << "used by: " << *UI->U->getUser() << "\n"; - if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) { + << "used by: " << *UI->getUse()->getUser() << "\n"; + if (MemTransferInst *II = + dyn_cast<MemTransferInst>(UI->getUse()->getUser())) { const MemTransferOffsets &MTO = MemTransferInstData.lookup(II); bool IsDest; if (!MTO.IsSplittable) @@ -1375,11 +1396,11 @@ public: // may be grown during speculation. However, we never need to re-visit the // new uses, and so we can use the initial size bound. for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) { - const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx); - if (!PU.U) + const PartitionUse &PU = P.getUse(PI, Idx); + if (!PU.getUse()) continue; // Skip dead use. - visit(cast<Instruction>(PU.U->getUser())); + visit(cast<Instruction>(PU.getUse()->getUser())); } } @@ -1523,8 +1544,8 @@ private: // inside the load. AllocaPartitioning::use_iterator UI = P.findPartitionUseForPHIOrSelectOperand(InUse); - assert(isa<PHINode>(*UI->U->getUser())); - UI->U = &Load->getOperandUse(Load->getPointerOperandIndex()); + assert(isa<PHINode>(*UI->getUse()->getUser())); + UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex())); } DEBUG(dbgs() << " speculated to: " << *NewPN << "\n"); } @@ -1571,16 +1592,16 @@ private: void visitSelectInst(SelectInst &SI) { DEBUG(dbgs() << " original: " << SI << "\n"); - IRBuilder<> IRB(&SI); // If the select isn't safe to speculate, just use simple logic to emit it. SmallVector<LoadInst *, 4> Loads; if (!isSafeSelectToSpeculate(SI, Loads)) return; + IRBuilder<> IRB(&SI); Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) }; AllocaPartitioning::iterator PIs[2]; - AllocaPartitioning::PartitionUse PUs[2]; + PartitionUse PUs[2]; for (unsigned i = 0, e = 2; i != e; ++i) { PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]); if (PIs[i] != P.end()) { @@ -1591,7 +1612,7 @@ private: PUs[i] = *UI; // Clear out the use here so that the offsets into the use list remain // stable but this use is ignored when rewriting. - UI->U = 0; + UI->setUse(0); } } @@ -1623,8 +1644,8 @@ private: for (unsigned i = 0, e = 2; i != e; ++i) { if (PIs[i] != P.end()) { Use *LoadUse = &Loads[i]->getOperandUse(0); - assert(PUs[i].U->get() == LoadUse->get()); - PUs[i].U = LoadUse; + assert(PUs[i].getUse()->get() == LoadUse->get()); + PUs[i].setUse(LoadUse); P.use_push_back(PIs[i], PUs[i]); } } @@ -1911,6 +1932,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { if (OldTy == NewTy) return true; + if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy)) + if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy)) + if (NewITy->getBitWidth() >= OldITy->getBitWidth()) + return true; if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy)) return false; if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) @@ -1939,6 +1964,10 @@ static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V, "Value not convertable to type"); if (V->getType() == Ty) return V; + if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType())) + if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty)) + if (NewITy->getBitWidth() > OldITy->getBitWidth()) + return IRB.CreateZExt(V, NewITy); if (V->getType()->isIntegerTy() && Ty->isPointerTy()) return IRB.CreateIntToPtr(V, Ty); if (V->getType()->isPointerTy() && Ty->isIntegerTy()) @@ -1977,7 +2006,8 @@ static bool isVectorPromotionViable(const DataLayout &TD, ElementSize /= 8; for (; I != E; ++I) { - if (!I->U) + Use *U = I->getUse(); + if (!U) continue; // Skip dead use. uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset; @@ -1997,24 +2027,24 @@ static bool isVectorPromotionViable(const DataLayout &TD, = (NumElements == 1) ? Ty->getElementType() : VectorType::get(Ty->getElementType(), NumElements); - if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { if (MI->isVolatile()) return false; - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(*MTI); if (!MTO.IsSplittable) return false; } - } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) { + } else if (U->get()->getType()->getPointerElementType()->isStructTy()) { // Disable vector promotion when there are loads or stores of an FCA. return false; - } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { + } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { if (LI->isVolatile()) return false; if (!canConvertValue(TD, PartitionTy, LI->getType())) return false; - } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { if (SI->isVolatile()) return false; if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy)) @@ -2063,7 +2093,8 @@ static bool isIntegerWideningViable(const DataLayout &TD, // unsplittable entry (which we may make splittable later). bool WholeAllocaOp = false; for (; I != E; ++I) { - if (!I->U) + Use *U = I->getUse(); + if (!U) continue; // Skip dead use. uint64_t RelBegin = I->BeginOffset - AllocBeginOffset; @@ -2074,7 +2105,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelEnd > Size) return false; - if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { + if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { if (LI->isVolatile()) return false; if (RelBegin == 0 && RelEnd == Size) @@ -2089,7 +2120,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin != 0 || RelEnd != Size || !canConvertValue(TD, AllocaTy, LI->getType())) return false; - } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { Type *ValueTy = SI->getValueOperand()->getType(); if (SI->isVolatile()) return false; @@ -2105,16 +2136,16 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin != 0 || RelEnd != Size || !canConvertValue(TD, ValueTy, AllocaTy)) return false; - } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { if (MI->isVolatile() || !isa<Constant>(MI->getLength())) return false; - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(*MTI); if (!MTO.IsSplittable) return false; } - } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) { + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { if (II->getIntrinsicID() != Intrinsic::lifetime_start && II->getIntrinsicID() != Intrinsic::lifetime_end) return false; @@ -2297,6 +2328,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, // The offset of the partition user currently being rewritten. uint64_t BeginOffset, EndOffset; + bool IsSplit; Use *OldUse; Instruction *OldPtr; @@ -2314,7 +2346,7 @@ public: NewAllocaEndOffset(NewEndOffset), NewAllocaTy(NewAI.getAllocatedType()), VecTy(), ElementTy(), ElementSize(), IntTy(), - BeginOffset(), EndOffset() { + BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr() { } /// \brief Visit the users of the alloca partition and rewrite them. @@ -2336,14 +2368,15 @@ public: } bool CanSROA = true; for (; I != E; ++I) { - if (!I->U) + if (!I->getUse()) continue; // Skip dead uses. BeginOffset = I->BeginOffset; EndOffset = I->EndOffset; - OldUse = I->U; - OldPtr = cast<Instruction>(I->U->get()); + IsSplit = I->isSplit(); + OldUse = I->getUse(); + OldPtr = cast<Instruction>(OldUse->get()); NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str(); - CanSROA &= visit(cast<Instruction>(I->U->getUser())); + CanSROA &= visit(cast<Instruction>(OldUse->getUser())); } if (VecTy) { assert(CanSROA); @@ -2450,29 +2483,12 @@ private: DEBUG(dbgs() << " original: " << LI << "\n"); Value *OldOp = LI.getOperand(0); assert(OldOp == OldPtr); - IRBuilder<> IRB(&LI); uint64_t Size = EndOffset - BeginOffset; - bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType()); - // If this memory access can be shown to *statically* extend outside the - // bounds of the original allocation it's behavior is undefined. Rather - // than trying to transform it, just replace it with undef. - // FIXME: We should do something more clever for functions being - // instrumented by asan. - // FIXME: Eventually, once ASan and friends can flush out bugs here, this - // should be transformed to a load of null making it unreachable. - uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType()); - if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) { - LI.replaceAllUsesWith(UndefValue::get(LI.getType())); - Pass.DeadInsts.insert(&LI); - deleteIfTriviallyDead(OldOp); - DEBUG(dbgs() << " to: undef!!\n"); - return true; - } - - Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8) - : LI.getType(); + IRBuilder<> IRB(&LI); + Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8) + : LI.getType(); bool IsPtrAdjusted = false; Value *V; if (VecTy) { @@ -2492,16 +2508,15 @@ private: } V = convertValue(TD, IRB, V, TargetTy); - if (IsSplitIntLoad) { + if (IsSplit) { assert(!LI.isVolatile()); assert(LI.getType()->isIntegerTy() && "Only integer type loads and stores are split"); + assert(Size < TD.getTypeStoreSize(LI.getType()) && + "Split load isn't smaller than original load"); assert(LI.getType()->getIntegerBitWidth() == TD.getTypeStoreSizeInBits(LI.getType()) && "Non-byte-multiple bit width"); - assert(LI.getType()->getIntegerBitWidth() == - TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && - "Only alloca-wide loads can be split and recomposed"); // Move the insertion point just past the load so that we can refer to it. IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI))); // Create a placeholder value with the same type as LI to use as the @@ -2588,14 +2603,12 @@ private: uint64_t Size = EndOffset - BeginOffset; if (Size < TD.getTypeStoreSize(V->getType())) { assert(!SI.isVolatile()); + assert(IsSplit && "A seemingly split store isn't splittable"); assert(V->getType()->isIntegerTy() && "Only integer type loads and stores are split"); assert(V->getType()->getIntegerBitWidth() == TD.getTypeStoreSizeInBits(V->getType()) && "Non-byte-multiple bit width"); - assert(V->getType()->getIntegerBitWidth() == - TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && - "Only alloca-wide stores can be split and recomposed"); IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset, getName(".extract")); @@ -3381,7 +3394,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI, for (AllocaPartitioning::use_iterator UI = P.use_begin(PI), UE = P.use_end(PI); UI != UE && !IsLive; ++UI) - if (UI->U) + if (UI->getUse()) IsLive = true; if (!IsLive) return false; // No live uses left of this partition. diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 00cda8e..1f517d0 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -163,7 +163,7 @@ static bool insertFastDiv(Function &F, Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); // Compare operand values and branch - Value *ZeroV = MainBuilder.getInt32(0); + Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); @@ -244,7 +244,7 @@ bool llvm::bypassSlowDivision(Function &F, // Get bitwidth of div/rem instruction IntegerType *T = cast<IntegerType>(J->getType()); - int bitwidth = T->getBitWidth(); + unsigned int bitwidth = T->getBitWidth(); // Continue if bitwidth is not bypassed DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index a309bce..63d7a1d 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -94,9 +94,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, //Some arguments were deleted with the VMap. Copy arguments one by one for (Function::const_arg_iterator I = OldFunc->arg_begin(), E = OldFunc->arg_end(); I != E; ++I) - if (Argument* Anew = dyn_cast<Argument>(VMap[I])) - Anew->addAttr(OldFunc->getAttributes() - .getParamAttributes(I->getArgNo() + 1)); + if (Argument* Anew = dyn_cast<Argument>(VMap[I])) { + AttributeSet attrs = OldFunc->getAttributes() + .getParamAttributes(I->getArgNo() + 1); + if (attrs.getNumSlots() > 0) + Anew->addAttr(attrs); + } NewFunc->setAttributes(NewFunc->getAttributes() .addAttributes(NewFunc->getContext(), AttributeSet::ReturnIndex, diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index a63d31d..681bf9c 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2789,9 +2789,20 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { return false; // Turn all invokes that unwind here into calls and delete the basic block. + bool InvokeRequiresTableEntry = false; + bool Changed = false; for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator()); + + if (II->hasFnAttr(Attribute::UWTable)) { + // Don't remove an `invoke' instruction if the ABI requires an entry into + // the table. + InvokeRequiresTableEntry = true; + continue; + } + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); + // Insert a call instruction before the invoke. CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); Call->takeName(II); @@ -2811,11 +2822,14 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { // Finally, delete the invoke instruction! II->eraseFromParent(); + Changed = true; } - // The landingpad is now unreachable. Zap it. - BB->eraseFromParent(); - return true; + if (!InvokeRequiresTableEntry) + // The landingpad is now unreachable. Zap it. + BB->eraseFromParent(); + + return Changed; } bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { @@ -3944,11 +3958,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { // Load from null is undefined. if (LoadInst *LI = dyn_cast<LoadInst>(Use)) - return LI->getPointerAddressSpace() == 0; + if (!LI->isVolatile()) + return LI->getPointerAddressSpace() == 0; // Store to null is undefined. if (StoreInst *SI = dyn_cast<StoreInst>(Use)) - return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I; + if (!SI->isVolatile()) + return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I; } return false; } diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 8ad566c..c231704 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -15,14 +15,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/Support/Allocator.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" @@ -797,8 +800,7 @@ struct StrToOpt : public LibCallOptimization { if (isa<ConstantPointerNull>(EndPtr)) { // With a null EndPtr, this function won't capture the main argument. // It would be readonly too, except that it still may write to errno. - CI->addAttribute(1, Attribute::get(Callee->getContext(), - Attribute::NoCapture)); + CI->addAttribute(1, Attribute::NoCapture); } return 0; @@ -1672,67 +1674,16 @@ class LibCallSimplifierImpl { const TargetLibraryInfo *TLI; const LibCallSimplifier *LCS; bool UnsafeFPShrink; - StringMap<LibCallOptimization*> Optimizations; - - // Fortified library call optimizations. - MemCpyChkOpt MemCpyChk; - MemMoveChkOpt MemMoveChk; - MemSetChkOpt MemSetChk; - StrCpyChkOpt StrCpyChk; - StpCpyChkOpt StpCpyChk; - StrNCpyChkOpt StrNCpyChk; - - // String library call optimizations. - StrCatOpt StrCat; - StrNCatOpt StrNCat; - StrChrOpt StrChr; - StrRChrOpt StrRChr; - StrCmpOpt StrCmp; - StrNCmpOpt StrNCmp; - StrCpyOpt StrCpy; - StpCpyOpt StpCpy; - StrNCpyOpt StrNCpy; - StrLenOpt StrLen; - StrPBrkOpt StrPBrk; - StrToOpt StrTo; - StrSpnOpt StrSpn; - StrCSpnOpt StrCSpn; - StrStrOpt StrStr; - - // Memory library call optimizations. - MemCmpOpt MemCmp; - MemCpyOpt MemCpy; - MemMoveOpt MemMove; - MemSetOpt MemSet; // Math library call optimizations. - UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP; - CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; - - // Integer library call optimizations. - FFSOpt FFS; - AbsOpt Abs; - IsDigitOpt IsDigit; - IsAsciiOpt IsAscii; - ToAsciiOpt ToAscii; - - // Formatting and IO library call optimizations. - PrintFOpt PrintF; - SPrintFOpt SPrintF; - FPrintFOpt FPrintF; - FWriteOpt FWrite; - FPutsOpt FPuts; - PutsOpt Puts; - - void initOptimizations(); - void addOpt(LibFunc::Func F, LibCallOptimization* Opt); - void addOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt); + CosOpt Cos; + PowOpt Pow; + Exp2Opt Exp2; public: LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI, const LibCallSimplifier *LCS, bool UnsafeFPShrink = false) - : UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true), - Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) { + : Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) { this->TD = TD; this->TLI = TLI; this->LCS = LCS; @@ -1740,126 +1691,234 @@ public: } Value *optimizeCall(CallInst *CI); + LibCallOptimization *lookupOptimization(CallInst *CI); + bool hasFloatVersion(StringRef FuncName); }; -void LibCallSimplifierImpl::initOptimizations() { - // Fortified library call optimizations. - Optimizations["__memcpy_chk"] = &MemCpyChk; - Optimizations["__memmove_chk"] = &MemMoveChk; - Optimizations["__memset_chk"] = &MemSetChk; - Optimizations["__strcpy_chk"] = &StrCpyChk; - Optimizations["__stpcpy_chk"] = &StpCpyChk; - Optimizations["__strncpy_chk"] = &StrNCpyChk; - Optimizations["__stpncpy_chk"] = &StrNCpyChk; - - // String library call optimizations. - addOpt(LibFunc::strcat, &StrCat); - addOpt(LibFunc::strncat, &StrNCat); - addOpt(LibFunc::strchr, &StrChr); - addOpt(LibFunc::strrchr, &StrRChr); - addOpt(LibFunc::strcmp, &StrCmp); - addOpt(LibFunc::strncmp, &StrNCmp); - addOpt(LibFunc::strcpy, &StrCpy); - addOpt(LibFunc::stpcpy, &StpCpy); - addOpt(LibFunc::strncpy, &StrNCpy); - addOpt(LibFunc::strlen, &StrLen); - addOpt(LibFunc::strpbrk, &StrPBrk); - addOpt(LibFunc::strtol, &StrTo); - addOpt(LibFunc::strtod, &StrTo); - addOpt(LibFunc::strtof, &StrTo); - addOpt(LibFunc::strtoul, &StrTo); - addOpt(LibFunc::strtoll, &StrTo); - addOpt(LibFunc::strtold, &StrTo); - addOpt(LibFunc::strtoull, &StrTo); - addOpt(LibFunc::strspn, &StrSpn); - addOpt(LibFunc::strcspn, &StrCSpn); - addOpt(LibFunc::strstr, &StrStr); - - // Memory library call optimizations. - addOpt(LibFunc::memcmp, &MemCmp); - addOpt(LibFunc::memcpy, &MemCpy); - addOpt(LibFunc::memmove, &MemMove); - addOpt(LibFunc::memset, &MemSet); +bool LibCallSimplifierImpl::hasFloatVersion(StringRef FuncName) { + LibFunc::Func Func; + SmallString<20> FloatFuncName = FuncName; + FloatFuncName += 'f'; + if (TLI->getLibFunc(FloatFuncName, Func)) + return TLI->has(Func); + return false; +} - // Math library call optimizations. - addOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP); - addOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP); - addOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP); - addOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP); - addOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP); - addOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP); - addOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP); - - if(UnsafeFPShrink) { - addOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP); +// Fortified library call optimizations. +static MemCpyChkOpt MemCpyChk; +static MemMoveChkOpt MemMoveChk; +static MemSetChkOpt MemSetChk; +static StrCpyChkOpt StrCpyChk; +static StpCpyChkOpt StpCpyChk; +static StrNCpyChkOpt StrNCpyChk; + +// String library call optimizations. +static StrCatOpt StrCat; +static StrNCatOpt StrNCat; +static StrChrOpt StrChr; +static StrRChrOpt StrRChr; +static StrCmpOpt StrCmp; +static StrNCmpOpt StrNCmp; +static StrCpyOpt StrCpy; +static StpCpyOpt StpCpy; +static StrNCpyOpt StrNCpy; +static StrLenOpt StrLen; +static StrPBrkOpt StrPBrk; +static StrToOpt StrTo; +static StrSpnOpt StrSpn; +static StrCSpnOpt StrCSpn; +static StrStrOpt StrStr; + +// Memory library call optimizations. +static MemCmpOpt MemCmp; +static MemCpyOpt MemCpy; +static MemMoveOpt MemMove; +static MemSetOpt MemSet; + +// Math library call optimizations. +static UnaryDoubleFPOpt UnaryDoubleFP(false); +static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + + // Integer library call optimizations. +static FFSOpt FFS; +static AbsOpt Abs; +static IsDigitOpt IsDigit; +static IsAsciiOpt IsAscii; +static ToAsciiOpt ToAscii; + +// Formatting and IO library call optimizations. +static PrintFOpt PrintF; +static SPrintFOpt SPrintF; +static FPrintFOpt FPrintF; +static FWriteOpt FWrite; +static FPutsOpt FPuts; +static PutsOpt Puts; + +LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { + LibFunc::Func Func; + Function *Callee = CI->getCalledFunction(); + StringRef FuncName = Callee->getName(); + + // Next check for intrinsics. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { + switch (II->getIntrinsicID()) { + case Intrinsic::pow: + return &Pow; + case Intrinsic::exp2: + return &Exp2; + default: + return 0; + } } - addOpt(LibFunc::cosf, &Cos); - addOpt(LibFunc::cos, &Cos); - addOpt(LibFunc::cosl, &Cos); - addOpt(LibFunc::powf, &Pow); - addOpt(LibFunc::pow, &Pow); - addOpt(LibFunc::powl, &Pow); - Optimizations["llvm.pow.f32"] = &Pow; - Optimizations["llvm.pow.f64"] = &Pow; - Optimizations["llvm.pow.f80"] = &Pow; - Optimizations["llvm.pow.f128"] = &Pow; - Optimizations["llvm.pow.ppcf128"] = &Pow; - addOpt(LibFunc::exp2l, &Exp2); - addOpt(LibFunc::exp2, &Exp2); - addOpt(LibFunc::exp2f, &Exp2); - Optimizations["llvm.exp2.ppcf128"] = &Exp2; - Optimizations["llvm.exp2.f128"] = &Exp2; - Optimizations["llvm.exp2.f80"] = &Exp2; - Optimizations["llvm.exp2.f64"] = &Exp2; - Optimizations["llvm.exp2.f32"] = &Exp2; + // Then check for known library functions. + if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) { + switch (Func) { + case LibFunc::strcat: + return &StrCat; + case LibFunc::strncat: + return &StrNCat; + case LibFunc::strchr: + return &StrChr; + case LibFunc::strrchr: + return &StrRChr; + case LibFunc::strcmp: + return &StrCmp; + case LibFunc::strncmp: + return &StrNCmp; + case LibFunc::strcpy: + return &StrCpy; + case LibFunc::stpcpy: + return &StpCpy; + case LibFunc::strncpy: + return &StrNCpy; + case LibFunc::strlen: + return &StrLen; + case LibFunc::strpbrk: + return &StrPBrk; + case LibFunc::strtol: + case LibFunc::strtod: + case LibFunc::strtof: + case LibFunc::strtoul: + case LibFunc::strtoll: + case LibFunc::strtold: + case LibFunc::strtoull: + return &StrTo; + case LibFunc::strspn: + return &StrSpn; + case LibFunc::strcspn: + return &StrCSpn; + case LibFunc::strstr: + return &StrStr; + case LibFunc::memcmp: + return &MemCmp; + case LibFunc::memcpy: + return &MemCpy; + case LibFunc::memmove: + return &MemMove; + case LibFunc::memset: + return &MemSet; + case LibFunc::cosf: + case LibFunc::cos: + case LibFunc::cosl: + return &Cos; + case LibFunc::powf: + case LibFunc::pow: + case LibFunc::powl: + return &Pow; + case LibFunc::exp2l: + case LibFunc::exp2: + case LibFunc::exp2f: + return &Exp2; + case LibFunc::ffs: + case LibFunc::ffsl: + case LibFunc::ffsll: + return &FFS; + case LibFunc::abs: + case LibFunc::labs: + case LibFunc::llabs: + return &Abs; + case LibFunc::isdigit: + return &IsDigit; + case LibFunc::isascii: + return &IsAscii; + case LibFunc::toascii: + return &ToAscii; + case LibFunc::printf: + return &PrintF; + case LibFunc::sprintf: + return &SPrintF; + case LibFunc::fprintf: + return &FPrintF; + case LibFunc::fwrite: + return &FWrite; + case LibFunc::fputs: + return &FPuts; + case LibFunc::puts: + return &Puts; + case LibFunc::ceil: + case LibFunc::fabs: + case LibFunc::floor: + case LibFunc::rint: + case LibFunc::round: + case LibFunc::nearbyint: + case LibFunc::trunc: + if (hasFloatVersion(FuncName)) + return &UnaryDoubleFP; + return 0; + case LibFunc::acos: + case LibFunc::acosh: + case LibFunc::asin: + case LibFunc::asinh: + case LibFunc::atan: + case LibFunc::atanh: + case LibFunc::cbrt: + case LibFunc::cosh: + case LibFunc::exp: + case LibFunc::exp10: + case LibFunc::expm1: + case LibFunc::log: + case LibFunc::log10: + case LibFunc::log1p: + case LibFunc::log2: + case LibFunc::logb: + case LibFunc::sin: + case LibFunc::sinh: + case LibFunc::sqrt: + case LibFunc::tan: + case LibFunc::tanh: + if (UnsafeFPShrink && hasFloatVersion(FuncName)) + return &UnsafeUnaryDoubleFP; + return 0; + case LibFunc::memcpy_chk: + return &MemCpyChk; + default: + return 0; + } + } + + // Finally check for fortified library calls. + if (FuncName.endswith("_chk")) { + if (FuncName == "__memmove_chk") + return &MemMoveChk; + else if (FuncName == "__memset_chk") + return &MemSetChk; + else if (FuncName == "__strcpy_chk") + return &StrCpyChk; + else if (FuncName == "__stpcpy_chk") + return &StpCpyChk; + else if (FuncName == "__strncpy_chk") + return &StrNCpyChk; + else if (FuncName == "__stpncpy_chk") + return &StrNCpyChk; + } + + return 0; - // Integer library call optimizations. - addOpt(LibFunc::ffs, &FFS); - addOpt(LibFunc::ffsl, &FFS); - addOpt(LibFunc::ffsll, &FFS); - addOpt(LibFunc::abs, &Abs); - addOpt(LibFunc::labs, &Abs); - addOpt(LibFunc::llabs, &Abs); - addOpt(LibFunc::isdigit, &IsDigit); - addOpt(LibFunc::isascii, &IsAscii); - addOpt(LibFunc::toascii, &ToAscii); - - // Formatting and IO library call optimizations. - addOpt(LibFunc::printf, &PrintF); - addOpt(LibFunc::sprintf, &SPrintF); - addOpt(LibFunc::fprintf, &FPrintF); - addOpt(LibFunc::fwrite, &FWrite); - addOpt(LibFunc::fputs, &FPuts); - addOpt(LibFunc::puts, &Puts); } Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { - if (Optimizations.empty()) - initOptimizations(); - - Function *Callee = CI->getCalledFunction(); - LibCallOptimization *LCO = Optimizations.lookup(Callee->getName()); + LibCallOptimization *LCO = lookupOptimization(CI); if (LCO) { IRBuilder<> Builder(CI); return LCO->optimizeCall(CI, TD, TLI, LCS, Builder); @@ -1867,17 +1926,6 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { return 0; } -void LibCallSimplifierImpl::addOpt(LibFunc::Func F, LibCallOptimization* Opt) { - if (TLI->has(F)) - Optimizations[TLI->getName(F)] = Opt; -} - -void LibCallSimplifierImpl::addOpt(LibFunc::Func F1, LibFunc::Func F2, - LibCallOptimization* Opt) { - if (TLI->has(F1) && TLI->has(F2)) - Optimizations[TLI->getName(F1)] = Opt; -} - LibCallSimplifier::LibCallSimplifier(const DataLayout *TD, const TargetLibraryInfo *TLI, bool UnsafeFPShrink) { diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 7636541..17900da 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -1771,7 +1771,7 @@ namespace { size_t MaxDepth = DAG.lookup(IJ); DEBUG(if (DebugPairSelection) dbgs() << "BBV: found DAG for pair {" - << IJ.first << " <-> " << IJ.second << "} of depth " << + << *IJ.first << " <-> " << *IJ.second << "} of depth " << MaxDepth << " and size " << DAG.size() << "\n"); // At this point the DAG has been constructed, but, may contain @@ -2086,7 +2086,7 @@ namespace { DEBUG(if (DebugPairSelection) dbgs() << "BBV: found pruned DAG for pair {" - << IJ.first << " <-> " << IJ.second << "} of depth " << + << *IJ.first << " <-> " << *IJ.second << "} of depth " << MaxDepth << " and size " << PrunedDAG.size() << " (effective size: " << EffSize << ")\n"); if (((TTI && !UseChainDepthWithTI) || diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index f489393..930d9c4 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -79,6 +79,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -115,6 +116,12 @@ static const unsigned TinyTripCountUnrollThreshold = 128; /// number of pointers. Notice that the check is quadratic! static const unsigned RuntimeMemoryCheckThreshold = 4; +/// We use a metadata with this name to indicate that a scalar loop was +/// vectorized and that we don't need to re-vectorize it if we run into it +/// again. +static const char* +AlreadyVectorizedMDName = "llvm.vectorizer.already_vectorized"; + namespace { // Forward declarations. @@ -138,10 +145,11 @@ class LoopVectorizationCostModel; class InnerLoopVectorizer { public: InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI, - DominatorTree *DT, DataLayout *DL, unsigned VecWidth, + DominatorTree *DT, DataLayout *DL, + const TargetLibraryInfo *TLI, unsigned VecWidth, unsigned UnrollFactor) - : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), VF(VecWidth), - UF(UnrollFactor), Builder(SE->getContext()), Induction(0), + : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI), + VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), Induction(0), OldInduction(0), WidenMap(UnrollFactor) {} // Perform the actual loop widening (vectorization). @@ -268,6 +276,9 @@ private: DominatorTree *DT; /// Data Layout. DataLayout *DL; + /// Target Library Info. + const TargetLibraryInfo *TLI; + /// The vectorization SIMD factor to use. Each vector will have this many /// vector elements. unsigned VF; @@ -320,8 +331,9 @@ class LoopVectorizationLegality { public: LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL, DominatorTree *DT, TargetTransformInfo* TTI, - AliasAnalysis* AA) - : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), Induction(0) {} + AliasAnalysis *AA, TargetLibraryInfo *TLI) + : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI), + Induction(0) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -407,7 +419,7 @@ public: /// Alias(Multi)Map stores the values (GEPs or underlying objects and their /// respective Store/Load instruction(s) to calculate aliasing. - typedef DenseMap<Value*, Instruction* > AliasMap; + typedef MapVector<Value*, Instruction* > AliasMap; typedef DenseMap<Value*, std::vector<Instruction*> > AliasMultiMap; /// Returns true if it is legal to vectorize this loop. @@ -504,6 +516,8 @@ private: TargetTransformInfo *TTI; /// Alias Analysis. AliasAnalysis *AA; + /// Target Library Info. + TargetLibraryInfo *TLI; // --- vectorization state --- // @@ -540,8 +554,8 @@ public: LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, LoopVectorizationLegality *Legal, const TargetTransformInfo &TTI, - DataLayout *DL) - : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL) {} + DataLayout *DL, const TargetLibraryInfo *TLI) + : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI) {} /// Information about vectorization costs struct VectorizationFactor { @@ -614,6 +628,8 @@ private: const TargetTransformInfo &TTI; /// Target data layout information. DataLayout *DL; + /// Target Library Info. + const TargetLibraryInfo *TLI; }; /// The LoopVectorize Pass. @@ -631,6 +647,7 @@ struct LoopVectorize : public LoopPass { TargetTransformInfo *TTI; DominatorTree *DT; AliasAnalysis *AA; + TargetLibraryInfo *TLI; virtual bool runOnLoop(Loop *L, LPPassManager &LPM) { // We only vectorize innermost loops. @@ -643,19 +660,20 @@ struct LoopVectorize : public LoopPass { TTI = &getAnalysis<TargetTransformInfo>(); DT = &getAnalysis<DominatorTree>(); AA = getAnalysisIfAvailable<AliasAnalysis>(); + TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); DEBUG(dbgs() << "LV: Checking a loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA); + LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA, TLI); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing.\n"); return false; } // Use the cost model. - LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL); + LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI); // Check the function attributes to find out if this function should be // optimized for size. @@ -689,7 +707,7 @@ struct LoopVectorize : public LoopPass { DEBUG(dbgs() << "LV: Unroll Factor is " << UF << "\n"); // If we decided that it is *legal* to vectorize the loop then do it. - InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF.Width, UF); + InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF); LB.vectorize(&LVL); DEBUG(verifyFunction(*L->getHeader()->getParent())); @@ -1147,6 +1165,11 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { BasicBlock *ExitBlock = OrigLoop->getExitBlock(); assert(ExitBlock && "Must have an exit block"); + // Mark the old scalar loop with metadata that tells us not to vectorize this + // loop again if we run into it. + MDNode *MD = MDNode::get(OldBasicBlock->getContext(), ArrayRef<Value*>()); + OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD); + // Some loops have a single integer induction variable, while other loops // don't. One example is c++ iterators that often have multiple pointer // induction variables. In the code below we also support a case where we @@ -1438,34 +1461,108 @@ getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) { } } -static bool -isTriviallyVectorizableIntrinsic(Instruction *Inst) { - IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst); - if (!II) - return false; - switch (II->getIntrinsicID()) { - case Intrinsic::sqrt: - case Intrinsic::sin: - case Intrinsic::cos: - case Intrinsic::exp: - case Intrinsic::exp2: - case Intrinsic::log: - case Intrinsic::log10: - case Intrinsic::log2: - case Intrinsic::fabs: - case Intrinsic::floor: - case Intrinsic::ceil: - case Intrinsic::trunc: - case Intrinsic::rint: - case Intrinsic::nearbyint: - case Intrinsic::pow: - case Intrinsic::fma: - case Intrinsic::fmuladd: - return true; - default: - return false; +static Intrinsic::ID +getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) { + // If we have an intrinsic call, check if it is trivially vectorizable. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { + switch (II->getIntrinsicID()) { + case Intrinsic::sqrt: + case Intrinsic::sin: + case Intrinsic::cos: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::log: + case Intrinsic::log10: + case Intrinsic::log2: + case Intrinsic::fabs: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: + case Intrinsic::pow: + case Intrinsic::fma: + case Intrinsic::fmuladd: + return II->getIntrinsicID(); + default: + return Intrinsic::not_intrinsic; + } } - return false; + + if (!TLI) + return Intrinsic::not_intrinsic; + + LibFunc::Func Func; + Function *F = CI->getCalledFunction(); + // We're going to make assumptions on the semantics of the functions, check + // that the target knows that it's available in this environment. + if (!F || !TLI->getLibFunc(F->getName(), Func)) + return Intrinsic::not_intrinsic; + + // Otherwise check if we have a call to a function that can be turned into a + // vector intrinsic. + switch (Func) { + default: + break; + case LibFunc::sin: + case LibFunc::sinf: + case LibFunc::sinl: + return Intrinsic::sin; + case LibFunc::cos: + case LibFunc::cosf: + case LibFunc::cosl: + return Intrinsic::cos; + case LibFunc::exp: + case LibFunc::expf: + case LibFunc::expl: + return Intrinsic::exp; + case LibFunc::exp2: + case LibFunc::exp2f: + case LibFunc::exp2l: + return Intrinsic::exp2; + case LibFunc::log: + case LibFunc::logf: + case LibFunc::logl: + return Intrinsic::log; + case LibFunc::log10: + case LibFunc::log10f: + case LibFunc::log10l: + return Intrinsic::log10; + case LibFunc::log2: + case LibFunc::log2f: + case LibFunc::log2l: + return Intrinsic::log2; + case LibFunc::fabs: + case LibFunc::fabsf: + case LibFunc::fabsl: + return Intrinsic::fabs; + case LibFunc::floor: + case LibFunc::floorf: + case LibFunc::floorl: + return Intrinsic::floor; + case LibFunc::ceil: + case LibFunc::ceilf: + case LibFunc::ceill: + return Intrinsic::ceil; + case LibFunc::trunc: + case LibFunc::truncf: + case LibFunc::truncl: + return Intrinsic::trunc; + case LibFunc::rint: + case LibFunc::rintf: + case LibFunc::rintl: + return Intrinsic::rint; + case LibFunc::nearbyint: + case LibFunc::nearbyintf: + case LibFunc::nearbyintl: + return Intrinsic::nearbyint; + case LibFunc::pow: + case LibFunc::powf: + case LibFunc::powl: + return Intrinsic::pow; + } + + return Intrinsic::not_intrinsic; } /// This function translates the reduction kind to an LLVM binary operator. @@ -1546,7 +1643,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // To do so, we need to generate the 'identity' vector and overide // one of the elements with the incoming scalar reduction. We need // to do it in the vector-loop preheader. - Builder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator()); + Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator()); // This is the vector-clone of the value that leaves the loop. VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr); @@ -1991,17 +2088,21 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, } case Instruction::Call: { - assert(isTriviallyVectorizableIntrinsic(it)); + // Ignore dbg intrinsics. + if (isa<DbgInfoIntrinsic>(it)) + break; + Module *M = BB->getParent()->getParent(); - IntrinsicInst *II = cast<IntrinsicInst>(it); - Intrinsic::ID ID = II->getIntrinsicID(); + CallInst *CI = cast<CallInst>(it); + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); + assert(ID && "Not an intrinsic call!"); for (unsigned Part = 0; Part < UF; ++Part) { SmallVector<Value*, 4> Args; - for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i) { - VectorParts &Arg = getVectorValue(II->getArgOperand(i)); + for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { + VectorParts &Arg = getVectorValue(CI->getArgOperand(i)); Args.push_back(Arg[Part]); } - Type *Tys[] = { VectorType::get(II->getType()->getScalarType(), VF) }; + Type *Tys[] = { VectorType::get(CI->getType()->getScalarType(), VF) }; Function *F = Intrinsic::getDeclaration(M, ID, Tys); Entry[Part] = Builder.CreateCall(F, Args); } @@ -2138,6 +2239,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { BasicBlock *PreHeader = TheLoop->getLoopPreheader(); BasicBlock *Header = TheLoop->getHeader(); + // If we marked the scalar loop as "already vectorized" then no need + // to vectorize it again. + if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) { + DEBUG(dbgs() << "LV: This loop was vectorized before\n"); + return false; + } + // For each block in the loop. for (Loop::block_iterator bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be; ++bb) { @@ -2220,9 +2328,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { return false; }// end of PHI handling - // We still don't handle functions. + // We still don't handle functions. However, we can ignore dbg intrinsic + // calls and we do handle certain intrinsic and libm functions. CallInst *CI = dyn_cast<CallInst>(it); - if (CI && !isTriviallyVectorizableIntrinsic(it)) { + if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) { DEBUG(dbgs() << "LV: Found a call site.\n"); return false; } @@ -2638,6 +2747,9 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // Is this a bin op ? FoundBinOp |= !isa<PHINode>(Iter); + // Remember the current instruction. + Instruction *OldIter = Iter; + // For each of the *users* of iter. for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end(); it != e; ++it) { @@ -2663,7 +2775,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, if (isa<PHINode>(Iter) && isa<PHINode>(U) && U->getParent() != TheLoop->getHeader() && TheLoop->contains(U) && - Iter->getNumUses() > 1) + Iter->hasNUsesOrMore(2)) continue; // We can't have multiple inside users. @@ -2683,6 +2795,10 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, Iter = U; } + // If all uses were skipped this can't be a reduction variable. + if (Iter == OldIter) + return false; + // We found a reduction var if we have reached the original // phi node and we only have a single instruction with out-of-loop // users. @@ -3152,6 +3268,10 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { // For each instruction in the old loop. for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + // Skip dbg intrinsics. + if (isa<DbgInfoIntrinsic>(it)) + continue; + unsigned C = getInstructionCost(it, VF); Cost += C; DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " << @@ -3218,7 +3338,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); Type *CondTy = SI->getCondition()->getType(); - if (ScalarCond) + if (!ScalarCond) CondTy = VectorType::get(CondTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); @@ -3305,13 +3425,14 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy); } case Instruction::Call: { - assert(isTriviallyVectorizableIntrinsic(I)); - IntrinsicInst *II = cast<IntrinsicInst>(I); - Type *RetTy = ToVectorTy(II->getType(), VF); + CallInst *CI = cast<CallInst>(I); + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); + assert(ID && "Not an intrinsic call!"); + Type *RetTy = ToVectorTy(CI->getType(), VF); SmallVector<Type*, 4> Tys; - for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i) - Tys.push_back(ToVectorTy(II->getArgOperand(i)->getType(), VF)); - return TTI.getIntrinsicInstrCost(II->getIntrinsicID(), RetTy, Tys); + for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) + Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF)); + return TTI.getIntrinsicInstrCost(ID, RetTy, Tys); } default: { // We are scalarizing the instruction. Return the cost of the scalar |