From 38c4e535493363b96eac47af9e7c056530137bea Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 2 Mar 2011 04:14:42 +0000 Subject: Add a special streamer to libLTO that just records symbols definitions and uses. The result produced by the streamer is used to give the linker more accurate information and to add to llvm.compiler.used. The second improvement removes the need for the user to add __attribute__((used)) to functions only used in inline asm. The first one lets us build firefox with LTO on Darwin :-) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126830 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/lto/LTOCodeGenerator.cpp | 108 ++++++++++++----- tools/lto/LTOCodeGenerator.h | 6 + tools/lto/LTOModule.cpp | 268 +++++++++++++++++++++++++++++++++++------ tools/lto/LTOModule.h | 12 +- 4 files changed, 322 insertions(+), 72 deletions(-) (limited to 'tools/lto') diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp index f72fdb0..6739e06 100644 --- a/tools/lto/LTOCodeGenerator.cpp +++ b/tools/lto/LTOCodeGenerator.cpp @@ -75,7 +75,6 @@ LTOCodeGenerator::LTOCodeGenerator() { InitializeAllTargets(); InitializeAllAsmPrinters(); - InitializeAllAsmParsers(); } LTOCodeGenerator::~LTOCodeGenerator() @@ -88,7 +87,13 @@ LTOCodeGenerator::~LTOCodeGenerator() bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) { - return _linker.LinkInModule(mod->getLLVVMModule(), &errMsg); + bool ret = _linker.LinkInModule(mod->getLLVVMModule(), &errMsg); + + const std::vector &undefs = mod->getAsmUndefinedRefs(); + for (int i = 0, e = undefs.size(); i != e; ++i) + _asmUndefinedRefs[undefs[i]] = 1; + + return ret; } @@ -249,6 +254,34 @@ bool LTOCodeGenerator::determineTarget(std::string& errMsg) return false; } +void LTOCodeGenerator::applyRestriction(GlobalValue &GV, + std::vector &mustPreserveList, + SmallPtrSet &asmUsed, + Mangler &mangler) { + SmallString<64> Buffer; + mangler.getNameWithPrefix(Buffer, &GV, false); + + if (GV.isDeclaration()) + return; + if (_mustPreserveSymbols.count(Buffer)) + mustPreserveList.push_back(GV.getName().data()); + if (_asmUndefinedRefs.count(Buffer)) + asmUsed.insert(&GV); +} + +static void findUsedValues(GlobalVariable *LLVMUsed, + SmallPtrSet &UsedValues) { + if (LLVMUsed == 0) return; + + ConstantArray *Inits = dyn_cast(LLVMUsed->getInitializer()); + if (Inits == 0) return; + + for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) + if (GlobalValue *GV = + dyn_cast(Inits->getOperand(i)->stripPointerCasts())) + UsedValues.insert(GV); +} + void LTOCodeGenerator::applyScopeRestrictions() { if (_scopeRestrictionsDone) return; Module *mergedModule = _linker.getModule(); @@ -258,38 +291,47 @@ void LTOCodeGenerator::applyScopeRestrictions() { passes.add(createVerifierPass()); // mark which symbols can not be internalized - if (!_mustPreserveSymbols.empty()) { - MCContext Context(*_target->getMCAsmInfo(), NULL); - Mangler mangler(Context, *_target->getTargetData()); - std::vector mustPreserveList; - SmallString<64> Buffer; - for (Module::iterator f = mergedModule->begin(), - e = mergedModule->end(); f != e; ++f) { - Buffer.clear(); - mangler.getNameWithPrefix(Buffer, f, false); - if (!f->isDeclaration() && - _mustPreserveSymbols.count(Buffer)) - mustPreserveList.push_back(f->getName().data()); - } - for (Module::global_iterator v = mergedModule->global_begin(), - e = mergedModule->global_end(); v != e; ++v) { - Buffer.clear(); - mangler.getNameWithPrefix(Buffer, v, false); - if (!v->isDeclaration() && - _mustPreserveSymbols.count(Buffer)) - mustPreserveList.push_back(v->getName().data()); - } - for (Module::alias_iterator a = mergedModule->alias_begin(), - e = mergedModule->alias_end(); a != e; ++a) { - Buffer.clear(); - mangler.getNameWithPrefix(Buffer, a, false); - if (!a->isDeclaration() && - _mustPreserveSymbols.count(Buffer)) - mustPreserveList.push_back(a->getName().data()); - } - passes.add(createInternalizePass(mustPreserveList)); + MCContext Context(*_target->getMCAsmInfo(), NULL); + Mangler mangler(Context, *_target->getTargetData()); + std::vector mustPreserveList; + SmallPtrSet asmUsed; + + for (Module::iterator f = mergedModule->begin(), + e = mergedModule->end(); f != e; ++f) + applyRestriction(*f, mustPreserveList, asmUsed, mangler); + for (Module::global_iterator v = mergedModule->global_begin(), + e = mergedModule->global_end(); v != e; ++v) + applyRestriction(*v, mustPreserveList, asmUsed, mangler); + for (Module::alias_iterator a = mergedModule->alias_begin(), + e = mergedModule->alias_end(); a != e; ++a) + applyRestriction(*a, mustPreserveList, asmUsed, mangler); + + GlobalVariable *LLVMCompilerUsed = + mergedModule->getGlobalVariable("llvm.compiler.used"); + findUsedValues(LLVMCompilerUsed, asmUsed); + if (LLVMCompilerUsed) + LLVMCompilerUsed->eraseFromParent(); + + const llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(_context); + std::vector asmUsed2; + for (SmallPtrSet::const_iterator i = asmUsed.begin(), + e = asmUsed.end(); i !=e; ++i) { + GlobalValue *GV = *i; + Constant *c = ConstantExpr::getBitCast(GV, i8PTy); + asmUsed2.push_back(c); } - + + llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, asmUsed2.size()); + LLVMCompilerUsed = + new llvm::GlobalVariable(*mergedModule, ATy, false, + llvm::GlobalValue::AppendingLinkage, + llvm::ConstantArray::get(ATy, asmUsed2), + "llvm.compiler.used"); + + LLVMCompilerUsed->setSection("llvm.metadata"); + + passes.add(createInternalizePass(mustPreserveList)); + // apply scope restrictions passes.run(*mergedModule); diff --git a/tools/lto/LTOCodeGenerator.h b/tools/lto/LTOCodeGenerator.h index 0556520..7798db9 100644 --- a/tools/lto/LTOCodeGenerator.h +++ b/tools/lto/LTOCodeGenerator.h @@ -19,6 +19,7 @@ #include "llvm/LLVMContext.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include @@ -46,6 +47,10 @@ private: bool generateObjectFile(llvm::raw_ostream& out, std::string& errMsg); void applyScopeRestrictions(); + void applyRestriction(llvm::GlobalValue &GV, + std::vector &mustPreserveList, + llvm::SmallPtrSet &asmUsed, + llvm::Mangler &mangler); bool determineTarget(std::string& errMsg); typedef llvm::StringMap StringSet; @@ -57,6 +62,7 @@ private: bool _scopeRestrictionsDone; lto_codegen_model _codeModel; StringSet _mustPreserveSymbols; + StringSet _asmUndefinedRefs; llvm::MemoryBuffer* _nativeObjectFile; std::vector _codegenOptions; std::string _mCpu; diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp index 1eac22c..bdea0c3 100644 --- a/tools/lto/LTOModule.cpp +++ b/tools/lto/LTOModule.cpp @@ -26,11 +26,18 @@ #include "llvm/Support/Host.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/system_error.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/SubtargetFeature.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetAsmParser.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetSelect.h" @@ -73,7 +80,7 @@ bool LTOModule::isTargetMatch(MemoryBuffer *buffer, const char *triplePrefix) { LTOModule::LTOModule(Module *m, TargetMachine *t) - : _module(m), _target(t), _symbolsParsed(false) + : _module(m), _target(t) { } @@ -123,7 +130,12 @@ LTOModule *LTOModule::makeLTOModule(const void *mem, size_t length, LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer, std::string &errMsg) { - InitializeAllTargets(); + static bool Initialized = false; + if (!Initialized) { + InitializeAllTargets(); + InitializeAllAsmParsers(); + Initialized = true; + } // parse bitcode buffer OwningPtr m(ParseBitcodeFile(buffer, getGlobalContext(), &errMsg)); @@ -144,7 +156,13 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer, Features.getDefaultSubtargetFeatures("" /* cpu */, llvm::Triple(Triple)); std::string FeatureStr = Features.getString(); TargetMachine *target = march->createTargetMachine(Triple, FeatureStr); - return new LTOModule(m.take(), target); + LTOModule *Ret = new LTOModule(m.take(), target); + bool Err = Ret->ParseSymbols(); + if (Err) { + delete Ret; + return NULL; + } + return Ret; } @@ -383,7 +401,8 @@ void LTOModule::addDefinedSymbol(GlobalValue *def, Mangler &mangler, _symbols.push_back(info); } -void LTOModule::addAsmGlobalSymbol(const char *name) { +void LTOModule::addAsmGlobalSymbol(const char *name, + lto_symbol_attributes scope) { StringSet::value_type &entry = _defines.GetOrCreateValue(name); // only add new define if not already defined @@ -393,13 +412,32 @@ void LTOModule::addAsmGlobalSymbol(const char *name) { entry.setValue(1); const char *symbolName = entry.getKey().data(); uint32_t attr = LTO_SYMBOL_DEFINITION_REGULAR; - attr |= LTO_SYMBOL_SCOPE_DEFAULT; + attr |= scope; NameAndAttributes info; info.name = symbolName; info.attributes = (lto_symbol_attributes)attr; _symbols.push_back(info); } +void LTOModule::addAsmGlobalSymbolUndef(const char *name) { + StringMap::value_type &entry = + _undefines.GetOrCreateValue(name); + + _asm_undefines.push_back(entry.getKey().data()); + + // we already have the symbol + if (entry.getValue().name) + return; + + uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;; + attr |= LTO_SYMBOL_SCOPE_DEFAULT; + NameAndAttributes info; + info.name = entry.getKey().data(); + info.attributes = (lto_symbol_attributes)attr; + + entry.setValue(info); +} + void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl, Mangler &mangler) { // ignore all llvm.* symbols @@ -454,12 +492,194 @@ void LTOModule::findExternalRefs(Value *value, Mangler &mangler) { } } -void LTOModule::lazyParseSymbols() { - if (_symbolsParsed) - return; +namespace { + class RecordStreamer : public MCStreamer { + public: + enum State { NeverSeen, Global, Defined, DefinedGlobal, Used}; + + private: + StringMap Symbols; + + void markDefined(const MCSymbol &Symbol) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Global: + S = DefinedGlobal; + break; + case NeverSeen: + case Defined: + case Used: + S = Defined; + break; + } + } + void markGlobal(const MCSymbol &Symbol) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Defined: + S = DefinedGlobal; + break; + + case NeverSeen: + case Global: + case Used: + S = Global; + break; + } + } + void markUsed(const MCSymbol &Symbol) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Defined: + case Global: + break; + + case NeverSeen: + case Used: + S = Used; + break; + } + } + + // FIXME: mostly copied for the obj streamer. + void AddValueSymbols(const MCExpr *Value) { + switch (Value->getKind()) { + case MCExpr::Target: + // FIXME: What should we do in here? + break; + + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Value); + AddValueSymbols(BE->getLHS()); + AddValueSymbols(BE->getRHS()); + break; + } + + case MCExpr::SymbolRef: + markUsed(cast(Value)->getSymbol()); + break; + + case MCExpr::Unary: + AddValueSymbols(cast(Value)->getSubExpr()); + break; + } + } + + public: + typedef StringMap::const_iterator const_iterator; + + const_iterator begin() { + return Symbols.begin(); + } + + const_iterator end() { + return Symbols.end(); + } - _symbolsParsed = true; + RecordStreamer(MCContext &Context) : MCStreamer(Context) {} + virtual void ChangeSection(const MCSection *Section) {} + virtual void InitSections() {} + virtual void EmitLabel(MCSymbol *Symbol) { + Symbol->setSection(*getCurrentSection()); + markDefined(*Symbol); + } + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {} + virtual void EmitThumbFunc(MCSymbol *Func) {} + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + // FIXME: should we handle aliases? + markDefined(*Symbol); + } + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { + if (Attribute == MCSA_Global) + markGlobal(*Symbol); + } + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} + virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {} + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {} + virtual void EmitCOFFSymbolStorageClass(int StorageClass) {} + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size , unsigned ByteAlignment) { + markDefined(*Symbol); + } + virtual void EmitCOFFSymbolType(int Type) {} + virtual void EndCOFFSymbolDef() {} + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + markDefined(*Symbol); + } + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {} + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) {} + virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {} + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, + bool isPCRel, unsigned AddrSpace) {} + virtual void EmitULEB128Value(const MCExpr *Value, + unsigned AddrSpace = 0) {} + virtual void EmitSLEB128Value(const MCExpr *Value, + unsigned AddrSpace = 0) {} + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, + unsigned ValueSize, + unsigned MaxBytesToEmit) {} + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit) {} + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value ) {} + virtual void EmitFileDirective(StringRef Filename) {} + virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, + const MCSymbol *LastLabel, + const MCSymbol *Label) {} + + virtual void EmitInstruction(const MCInst &Inst) { + // Scan for values. + for (unsigned i = Inst.getNumOperands(); i--; ) + if (Inst.getOperand(i).isExpr()) + AddValueSymbols(Inst.getOperand(i).getExpr()); + } + virtual void Finish() {} + }; +} + +bool LTOModule::addAsmGlobalSymbols(MCContext &Context) { + const std::string &inlineAsm = _module->getModuleInlineAsm(); + + OwningPtr Streamer(new RecordStreamer(Context)); + MemoryBuffer *Buffer = MemoryBuffer::getMemBuffer(inlineAsm); + SourceMgr SrcMgr; + SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); + OwningPtr Parser(createMCAsmParser(_target->getTarget(), SrcMgr, + Context, *Streamer, + *_target->getMCAsmInfo())); + OwningPtr + TAP(_target->getTarget().createAsmParser(*Parser.get(), *_target.get())); + Parser->setTargetParser(*TAP); + int Res = Parser->Run(false); + if (Res) + return true; + + for (RecordStreamer::const_iterator i = Streamer->begin(), + e = Streamer->end(); i != e; ++i) { + StringRef Key = i->first(); + RecordStreamer::State Value = i->second; + if (Value == RecordStreamer::DefinedGlobal) + addAsmGlobalSymbol(Key.data(), LTO_SYMBOL_SCOPE_DEFAULT); + else if (Value == RecordStreamer::Defined) + addAsmGlobalSymbol(Key.data(), LTO_SYMBOL_SCOPE_INTERNAL); + else if (Value == RecordStreamer::Global || + Value == RecordStreamer::Used) + addAsmGlobalSymbolUndef(Key.data()); + } + return false; +} + +bool LTOModule::ParseSymbols() { // Use mangler to add GlobalPrefix to names to match linker names. MCContext Context(*_target->getMCAsmInfo(), NULL); Mangler mangler(Context, *_target->getTargetData()); @@ -482,30 +702,8 @@ void LTOModule::lazyParseSymbols() { } // add asm globals - const std::string &inlineAsm = _module->getModuleInlineAsm(); - const std::string glbl = ".globl"; - std::string asmSymbolName; - std::string::size_type pos = inlineAsm.find(glbl, 0); - while (pos != std::string::npos) { - // eat .globl - pos = pos + 6; - - // skip white space between .globl and symbol name - std::string::size_type pbegin = inlineAsm.find_first_not_of(' ', pos); - if (pbegin == std::string::npos) - break; - - // find end-of-line - std::string::size_type pend = inlineAsm.find_first_of('\n', pbegin); - if (pend == std::string::npos) - break; - - asmSymbolName.assign(inlineAsm, pbegin, pend - pbegin); - addAsmGlobalSymbol(asmSymbolName.c_str()); - - // search next .globl - pos = inlineAsm.find(glbl, pend); - } + if (addAsmGlobalSymbols(Context)) + return true; // add aliases for (Module::alias_iterator i = _module->alias_begin(), @@ -526,17 +724,16 @@ void LTOModule::lazyParseSymbols() { _symbols.push_back(info); } } + return false; } uint32_t LTOModule::getSymbolCount() { - lazyParseSymbols(); return _symbols.size(); } lto_symbol_attributes LTOModule::getSymbolAttributes(uint32_t index) { - lazyParseSymbols(); if (index < _symbols.size()) return _symbols[index].attributes; else @@ -544,7 +741,6 @@ lto_symbol_attributes LTOModule::getSymbolAttributes(uint32_t index) { } const char *LTOModule::getSymbolName(uint32_t index) { - lazyParseSymbols(); if (index < _symbols.size()) return _symbols[index].name; else diff --git a/tools/lto/LTOModule.h b/tools/lto/LTOModule.h index 1794d81..21e8475 100644 --- a/tools/lto/LTOModule.h +++ b/tools/lto/LTOModule.h @@ -64,11 +64,14 @@ struct LTOModule { const char* getSymbolName(uint32_t index); llvm::Module * getLLVVMModule() { return _module.get(); } + const std::vector &getAsmUndefinedRefs() { + return _asm_undefines; + } private: LTOModule(llvm::Module* m, llvm::TargetMachine* t); - void lazyParseSymbols(); + bool ParseSymbols(); void addDefinedSymbol(llvm::GlobalValue* def, llvm::Mangler& mangler, bool isFunction); @@ -80,7 +83,10 @@ private: llvm::Mangler &mangler); void addDefinedDataSymbol(llvm::GlobalValue* v, llvm::Mangler &mangler); - void addAsmGlobalSymbol(const char *); + bool addAsmGlobalSymbols(llvm::MCContext &Context); + void addAsmGlobalSymbol(const char *, + lto_symbol_attributes scope); + void addAsmGlobalSymbolUndef(const char *); void addObjCClass(llvm::GlobalVariable* clgv); void addObjCCategory(llvm::GlobalVariable* clgv); void addObjCClassRef(llvm::GlobalVariable* clgv); @@ -103,11 +109,11 @@ private: llvm::OwningPtr _module; llvm::OwningPtr _target; - bool _symbolsParsed; std::vector _symbols; // _defines and _undefines only needed to disambiguate tentative definitions StringSet _defines; llvm::StringMap _undefines; + std::vector _asm_undefines; }; #endif // LTO_MODULE_H -- cgit v1.1