diff options
Diffstat (limited to 'tools/dsymutil')
-rw-r--r-- | tools/dsymutil/BinaryHolder.cpp | 111 | ||||
-rw-r--r-- | tools/dsymutil/BinaryHolder.h | 104 | ||||
-rw-r--r-- | tools/dsymutil/CMakeLists.txt | 14 | ||||
-rw-r--r-- | tools/dsymutil/DebugMap.cpp | 92 | ||||
-rw-r--r-- | tools/dsymutil/DebugMap.h | 142 | ||||
-rw-r--r-- | tools/dsymutil/DwarfLinker.cpp | 667 | ||||
-rw-r--r-- | tools/dsymutil/LLVMBuild.txt | 22 | ||||
-rw-r--r-- | tools/dsymutil/MachODebugMapParser.cpp | 241 | ||||
-rw-r--r-- | tools/dsymutil/Makefile | 17 | ||||
-rw-r--r-- | tools/dsymutil/dsymutil.cpp | 71 | ||||
-rw-r--r-- | tools/dsymutil/dsymutil.h | 39 |
11 files changed, 1520 insertions, 0 deletions
diff --git a/tools/dsymutil/BinaryHolder.cpp b/tools/dsymutil/BinaryHolder.cpp new file mode 100644 index 0000000..ad66105 --- /dev/null +++ b/tools/dsymutil/BinaryHolder.cpp @@ -0,0 +1,111 @@ +//===-- BinaryHolder.cpp --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This program is a utility that aims to be a dropin replacement for +// Darwin's dsymutil. +// +//===----------------------------------------------------------------------===// + +#include "BinaryHolder.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace dsymutil { + +ErrorOr<MemoryBufferRef> +BinaryHolder::GetMemoryBufferForFile(StringRef Filename) { + if (Verbose) + outs() << "trying to open '" << Filename << "'\n"; + + // Try that first as it doesn't involve any filesystem access. + if (auto ErrOrArchiveMember = GetArchiveMemberBuffer(Filename)) + return *ErrOrArchiveMember; + + // If the name ends with a closing paren, there is a huge chance + // it is an archive member specification. + if (Filename.endswith(")")) + if (auto ErrOrArchiveMember = MapArchiveAndGetMemberBuffer(Filename)) + return *ErrOrArchiveMember; + + // Otherwise, just try opening a standard file. If this is an + // archive member specifiaction and any of the above didn't handle it + // (either because the archive is not there anymore, or because the + // archive doesn't contain the requested member), this will still + // provide a sensible error message. + auto ErrOrFile = MemoryBuffer::getFileOrSTDIN(Filename); + if (auto Err = ErrOrFile.getError()) + return Err; + + if (Verbose) + outs() << "\tloaded file.\n"; + CurrentArchive.reset(); + CurrentMemoryBuffer = std::move(ErrOrFile.get()); + return CurrentMemoryBuffer->getMemBufferRef(); +} + +ErrorOr<MemoryBufferRef> +BinaryHolder::GetArchiveMemberBuffer(StringRef Filename) { + if (!CurrentArchive) + return make_error_code(errc::no_such_file_or_directory); + + StringRef CurArchiveName = CurrentArchive->getFileName(); + if (!Filename.startswith(Twine(CurArchiveName, "(").str())) + return make_error_code(errc::no_such_file_or_directory); + + // Remove the archive name and the parens around the archive member name. + Filename = Filename.substr(CurArchiveName.size() + 1).drop_back(); + + for (const auto &Child : CurrentArchive->children()) { + if (auto NameOrErr = Child.getName()) + if (*NameOrErr == Filename) { + if (Verbose) + outs() << "\tfound member in current archive.\n"; + return Child.getMemoryBufferRef(); + } + } + + return make_error_code(errc::no_such_file_or_directory); +} + +ErrorOr<MemoryBufferRef> +BinaryHolder::MapArchiveAndGetMemberBuffer(StringRef Filename) { + StringRef ArchiveFilename = Filename.substr(0, Filename.find('(')); + + auto ErrOrBuff = MemoryBuffer::getFileOrSTDIN(ArchiveFilename); + if (auto Err = ErrOrBuff.getError()) + return Err; + + if (Verbose) + outs() << "\topened new archive '" << ArchiveFilename << "'\n"; + auto ErrOrArchive = object::Archive::create((*ErrOrBuff)->getMemBufferRef()); + if (auto Err = ErrOrArchive.getError()) + return Err; + + CurrentArchive = std::move(*ErrOrArchive); + CurrentMemoryBuffer = std::move(*ErrOrBuff); + + return GetArchiveMemberBuffer(Filename); +} + +ErrorOr<const object::ObjectFile &> +BinaryHolder::GetObjectFile(StringRef Filename) { + auto ErrOrMemBufferRef = GetMemoryBufferForFile(Filename); + if (auto Err = ErrOrMemBufferRef.getError()) + return Err; + + auto ErrOrObjectFile = + object::ObjectFile::createObjectFile(*ErrOrMemBufferRef); + if (auto Err = ErrOrObjectFile.getError()) + return Err; + + CurrentObjectFile = std::move(*ErrOrObjectFile); + return *CurrentObjectFile; +} +} +} diff --git a/tools/dsymutil/BinaryHolder.h b/tools/dsymutil/BinaryHolder.h new file mode 100644 index 0000000..04871b5 --- /dev/null +++ b/tools/dsymutil/BinaryHolder.h @@ -0,0 +1,104 @@ +//===-- BinaryHolder.h - Utility class for accessing binaries -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This program is a utility that aims to be a dropin replacement for +// Darwin's dsymutil. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TOOLS_DSYMUTIL_BINARYHOLDER_H +#define LLVM_TOOLS_DSYMUTIL_BINARYHOLDER_H + +#include "llvm/Object/Archive.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorOr.h" + +namespace llvm { +namespace dsymutil { + +/// \brief The BinaryHolder class is responsible for creating and +/// owning ObjectFile objects and their underlying MemoryBuffer. This +/// is different from a simple OwningBinary in that it handles +/// accessing to archive members. +/// +/// As an optimization, this class will reuse an already mapped and +/// parsed Archive object if 2 successive requests target the same +/// archive file (Which is always the case in debug maps). +/// Currently it only owns one memory buffer at any given time, +/// meaning that a mapping request will invalidate the previous memory +/// mapping. +class BinaryHolder { + std::unique_ptr<object::Archive> CurrentArchive; + std::unique_ptr<MemoryBuffer> CurrentMemoryBuffer; + std::unique_ptr<object::ObjectFile> CurrentObjectFile; + bool Verbose; + + /// \brief Get the MemoryBufferRef for the file specification in \p + /// Filename from the current archive. + /// + /// This function performs no system calls, it just looks up a + /// potential match for the given \p Filename in the currently + /// mapped archive if there is one. + ErrorOr<MemoryBufferRef> GetArchiveMemberBuffer(StringRef Filename); + + /// \brief Interpret Filename as an archive member specification, + /// map the corresponding archive to memory and return the + /// MemoryBufferRef corresponding to the described member. + ErrorOr<MemoryBufferRef> MapArchiveAndGetMemberBuffer(StringRef Filename); + + /// \brief Return the MemoryBufferRef that holds the memory + /// mapping for the given \p Filename. This function will try to + /// parse archive member specifications of the form + /// /path/to/archive.a(member.o). + /// + /// The returned MemoryBufferRef points to a buffer owned by this + /// object. The buffer is valid until the next call to + /// GetMemoryBufferForFile() on this object. + ErrorOr<MemoryBufferRef> GetMemoryBufferForFile(StringRef Filename); + +public: + BinaryHolder(bool Verbose) : Verbose(Verbose) {} + + /// \brief Get the ObjectFile designated by the \p Filename. This + /// might be an archive member specification of the form + /// /path/to/archive.a(member.o). + /// + /// Calling this function invalidates the previous mapping owned by + /// the BinaryHolder. + ErrorOr<const object::ObjectFile &> GetObjectFile(StringRef Filename); + + /// \brief Wraps GetObjectFile() to return a derived ObjectFile type. + template <typename ObjectFileType> + ErrorOr<const ObjectFileType &> GetFileAs(StringRef Filename) { + auto ErrOrObjFile = GetObjectFile(Filename); + if (auto Err = ErrOrObjFile.getError()) + return Err; + if (const auto *Derived = dyn_cast<ObjectFileType>(CurrentObjectFile.get())) + return *Derived; + return make_error_code(object::object_error::invalid_file_type); + } + + /// \brief Access the currently owned ObjectFile. As successfull + /// call to GetObjectFile() or GetFileAs() must have been performed + /// before calling this. + const object::ObjectFile &Get() { + assert(CurrentObjectFile); + return *CurrentObjectFile; + } + + /// \brief Access to a derived version of the currently owned + /// ObjectFile. The conversion must be known to be valid. + template <typename ObjectFileType> const ObjectFileType &GetAs() { + return cast<ObjectFileType>(*CurrentObjectFile); + } +}; +} +} +#endif diff --git a/tools/dsymutil/CMakeLists.txt b/tools/dsymutil/CMakeLists.txt new file mode 100644 index 0000000..5e1f37f --- /dev/null +++ b/tools/dsymutil/CMakeLists.txt @@ -0,0 +1,14 @@ +set(LLVM_LINK_COMPONENTS + DebugInfoDWARF + Object + Support + ) + +add_llvm_tool(llvm-dsymutil + dsymutil.cpp + BinaryHolder.cpp + DebugMap.cpp + DwarfLinker.cpp + MachODebugMapParser.cpp + ) + diff --git a/tools/dsymutil/DebugMap.cpp b/tools/dsymutil/DebugMap.cpp new file mode 100644 index 0000000..c04b2fe --- /dev/null +++ b/tools/dsymutil/DebugMap.cpp @@ -0,0 +1,92 @@ +//===- tools/dsymutil/DebugMap.cpp - Generic debug map representation -----===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "DebugMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +namespace llvm { +namespace dsymutil { + +using namespace llvm::object; + +DebugMapObject::DebugMapObject(StringRef ObjectFilename) + : Filename(ObjectFilename) {} + +bool DebugMapObject::addSymbol(StringRef Name, uint64_t ObjectAddress, + uint64_t LinkedAddress) { + auto InsertResult = Symbols.insert( + std::make_pair(Name, SymbolMapping(ObjectAddress, LinkedAddress))); + + if (InsertResult.second) + AddressToMapping[ObjectAddress] = &*InsertResult.first; + return InsertResult.second; +} + +void DebugMapObject::print(raw_ostream &OS) const { + OS << getObjectFilename() << ":\n"; + // Sort the symbols in alphabetical order, like llvm-nm (and to get + // deterministic output for testing). + typedef std::pair<StringRef, SymbolMapping> Entry; + std::vector<Entry> Entries; + Entries.reserve(Symbols.getNumItems()); + for (const auto &Sym : make_range(Symbols.begin(), Symbols.end())) + Entries.push_back(std::make_pair(Sym.getKey(), Sym.getValue())); + std::sort( + Entries.begin(), Entries.end(), + [](const Entry &LHS, const Entry &RHS) { return LHS.first < RHS.first; }); + for (const auto &Sym : Entries) { + OS << format("\t%016" PRIx64 " => %016" PRIx64 "\t%s\n", + Sym.second.ObjectAddress, Sym.second.BinaryAddress, + Sym.first.data()); + } + OS << '\n'; +} + +#ifndef NDEBUG +void DebugMapObject::dump() const { print(errs()); } +#endif + +DebugMapObject &DebugMap::addDebugMapObject(StringRef ObjectFilePath) { + Objects.emplace_back(new DebugMapObject(ObjectFilePath)); + return *Objects.back(); +} + +const DebugMapObject::DebugMapEntry * +DebugMapObject::lookupSymbol(StringRef SymbolName) const { + StringMap<SymbolMapping>::const_iterator Sym = Symbols.find(SymbolName); + if (Sym == Symbols.end()) + return nullptr; + return &*Sym; +} + +const DebugMapObject::DebugMapEntry * +DebugMapObject::lookupObjectAddress(uint64_t Address) const { + auto Mapping = AddressToMapping.find(Address); + if (Mapping == AddressToMapping.end()) + return nullptr; + return Mapping->getSecond(); +} + +void DebugMap::print(raw_ostream &OS) const { + OS << "DEBUG MAP: " << BinaryTriple.getTriple() + << "\n\tobject addr => executable addr\tsymbol name\n"; + for (const auto &Obj : objects()) + Obj->print(OS); + OS << "END DEBUG MAP\n"; +} + +#ifndef NDEBUG +void DebugMap::dump() const { print(errs()); } +#endif +} +} diff --git a/tools/dsymutil/DebugMap.h b/tools/dsymutil/DebugMap.h new file mode 100644 index 0000000..ff2b27e --- /dev/null +++ b/tools/dsymutil/DebugMap.h @@ -0,0 +1,142 @@ +//===- tools/dsymutil/DebugMap.h - Generic debug map representation -------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// This file contains the class declaration of the DebugMap +/// entity. A DebugMap lists all the object files linked together to +/// produce an executable along with the linked address of all the +/// atoms used in these object files. +/// The DebugMap is an input to the DwarfLinker class that will +/// extract the Dwarf debug information from the referenced object +/// files and link their usefull debug info together. +/// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TOOLS_DSYMUTIL_DEBUGMAP_H +#define LLVM_TOOLS_DSYMUTIL_DEBUGMAP_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/Format.h" +#include <vector> + +namespace llvm { +class raw_ostream; + +namespace dsymutil { +class DebugMapObject; + +/// \brief The DebugMap object stores the list of object files to +/// query for debug information along with the mapping between the +/// symbols' addresses in the object file to their linked address in +/// the linked binary. +/// +/// A DebugMap producer could look like this: +/// DebugMap *DM = new DebugMap(); +/// for (const auto &Obj: LinkedObjects) { +/// DebugMapObject &DMO = DM->addDebugMapObject(Obj.getPath()); +/// for (const auto &Sym: Obj.getLinkedSymbols()) +/// DMO.addSymbol(Sym.getName(), Sym.getObjectFileAddress(), +/// Sym.getBinaryAddress()); +/// } +/// +/// A DebugMap consumer can then use the map to link the debug +/// information. For example something along the lines of: +/// for (const auto &DMO: DM->objects()) { +/// auto Obj = createBinary(DMO.getObjectFilename()); +/// for (auto &DIE: Obj.getDwarfDIEs()) { +/// if (SymbolMapping *Sym = DMO.lookup(DIE.getName())) +/// DIE.relocate(Sym->ObjectAddress, Sym->BinaryAddress); +/// else +/// DIE.discardSubtree(); +/// } +/// } +class DebugMap { + Triple BinaryTriple; + typedef std::vector<std::unique_ptr<DebugMapObject>> ObjectContainer; + ObjectContainer Objects; + +public: + DebugMap(const Triple &BinaryTriple) : BinaryTriple(BinaryTriple) {} + + typedef ObjectContainer::const_iterator const_iterator; + + iterator_range<const_iterator> objects() const { + return make_range(begin(), end()); + } + + const_iterator begin() const { return Objects.begin(); } + + const_iterator end() const { return Objects.end(); } + + /// This function adds an DebugMapObject to the list owned by this + /// debug map. + DebugMapObject &addDebugMapObject(StringRef ObjectFilePath); + + const Triple &getTriple() { return BinaryTriple; } + + void print(raw_ostream &OS) const; + +#ifndef NDEBUG + void dump() const; +#endif +}; + +/// \brief The DebugMapObject represents one object file described by +/// the DebugMap. It contains a list of mappings between addresses in +/// the object file and in the linked binary for all the linked atoms +/// in this object file. +class DebugMapObject { +public: + struct SymbolMapping { + uint64_t ObjectAddress; + uint64_t BinaryAddress; + SymbolMapping(uint64_t ObjectAddress, uint64_t BinaryAddress) + : ObjectAddress(ObjectAddress), BinaryAddress(BinaryAddress) {} + }; + + typedef StringMapEntry<SymbolMapping> DebugMapEntry; + + /// \brief Adds a symbol mapping to this DebugMapObject. + /// \returns false if the symbol was already registered. The request + /// is discarded in this case. + bool addSymbol(llvm::StringRef SymName, uint64_t ObjectAddress, + uint64_t LinkedAddress); + + /// \brief Lookup a symbol mapping. + /// \returns null if the symbol isn't found. + const DebugMapEntry *lookupSymbol(StringRef SymbolName) const; + + /// \brief Lookup an objectfile address. + /// \returns null if the address isn't found. + const DebugMapEntry *lookupObjectAddress(uint64_t Address) const; + + llvm::StringRef getObjectFilename() const { return Filename; } + + void print(raw_ostream &OS) const; +#ifndef NDEBUG + void dump() const; +#endif +private: + friend class DebugMap; + /// DebugMapObjects can only be constructed by the owning DebugMap. + DebugMapObject(StringRef ObjectFilename); + + std::string Filename; + StringMap<SymbolMapping> Symbols; + DenseMap<uint64_t, DebugMapEntry *> AddressToMapping; +}; +} +} + +#endif // LLVM_TOOLS_DSYMUTIL_DEBUGMAP_H diff --git a/tools/dsymutil/DwarfLinker.cpp b/tools/dsymutil/DwarfLinker.cpp new file mode 100644 index 0000000..3c0bc0b --- /dev/null +++ b/tools/dsymutil/DwarfLinker.cpp @@ -0,0 +1,667 @@ +//===- tools/dsymutil/DwarfLinker.cpp - Dwarf debug info linker -----------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "DebugMap.h" +#include "BinaryHolder.h" +#include "DebugMap.h" +#include "dsymutil.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/Object/MachO.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/LEB128.h" +#include <string> + +namespace llvm { +namespace dsymutil { + +namespace { + +/// \brief Stores all information relating to a compile unit, be it in +/// its original instance in the object file to its brand new cloned +/// and linked DIE tree. +class CompileUnit { +public: + /// \brief Information gathered about a DIE in the object file. + struct DIEInfo { + uint64_t Address; ///< Linked address of the described entity. + uint32_t ParentIdx; ///< The index of this DIE's parent. + bool Keep; ///< Is the DIE part of the linked output? + bool InDebugMap; ///< Was this DIE's entity found in the map? + }; + + CompileUnit(DWARFUnit &OrigUnit) : OrigUnit(OrigUnit) { + Info.resize(OrigUnit.getNumDIEs()); + } + + DWARFUnit &getOrigUnit() const { return OrigUnit; } + + DIEInfo &getInfo(unsigned Idx) { return Info[Idx]; } + const DIEInfo &getInfo(unsigned Idx) const { return Info[Idx]; } + +private: + DWARFUnit &OrigUnit; + std::vector<DIEInfo> Info; ///< DIE info indexed by DIE index. +}; + +/// \brief The core of the Dwarf linking logic. +/// +/// The link of the dwarf information from the object files will be +/// driven by the selection of 'root DIEs', which are DIEs that +/// describe variables or functions that are present in the linked +/// binary (and thus have entries in the debug map). All the debug +/// information that will be linked (the DIEs, but also the line +/// tables, ranges, ...) is derived from that set of root DIEs. +/// +/// The root DIEs are identified because they contain relocations that +/// correspond to a debug map entry at specific places (the low_pc for +/// a function, the location for a variable). These relocations are +/// called ValidRelocs in the DwarfLinker and are gathered as a very +/// first step when we start processing a DebugMapObject. +class DwarfLinker { +public: + DwarfLinker(StringRef OutputFilename, bool Verbose) + : OutputFilename(OutputFilename), Verbose(Verbose), BinHolder(Verbose) {} + + /// \brief Link the contents of the DebugMap. + bool link(const DebugMap &); + +private: + /// \brief Called at the start of a debug object link. + void startDebugObject(DWARFContext &); + + /// \brief Called at the end of a debug object link. + void endDebugObject(); + + /// \defgroup FindValidRelocations Translate debug map into a list + /// of relevant relocations + /// + /// @{ + struct ValidReloc { + uint32_t Offset; + uint32_t Size; + uint64_t Addend; + const DebugMapObject::DebugMapEntry *Mapping; + + ValidReloc(uint32_t Offset, uint32_t Size, uint64_t Addend, + const DebugMapObject::DebugMapEntry *Mapping) + : Offset(Offset), Size(Size), Addend(Addend), Mapping(Mapping) {} + + bool operator<(const ValidReloc &RHS) const { return Offset < RHS.Offset; } + }; + + /// \brief The valid relocations for the current DebugMapObject. + /// This vector is sorted by relocation offset. + std::vector<ValidReloc> ValidRelocs; + + /// \brief Index into ValidRelocs of the next relocation to + /// consider. As we walk the DIEs in acsending file offset and as + /// ValidRelocs is sorted by file offset, keeping this index + /// uptodate is all we have to do to have a cheap lookup during the + /// root DIE selection. + unsigned NextValidReloc; + + bool findValidRelocsInDebugInfo(const object::ObjectFile &Obj, + const DebugMapObject &DMO); + + bool findValidRelocs(const object::SectionRef &Section, + const object::ObjectFile &Obj, + const DebugMapObject &DMO); + + void findValidRelocsMachO(const object::SectionRef &Section, + const object::MachOObjectFile &Obj, + const DebugMapObject &DMO); + /// @} + + /// \defgroup FindRootDIEs Find DIEs corresponding to debug map entries. + /// + /// @{ + /// \brief Recursively walk the \p DIE tree and look for DIEs to + /// keep. Store that information in \p CU's DIEInfo. + void lookForDIEsToKeep(const DWARFDebugInfoEntryMinimal &DIE, + const DebugMapObject &DMO, CompileUnit &CU, + unsigned Flags); + + /// \brief Flags passed to DwarfLinker::lookForDIEsToKeep + enum TravesalFlags { + TF_Keep = 1 << 0, ///< Mark the traversed DIEs as kept. + TF_InFunctionScope = 1 << 1, ///< Current scope is a fucntion scope. + TF_DependencyWalk = 1 << 2, ///< Walking the dependencies of a kept DIE. + TF_ParentWalk = 1 << 3, ///< Walking up the parents of a kept DIE. + }; + + /// \brief Mark the passed DIE as well as all the ones it depends on + /// as kept. + void keepDIEAndDenpendencies(const DWARFDebugInfoEntryMinimal &DIE, + CompileUnit::DIEInfo &MyInfo, + const DebugMapObject &DMO, CompileUnit &CU, + unsigned Flags); + + unsigned shouldKeepDIE(const DWARFDebugInfoEntryMinimal &DIE, + CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo, + unsigned Flags); + + unsigned shouldKeepVariableDIE(const DWARFDebugInfoEntryMinimal &DIE, + CompileUnit &Unit, + CompileUnit::DIEInfo &MyInfo, unsigned Flags); + + unsigned shouldKeepSubprogramDIE(const DWARFDebugInfoEntryMinimal &DIE, + CompileUnit &Unit, + CompileUnit::DIEInfo &MyInfo, + unsigned Flags); + + bool hasValidRelocation(uint32_t StartOffset, uint32_t EndOffset, + CompileUnit::DIEInfo &Info); + /// @} + + /// \defgroup Helpers Various helper methods. + /// + /// @{ + const DWARFDebugInfoEntryMinimal * + resolveDIEReference(DWARFFormValue &RefValue, const DWARFUnit &Unit, + const DWARFDebugInfoEntryMinimal &DIE, + CompileUnit *&ReferencedCU); + + CompileUnit *getUnitForOffset(unsigned Offset); + + void reportWarning(const Twine &Warning, const DWARFUnit *Unit = nullptr, + const DWARFDebugInfoEntryMinimal *DIE = nullptr); + /// @} + +private: + std::string OutputFilename; + bool Verbose; + BinaryHolder BinHolder; + + /// The units of the current debug map object. + std::vector<CompileUnit> Units; + + /// The debug map object curently under consideration. + DebugMapObject *CurrentDebugObject; +}; + +/// \brief Similar to DWARFUnitSection::getUnitForOffset(), but +/// returning our CompileUnit object instead. +CompileUnit *DwarfLinker::getUnitForOffset(unsigned Offset) { + auto CU = + std::upper_bound(Units.begin(), Units.end(), Offset, + [](uint32_t LHS, const CompileUnit &RHS) { + return LHS < RHS.getOrigUnit().getNextUnitOffset(); + }); + return CU != Units.end() ? &*CU : nullptr; +} + +/// \brief Resolve the DIE attribute reference that has been +/// extracted in \p RefValue. The resulting DIE migh be in another +/// CompileUnit which is stored into \p ReferencedCU. +/// \returns null if resolving fails for any reason. +const DWARFDebugInfoEntryMinimal *DwarfLinker::resolveDIEReference( + DWARFFormValue &RefValue, const DWARFUnit &Unit, + const DWARFDebugInfoEntryMinimal &DIE, CompileUnit *&RefCU) { + assert(RefValue.isFormClass(DWARFFormValue::FC_Reference)); + uint64_t RefOffset = *RefValue.getAsReference(&Unit); + + if ((RefCU = getUnitForOffset(RefOffset))) + if (const auto *RefDie = RefCU->getOrigUnit().getDIEForOffset(RefOffset)) + return RefDie; + + reportWarning("could not find referenced DIE", &Unit, &DIE); + return nullptr; +} + +/// \brief Report a warning to the user, optionaly including +/// information about a specific \p DIE related to the warning. +void DwarfLinker::reportWarning(const Twine &Warning, const DWARFUnit *Unit, + const DWARFDebugInfoEntryMinimal *DIE) { + if (CurrentDebugObject) + errs() << Twine("while processing ") + + CurrentDebugObject->getObjectFilename() + ":\n"; + errs() << Twine("warning: ") + Warning + "\n"; + + if (!Verbose || !DIE) + return; + + errs() << " in DIE:\n"; + DIE->dump(errs(), const_cast<DWARFUnit *>(Unit), 0 /* RecurseDepth */, + 6 /* Indent */); +} + +/// \brief Recursive helper to gather the child->parent relationships in the +/// original compile unit. +static void gatherDIEParents(const DWARFDebugInfoEntryMinimal *DIE, + unsigned ParentIdx, CompileUnit &CU) { + unsigned MyIdx = CU.getOrigUnit().getDIEIndex(DIE); + CU.getInfo(MyIdx).ParentIdx = ParentIdx; + + if (DIE->hasChildren()) + for (auto *Child = DIE->getFirstChild(); Child && !Child->isNULL(); + Child = Child->getSibling()) + gatherDIEParents(Child, MyIdx, CU); +} + +static bool dieNeedsChildrenToBeMeaningful(uint32_t Tag) { + switch (Tag) { + default: + return false; + case dwarf::DW_TAG_subprogram: + case dwarf::DW_TAG_lexical_block: + case dwarf::DW_TAG_subroutine_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_union_type: + return true; + } + llvm_unreachable("Invalid Tag"); +} + +void DwarfLinker::startDebugObject(DWARFContext &Dwarf) { + Units.reserve(Dwarf.getNumCompileUnits()); + NextValidReloc = 0; +} + +void DwarfLinker::endDebugObject() { + Units.clear(); + ValidRelocs.clear(); +} + +/// \brief Iterate over the relocations of the given \p Section and +/// store the ones that correspond to debug map entries into the +/// ValidRelocs array. +void DwarfLinker::findValidRelocsMachO(const object::SectionRef &Section, + const object::MachOObjectFile &Obj, + const DebugMapObject &DMO) { + StringRef Contents; + Section.getContents(Contents); + DataExtractor Data(Contents, Obj.isLittleEndian(), 0); + + for (const object::RelocationRef &Reloc : Section.relocations()) { + object::DataRefImpl RelocDataRef = Reloc.getRawDataRefImpl(); + MachO::any_relocation_info MachOReloc = Obj.getRelocation(RelocDataRef); + unsigned RelocSize = 1 << Obj.getAnyRelocationLength(MachOReloc); + uint64_t Offset64; + if ((RelocSize != 4 && RelocSize != 8) || Reloc.getOffset(Offset64)) { + reportWarning(" unsupported relocation in debug_info section."); + continue; + } + uint32_t Offset = Offset64; + // Mach-o uses REL relocations, the addend is at the relocation offset. + uint64_t Addend = Data.getUnsigned(&Offset, RelocSize); + + auto Sym = Reloc.getSymbol(); + if (Sym != Obj.symbol_end()) { + StringRef SymbolName; + if (Sym->getName(SymbolName)) { + reportWarning("error getting relocation symbol name."); + continue; + } + if (const auto *Mapping = DMO.lookupSymbol(SymbolName)) + ValidRelocs.emplace_back(Offset64, RelocSize, Addend, Mapping); + } else if (const auto *Mapping = DMO.lookupObjectAddress(Addend)) { + // Do not store the addend. The addend was the address of the + // symbol in the object file, the address in the binary that is + // stored in the debug map doesn't need to be offseted. + ValidRelocs.emplace_back(Offset64, RelocSize, 0, Mapping); + } + } +} + +/// \brief Dispatch the valid relocation finding logic to the +/// appropriate handler depending on the object file format. +bool DwarfLinker::findValidRelocs(const object::SectionRef &Section, + const object::ObjectFile &Obj, + const DebugMapObject &DMO) { + // Dispatch to the right handler depending on the file type. + if (auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Obj)) + findValidRelocsMachO(Section, *MachOObj, DMO); + else + reportWarning(Twine("unsupported object file type: ") + Obj.getFileName()); + + if (ValidRelocs.empty()) + return false; + + // Sort the relocations by offset. We will walk the DIEs linearly in + // the file, this allows us to just keep an index in the relocation + // array that we advance during our walk, rather than resorting to + // some associative container. See DwarfLinker::NextValidReloc. + std::sort(ValidRelocs.begin(), ValidRelocs.end()); + return true; +} + +/// \brief Look for relocations in the debug_info section that match +/// entries in the debug map. These relocations will drive the Dwarf +/// link by indicating which DIEs refer to symbols present in the +/// linked binary. +/// \returns wether there are any valid relocations in the debug info. +bool DwarfLinker::findValidRelocsInDebugInfo(const object::ObjectFile &Obj, + const DebugMapObject &DMO) { + // Find the debug_info section. + for (const object::SectionRef &Section : Obj.sections()) { + StringRef SectionName; + Section.getName(SectionName); + SectionName = SectionName.substr(SectionName.find_first_not_of("._")); + if (SectionName != "debug_info") + continue; + return findValidRelocs(Section, Obj, DMO); + } + return false; +} + +/// \brief Checks that there is a relocation against an actual debug +/// map entry between \p StartOffset and \p NextOffset. +/// +/// This function must be called with offsets in strictly ascending +/// order because it never looks back at relocations it already 'went past'. +/// \returns true and sets Info.InDebugMap if it is the case. +bool DwarfLinker::hasValidRelocation(uint32_t StartOffset, uint32_t EndOffset, + CompileUnit::DIEInfo &Info) { + assert(NextValidReloc == 0 || + StartOffset > ValidRelocs[NextValidReloc - 1].Offset); + if (NextValidReloc >= ValidRelocs.size()) + return false; + + uint64_t RelocOffset = ValidRelocs[NextValidReloc].Offset; + + // We might need to skip some relocs that we didn't consider. For + // example the high_pc of a discarded DIE might contain a reloc that + // is in the list because it actually corresponds to the start of a + // function that is in the debug map. + while (RelocOffset < StartOffset && NextValidReloc < ValidRelocs.size() - 1) + RelocOffset = ValidRelocs[++NextValidReloc].Offset; + + if (RelocOffset < StartOffset || RelocOffset >= EndOffset) + return false; + + const auto &ValidReloc = ValidRelocs[NextValidReloc++]; + if (Verbose) + outs() << "Found valid debug map entry: " << ValidReloc.Mapping->getKey() + << " " << format("\t%016" PRIx64 " => %016" PRIx64, + ValidReloc.Mapping->getValue().ObjectAddress, + ValidReloc.Mapping->getValue().BinaryAddress); + + Info.Address = + ValidReloc.Mapping->getValue().BinaryAddress + ValidReloc.Addend; + Info.InDebugMap = true; + return true; +} + +/// \brief Get the starting and ending (exclusive) offset for the +/// attribute with index \p Idx descibed by \p Abbrev. \p Offset is +/// supposed to point to the position of the first attribute described +/// by \p Abbrev. +/// \return [StartOffset, EndOffset) as a pair. +static std::pair<uint32_t, uint32_t> +getAttributeOffsets(const DWARFAbbreviationDeclaration *Abbrev, unsigned Idx, + unsigned Offset, const DWARFUnit &Unit) { + DataExtractor Data = Unit.getDebugInfoExtractor(); + + for (unsigned i = 0; i < Idx; ++i) + DWARFFormValue::skipValue(Abbrev->getFormByIndex(i), Data, &Offset, &Unit); + + uint32_t End = Offset; + DWARFFormValue::skipValue(Abbrev->getFormByIndex(Idx), Data, &End, &Unit); + + return std::make_pair(Offset, End); +} + +/// \brief Check if a variable describing DIE should be kept. +/// \returns updated TraversalFlags. +unsigned DwarfLinker::shouldKeepVariableDIE( + const DWARFDebugInfoEntryMinimal &DIE, CompileUnit &Unit, + CompileUnit::DIEInfo &MyInfo, unsigned Flags) { + const auto *Abbrev = DIE.getAbbreviationDeclarationPtr(); + + // Global variables with constant value can always be kept. + if (!(Flags & TF_InFunctionScope) && + Abbrev->findAttributeIndex(dwarf::DW_AT_const_value) != -1U) { + MyInfo.InDebugMap = true; + return Flags | TF_Keep; + } + + uint32_t LocationIdx = Abbrev->findAttributeIndex(dwarf::DW_AT_location); + if (LocationIdx == -1U) + return Flags; + + uint32_t Offset = DIE.getOffset() + getULEB128Size(Abbrev->getCode()); + const DWARFUnit &OrigUnit = Unit.getOrigUnit(); + uint32_t LocationOffset, LocationEndOffset; + std::tie(LocationOffset, LocationEndOffset) = + getAttributeOffsets(Abbrev, LocationIdx, Offset, OrigUnit); + + // See if there is a relocation to a valid debug map entry inside + // this variable's location. The order is important here. We want to + // always check in the variable has a valid relocation, so that the + // DIEInfo is filled. However, we don't want a static variable in a + // function to force us to keep the enclosing function. + if (!hasValidRelocation(LocationOffset, LocationEndOffset, MyInfo) || + (Flags & TF_InFunctionScope)) + return Flags; + + if (Verbose) + DIE.dump(outs(), const_cast<DWARFUnit *>(&OrigUnit), 0, 8 /* Indent */); + + return Flags | TF_Keep; +} + +/// \brief Check if a function describing DIE should be kept. +/// \returns updated TraversalFlags. +unsigned DwarfLinker::shouldKeepSubprogramDIE( + const DWARFDebugInfoEntryMinimal &DIE, CompileUnit &Unit, + CompileUnit::DIEInfo &MyInfo, unsigned Flags) { + const auto *Abbrev = DIE.getAbbreviationDeclarationPtr(); + + Flags |= TF_InFunctionScope; + + uint32_t LowPcIdx = Abbrev->findAttributeIndex(dwarf::DW_AT_low_pc); + if (LowPcIdx == -1U) + return Flags; + + uint32_t Offset = DIE.getOffset() + getULEB128Size(Abbrev->getCode()); + const DWARFUnit &OrigUnit = Unit.getOrigUnit(); + uint32_t LowPcOffset, LowPcEndOffset; + std::tie(LowPcOffset, LowPcEndOffset) = + getAttributeOffsets(Abbrev, LowPcIdx, Offset, OrigUnit); + + uint64_t LowPc = + DIE.getAttributeValueAsAddress(&OrigUnit, dwarf::DW_AT_low_pc, -1ULL); + assert(LowPc != -1ULL && "low_pc attribute is not an address."); + if (LowPc == -1ULL || + !hasValidRelocation(LowPcOffset, LowPcEndOffset, MyInfo)) + return Flags; + + if (Verbose) + DIE.dump(outs(), const_cast<DWARFUnit *>(&OrigUnit), 0, 8 /* Indent */); + + return Flags | TF_Keep; +} + +/// \brief Check if a DIE should be kept. +/// \returns updated TraversalFlags. +unsigned DwarfLinker::shouldKeepDIE(const DWARFDebugInfoEntryMinimal &DIE, + CompileUnit &Unit, + CompileUnit::DIEInfo &MyInfo, + unsigned Flags) { + switch (DIE.getTag()) { + case dwarf::DW_TAG_constant: + case dwarf::DW_TAG_variable: + return shouldKeepVariableDIE(DIE, Unit, MyInfo, Flags); + case dwarf::DW_TAG_subprogram: + return shouldKeepSubprogramDIE(DIE, Unit, MyInfo, Flags); + case dwarf::DW_TAG_module: + case dwarf::DW_TAG_imported_module: + case dwarf::DW_TAG_imported_declaration: + case dwarf::DW_TAG_imported_unit: + // We always want to keep these. + return Flags | TF_Keep; + } + + return Flags; +} + + +/// \brief Mark the passed DIE as well as all the ones it depends on +/// as kept. +/// +/// This function is called by lookForDIEsToKeep on DIEs that are +/// newly discovered to be needed in the link. It recursively calls +/// back to lookForDIEsToKeep while adding TF_DependencyWalk to the +/// TraversalFlags to inform it that it's not doing the primary DIE +/// tree walk. +void DwarfLinker::keepDIEAndDenpendencies(const DWARFDebugInfoEntryMinimal &DIE, + CompileUnit::DIEInfo &MyInfo, + const DebugMapObject &DMO, + CompileUnit &CU, unsigned Flags) { + const DWARFUnit &Unit = CU.getOrigUnit(); + MyInfo.Keep = true; + + // First mark all the parent chain as kept. + unsigned AncestorIdx = MyInfo.ParentIdx; + while (!CU.getInfo(AncestorIdx).Keep) { + lookForDIEsToKeep(*Unit.getDIEAtIndex(AncestorIdx), DMO, CU, + TF_ParentWalk | TF_Keep | TF_DependencyWalk); + AncestorIdx = CU.getInfo(AncestorIdx).ParentIdx; + } + + // Then we need to mark all the DIEs referenced by this DIE's + // attributes as kept. + DataExtractor Data = Unit.getDebugInfoExtractor(); + const auto *Abbrev = DIE.getAbbreviationDeclarationPtr(); + uint32_t Offset = DIE.getOffset() + getULEB128Size(Abbrev->getCode()); + + // Mark all DIEs referenced through atttributes as kept. + for (const auto &AttrSpec : Abbrev->attributes()) { + DWARFFormValue Val(AttrSpec.Form); + + if (!Val.isFormClass(DWARFFormValue::FC_Reference)) { + DWARFFormValue::skipValue(AttrSpec.Form, Data, &Offset, &Unit); + continue; + } + + Val.extractValue(Data, &Offset, &Unit); + CompileUnit *ReferencedCU; + if (const auto *RefDIE = resolveDIEReference(Val, Unit, DIE, ReferencedCU)) + lookForDIEsToKeep(*RefDIE, DMO, *ReferencedCU, + TF_Keep | TF_DependencyWalk); + } +} + +/// \brief Recursively walk the \p DIE tree and look for DIEs to +/// keep. Store that information in \p CU's DIEInfo. +/// +/// This function is the entry point of the DIE selection +/// algorithm. It is expected to walk the DIE tree in file order and +/// (though the mediation of its helper) call hasValidRelocation() on +/// each DIE that might be a 'root DIE' (See DwarfLinker class +/// comment). +/// While walking the dependencies of root DIEs, this function is +/// also called, but during these dependency walks the file order is +/// not respected. The TF_DependencyWalk flag tells us which kind of +/// traversal we are currently doing. +void DwarfLinker::lookForDIEsToKeep(const DWARFDebugInfoEntryMinimal &DIE, + const DebugMapObject &DMO, CompileUnit &CU, + unsigned Flags) { + unsigned Idx = CU.getOrigUnit().getDIEIndex(&DIE); + CompileUnit::DIEInfo &MyInfo = CU.getInfo(Idx); + bool AlreadyKept = MyInfo.Keep; + + // If the Keep flag is set, we are marking a required DIE's + // dependencies. If our target is already marked as kept, we're all + // set. + if ((Flags & TF_DependencyWalk) && AlreadyKept) + return; + + // We must not call shouldKeepDIE while called from keepDIEAndDenpendencies, + // because it would screw up the relocation finding logic. + if (!(Flags & TF_DependencyWalk)) + Flags = shouldKeepDIE(DIE, CU, MyInfo, Flags); + + // If it is a newly kept DIE mark it as well as all its dependencies as kept. + if (!AlreadyKept && (Flags & TF_Keep)) + keepDIEAndDenpendencies(DIE, MyInfo, DMO, CU, Flags); + + // The TF_ParentWalk flag tells us that we are currently walking up + // the parent chain of a required DIE, and we don't want to mark all + // the children of the parents as kept (consider for example a + // DW_TAG_namespace node in the parent chain). There are however a + // set of DIE types for which we want to ignore that directive and still + // walk their children. + if (dieNeedsChildrenToBeMeaningful(DIE.getTag())) + Flags &= ~TF_ParentWalk; + + if (!DIE.hasChildren() || (Flags & TF_ParentWalk)) + return; + + for (auto *Child = DIE.getFirstChild(); Child && !Child->isNULL(); + Child = Child->getSibling()) + lookForDIEsToKeep(*Child, DMO, CU, Flags); +} + +bool DwarfLinker::link(const DebugMap &Map) { + + if (Map.begin() == Map.end()) { + errs() << "Empty debug map.\n"; + return false; + } + + for (const auto &Obj : Map.objects()) { + CurrentDebugObject = Obj.get(); + + if (Verbose) + outs() << "DEBUG MAP OBJECT: " << Obj->getObjectFilename() << "\n"; + auto ErrOrObj = BinHolder.GetObjectFile(Obj->getObjectFilename()); + if (std::error_code EC = ErrOrObj.getError()) { + reportWarning(Twine(Obj->getObjectFilename()) + ": " + EC.message()); + continue; + } + + // Look for relocations that correspond to debug map entries. + if (!findValidRelocsInDebugInfo(*ErrOrObj, *Obj)) { + if (Verbose) + outs() << "No valid relocations found. Skipping.\n"; + continue; + } + + // Setup access to the debug info. + DWARFContextInMemory DwarfContext(*ErrOrObj); + startDebugObject(DwarfContext); + + // In a first phase, just read in the debug info and store the DIE + // parent links that we will use during the next phase. + for (const auto &CU : DwarfContext.compile_units()) { + auto *CUDie = CU->getCompileUnitDIE(false); + if (Verbose) { + outs() << "Input compilation unit:"; + CUDie->dump(outs(), CU.get(), 0); + } + Units.emplace_back(*CU); + gatherDIEParents(CUDie, 0, Units.back()); + } + + // Then mark all the DIEs that need to be present in the linked + // output and collect some information about them. Note that this + // loop can not be merged with the previous one becaue cross-cu + // references require the ParentIdx to be setup for every CU in + // the object file before calling this. + for (auto &CurrentUnit : Units) + lookForDIEsToKeep(*CurrentUnit.getOrigUnit().getCompileUnitDIE(), *Obj, + CurrentUnit, 0); + + // Clean-up before starting working on the next object. + endDebugObject(); + } + + return true; +} +} + +bool linkDwarf(StringRef OutputFilename, const DebugMap &DM, bool Verbose) { + DwarfLinker Linker(OutputFilename, Verbose); + return Linker.link(DM); +} +} +} diff --git a/tools/dsymutil/LLVMBuild.txt b/tools/dsymutil/LLVMBuild.txt new file mode 100644 index 0000000..c995291 --- /dev/null +++ b/tools/dsymutil/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./tools/dsymutil/LLVMBuild.txt ---------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = llvm-dsymutil +parent = Tools +required_libraries = DebugInfoDWARF Object Support diff --git a/tools/dsymutil/MachODebugMapParser.cpp b/tools/dsymutil/MachODebugMapParser.cpp new file mode 100644 index 0000000..7bb0011 --- /dev/null +++ b/tools/dsymutil/MachODebugMapParser.cpp @@ -0,0 +1,241 @@ +//===- tools/dsymutil/MachODebugMapParser.cpp - Parse STABS debug maps ----===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "BinaryHolder.h" +#include "DebugMap.h" +#include "dsymutil.h" +#include "llvm/Object/MachO.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +namespace { +using namespace llvm; +using namespace llvm::dsymutil; +using namespace llvm::object; + +class MachODebugMapParser { +public: + MachODebugMapParser(StringRef BinaryPath, StringRef PathPrefix = "", + bool Verbose = false) + : BinaryPath(BinaryPath), PathPrefix(PathPrefix), + MainBinaryHolder(Verbose), CurrentObjectHolder(Verbose), + CurrentDebugMapObject(nullptr) {} + + /// \brief Parses and returns the DebugMap of the input binary. + /// \returns an error in case the provided BinaryPath doesn't exist + /// or isn't of a supported type. + ErrorOr<std::unique_ptr<DebugMap>> parse(); + +private: + std::string BinaryPath; + std::string PathPrefix; + + /// Owns the MemoryBuffer for the main binary. + BinaryHolder MainBinaryHolder; + /// Map of the binary symbol addresses. + StringMap<uint64_t> MainBinarySymbolAddresses; + StringRef MainBinaryStrings; + /// The constructed DebugMap. + std::unique_ptr<DebugMap> Result; + + /// Owns the MemoryBuffer for the currently handled object file. + BinaryHolder CurrentObjectHolder; + /// Map of the currently processed object file symbol addresses. + StringMap<uint64_t> CurrentObjectAddresses; + /// Element of the debug map corresponfing to the current object file. + DebugMapObject *CurrentDebugMapObject; + + void switchToNewDebugMapObject(StringRef Filename); + void resetParserState(); + uint64_t getMainBinarySymbolAddress(StringRef Name); + void loadMainBinarySymbols(); + void loadCurrentObjectFileSymbols(); + void handleStabSymbolTableEntry(uint32_t StringIndex, uint8_t Type, + uint8_t SectionIndex, uint16_t Flags, + uint64_t Value); + + template <typename STEType> void handleStabDebugMapEntry(const STEType &STE) { + handleStabSymbolTableEntry(STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc, + STE.n_value); + } +}; + +static void Warning(const Twine &Msg) { errs() << "warning: " + Msg + "\n"; } +} + +/// Reset the parser state coresponding to the current object +/// file. This is to be called after an object file is finished +/// processing. +void MachODebugMapParser::resetParserState() { + CurrentObjectAddresses.clear(); + CurrentDebugMapObject = nullptr; +} + +/// Create a new DebugMapObject. This function resets the state of the +/// parser that was referring to the last object file and sets +/// everything up to add symbols to the new one. +void MachODebugMapParser::switchToNewDebugMapObject(StringRef Filename) { + resetParserState(); + + SmallString<80> Path(PathPrefix); + sys::path::append(Path, Filename); + + auto MachOOrError = CurrentObjectHolder.GetFileAs<MachOObjectFile>(Path); + if (auto Error = MachOOrError.getError()) { + Warning(Twine("cannot open debug object \"") + Path.str() + "\": " + + Error.message() + "\n"); + return; + } + + loadCurrentObjectFileSymbols(); + CurrentDebugMapObject = &Result->addDebugMapObject(Path); +} + +static Triple getTriple(const object::MachOObjectFile &Obj) { + Triple TheTriple("unknown-unknown-unknown"); + TheTriple.setArch(Triple::ArchType(Obj.getArch())); + TheTriple.setObjectFormat(Triple::MachO); + return TheTriple; +} + +/// This main parsing routine tries to open the main binary and if +/// successful iterates over the STAB entries. The real parsing is +/// done in handleStabSymbolTableEntry. +ErrorOr<std::unique_ptr<DebugMap>> MachODebugMapParser::parse() { + auto MainBinOrError = MainBinaryHolder.GetFileAs<MachOObjectFile>(BinaryPath); + if (auto Error = MainBinOrError.getError()) + return Error; + + const MachOObjectFile &MainBinary = *MainBinOrError; + loadMainBinarySymbols(); + Result = make_unique<DebugMap>(getTriple(MainBinary)); + MainBinaryStrings = MainBinary.getStringTableData(); + for (const SymbolRef &Symbol : MainBinary.symbols()) { + const DataRefImpl &DRI = Symbol.getRawDataRefImpl(); + if (MainBinary.is64Bit()) + handleStabDebugMapEntry(MainBinary.getSymbol64TableEntry(DRI)); + else + handleStabDebugMapEntry(MainBinary.getSymbolTableEntry(DRI)); + } + + resetParserState(); + return std::move(Result); +} + +/// Interpret the STAB entries to fill the DebugMap. +void MachODebugMapParser::handleStabSymbolTableEntry(uint32_t StringIndex, + uint8_t Type, + uint8_t SectionIndex, + uint16_t Flags, + uint64_t Value) { + if (!(Type & MachO::N_STAB)) + return; + + const char *Name = &MainBinaryStrings.data()[StringIndex]; + + // An N_OSO entry represents the start of a new object file description. + if (Type == MachO::N_OSO) + return switchToNewDebugMapObject(Name); + + // If the last N_OSO object file wasn't found, + // CurrentDebugMapObject will be null. Do not update anything + // until we find the next valid N_OSO entry. + if (!CurrentDebugMapObject) + return; + + switch (Type) { + case MachO::N_GSYM: + // This is a global variable. We need to query the main binary + // symbol table to find its address as it might not be in the + // debug map (for common symbols). + Value = getMainBinarySymbolAddress(Name); + if (Value == UnknownAddressOrSize) + return; + break; + case MachO::N_FUN: + // Functions are scopes in STABS. They have an end marker that we + // need to ignore. + if (Name[0] == '\0') + return; + break; + case MachO::N_STSYM: + break; + default: + return; + } + + auto ObjectSymIt = CurrentObjectAddresses.find(Name); + if (ObjectSymIt == CurrentObjectAddresses.end()) + return Warning("could not find object file symbol for symbol " + + Twine(Name)); + if (!CurrentDebugMapObject->addSymbol(Name, ObjectSymIt->getValue(), Value)) + return Warning(Twine("failed to insert symbol '") + Name + + "' in the debug map."); +} + +/// Load the current object file symbols into CurrentObjectAddresses. +void MachODebugMapParser::loadCurrentObjectFileSymbols() { + CurrentObjectAddresses.clear(); + + for (auto Sym : CurrentObjectHolder.Get().symbols()) { + StringRef Name; + uint64_t Addr; + if (Sym.getAddress(Addr) || Addr == UnknownAddressOrSize || + Sym.getName(Name)) + continue; + CurrentObjectAddresses[Name] = Addr; + } +} + +/// Lookup a symbol address in the main binary symbol table. The +/// parser only needs to query common symbols, thus not every symbol's +/// address is available through this function. +uint64_t MachODebugMapParser::getMainBinarySymbolAddress(StringRef Name) { + auto Sym = MainBinarySymbolAddresses.find(Name); + if (Sym == MainBinarySymbolAddresses.end()) + return UnknownAddressOrSize; + return Sym->second; +} + +/// Load the interesting main binary symbols' addresses into +/// MainBinarySymbolAddresses. +void MachODebugMapParser::loadMainBinarySymbols() { + const MachOObjectFile &MainBinary = MainBinaryHolder.GetAs<MachOObjectFile>(); + section_iterator Section = MainBinary.section_end(); + for (const auto &Sym : MainBinary.symbols()) { + SymbolRef::Type Type; + // Skip undefined and STAB entries. + if (Sym.getType(Type) || (Type & SymbolRef::ST_Debug) || + (Type & SymbolRef::ST_Unknown)) + continue; + StringRef Name; + uint64_t Addr; + // The only symbols of interest are the global variables. These + // are the only ones that need to be queried because the address + // of common data won't be described in the debug map. All other + // addresses should be fetched for the debug map. + if (Sym.getAddress(Addr) || Addr == UnknownAddressOrSize || + !(Sym.getFlags() & SymbolRef::SF_Global) || Sym.getSection(Section) || + Section->isText() || Sym.getName(Name) || Name.size() == 0 || + Name[0] == '\0') + continue; + MainBinarySymbolAddresses[Name] = Addr; + } +} + +namespace llvm { +namespace dsymutil { +llvm::ErrorOr<std::unique_ptr<DebugMap>> parseDebugMap(StringRef InputFile, + StringRef PrependPath, + bool Verbose) { + MachODebugMapParser Parser(InputFile, PrependPath, Verbose); + return Parser.parse(); +} +} +} diff --git a/tools/dsymutil/Makefile b/tools/dsymutil/Makefile new file mode 100644 index 0000000..e8dc569 --- /dev/null +++ b/tools/dsymutil/Makefile @@ -0,0 +1,17 @@ +##===- tools/dsymutil/Makefile -----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL := ../.. +TOOLNAME := llvm-dsymutil +LINK_COMPONENTS := DebugInfoDWARF Object Support + +# This tool has no plugins, optimize startup time. +TOOL_NO_EXPORTS := 1 + +include $(LEVEL)/Makefile.common diff --git a/tools/dsymutil/dsymutil.cpp b/tools/dsymutil/dsymutil.cpp new file mode 100644 index 0000000..2b4fcfe --- /dev/null +++ b/tools/dsymutil/dsymutil.cpp @@ -0,0 +1,71 @@ +//===-- dsymutil.cpp - Debug info dumping utility for llvm ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This program is a utility that aims to be a dropin replacement for +// Darwin's dsymutil. +// +//===----------------------------------------------------------------------===// + +#include "DebugMap.h" +#include "dsymutil.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Options.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/raw_ostream.h" +#include <string> + +using namespace llvm::dsymutil; + +namespace { +using namespace llvm::cl; + +static opt<std::string> InputFile(Positional, desc("<input file>"), + init("a.out")); + +static opt<std::string> OsoPrependPath("oso-prepend-path", + desc("Specify a directory to prepend " + "to the paths of object files."), + value_desc("path")); + +static opt<bool> Verbose("v", desc("Verbosity level"), init(false)); + +static opt<bool> + ParseOnly("parse-only", + desc("Only parse the debug map, do not actaully link " + "the DWARF."), + init(false)); +} + +int main(int argc, char **argv) { + llvm::sys::PrintStackTraceOnErrorSignal(); + llvm::PrettyStackTraceProgram StackPrinter(argc, argv); + llvm::llvm_shutdown_obj Shutdown; + + llvm::cl::ParseCommandLineOptions(argc, argv, "llvm dsymutil\n"); + auto DebugMapPtrOrErr = parseDebugMap(InputFile, OsoPrependPath, Verbose); + + if (auto EC = DebugMapPtrOrErr.getError()) { + llvm::errs() << "error: cannot parse the debug map for \"" << InputFile + << "\": " << EC.message() << '\n'; + return 1; + } + + if (Verbose) + (*DebugMapPtrOrErr)->print(llvm::outs()); + + if (ParseOnly) + return 0; + + std::string OutputBasename(InputFile); + if (OutputBasename == "-") + OutputBasename = "a.out"; + + return !linkDwarf(OutputBasename + ".dwarf", **DebugMapPtrOrErr, Verbose); +} diff --git a/tools/dsymutil/dsymutil.h b/tools/dsymutil/dsymutil.h new file mode 100644 index 0000000..9203bea --- /dev/null +++ b/tools/dsymutil/dsymutil.h @@ -0,0 +1,39 @@ +//===- tools/dsymutil/dsymutil.h - dsymutil high-level functionality ------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// This file contains the class declaration for the code that parses STABS +/// debug maps that are embedded in the binaries symbol tables. +/// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TOOLS_DSYMUTIL_DSYMUTIL_H +#define LLVM_TOOLS_DSYMUTIL_DSYMUTIL_H + +#include "DebugMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorOr.h" +#include <memory> + +namespace llvm { +namespace dsymutil { +/// \brief Extract the DebugMap from the given file. +/// The file has to be a MachO object file. +llvm::ErrorOr<std::unique_ptr<DebugMap>> +parseDebugMap(StringRef InputFile, StringRef PrependPath = "", + bool Verbose = false); + +/// \brief Link the Dwarf debuginfo as directed by the passed DebugMap +/// \p DM into a DwarfFile named \p OutputFilename. +/// \returns false if the link failed. +bool linkDwarf(StringRef OutputFilename, const DebugMap &DM, + bool Verbose = false); +} +} +#endif // LLVM_TOOLS_DSYMUTIL_DSYMUTIL_H |