diff options
author | Gregory Szorc <gregory.szorc@gmail.com> | 2012-03-10 04:41:24 +0000 |
---|---|---|
committer | Gregory Szorc <gregory.szorc@gmail.com> | 2012-03-10 04:41:24 +0000 |
commit | 61e22cd85cd4c84fff391da67018c92bf21a8e19 (patch) | |
tree | 9f3f6b06d25b5735247ea51b665ed16a858cab3b /bindings/python/llvm/object.py | |
parent | 51cf8661637c114e4b4f178bd2677a6bb246be0d (diff) | |
download | external_llvm-61e22cd85cd4c84fff391da67018c92bf21a8e19.zip external_llvm-61e22cd85cd4c84fff391da67018c92bf21a8e19.tar.gz external_llvm-61e22cd85cd4c84fff391da67018c92bf21a8e19.tar.bz2 |
[llvm.py] Implement interface to object files
It is now possible to load object files and scan over sections, symbols,
and relocations! Includes test code with partial coverage.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152482 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'bindings/python/llvm/object.py')
-rw-r--r-- | bindings/python/llvm/object.py | 473 |
1 files changed, 374 insertions, 99 deletions
diff --git a/bindings/python/llvm/object.py b/bindings/python/llvm/object.py index f633f60..473aa3a 100644 --- a/bindings/python/llvm/object.py +++ b/bindings/python/llvm/object.py @@ -7,11 +7,82 @@ # #===------------------------------------------------------------------------===# +r""" +Object File Interface +===================== + +This module provides an interface for reading information from object files +(e.g. binary executables and libraries). + +Using this module, you can obtain information about an object file's sections, +symbols, and relocations. These are represented by the classes ObjectFile, +Section, Symbol, and Relocation, respectively. + +Usage +----- + +The only way to use this module is to start by creating an ObjectFile. You can +create an ObjectFile by loading a file (specified by its path) or by creating a +llvm.core.MemoryBuffer and loading that. + +Once you have an object file, you can inspect its sections and symbols directly +by calling get_sections() and get_symbols() respectively. To inspect +relocations, call get_relocations() on a Section instance. + +Iterator Interface +------------------ + +The LLVM bindings expose iteration over sections, symbols, and relocations in a +way that only allows one instance to be operated on at a single time. This is +slightly annoying from a Python perspective, as it isn't very Pythonic to have +objects that "expire" but are still active from a dynamic language. + +To aid working around this limitation, each Section, Symbol, and Relocation +instance caches its properties after first access. So, if the underlying +iterator is advanced, the properties can still be obtained provided they have +already been retrieved. + +In addition, we also provide a "cache" method on each class to cache all +available data. You can call this on each obtained instance. Or, you can pass +cache=True to the appropriate get_XXX() method to have this done for you. + +Here are some examples on how to perform iteration: + + obj = ObjectFile(filename='/bin/ls') + + # This is OK. Each Section is only accessed inside its own iteration slot. + section_names = [] + for section in obj.get_sections(): + section_names.append(section.name) + + # This is NOT OK. You perform a lookup after the object has expired. + symbols = list(obj.get_symbols()) + for symbol in symbols: + print symbol.name # This raises because the object has expired. + + # In this example, we mix a working and failing scenario. + symbols = [] + for symbol in obj.get_symbols(): + symbols.append(symbol) + print symbol.name + + for symbol in symbols: + print symbol.name # OK + print symbol.address # NOT OK. We didn't look up this property before. + + # Cache everything up front. + symbols = list(obj.get_symbols(cache=True)) + for symbol in symbols: + print symbol.name # OK + +""" + from ctypes import c_char_p from ctypes import c_uint64 -from ctypes import c_void_p +from .common import CachedProperty from .common import LLVMObject +from .common import c_object_p from .common import get_library from .core import MemoryBuffer @@ -23,7 +94,7 @@ __all__ = [ "Symbol", ] -class ObjectFile(object): +class ObjectFile(LLVMObject): """Represents an object/binary file.""" def __init__(self, filename=None, contents=None): @@ -39,209 +110,413 @@ class ObjectFile(object): if filename is not None: contents = MemoryBuffer(filename=filename) - self._memory = contents - self._obj = lib.LLVMCreateObjectFile(contents) - contents.release_ownership() - self._as_parameter_ = self._obj - - def __del__(self): - lib.LLVMDisposeObjectFile(self) + if contents is None: + raise Exception('No input found.') - def from_param(self): - return self._as_parameter_ + ptr = lib.LLVMCreateObjectFile(contents) + LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisposeObjectFile) + self.take_ownership(contents) - def get_sections(self): + def get_sections(self, cache=False): """Obtain the sections in this object file. - This is an iterator for llvm.object.Section instances. + This is a generator for llvm.object.Section instances. + + Sections are exposed as limited-use objects. See the module's + documentation on iterators for more. """ - pass + sections = lib.LLVMGetSections(self) + last = None + while True: + if lib.LLVMIsSectionIteratorAtEnd(self, sections): + break + + last = Section(sections) + if cache: + last.cache() - def get_symbols(self): + yield last + + lib.LLVMMoveToNextSection(sections) + last.expire() + + if last is not None: + last.expire() + + lib.LLVMDisposeSectionIterator(sections) + + def get_symbols(self, cache=False): """Obtain the symbols in this object file. - This is an iterator for llvm.object.Symbol instances. + This is a generator for llvm.object.Symbol instances. + + Each Symbol instance is a limited-use object. See this module's + documentation on iterators for more. """ + symbols = lib.LLVMGetSymbols(self) + last = None + while True: + if lib.LLVMIsSymbolIteratorAtEnd(self, symbols): + break + + last = Symbol(symbols, self) + if cache: + last.cache() + + yield last + + lib.LLVMMoveToNextSymbol(symbols) + last.expire() + + if last is not None: + last.expire() + + lib.LLVMDisposeSymbolIterator(symbols) -class Section(object): +class Section(LLVMObject): """Represents a section in an object file.""" - def __init__(self, obj=None): + def __init__(self, ptr): """Construct a new section instance. Section instances can currently only be created from an ObjectFile instance. Therefore, this constructor should not be used outside of this module. """ - pass + LLVMObject.__init__(self, ptr) - def __del__(self): - pass + self.expired = False - @property + @CachedProperty def name(self): - pass + """Obtain the string name of the section. - @property + This is typically something like '.dynsym' or '.rodata'. + """ + if self.expired: + raise Exception('Section instance has expired.') + + return lib.LLVMGetSectionName(self) + + @CachedProperty def size(self): - pass + """The size of the section, in long bytes.""" + if self.expired: + raise Exception('Section instance has expired.') - @property + return lib.LLVMGetSectionSize(self) + + @CachedProperty def contents(self): - pass + if self.expired: + raise Exception('Section instance has expired.') + + return lib.LLVMGetSectionContents(self) - @property + @CachedProperty def address(self): - pass + """The address of this section, in long bytes.""" + if self.expired: + raise Exception('Section instance has expired.') + + return lib.LLVMGetSectionAddress(self) - # TODO consider exposing more Pythonic interface, like __contains__ def has_symbol(self, symbol): - pass + """Returns whether a Symbol instance is present in this Section.""" + if self.expired: + raise Exception('Section instance has expired.') + + assert isinstance(symbol, Symbol) + return lib.LLVMGetSectionContainsSymbol(self, symbol) + + def get_relocations(self, cache=False): + """Obtain the relocations in this Section. - def get_relocations(self): - pass + This is a generator for llvm.object.Relocation instances. + + Each instance is a limited used object. See this module's documentation + on iterators for more. + """ + if self.expired: + raise Exception('Section instance has expired.') -class Symbol(object): - def __init__(self): - pass + relocations = lib.LLVMGetRelocations(self) + last = None + while True: + if lib.LLVMIsRelocationIteratorAtEnd(self, relocations): + break - @property + last = Relocation(relocations) + if cache: + last.cache() + + yield last + + lib.LLVMMoveToNextRelocation(relocations) + last.expire() + + if last is not None: + last.expire() + + lib.LLVMDisposeRelocationIterator(relocations) + + def cache(self): + """Cache properties of this Section. + + This can be called as a workaround to the single active Section + limitation. When called, the properties of the Section are fetched so + they are still available after the Section has been marked inactive. + """ + getattr(self, 'name') + getattr(self, 'size') + getattr(self, 'contents') + getattr(self, 'address') + + def expire(self): + """Expire the section. + + This is called internally by the section iterator. + """ + self.expired = True + +class Symbol(LLVMObject): + """Represents a symbol in an object file.""" + def __init__(self, ptr, object_file): + assert isinstance(ptr, c_object_p) + assert isinstance(object_file, ObjectFile) + + LLVMObject.__init__(self, ptr) + + self.expired = False + self._object_file = object_file + + @CachedProperty def name(self): - pass + """The str name of the symbol. + + This is often a function or variable name. Keep in mind that name + mangling could be in effect. + """ + if self.expired: + raise Exception('Symbol instance has expired.') + + return lib.LLVMGetSymbolName(self) - @property + @CachedProperty def address(self): - pass + """The address of this symbol, in long bytes.""" + if self.expired: + raise Exception('Symbol instance has expired.') + + return lib.LLVMGetSymbolAddress(self) - @property + @CachedProperty def file_offset(self): - pass + """The offset of this symbol in the file, in long bytes.""" + if self.expired: + raise Exception('Symbol instance has expired.') - @property + return lib.LLVMGetSymbolFileOffset(self) + + @CachedProperty def size(self): - pass + """The size of the symbol, in long bytes.""" + if self.expired: + raise Exception('Symbol instance has expired.') + + return lib.LLVMGetSymbolSize(self) -class Relocation(object): - def __init__(self): - pass + @CachedProperty + def section(self): + """The Section to which this Symbol belongs. - @property + The returned Section instance does not expire, unlike Sections that are + commonly obtained through iteration. + + Because this obtains a new section iterator each time it is accessed, + calling this on a number of Symbol instances could be expensive. + """ + sections = lib.LLVMGetSections(self._object_file) + lib.LLVMMoveToContainingSection(sections, self) + + return Section(sections) + + def cache(self): + """Cache all cacheable properties.""" + getattr(self, 'name') + getattr(self, 'address') + getattr(self, 'file_offset') + getattr(self, 'size') + + def expire(self): + """Mark the object as expired to prevent future API accesses. + + This is called internally by this module and it is unlikely that + external callers have a legitimate reason for using it. + """ + self.expired = True + +class Relocation(LLVMObject): + """Represents a relocation definition.""" + def __init__(self, ptr): + """Create a new relocation instance. + + Relocations are created from objects derived from Section instances. + Therefore, this constructor should not be called outside of this + module. See Section.get_relocations() for the proper method to obtain + a Relocation instance. + """ + assert isinstance(ptr, c_object_p) + + LLVMObject.__init__(self, ptr) + + self.expired = False + + @CachedProperty def address(self): - pass + """The address of this relocation, in long bytes.""" + if self.expired: + raise Exception('Relocation instance has expired.') - @property + return lib.LLVMGetRelocationAddress(self) + + @CachedProperty def offset(self): - pass + """The offset of this relocation, in long bytes.""" + if self.expired: + raise Exception('Relocation instance has expired.') + + return lib.LLVMGetRelocationOffset(self) - @property + @CachedProperty def symbol(self): - pass + """The Symbol corresponding to this Relocation.""" + if self.expired: + raise Exception('Relocation instance has expired.') - @property - def type(self): - pass + ptr = lib.LLVMGetRelocationSymbol(self) + return Symbol(ptr) - @property + @CachedProperty + def type_number(self): + """The relocation type, as a long.""" + if self.expired: + raise Exception('Relocation instance has expired.') + + return lib.LLVMGetRelocationType(self) + + @CachedProperty def type_name(self): - pass + """The relocation type's name, as a str.""" + if self.expired: + raise Exception('Relocation instance has expired.') + + return lib.LLVMGetRelocationTypeName(self) - @property + @CachedProperty def value_string(self): - pass + if self.expired: + raise Exception('Relocation instance has expired.') + + return lib.LLVMGetRelocationValueString(self) + + def expire(self): + """Expire this instance, making future API accesses fail.""" + self.expired = True -SectionIteratorRef = c_void_p -SymbolIteratorRef = c_void_p -RelocationIteratorRef = c_void_p + def cache(self): + """Cache all cacheable properties on this instance.""" + getattr(self, 'address') + getattr(self, 'offset') + getattr(self, 'symbol') + getattr(self, 'type') + getattr(self, 'type_name') + getattr(self, 'value_string') def register_library(library): """Register function prototypes with LLVM library instance.""" # Object.h functions library.LLVMCreateObjectFile.argtypes = [MemoryBuffer] - library.LLVMCreateObjectFile.restype = LLVMObject + library.LLVMCreateObjectFile.restype = c_object_p library.LLVMDisposeObjectFile.argtypes = [ObjectFile] library.LLVMGetSections.argtypes = [ObjectFile] - library.LLVMGetSections.restype = SectionIteratorRef + library.LLVMGetSections.restype = c_object_p - library.LLVMDisposeSectionIterator.argtypes = [SectionIteratorRef] + library.LLVMDisposeSectionIterator.argtypes = [c_object_p] - library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, - SectionIteratorRef] + library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, c_object_p] library.LLVMIsSectionIteratorAtEnd.restype = bool - library.LLVMMoveToNextSection.argtypes = [SectionIteratorRef] + library.LLVMMoveToNextSection.argtypes = [c_object_p] - library.LLVMMoveToContainingSection.argtypes = [SectionIteratorRef, - SymbolIteratorRef] + library.LLVMMoveToContainingSection.argtypes = [c_object_p, c_object_p] library.LLVMGetSymbols.argtypes = [ObjectFile] - library.LLVMGetSymbols.restype = SymbolIteratorRef + library.LLVMGetSymbols.restype = c_object_p - library.LLVMDisposeSymbolIterator.argtypes = [SymbolIteratorRef] + library.LLVMDisposeSymbolIterator.argtypes = [c_object_p] - library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, - SymbolIteratorRef] + library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, c_object_p] library.LLVMIsSymbolIteratorAtEnd.restype = bool - library.LLVMMoveToNextSymbol.argtypes = [SymbolIteratorRef] + library.LLVMMoveToNextSymbol.argtypes = [c_object_p] - library.LLVMGetSectionName.argtypes = [SectionIteratorRef] + library.LLVMGetSectionName.argtypes = [c_object_p] library.LLVMGetSectionName.restype = c_char_p - library.LLVMGetSectionSize.argtypes = [SectionIteratorRef] + library.LLVMGetSectionSize.argtypes = [c_object_p] library.LLVMGetSectionSize.restype = c_uint64 - library.LLVMGetSectionContents.argtypes = [SectionIteratorRef] + library.LLVMGetSectionContents.argtypes = [c_object_p] library.LLVMGetSectionContents.restype = c_char_p - library.LLVMGetSectionAddress.argtypes = [SectionIteratorRef] + library.LLVMGetSectionAddress.argtypes = [c_object_p] library.LLVMGetSectionAddress.restype = c_uint64 - library.LLVMGetSectionContainsSymbol.argtypes = [SectionIteratorRef, - SymbolIteratorRef] + library.LLVMGetSectionContainsSymbol.argtypes = [c_object_p, c_object_p] library.LLVMGetSectionContainsSymbol.restype = bool - library.LLVMGetRelocations.argtypes = [SectionIteratorRef] - library.LLVMGetRelocations.restype = RelocationIteratorRef + library.LLVMGetRelocations.argtypes = [c_object_p] + library.LLVMGetRelocations.restype = c_object_p - library.LLVMDisposeRelocationIterator.argtypes = [RelocationIteratorRef] + library.LLVMDisposeRelocationIterator.argtypes = [c_object_p] - library.LLVMIsRelocationIteratorAtEnd.argtypes = [SectionIteratorRef, - RelocationIteratorRef] + library.LLVMIsRelocationIteratorAtEnd.argtypes = [c_object_p, c_object_p] library.LLVMIsRelocationIteratorAtEnd.restype = bool - library.LLVMMoveToNextRelocation.argtypes = [RelocationIteratorRef] + library.LLVMMoveToNextRelocation.argtypes = [c_object_p] - library.LLVMGetSymbolName.argtypes = [SymbolIteratorRef] + library.LLVMGetSymbolName.argtypes = [Symbol] library.LLVMGetSymbolName.restype = c_char_p - library.LLVMGetSymbolAddress.argtypes = [SymbolIteratorRef] + library.LLVMGetSymbolAddress.argtypes = [Symbol] library.LLVMGetSymbolAddress.restype = c_uint64 - library.LLVMGetSymbolFileOffset.argtypes = [SymbolIteratorRef] + library.LLVMGetSymbolFileOffset.argtypes = [Symbol] library.LLVMGetSymbolFileOffset.restype = c_uint64 - library.LLVMGetSymbolSize.argtypes = [SymbolIteratorRef] + library.LLVMGetSymbolSize.argtypes = [Symbol] library.LLVMGetSymbolSize.restype = c_uint64 - library.LLVMGetRelocationAddress.argtypes = [SymbolIteratorRef] + library.LLVMGetRelocationAddress.argtypes = [c_object_p] library.LLVMGetRelocationAddress.restype = c_uint64 - library.LLVMGetRelocationOffset.argtypes = [RelocationIteratorRef] + library.LLVMGetRelocationOffset.argtypes = [c_object_p] library.LLVMGetRelocationOffset.restype = c_uint64 - library.LLVMGetRelocationSymbol.argtypes = [RelocationIteratorRef] - library.LLVMGetRelocationSymbol.restype = SymbolIteratorRef + library.LLVMGetRelocationSymbol.argtypes = [c_object_p] + library.LLVMGetRelocationSymbol.restype = c_object_p - library.LLVMGetRelocationType.argtypes = [RelocationIteratorRef] + library.LLVMGetRelocationType.argtypes = [c_object_p] library.LLVMGetRelocationType.restype = c_uint64 - library.LLVMGetRelocationTypeName.argtypes = [RelocationIteratorRef] + library.LLVMGetRelocationTypeName.argtypes = [c_object_p] library.LLVMGetRelocationTypeName.restype = c_char_p - library.LLVMGetRelocationValueString.argtypes = [RelocationIteratorRef] + library.LLVMGetRelocationValueString.argtypes = [c_object_p] library.LLVMGetRelocationValueString.restype = c_char_p lib = get_library() |