aboutsummaryrefslogtreecommitdiffstats
path: root/bindings/python/llvm/object.py
diff options
context:
space:
mode:
authorGregory Szorc <gregory.szorc@gmail.com>2012-03-10 04:41:24 +0000
committerGregory Szorc <gregory.szorc@gmail.com>2012-03-10 04:41:24 +0000
commit61e22cd85cd4c84fff391da67018c92bf21a8e19 (patch)
tree9f3f6b06d25b5735247ea51b665ed16a858cab3b /bindings/python/llvm/object.py
parent51cf8661637c114e4b4f178bd2677a6bb246be0d (diff)
downloadexternal_llvm-61e22cd85cd4c84fff391da67018c92bf21a8e19.zip
external_llvm-61e22cd85cd4c84fff391da67018c92bf21a8e19.tar.gz
external_llvm-61e22cd85cd4c84fff391da67018c92bf21a8e19.tar.bz2
[llvm.py] Implement interface to object files
It is now possible to load object files and scan over sections, symbols, and relocations! Includes test code with partial coverage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152482 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'bindings/python/llvm/object.py')
-rw-r--r--bindings/python/llvm/object.py473
1 files changed, 374 insertions, 99 deletions
diff --git a/bindings/python/llvm/object.py b/bindings/python/llvm/object.py
index f633f60..473aa3a 100644
--- a/bindings/python/llvm/object.py
+++ b/bindings/python/llvm/object.py
@@ -7,11 +7,82 @@
#
#===------------------------------------------------------------------------===#
+r"""
+Object File Interface
+=====================
+
+This module provides an interface for reading information from object files
+(e.g. binary executables and libraries).
+
+Using this module, you can obtain information about an object file's sections,
+symbols, and relocations. These are represented by the classes ObjectFile,
+Section, Symbol, and Relocation, respectively.
+
+Usage
+-----
+
+The only way to use this module is to start by creating an ObjectFile. You can
+create an ObjectFile by loading a file (specified by its path) or by creating a
+llvm.core.MemoryBuffer and loading that.
+
+Once you have an object file, you can inspect its sections and symbols directly
+by calling get_sections() and get_symbols() respectively. To inspect
+relocations, call get_relocations() on a Section instance.
+
+Iterator Interface
+------------------
+
+The LLVM bindings expose iteration over sections, symbols, and relocations in a
+way that only allows one instance to be operated on at a single time. This is
+slightly annoying from a Python perspective, as it isn't very Pythonic to have
+objects that "expire" but are still active from a dynamic language.
+
+To aid working around this limitation, each Section, Symbol, and Relocation
+instance caches its properties after first access. So, if the underlying
+iterator is advanced, the properties can still be obtained provided they have
+already been retrieved.
+
+In addition, we also provide a "cache" method on each class to cache all
+available data. You can call this on each obtained instance. Or, you can pass
+cache=True to the appropriate get_XXX() method to have this done for you.
+
+Here are some examples on how to perform iteration:
+
+ obj = ObjectFile(filename='/bin/ls')
+
+ # This is OK. Each Section is only accessed inside its own iteration slot.
+ section_names = []
+ for section in obj.get_sections():
+ section_names.append(section.name)
+
+ # This is NOT OK. You perform a lookup after the object has expired.
+ symbols = list(obj.get_symbols())
+ for symbol in symbols:
+ print symbol.name # This raises because the object has expired.
+
+ # In this example, we mix a working and failing scenario.
+ symbols = []
+ for symbol in obj.get_symbols():
+ symbols.append(symbol)
+ print symbol.name
+
+ for symbol in symbols:
+ print symbol.name # OK
+ print symbol.address # NOT OK. We didn't look up this property before.
+
+ # Cache everything up front.
+ symbols = list(obj.get_symbols(cache=True))
+ for symbol in symbols:
+ print symbol.name # OK
+
+"""
+
from ctypes import c_char_p
from ctypes import c_uint64
-from ctypes import c_void_p
+from .common import CachedProperty
from .common import LLVMObject
+from .common import c_object_p
from .common import get_library
from .core import MemoryBuffer
@@ -23,7 +94,7 @@ __all__ = [
"Symbol",
]
-class ObjectFile(object):
+class ObjectFile(LLVMObject):
"""Represents an object/binary file."""
def __init__(self, filename=None, contents=None):
@@ -39,209 +110,413 @@ class ObjectFile(object):
if filename is not None:
contents = MemoryBuffer(filename=filename)
- self._memory = contents
- self._obj = lib.LLVMCreateObjectFile(contents)
- contents.release_ownership()
- self._as_parameter_ = self._obj
-
- def __del__(self):
- lib.LLVMDisposeObjectFile(self)
+ if contents is None:
+ raise Exception('No input found.')
- def from_param(self):
- return self._as_parameter_
+ ptr = lib.LLVMCreateObjectFile(contents)
+ LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisposeObjectFile)
+ self.take_ownership(contents)
- def get_sections(self):
+ def get_sections(self, cache=False):
"""Obtain the sections in this object file.
- This is an iterator for llvm.object.Section instances.
+ This is a generator for llvm.object.Section instances.
+
+ Sections are exposed as limited-use objects. See the module's
+ documentation on iterators for more.
"""
- pass
+ sections = lib.LLVMGetSections(self)
+ last = None
+ while True:
+ if lib.LLVMIsSectionIteratorAtEnd(self, sections):
+ break
+
+ last = Section(sections)
+ if cache:
+ last.cache()
- def get_symbols(self):
+ yield last
+
+ lib.LLVMMoveToNextSection(sections)
+ last.expire()
+
+ if last is not None:
+ last.expire()
+
+ lib.LLVMDisposeSectionIterator(sections)
+
+ def get_symbols(self, cache=False):
"""Obtain the symbols in this object file.
- This is an iterator for llvm.object.Symbol instances.
+ This is a generator for llvm.object.Symbol instances.
+
+ Each Symbol instance is a limited-use object. See this module's
+ documentation on iterators for more.
"""
+ symbols = lib.LLVMGetSymbols(self)
+ last = None
+ while True:
+ if lib.LLVMIsSymbolIteratorAtEnd(self, symbols):
+ break
+
+ last = Symbol(symbols, self)
+ if cache:
+ last.cache()
+
+ yield last
+
+ lib.LLVMMoveToNextSymbol(symbols)
+ last.expire()
+
+ if last is not None:
+ last.expire()
+
+ lib.LLVMDisposeSymbolIterator(symbols)
-class Section(object):
+class Section(LLVMObject):
"""Represents a section in an object file."""
- def __init__(self, obj=None):
+ def __init__(self, ptr):
"""Construct a new section instance.
Section instances can currently only be created from an ObjectFile
instance. Therefore, this constructor should not be used outside of
this module.
"""
- pass
+ LLVMObject.__init__(self, ptr)
- def __del__(self):
- pass
+ self.expired = False
- @property
+ @CachedProperty
def name(self):
- pass
+ """Obtain the string name of the section.
- @property
+ This is typically something like '.dynsym' or '.rodata'.
+ """
+ if self.expired:
+ raise Exception('Section instance has expired.')
+
+ return lib.LLVMGetSectionName(self)
+
+ @CachedProperty
def size(self):
- pass
+ """The size of the section, in long bytes."""
+ if self.expired:
+ raise Exception('Section instance has expired.')
- @property
+ return lib.LLVMGetSectionSize(self)
+
+ @CachedProperty
def contents(self):
- pass
+ if self.expired:
+ raise Exception('Section instance has expired.')
+
+ return lib.LLVMGetSectionContents(self)
- @property
+ @CachedProperty
def address(self):
- pass
+ """The address of this section, in long bytes."""
+ if self.expired:
+ raise Exception('Section instance has expired.')
+
+ return lib.LLVMGetSectionAddress(self)
- # TODO consider exposing more Pythonic interface, like __contains__
def has_symbol(self, symbol):
- pass
+ """Returns whether a Symbol instance is present in this Section."""
+ if self.expired:
+ raise Exception('Section instance has expired.')
+
+ assert isinstance(symbol, Symbol)
+ return lib.LLVMGetSectionContainsSymbol(self, symbol)
+
+ def get_relocations(self, cache=False):
+ """Obtain the relocations in this Section.
- def get_relocations(self):
- pass
+ This is a generator for llvm.object.Relocation instances.
+
+ Each instance is a limited used object. See this module's documentation
+ on iterators for more.
+ """
+ if self.expired:
+ raise Exception('Section instance has expired.')
-class Symbol(object):
- def __init__(self):
- pass
+ relocations = lib.LLVMGetRelocations(self)
+ last = None
+ while True:
+ if lib.LLVMIsRelocationIteratorAtEnd(self, relocations):
+ break
- @property
+ last = Relocation(relocations)
+ if cache:
+ last.cache()
+
+ yield last
+
+ lib.LLVMMoveToNextRelocation(relocations)
+ last.expire()
+
+ if last is not None:
+ last.expire()
+
+ lib.LLVMDisposeRelocationIterator(relocations)
+
+ def cache(self):
+ """Cache properties of this Section.
+
+ This can be called as a workaround to the single active Section
+ limitation. When called, the properties of the Section are fetched so
+ they are still available after the Section has been marked inactive.
+ """
+ getattr(self, 'name')
+ getattr(self, 'size')
+ getattr(self, 'contents')
+ getattr(self, 'address')
+
+ def expire(self):
+ """Expire the section.
+
+ This is called internally by the section iterator.
+ """
+ self.expired = True
+
+class Symbol(LLVMObject):
+ """Represents a symbol in an object file."""
+ def __init__(self, ptr, object_file):
+ assert isinstance(ptr, c_object_p)
+ assert isinstance(object_file, ObjectFile)
+
+ LLVMObject.__init__(self, ptr)
+
+ self.expired = False
+ self._object_file = object_file
+
+ @CachedProperty
def name(self):
- pass
+ """The str name of the symbol.
+
+ This is often a function or variable name. Keep in mind that name
+ mangling could be in effect.
+ """
+ if self.expired:
+ raise Exception('Symbol instance has expired.')
+
+ return lib.LLVMGetSymbolName(self)
- @property
+ @CachedProperty
def address(self):
- pass
+ """The address of this symbol, in long bytes."""
+ if self.expired:
+ raise Exception('Symbol instance has expired.')
+
+ return lib.LLVMGetSymbolAddress(self)
- @property
+ @CachedProperty
def file_offset(self):
- pass
+ """The offset of this symbol in the file, in long bytes."""
+ if self.expired:
+ raise Exception('Symbol instance has expired.')
- @property
+ return lib.LLVMGetSymbolFileOffset(self)
+
+ @CachedProperty
def size(self):
- pass
+ """The size of the symbol, in long bytes."""
+ if self.expired:
+ raise Exception('Symbol instance has expired.')
+
+ return lib.LLVMGetSymbolSize(self)
-class Relocation(object):
- def __init__(self):
- pass
+ @CachedProperty
+ def section(self):
+ """The Section to which this Symbol belongs.
- @property
+ The returned Section instance does not expire, unlike Sections that are
+ commonly obtained through iteration.
+
+ Because this obtains a new section iterator each time it is accessed,
+ calling this on a number of Symbol instances could be expensive.
+ """
+ sections = lib.LLVMGetSections(self._object_file)
+ lib.LLVMMoveToContainingSection(sections, self)
+
+ return Section(sections)
+
+ def cache(self):
+ """Cache all cacheable properties."""
+ getattr(self, 'name')
+ getattr(self, 'address')
+ getattr(self, 'file_offset')
+ getattr(self, 'size')
+
+ def expire(self):
+ """Mark the object as expired to prevent future API accesses.
+
+ This is called internally by this module and it is unlikely that
+ external callers have a legitimate reason for using it.
+ """
+ self.expired = True
+
+class Relocation(LLVMObject):
+ """Represents a relocation definition."""
+ def __init__(self, ptr):
+ """Create a new relocation instance.
+
+ Relocations are created from objects derived from Section instances.
+ Therefore, this constructor should not be called outside of this
+ module. See Section.get_relocations() for the proper method to obtain
+ a Relocation instance.
+ """
+ assert isinstance(ptr, c_object_p)
+
+ LLVMObject.__init__(self, ptr)
+
+ self.expired = False
+
+ @CachedProperty
def address(self):
- pass
+ """The address of this relocation, in long bytes."""
+ if self.expired:
+ raise Exception('Relocation instance has expired.')
- @property
+ return lib.LLVMGetRelocationAddress(self)
+
+ @CachedProperty
def offset(self):
- pass
+ """The offset of this relocation, in long bytes."""
+ if self.expired:
+ raise Exception('Relocation instance has expired.')
+
+ return lib.LLVMGetRelocationOffset(self)
- @property
+ @CachedProperty
def symbol(self):
- pass
+ """The Symbol corresponding to this Relocation."""
+ if self.expired:
+ raise Exception('Relocation instance has expired.')
- @property
- def type(self):
- pass
+ ptr = lib.LLVMGetRelocationSymbol(self)
+ return Symbol(ptr)
- @property
+ @CachedProperty
+ def type_number(self):
+ """The relocation type, as a long."""
+ if self.expired:
+ raise Exception('Relocation instance has expired.')
+
+ return lib.LLVMGetRelocationType(self)
+
+ @CachedProperty
def type_name(self):
- pass
+ """The relocation type's name, as a str."""
+ if self.expired:
+ raise Exception('Relocation instance has expired.')
+
+ return lib.LLVMGetRelocationTypeName(self)
- @property
+ @CachedProperty
def value_string(self):
- pass
+ if self.expired:
+ raise Exception('Relocation instance has expired.')
+
+ return lib.LLVMGetRelocationValueString(self)
+
+ def expire(self):
+ """Expire this instance, making future API accesses fail."""
+ self.expired = True
-SectionIteratorRef = c_void_p
-SymbolIteratorRef = c_void_p
-RelocationIteratorRef = c_void_p
+ def cache(self):
+ """Cache all cacheable properties on this instance."""
+ getattr(self, 'address')
+ getattr(self, 'offset')
+ getattr(self, 'symbol')
+ getattr(self, 'type')
+ getattr(self, 'type_name')
+ getattr(self, 'value_string')
def register_library(library):
"""Register function prototypes with LLVM library instance."""
# Object.h functions
library.LLVMCreateObjectFile.argtypes = [MemoryBuffer]
- library.LLVMCreateObjectFile.restype = LLVMObject
+ library.LLVMCreateObjectFile.restype = c_object_p
library.LLVMDisposeObjectFile.argtypes = [ObjectFile]
library.LLVMGetSections.argtypes = [ObjectFile]
- library.LLVMGetSections.restype = SectionIteratorRef
+ library.LLVMGetSections.restype = c_object_p
- library.LLVMDisposeSectionIterator.argtypes = [SectionIteratorRef]
+ library.LLVMDisposeSectionIterator.argtypes = [c_object_p]
- library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile,
- SectionIteratorRef]
+ library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
library.LLVMIsSectionIteratorAtEnd.restype = bool
- library.LLVMMoveToNextSection.argtypes = [SectionIteratorRef]
+ library.LLVMMoveToNextSection.argtypes = [c_object_p]
- library.LLVMMoveToContainingSection.argtypes = [SectionIteratorRef,
- SymbolIteratorRef]
+ library.LLVMMoveToContainingSection.argtypes = [c_object_p, c_object_p]
library.LLVMGetSymbols.argtypes = [ObjectFile]
- library.LLVMGetSymbols.restype = SymbolIteratorRef
+ library.LLVMGetSymbols.restype = c_object_p
- library.LLVMDisposeSymbolIterator.argtypes = [SymbolIteratorRef]
+ library.LLVMDisposeSymbolIterator.argtypes = [c_object_p]
- library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile,
- SymbolIteratorRef]
+ library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
library.LLVMIsSymbolIteratorAtEnd.restype = bool
- library.LLVMMoveToNextSymbol.argtypes = [SymbolIteratorRef]
+ library.LLVMMoveToNextSymbol.argtypes = [c_object_p]
- library.LLVMGetSectionName.argtypes = [SectionIteratorRef]
+ library.LLVMGetSectionName.argtypes = [c_object_p]
library.LLVMGetSectionName.restype = c_char_p
- library.LLVMGetSectionSize.argtypes = [SectionIteratorRef]
+ library.LLVMGetSectionSize.argtypes = [c_object_p]
library.LLVMGetSectionSize.restype = c_uint64
- library.LLVMGetSectionContents.argtypes = [SectionIteratorRef]
+ library.LLVMGetSectionContents.argtypes = [c_object_p]
library.LLVMGetSectionContents.restype = c_char_p
- library.LLVMGetSectionAddress.argtypes = [SectionIteratorRef]
+ library.LLVMGetSectionAddress.argtypes = [c_object_p]
library.LLVMGetSectionAddress.restype = c_uint64
- library.LLVMGetSectionContainsSymbol.argtypes = [SectionIteratorRef,
- SymbolIteratorRef]
+ library.LLVMGetSectionContainsSymbol.argtypes = [c_object_p, c_object_p]
library.LLVMGetSectionContainsSymbol.restype = bool
- library.LLVMGetRelocations.argtypes = [SectionIteratorRef]
- library.LLVMGetRelocations.restype = RelocationIteratorRef
+ library.LLVMGetRelocations.argtypes = [c_object_p]
+ library.LLVMGetRelocations.restype = c_object_p
- library.LLVMDisposeRelocationIterator.argtypes = [RelocationIteratorRef]
+ library.LLVMDisposeRelocationIterator.argtypes = [c_object_p]
- library.LLVMIsRelocationIteratorAtEnd.argtypes = [SectionIteratorRef,
- RelocationIteratorRef]
+ library.LLVMIsRelocationIteratorAtEnd.argtypes = [c_object_p, c_object_p]
library.LLVMIsRelocationIteratorAtEnd.restype = bool
- library.LLVMMoveToNextRelocation.argtypes = [RelocationIteratorRef]
+ library.LLVMMoveToNextRelocation.argtypes = [c_object_p]
- library.LLVMGetSymbolName.argtypes = [SymbolIteratorRef]
+ library.LLVMGetSymbolName.argtypes = [Symbol]
library.LLVMGetSymbolName.restype = c_char_p
- library.LLVMGetSymbolAddress.argtypes = [SymbolIteratorRef]
+ library.LLVMGetSymbolAddress.argtypes = [Symbol]
library.LLVMGetSymbolAddress.restype = c_uint64
- library.LLVMGetSymbolFileOffset.argtypes = [SymbolIteratorRef]
+ library.LLVMGetSymbolFileOffset.argtypes = [Symbol]
library.LLVMGetSymbolFileOffset.restype = c_uint64
- library.LLVMGetSymbolSize.argtypes = [SymbolIteratorRef]
+ library.LLVMGetSymbolSize.argtypes = [Symbol]
library.LLVMGetSymbolSize.restype = c_uint64
- library.LLVMGetRelocationAddress.argtypes = [SymbolIteratorRef]
+ library.LLVMGetRelocationAddress.argtypes = [c_object_p]
library.LLVMGetRelocationAddress.restype = c_uint64
- library.LLVMGetRelocationOffset.argtypes = [RelocationIteratorRef]
+ library.LLVMGetRelocationOffset.argtypes = [c_object_p]
library.LLVMGetRelocationOffset.restype = c_uint64
- library.LLVMGetRelocationSymbol.argtypes = [RelocationIteratorRef]
- library.LLVMGetRelocationSymbol.restype = SymbolIteratorRef
+ library.LLVMGetRelocationSymbol.argtypes = [c_object_p]
+ library.LLVMGetRelocationSymbol.restype = c_object_p
- library.LLVMGetRelocationType.argtypes = [RelocationIteratorRef]
+ library.LLVMGetRelocationType.argtypes = [c_object_p]
library.LLVMGetRelocationType.restype = c_uint64
- library.LLVMGetRelocationTypeName.argtypes = [RelocationIteratorRef]
+ library.LLVMGetRelocationTypeName.argtypes = [c_object_p]
library.LLVMGetRelocationTypeName.restype = c_char_p
- library.LLVMGetRelocationValueString.argtypes = [RelocationIteratorRef]
+ library.LLVMGetRelocationValueString.argtypes = [c_object_p]
library.LLVMGetRelocationValueString.restype = c_char_p
lib = get_library()