diff options
Diffstat (limited to 'bindings/python/llvm')
-rw-r--r-- | bindings/python/llvm/disassembler.py | 564 | ||||
-rw-r--r-- | bindings/python/llvm/tests/test_disassembler.py | 62 |
2 files changed, 0 insertions, 626 deletions
diff --git a/bindings/python/llvm/disassembler.py b/bindings/python/llvm/disassembler.py deleted file mode 100644 index d1fd789..0000000 --- a/bindings/python/llvm/disassembler.py +++ /dev/null @@ -1,564 +0,0 @@ -#===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# - -from abc import ABCMeta -from abc import abstractmethod - -from ctypes import CFUNCTYPE -from ctypes import POINTER -from ctypes import byref -from ctypes import c_char_p -from ctypes import c_int -from ctypes import c_ubyte -from ctypes import c_uint64 -from ctypes import c_uint -from ctypes import c_void_p -from ctypes import memmove - -from .common import CachedProperty -from .common import LLVMObject -from .common import c_object_p -from .common import get_library - -__all__ = [ - 'DisassemblerByteArraySource', - 'DisassemblerFileSource', - 'DisassemblerSource', - 'Disassembler', - 'Instruction', - 'Operand', - 'Token', -] - -callbacks = {} - -class DisassemblerSource: - """Abstract base class for disassembler input. - - This defines the interface to which inputs to the disassembler must - conform. - - Basically, the disassembler input is a read-only sequence of a finite - length. - """ - __metaclass__ = ABCMeta - - @abstractmethod - def __len__(self): - """Returns the number of bytes that are available for input.""" - pass - - @abstractmethod - def get_byte(self, address): - """Returns the byte at the specified address.""" - pass - - @abstractmethod - def start_address(self): - """Returns the address at which to start fetch bytes, as a long.""" - pass - -class DisassemblerByteArraySource(DisassemblerSource): - """A disassembler source for byte arrays.""" - - def __init__(self, b): - self._array = b - - def __len__(self): - return len(self._array) - - def get_byte(self, address): - return self._array[address] - - def start_address(self): - return 0 - -class DisassemblerFileSource(DisassemblerSource): - """A disassembler source for file segments. - - This allows you to feed in segments of a file into a Disassembler. - """ - - def __init__(self, filename, start_offset, length=None, end_offset=None, - start_address=None): - """Create a new source from a file. - - A source begins at a specified byte offset and can be defined in terms - of byte length of the end byte offset. - """ - if length is None and end_offset is None: - raise Exception('One of length or end_offset must be defined.') - - self._start_address = start_address - if self._start_address is None: - self._start_address = 0 - - count = length - if length is None: - count = end_offset - start_offset - - with open(filename, 'rb') as fh: - fh.seek(start_offset) - - # FIXME handle case where read bytes != requested - self._buf = fh.read(count) - - def __len__(self): - return len(self._buf) - - def get_byte(self, address): - return self._buf[address - self._start_address] - - def start_address(self): - return self._start_address - -class Disassembler(LLVMObject): - """Interface to LLVM's enhanced disassembler. - - The API is slightly different from the C API in that we tightly couple a - disassembler instance to an input source. This saves an extra level of - abstraction and makes the Python implementation easier. - """ - - SYNTAX_X86_INTEL = 0 - SYNTAX_X86_ATT = 1 - SYNTAX_ARM_UAL = 2 - - def __init__(self, triple, source, syntax=0): - """Create a new disassembler instance. - - Arguments: - - triple -- str target type (e.g. x86_64-apple-darwin10) - source -- DisassemblerSource instance to be fed into this disassembler. - syntax -- The assembly syntax to use. One of the SYNTAX_* class - constants. e.g. EnhancedDisassembler.SYNTAX_X86_INTEL - """ - assert isinstance(source, DisassemblerSource) - - ptr = c_object_p() - result = lib.EDGetDisassembler(byref(ptr), c_char_p(triple), - c_int(syntax)) - if result != 0: - raise Exception('Non-0 return code.') - - LLVMObject.__init__(self, ptr) - - self._source = source - - def get_instructions(self): - """Obtain the instructions from the input. - - This is a generator for Instruction instances. - - By default, this will return instructions for the entire source which - has been defined. It does this by querying the source's start_address() - method and continues to request instructions until len(source) is - exhausted. - """ - - # We currently obtain 1 instruction at a time because it is easiest. - - # This serves as our EDByteReaderCallback. It is a proxy between C and - # the Python DisassemblerSource. - def byte_reader(dest, address, arg): - try: - byte = self._source.get_byte(address) - memmove(dest, byte, 1) - - return 0 - except: - return -1 - - address = self._source.start_address() - end_address = address + len(self._source) - cb = callbacks['byte_reader'](byte_reader) - while address < end_address: - ptr = c_object_p() - - result = lib.EDCreateInsts(byref(ptr), c_uint(1), self, cb, - address, c_void_p(None)) - - if result != 1: - raise Exception('Error obtaining instruction at address %d' % - address) - - instruction = Instruction(ptr, self) - yield instruction - - address += instruction.byte_size - - -class Instruction(LLVMObject): - """Represents an individual instruction. - - Instruction instances are obtained from Disassembler.get_instructions(). - """ - def __init__(self, ptr, disassembler): - """Create a new instruction. - - Instructions are created from within this module. You should have no - need to call this from outside this module. - """ - assert isinstance(ptr, c_object_p) - assert isinstance(disassembler, Disassembler) - - LLVMObject.__init__(self, ptr, disposer=lib.EDReleaseInst) - self._disassembler = disassembler - - def __str__(self): - s = c_char_p(None) - result = lib.EDGetInstString(byref(s), self) - if result != 0: - raise Exception('Non-0 return code.') - - return s.value - - @CachedProperty - def byte_size(self): - result = lib.EDInstByteSize(self) - if result == -1: - raise Exception('Error code returned.') - - return result - - @CachedProperty - def id(self): - i = c_uint() - result = lib.EDInstID(byref(i), self) - if result != 0: - raise Exception('Non-0 return code.') - - return i.value - - @CachedProperty - def is_branch(self): - result = lib.EDInstIsBranch(self) - if result == -1: - raise Exception('Error code returned.') - - return result > 0 - - @CachedProperty - def is_move(self): - result = lib.EDInstIsMove(self) - if result == -1: - raise Exception('Error code returned.') - - return result > 0 - - @CachedProperty - def branch_target_id(self): - result = lib.EDBranchTargetID(self) - if result == -1: - raise Exception('Error code returned.') - - return result - - @CachedProperty - def move_source_id(self): - result = lib.EDMoveSourceID(self) - if result == -1: - raise Exception('Error code returned.') - - return result - - def get_tokens(self): - """Obtain the tokens in this instruction. - - This is a generator for Token instances. - """ - count = lib.EDNumTokens(self) - if count == -1: - raise Exception('Error code returned.') - - for i in range(0, count): - ptr = c_object_p() - result = lib.EDGetToken(byref(ptr), self, c_int(i)) - if result != 0: - raise Exception('Non-0 return code.') - - yield Token(ptr, self) - - def get_operands(self): - """Obtain the operands in this instruction. - - This is a generator for Operand instances. - """ - count = lib.EDNumOperands(self) - if count == -1: - raise Exception('Error code returned.') - - for i in range(0, count): - ptr = c_object_p() - result = lib.EDGetOperand(byref(ptr), self, c_int(i)) - if result != 0: - raise Exception('Non-0 return code.') - - yield Operand(ptr, self) - -class Token(LLVMObject): - def __init__(self, ptr, instruction): - assert isinstance(ptr, c_object_p) - assert isinstance(instruction, Instruction) - - LLVMObject.__init__(self, ptr) - - self._instruction = instruction - - def __str__(self): - s = c_char_p(None) - result = lib.EDGetTokenString(byref(s), self) - if result != 0: - raise Exception('Non-0 return code.') - - return s.value - - @CachedProperty - def operand_index(self): - result = lib.EDOperandIndexForToken(self) - if result == -1: - raise Exception('Error code returned.') - - return result - - @CachedProperty - def is_whitespace(self): - result = lib.EDTokenIsWhitespace(self) - if result == -1: - raise Exception('Error code returned.') - - return result > 0 - - @CachedProperty - def is_punctuation(self): - result = lib.EDTokenIsPunctuation(self) - if result == -1: - raise Exception('Error code returned.') - - return result > 0 - - @CachedProperty - def is_opcode(self): - result = lib.EDTokenIsOpcode(self) - if result == -1: - raise Exception('Error code returned.') - - return result > 0 - - @CachedProperty - def is_literal(self): - result = lib.EDTokenIsLiteral(self) - if result == -1: - raise Exception('Error code returned.') - - return result > 0 - - @CachedProperty - def is_register(self): - result = lib.EDTokenIsRegister(self) - if result == -1: - raise Exception('Error code returned.') - - return result > 0 - - @CachedProperty - def is_negative_literal(self): - result = lib.EDTokenIsNegativeLiteral(self) - if result == -1: - raise Exception('Error code returned.') - - return result > 0 - - @CachedProperty - def absolute_value(self): - value = c_uint64() - result = lib.EDLiteralTokenAbsoluteValue(byref(value), self) - if result != 0: - raise Exception('Non-0 return code.') - - return value - - @CachedProperty - def register_value(self): - value = c_uint() - result = lib.EDRegisterTokenValue(byref(value), self) - if result != 0: - raise Exception('Non-0 return code.') - - return value - -class Operand(LLVMObject): - """Represents an operand in an instruction. - - FIXME support register evaluation. - """ - def __init__(self, ptr, instruction): - assert isinstance(ptr, c_object_p) - assert isinstance(instruction, Instruction) - - LLVMObject.__init__(self, ptr) - - self._instruction = instruction - - @CachedProperty - def is_register(self): - result = lib.EDOperandIsRegister(self) - if result == -1: - raise Exception('Error code returned.') - - return result > 0 - - @CachedProperty - def is_immediate(self): - result = lib.EDOperandIsImmediate(self) - if result == -1: - raise Exception('Error code returned.') - - return result > 0 - - @CachedProperty - def is_memory(self): - result = lib.EDOperandIsMemory(self) - if result == -1: - raise Exception('Error code returned.') - - return result > 0 - - @CachedProperty - def register_value(self): - value = c_uint() - result = lib.EDRegisterOperandValue(byref(value), self) - if result != 0: - raise Exception('Non-0 return code.') - - return value - - @CachedProperty - def immediate_value(self): - value = c_uint64() - result = lib.EDImmediateOperandValue(byref(value), self) - if result != 0: - raise Exception('Non-0 return code.') - - return value - -def register_library(library): - library.EDGetDisassembler.argtypes = [POINTER(c_object_p), c_char_p, c_int] - library.EDGetDisassembler.restype = c_int - - library.EDGetRegisterName.argtypes = [POINTER(c_char_p), Disassembler, - c_uint] - library.EDGetRegisterName.restype = c_int - - library.EDRegisterIsStackPointer.argtypes = [Disassembler, c_uint] - library.EDRegisterIsStackPointer.restype = c_int - - library.EDRegisterIsProgramCounter.argtypes = [Disassembler, c_uint] - library.EDRegisterIsProgramCounter.restype = c_int - - library.EDCreateInsts.argtypes = [POINTER(c_object_p), c_uint, - Disassembler, callbacks['byte_reader'], c_uint64, c_void_p] - library.EDCreateInsts.restype = c_uint - - library.EDReleaseInst.argtypes = [Instruction] - - library.EDInstByteSize.argtypes = [Instruction] - library.EDInstByteSize.restype = c_int - - library.EDGetInstString.argtypes = [POINTER(c_char_p), Instruction] - library.EDGetInstString.restype = c_int - - library.EDInstID.argtypes = [POINTER(c_uint), Instruction] - library.EDInstID.restype = c_int - - library.EDInstIsBranch.argtypes = [Instruction] - library.EDInstIsBranch.restype = c_int - - library.EDInstIsMove.argtypes = [Instruction] - library.EDInstIsMove.restype = c_int - - library.EDBranchTargetID.argtypes = [Instruction] - library.EDBranchTargetID.restype = c_int - - library.EDMoveSourceID.argtypes = [Instruction] - library.EDMoveSourceID.restype = c_int - - library.EDMoveTargetID.argtypes = [Instruction] - library.EDMoveTargetID.restype = c_int - - library.EDNumTokens.argtypes = [Instruction] - library.EDNumTokens.restype = c_int - - library.EDGetToken.argtypes = [POINTER(c_object_p), Instruction, c_int] - library.EDGetToken.restype = c_int - - library.EDGetTokenString.argtypes = [POINTER(c_char_p), Token] - library.EDGetTokenString.restype = c_int - - library.EDOperandIndexForToken.argtypes = [Token] - library.EDOperandIndexForToken.restype = c_int - - library.EDTokenIsWhitespace.argtypes = [Token] - library.EDTokenIsWhitespace.restype = c_int - - library.EDTokenIsPunctuation.argtypes = [Token] - library.EDTokenIsPunctuation.restype = c_int - - library.EDTokenIsOpcode.argtypes = [Token] - library.EDTokenIsOpcode.restype = c_int - - library.EDTokenIsLiteral.argtypes = [Token] - library.EDTokenIsLiteral.restype = c_int - - library.EDTokenIsRegister.argtypes = [Token] - library.EDTokenIsRegister.restype = c_int - - library.EDTokenIsNegativeLiteral.argtypes = [Token] - library.EDTokenIsNegativeLiteral.restype = c_int - - library.EDLiteralTokenAbsoluteValue.argtypes = [POINTER(c_uint64), Token] - library.EDLiteralTokenAbsoluteValue.restype = c_int - - library.EDRegisterTokenValue.argtypes = [POINTER(c_uint), Token] - library.EDRegisterTokenValue.restype = c_int - - library.EDNumOperands.argtypes = [Instruction] - library.EDNumOperands.restype = c_int - - library.EDGetOperand.argtypes = [POINTER(c_object_p), Instruction, c_int] - library.EDGetOperand.restype = c_int - - library.EDOperandIsRegister.argtypes = [Operand] - library.EDOperandIsRegister.restype = c_int - - library.EDOperandIsImmediate.argtypes = [Operand] - library.EDOperandIsImmediate.restype = c_int - - library.EDOperandIsMemory.argtypes = [Operand] - library.EDOperandIsMemory.restype = c_int - - library.EDRegisterOperandValue.argtypes = [POINTER(c_uint), Operand] - library.EDRegisterOperandValue.restype = c_int - - library.EDImmediateOperandValue.argtypes = [POINTER(c_uint64), Operand] - library.EDImmediateOperandValue.restype = c_int - - library.EDEvaluateOperand.argtypes = [c_uint64, Operand, - callbacks['register_reader'], c_void_p] - library.EDEvaluateOperand.restype = c_int - -# Enhanced disassembler. -callbacks['byte_reader'] = CFUNCTYPE(c_int, POINTER(c_ubyte), c_uint64, - c_void_p) -callbacks['register_reader'] = CFUNCTYPE(c_int, POINTER(c_uint64), c_uint, - c_void_p) - -lib = get_library() -register_library(lib) diff --git a/bindings/python/llvm/tests/test_disassembler.py b/bindings/python/llvm/tests/test_disassembler.py deleted file mode 100644 index 6eb11a2..0000000 --- a/bindings/python/llvm/tests/test_disassembler.py +++ /dev/null @@ -1,62 +0,0 @@ -from unittest import expectedFailure -from unittest import skip - -from .base import TestBase -from ..disassembler import DisassemblerByteArraySource -from ..disassembler import DisassemblerFileSource -from ..disassembler import Disassembler -from ..object import ObjectFile - -class TestDisassembler(TestBase): - def test_simple(self): - sequence = '\x67\xe3\x81' # jcxz -127 - triple = 'i686-apple-darwin9' - - source = DisassemblerByteArraySource(sequence) - - disassembler = Disassembler(triple, source) - instructions = list(disassembler.get_instructions()) - - self.assertEqual(len(instructions), 1) - - i = instructions[0] - self.assertEqual(str(i), '\tjcxz\t-127\n') - self.assertEqual(i.byte_size, 3) - self.assertEqual(i.id, 1032) - self.assertTrue(i.is_branch) - self.assertFalse(i.is_move) - self.assertEqual(i.branch_target_id, 0) - - tokens = list(i.get_tokens()) - self.assertEqual(len(tokens), 4) - token = tokens[0] - self.assertEqual(str(token), 'jcxz') - self.assertFalse(token.is_whitespace) - self.assertFalse(token.is_punctuation) - self.assertTrue(token.is_opcode) - self.assertFalse(token.is_literal) - self.assertFalse(token.is_register) - - self.assertTrue(tokens[1].is_whitespace) - - operands = list(i.get_operands()) - self.assertEqual(len(operands), 1) - - # TODO implement operand tests - - @skip('This test is horribly broken and probably not even correct.') - def test_read_instructions(self): - filename = self.get_test_binary() - o = ObjectFile(filename=filename) - - for symbol in o.get_symbols(): - address = symbol.address - offset = symbol.file_offset - size = symbol.size - - source = DisassemblerFileSource(filename, offset, length=size, - start_address=address) - - disassembler = Disassembler('x86-generic-gnu-linux', source) - for instruction in disassembler.get_instructions(): - print instruction |