diff options
| -rw-r--r-- | cmake/modules/LLVMLibDeps.cmake | 14 | ||||
| -rw-r--r-- | lib/Target/X86/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | lib/Target/X86/Disassembler/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | lib/Target/X86/Disassembler/X86Disassembler.cpp | 438 | ||||
| -rw-r--r-- | lib/Target/X86/Disassembler/X86Disassembler.h | 150 | ||||
| -rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoder.c | 1361 | ||||
| -rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoder.h | 515 | ||||
| -rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h | 354 | ||||
| -rw-r--r-- | lib/Target/X86/Makefile | 4 | ||||
| -rw-r--r-- | lib/Target/X86/X86TargetMachine.cpp | 4 | ||||
| -rw-r--r-- | utils/TableGen/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | utils/TableGen/DisassemblerEmitter.cpp | 99 | ||||
| -rw-r--r-- | utils/TableGen/X86DisassemblerShared.h | 37 | ||||
| -rw-r--r-- | utils/TableGen/X86DisassemblerTables.cpp | 603 | ||||
| -rw-r--r-- | utils/TableGen/X86DisassemblerTables.h | 291 | ||||
| -rw-r--r-- | utils/TableGen/X86ModRMFilters.h | 197 | ||||
| -rw-r--r-- | utils/TableGen/X86RecognizableInstr.cpp | 959 | ||||
| -rw-r--r-- | utils/TableGen/X86RecognizableInstr.h | 237 | 
18 files changed, 5255 insertions, 12 deletions
| diff --git a/cmake/modules/LLVMLibDeps.cmake b/cmake/modules/LLVMLibDeps.cmake index 6a35354..4000365 100644 --- a/cmake/modules/LLVMLibDeps.cmake +++ b/cmake/modules/LLVMLibDeps.cmake @@ -2,7 +2,7 @@ set(MSVC_LIB_DEPS_LLVMARMAsmParser LLVMARMInfo LLVMMC)  set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMARMInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)  set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMARMInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)  set(MSVC_LIB_DEPS_LLVMARMInfo LLVMSupport) -set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaCodeGen LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)  set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)  set(MSVC_LIB_DEPS_LLVMAlphaInfo LLVMSupport)  set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget) @@ -11,12 +11,12 @@ set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport LLVMSystem)  set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)  set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport LLVMSystem)  set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport LLVMSystem) -set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)  set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)  set(MSVC_LIB_DEPS_LLVMBlackfinInfo LLVMSupport)  set(MSVC_LIB_DEPS_LLVMCBackend LLVMAnalysis LLVMCBackendInfo LLVMCodeGen LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)  set(MSVC_LIB_DEPS_LLVMCBackendInfo LLVMSupport) -set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)  set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)  set(MSVC_LIB_DEPS_LLVMCellSPUInfo LLVMSupport)  set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils) @@ -31,7 +31,7 @@ set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVM  set(MSVC_LIB_DEPS_LLVMMC LLVMSupport LLVMSystem)  set(MSVC_LIB_DEPS_LLVMMSIL LLVMAnalysis LLVMCodeGen LLVMCore LLVMMSILInfo LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)  set(MSVC_LIB_DEPS_LLVMMSILInfo LLVMSupport) -set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430CodeGen LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget)  set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)  set(MSVC_LIB_DEPS_LLVMMSP430Info LLVMSupport)  set(MSVC_LIB_DEPS_LLVMMipsAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsCodeGen LLVMMipsInfo LLVMSupport LLVMSystem LLVMTarget) @@ -40,17 +40,17 @@ set(MSVC_LIB_DEPS_LLVMMipsInfo LLVMSupport)  set(MSVC_LIB_DEPS_LLVMPIC16 LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMPIC16Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)  set(MSVC_LIB_DEPS_LLVMPIC16AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPIC16 LLVMPIC16Info LLVMSupport LLVMSystem LLVMTarget)  set(MSVC_LIB_DEPS_LLVMPIC16Info LLVMSupport) -set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCCodeGen LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget)  set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)  set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMSupport)  set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)  set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget) -set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcCodeGen LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)  set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)  set(MSVC_LIB_DEPS_LLVMSparcInfo LLVMSupport)  set(MSVC_LIB_DEPS_LLVMSupport LLVMSystem)  set(MSVC_LIB_DEPS_LLVMSystem ) -set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZInfo LLVMTarget) +set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZCodeGen LLVMSystemZInfo LLVMTarget)  set(MSVC_LIB_DEPS_LLVMSystemZCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystemZInfo LLVMTarget)  set(MSVC_LIB_DEPS_LLVMSystemZInfo LLVMSupport)  set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport LLVMSystem) diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 3ad65fb..4186fec 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS X86.td)  tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header)  tablegen(X86GenRegisterNames.inc -gen-register-enums)  tablegen(X86GenRegisterInfo.inc -gen-register-desc) +tablegen(X86GenDisassemblerTables.inc -gen-disassembler)  tablegen(X86GenInstrNames.inc -gen-instr-enums)  tablegen(X86GenInstrInfo.inc -gen-instr-desc)  tablegen(X86GenAsmWriter.inc -gen-asm-writer) diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt index b329e89..2a83a9c 100644 --- a/lib/Target/X86/Disassembler/CMakeLists.txt +++ b/lib/Target/X86/Disassembler/CMakeLists.txt @@ -2,5 +2,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/  add_llvm_library(LLVMX86Disassembler    X86Disassembler.cpp +  X86DisassemblerDecoder.c    )  add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 2ebbc9b..99617e7 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -6,18 +6,450 @@  // License. See LICENSE.TXT for details.  //  //===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler. +// It contains code to translate the data produced by the decoder into +//  MCInsts. +// Documentation for the disassembler can be found in X86Disassembler.h. +// +//===----------------------------------------------------------------------===// +#include "X86Disassembler.h" +#include "X86DisassemblerDecoder.h" +#include "X86InstrInfo.h" + +#include "llvm/MC/MCDisassembler.h"  #include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCInst.h"  #include "llvm/Target/TargetRegistry.h" -#include "X86.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h"  using namespace llvm; +using namespace llvm::X86Disassembler; + +namespace llvm {   +   +// Fill-ins to make the compiler happy.  These constants are never actually +//   assigned; they are just filler to make an automatically-generated switch +//   statement work. +namespace X86 { +  enum { +    BX_SI = 500, +    BX_DI = 501, +    BP_SI = 502, +    BP_DI = 503, +    sib   = 504, +    sib64 = 505 +  }; +} + +} + +static void translateInstruction(MCInst &target, +                                 InternalInstruction &source); + +X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) : +    MCDisassembler(), +    fMode(mode) { +} + +X86GenericDisassembler::~X86GenericDisassembler() { +} + +/// regionReader - a callback function that wraps the readByte method from +///   MemoryObject. +/// +/// @param arg      - The generic callback parameter.  In this case, this should +///                   be a pointer to a MemoryObject. +/// @param byte     - A pointer to the byte to be read. +/// @param address  - The address to be read. +static int regionReader(void* arg, uint8_t* byte, uint64_t address) { +  MemoryObject* region = static_cast<MemoryObject*>(arg); +  return region->readByte(address, byte); +} + +/// logger - a callback function that wraps the operator<< method from +///   raw_ostream. +/// +/// @param arg      - The generic callback parameter.  This should be a pointe +///                   to a raw_ostream. +/// @param log      - A string to be logged.  logger() adds a newline. +static void logger(void* arg, const char* log) { +  if (!arg) +    return; +   +  raw_ostream &vStream = *(static_cast<raw_ostream*>(arg)); +  vStream << log << "\n"; +}   +   +// +// Public interface for the disassembler +// + +bool X86GenericDisassembler::getInstruction(MCInst &instr, +                                            uint64_t &size, +                                            const MemoryObject ®ion, +                                            uint64_t address, +                                            raw_ostream &vStream) const { +  InternalInstruction internalInstr; +   +  int ret = decodeInstruction(&internalInstr, +                              regionReader, +                              (void*)®ion, +                              logger, +                              (void*)&vStream, +                              address, +                              fMode); + +  if(ret) { +    size = internalInstr.readerCursor - address; +    return false; +  } +  else { +    size = internalInstr.length; +    translateInstruction(instr, internalInstr); +    return true; +  } +} + +// +// Private code that translates from struct InternalInstructions to MCInsts. +// + +/// translateRegister - Translates an internal register to the appropriate LLVM +///   register, and appends it as an operand to an MCInst. +/// +/// @param mcInst     - The MCInst to append to. +/// @param reg        - The Reg to append. +static void translateRegister(MCInst &mcInst, Reg reg) { +#define ENTRY(x) X86::x, +  uint8_t llvmRegnums[] = { +    ALL_REGS +    0 +  }; +#undef ENTRY + +  uint8_t llvmRegnum = llvmRegnums[reg]; +  mcInst.addOperand(MCOperand::CreateReg(llvmRegnum)); +} + +/// translateImmediate  - Appends an immediate operand to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param immediate    - The immediate value to append. +static void translateImmediate(MCInst &mcInst, uint64_t immediate) { +  mcInst.addOperand(MCOperand::CreateImm(immediate)); +} + +/// translateRMRegister - Translates a register stored in the R/M field of the +///   ModR/M byte to its LLVM equivalent and appends it to an MCInst. +/// @param mcInst       - The MCInst to append to. +/// @param insn         - The internal instruction to extract the R/M field +///                       from. +static void translateRMRegister(MCInst &mcInst, +                                InternalInstruction &insn) { +  assert(insn.eaBase != EA_BASE_sib && insn.eaBase != EA_BASE_sib64 &&  +         "A R/M register operand may not have a SIB byte"); +   +  switch (insn.eaBase) { +  case EA_BASE_NONE: +    llvm_unreachable("EA_BASE_NONE for ModR/M base"); +    break; +#define ENTRY(x) case EA_BASE_##x: +  ALL_EA_BASES +#undef ENTRY +    llvm_unreachable("A R/M register operand may not have a base; " +                     "the operand must be a register."); +    break; +#define ENTRY(x)                                                        \ +  case EA_REG_##x:                                                    \ +    mcInst.addOperand(MCOperand::CreateReg(X86::x)); break; +  ALL_REGS +#undef ENTRY +  default: +    llvm_unreachable("Unexpected EA base register"); +  } +} + +/// translateRMMemory - Translates a memory operand stored in the Mod and R/M +///   fields of an internal instruction (and possibly its SIB byte) to a memory +///   operand in LLVM's format, and appends it to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param insn         - The instruction to extract Mod, R/M, and SIB fields +///                       from. +static void translateRMMemory(MCInst &mcInst, +                              InternalInstruction &insn) { +  // Addresses in an MCInst are represented as five operands: +  //   1. basereg       (register)  The R/M base, or (if there is a SIB) the  +  //                                SIB base +  //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified  +  //                                scale amount +  //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB) +  //                                the index (which is multiplied by the  +  //                                scale amount) +  //   4. displacement  (immediate) 0, or the displacement if there is one +  //   5. segmentreg    (register)  x86_registerNONE for now, but could be set +  //                                if we have segment overrides +   +  MCOperand baseReg; +  MCOperand scaleAmount; +  MCOperand indexReg; +  MCOperand displacement; +  MCOperand segmentReg; +   +  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { +    if (insn.sibBase != SIB_BASE_NONE) { +      switch (insn.sibBase) { +      default: +        llvm_unreachable("Unexpected sibBase"); +#define ENTRY(x)                                          \ +      case SIB_BASE_##x:                                \ +        baseReg = MCOperand::CreateReg(X86::x); break; +      ALL_SIB_BASES +#undef ENTRY +      } +    } else { +      baseReg = MCOperand::CreateReg(0); +    } +     +    if (insn.sibIndex != SIB_INDEX_NONE) { +      switch (insn.sibIndex) { +      default: +        llvm_unreachable("Unexpected sibIndex"); +#define ENTRY(x)                                            \ +      case SIB_INDEX_##x:                                 \ +        indexReg = MCOperand::CreateReg(X86::x); break; +      EA_BASES_32BIT +      EA_BASES_64BIT +#undef ENTRY +      } +    } else { +      indexReg = MCOperand::CreateReg(0); +    } +     +    scaleAmount = MCOperand::CreateImm(insn.sibScale); +  } else { +    switch (insn.eaBase) { +    case EA_BASE_NONE: +      assert(insn.eaDisplacement != EA_DISP_NONE &&  +             "EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); +       +      if (insn.mode == MODE_64BIT) +        baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6 +      else +        baseReg = MCOperand::CreateReg(0); +       +      indexReg = MCOperand::CreateReg(0); +      break; +    case EA_BASE_BX_SI: +      baseReg = MCOperand::CreateReg(X86::BX); +      indexReg = MCOperand::CreateReg(X86::SI); +      break; +    case EA_BASE_BX_DI: +      baseReg = MCOperand::CreateReg(X86::BX); +      indexReg = MCOperand::CreateReg(X86::DI); +      break; +    case EA_BASE_BP_SI: +      baseReg = MCOperand::CreateReg(X86::BP); +      indexReg = MCOperand::CreateReg(X86::SI); +      break; +    case EA_BASE_BP_DI: +      baseReg = MCOperand::CreateReg(X86::BP); +      indexReg = MCOperand::CreateReg(X86::DI); +      break; +    default: +      indexReg = MCOperand::CreateReg(0); +      switch (insn.eaBase) { +      default: +        llvm_unreachable("Unexpected eaBase"); +        break; +        // Here, we will use the fill-ins defined above.  However, +        //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and +        //   sib and sib64 were handled in the top-level if, so they're only +        //   placeholders to keep the compiler happy. +#define ENTRY(x)                                        \ +      case EA_BASE_##x:                                 \ +        baseReg = MCOperand::CreateReg(X86::x); break;  +      ALL_EA_BASES +#undef ENTRY +#define ENTRY(x) case EA_REG_##x: +      ALL_REGS +#undef ENTRY +        llvm_unreachable("A R/M memory operand may not be a register; " +                         "the base field must be a base."); +            break; +      } +    } +  } +   +  displacement = MCOperand::CreateImm(insn.displacement); +   +  static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { +    0,        // SEG_OVERRIDE_NONE +    X86::CS, +    X86::SS, +    X86::DS, +    X86::ES, +    X86::FS, +    X86::GS +  }; +   +  segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); +   +  mcInst.addOperand(baseReg); +  mcInst.addOperand(scaleAmount); +  mcInst.addOperand(indexReg); +  mcInst.addOperand(displacement); +  mcInst.addOperand(segmentReg); +} + +/// translateRM - Translates an operand stored in the R/M (and possibly SIB) +///   byte of an instruction to LLVM form, and appends it to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param operand      - The operand, as stored in the descriptor table. +/// @param insn         - The instruction to extract Mod, R/M, and SIB fields +///                       from. +static void translateRM(MCInst &mcInst, +                        OperandSpecifier &operand, +                        InternalInstruction &insn) { +  switch (operand.type) { +  default: +    llvm_unreachable("Unexpected type for a R/M operand"); +  case TYPE_R8: +  case TYPE_R16: +  case TYPE_R32: +  case TYPE_R64: +  case TYPE_Rv: +  case TYPE_MM: +  case TYPE_MM32: +  case TYPE_MM64: +  case TYPE_XMM: +  case TYPE_XMM32: +  case TYPE_XMM64: +  case TYPE_XMM128: +  case TYPE_DEBUGREG: +  case TYPE_CR32: +  case TYPE_CR64: +    translateRMRegister(mcInst, insn); +    break; +  case TYPE_M: +  case TYPE_M8: +  case TYPE_M16: +  case TYPE_M32: +  case TYPE_M64: +  case TYPE_M128: +  case TYPE_M512: +  case TYPE_Mv: +  case TYPE_M32FP: +  case TYPE_M64FP: +  case TYPE_M80FP: +  case TYPE_M16INT: +  case TYPE_M32INT: +  case TYPE_M64INT: +  case TYPE_M1616: +  case TYPE_M1632: +  case TYPE_M1664: +    translateRMMemory(mcInst, insn); +    break; +  } +} +   +/// translateFPRegister - Translates a stack position on the FPU stack to its +///   LLVM form, and appends it to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param stackPos     - The stack position to translate. +static void translateFPRegister(MCInst &mcInst, +                                uint8_t stackPos) { +  assert(stackPos < 8 && "Invalid FP stack position"); +   +  mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); +} + +/// translateOperand - Translates an operand stored in an internal instruction  +///   to LLVM's format and appends it to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param operand      - The operand, as stored in the descriptor table. +/// @param insn         - The internal instruction. +static void translateOperand(MCInst &mcInst, +                             OperandSpecifier &operand, +                             InternalInstruction &insn) { +  switch (operand.encoding) { +  default: +    llvm_unreachable("Unhandled operand encoding during translation"); +  case ENCODING_REG: +    translateRegister(mcInst, insn.reg); +    break; +  case ENCODING_RM: +    translateRM(mcInst, operand, insn); +    break; +  case ENCODING_CB: +  case ENCODING_CW: +  case ENCODING_CD: +  case ENCODING_CP: +  case ENCODING_CO: +  case ENCODING_CT: +    llvm_unreachable("Translation of code offsets isn't supported."); +  case ENCODING_IB: +  case ENCODING_IW: +  case ENCODING_ID: +  case ENCODING_IO: +  case ENCODING_Iv: +  case ENCODING_Ia: +    translateImmediate(mcInst,  +                       insn.immediates[insn.numImmediatesTranslated++]); +    break; +  case ENCODING_RB: +  case ENCODING_RW: +  case ENCODING_RD: +  case ENCODING_RO: +    translateRegister(mcInst, insn.opcodeRegister); +    break; +  case ENCODING_I: +    translateFPRegister(mcInst, insn.opcodeModifier); +    break; +  case ENCODING_Rv: +    translateRegister(mcInst, insn.opcodeRegister); +    break; +  case ENCODING_DUP: +    translateOperand(mcInst, +                     insn.spec->operands[operand.type - TYPE_DUP0], +                     insn); +    break; +  } +} +   +/// translateInstruction - Translates an internal instruction and all its +///   operands to an MCInst. +/// +/// @param mcInst       - The MCInst to populate with the instruction's data. +/// @param insn         - The internal instruction. +static void translateInstruction(MCInst &mcInst, +                                 InternalInstruction &insn) {   +  assert(insn.spec); +   +  mcInst.setOpcode(insn.instructionID); +   +  int index; +   +  insn.numImmediatesTranslated = 0; +   +  for (index = 0; index < X86_MAX_OPERANDS; ++index) { +    if (insn.spec->operands[index].encoding != ENCODING_NONE)                 +      translateOperand(mcInst, insn.spec->operands[index], insn); +  } +}  static const MCDisassembler *createX86_32Disassembler(const Target &T) { -  return 0; +  return new X86Disassembler::X86_32Disassembler;  }  static const MCDisassembler *createX86_64Disassembler(const Target &T) { -  return 0;  +  return new X86Disassembler::X86_64Disassembler;  }  extern "C" void LLVMInitializeX86Disassembler() {  diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h new file mode 100644 index 0000000..0e6e0b0 --- /dev/null +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -0,0 +1,150 @@ +//===- X86Disassembler.h - Disassembler for x86 and x86_64 ------*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and +// 64-bit X86 instruction sets.  The main decode sequence for an assembly +// instruction in this disassembler is: +// +// 1. Read the prefix bytes and determine the attributes of the instruction. +//    These attributes, recorded in enum attributeBits +//    (X86DisassemblerDecoderCommon.h), form a bitmask.  The table CONTEXTS_SYM +//    provides a mapping from bitmasks to contexts, which are represented by +//    enum InstructionContext (ibid.). +// +// 2. Read the opcode, and determine what kind of opcode it is.  The +//    disassembler distinguishes four kinds of opcodes, which are enumerated in +//    OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte +//    (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a  +//    (0x0f 0x3a 0xnn).  Mandatory prefixes are treated as part of the context. +// +// 3. Depending on the opcode type, look in one of four ClassDecision structures +//    (X86DisassemblerDecoderCommon.h).  Use the opcode class to determine which +//    OpcodeDecision (ibid.) to look the opcode in.  Look up the opcode, to get +//    a ModRMDecision (ibid.). +// +// 4. Some instructions, such as escape opcodes or extended opcodes, or even +//    instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the +//    ModR/M byte to complete decode.  The ModRMDecision's type is an entry from +//    ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the +//    ModR/M byte is required and how to interpret it. +// +// 5. After resolving the ModRMDecision, the disassembler has a unique ID +//    of type InstrUID (X86DisassemblerDecoderCommon.h).  Looking this ID up in +//    INSTRUCTIONS_SYM yields the name of the instruction and the encodings and +//    meanings of its operands. +// +// 6. For each operand, its encoding is an entry from OperandEncoding +//    (X86DisassemblerDecoderCommon.h) and its type is an entry from +//    OperandType (ibid.).  The encoding indicates how to read it from the +//    instruction; the type indicates how to interpret the value once it has +//    been read.  For example, a register operand could be stored in the R/M +//    field of the ModR/M byte, the REG field of the ModR/M byte, or added to +//    the main opcode.  This is orthogonal from its meaning (an GPR or an XMM +//    register, for instance).  Given this information, the operands can be +//    extracted and interpreted. +// +// 7. As the last step, the disassembler translates the instruction information +//    and operands into a format understandable by the client - in this case, an +//    MCInst for use by the MC infrastructure. +// +// The disassembler is broken broadly into two parts: the table emitter that +// emits the instruction decode tables discussed above during compilation, and +// the disassembler itself.  The table emitter is documented in more detail in +// utils/TableGen/X86DisassemblerEmitter.h. +// +// X86Disassembler.h contains the public interface for the disassembler, +//   adhering to the MCDisassembler interface. +// X86Disassembler.cpp contains the code responsible for step 7, and for +//   invoking the decoder to execute steps 1-6. +// X86DisassemblerDecoderCommon.h contains the definitions needed by both the +//   table emitter and the disassembler. +// X86DisassemblerDecoder.h contains the public interface of the decoder, +//   factored out into C for possible use by other projects. +// X86DisassemblerDecoder.c contains the source code of the decoder, which is +//   responsible for steps 1-6. +// +//===----------------------------------------------------------------------===// + +#ifndef X86DISASSEMBLER_H +#define X86DISASSEMBLER_H + +#define INSTRUCTION_SPECIFIER_FIELDS  \ +  const char*             name; + +#define INSTRUCTION_IDS               \ +  InstrUID*  instructionIDs; + +#include "X86DisassemblerDecoderCommon.h" + +#undef INSTRUCTION_SPECIFIER_FIELDS +#undef INSTRUCTION_IDS + +#include "llvm/MC/MCDisassembler.h" + +struct InternalInstruction; + +namespace llvm { +   +class MCInst; +class MemoryObject; +class raw_ostream; +   +namespace X86Disassembler { + +/// X86GenericDisassembler - Generic disassembler for all X86 platforms. +///   All each platform class should have to do is subclass the constructor, and +///   provide a different disassemblerMode value. +class X86GenericDisassembler : public MCDisassembler { +protected: +  /// Constructor     - Initializes the disassembler. +  /// +  /// @param mode     - The X86 architecture mode to decode for. +  X86GenericDisassembler(DisassemblerMode mode); +public: +  ~X86GenericDisassembler(); + +  /// getInstruction - See MCDisassembler. +  bool getInstruction(MCInst &instr, +                      uint64_t &size, +                      const MemoryObject ®ion, +                      uint64_t address, +                      raw_ostream &vStream) const; +private: +  DisassemblerMode              fMode; +}; + +/// X86_16Disassembler - 16-bit X86 disassembler. +class X86_16Disassembler : public X86GenericDisassembler { +public: +  X86_16Disassembler() : +    X86GenericDisassembler(MODE_16BIT) { +  } +};   + +/// X86_16Disassembler - 32-bit X86 disassembler. +class X86_32Disassembler : public X86GenericDisassembler { +public: +  X86_32Disassembler() : +    X86GenericDisassembler(MODE_32BIT) { +  } +}; + +/// X86_16Disassembler - 64-bit X86 disassembler. +class X86_64Disassembler : public X86GenericDisassembler { +public: +  X86_64Disassembler() : +    X86GenericDisassembler(MODE_64BIT) { +  } +}; + +} // namespace X86Disassembler +   +} // namespace llvm +   +#endif diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c new file mode 100644 index 0000000..99ae9cd --- /dev/null +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -0,0 +1,1361 @@ +/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==* + * + *                     The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * This file is part of the X86 Disassembler. + * It contains the implementation of the instruction decoder. + * Documentation for the disassembler can be found in X86Disassembler.h. + * + *===----------------------------------------------------------------------===*/ + +#include <assert.h>   /* for assert()     */ +#include <stdarg.h>   /* for va_*()       */ +#include <stdio.h>    /* for vsnprintf()  */ +#include <stdlib.h>   /* for exit()       */ +#include <string.h>   /* for bzero()      */ + +#include "X86DisassemblerDecoder.h" + +#include "X86GenDisassemblerTables.inc" + +#define TRUE  1 +#define FALSE 0 + +#ifdef __GNUC__ +#define NORETURN __attribute__((noreturn)) +#else +#define NORETURN +#endif + +#define unreachable(s)                                      \ +  do {                                                      \ +    fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, s);  \ +    exit(-1);                                               \ +  } while (0); + +/* + * contextForAttrs - Client for the instruction context table.  Takes a set of + *   attributes and returns the appropriate decode context. + * + * @param attrMask  - Attributes, from the enumeration attributeBits. + * @return          - The InstructionContext to use when looking up an + *                    an instruction with these attributes. + */ +static inline InstructionContext contextForAttrs(uint8_t attrMask) { +  return CONTEXTS_SYM[attrMask]; +} + +/* + * modRMRequired - Reads the appropriate instruction table to determine whether + *   the ModR/M byte is required to decode a particular instruction. + * + * @param type        - The opcode type (i.e., how many bytes it has). + * @param insnContext - The context for the instruction, as returned by + *                      contextForAttrs. + * @param opcode      - The last byte of the instruction's opcode, not counting + *                      ModR/M extensions and escapes. + * @return            - TRUE if the ModR/M byte is required, FALSE otherwise. + */ +static inline int modRMRequired(OpcodeType type, +                                InstructionContext insnContext, +                                uint8_t opcode) { +  const struct ContextDecision* decision; +   +  switch (type) { +  case ONEBYTE: +    decision = &ONEBYTE_SYM; +    break; +  case TWOBYTE: +    decision = &TWOBYTE_SYM; +    break; +  case THREEBYTE_38: +    decision = &THREEBYTE38_SYM; +    break; +  case THREEBYTE_3A: +    decision = &THREEBYTE3A_SYM; +    break; +  } +   +  return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. +    modrm_type != MODRM_ONEENTRY; +   +  unreachable("Unknown opcode type"); +  return 0; +} + +/* + * decode - Reads the appropriate instruction table to obtain the unique ID of + *   an instruction. + * + * @param type        - See modRMRequired(). + * @param insnContext - See modRMRequired(). + * @param opcode      - See modRMRequired(). + * @param modRM       - The ModR/M byte if required, or any value if not. + */ +static inline InstrUID decode(OpcodeType type, +                               InstructionContext insnContext, +                               uint8_t opcode, +                               uint8_t modRM) { +  struct ModRMDecision* dec; +   +  switch (type) { +  default: +    unreachable("Unknown opcode type"); +  case ONEBYTE: +    dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; +    break; +  case TWOBYTE: +    dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; +    break; +  case THREEBYTE_38: +    dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; +    break; +  case THREEBYTE_3A: +    dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; +    break; +  } +   +  switch (dec->modrm_type) { +  default: +    unreachable("Corrupt table!  Unknown modrm_type"); +  case MODRM_ONEENTRY: +    return dec->instructionIDs[0]; +  case MODRM_SPLITRM: +    if (modFromModRM(modRM) == 0x3) +      return dec->instructionIDs[1]; +    else +      return dec->instructionIDs[0]; +  case MODRM_FULL: +    return dec->instructionIDs[modRM]; +  } +   +  return 0; +} + +/* + * specifierForUID - Given a UID, returns the name and operand specification for + *   that instruction. + * + * @param uid - The unique ID for the instruction.  This should be returned by + *              decode(); specifierForUID will not check bounds. + * @return    - A pointer to the specification for that instruction. + */ +static inline struct InstructionSpecifier* specifierForUID(InstrUID uid) { +  return &INSTRUCTIONS_SYM[uid]; +} + +/* + * consumeByte - Uses the reader function provided by the user to consume one + *   byte from the instruction's memory and advance the cursor. + * + * @param insn  - The instruction with the reader function to use.  The cursor + *                for this instruction is advanced. + * @param byte  - A pointer to a pre-allocated memory buffer to be populated + *                with the data read. + * @return      - 0 if the read was successful; nonzero otherwise. + */ +static inline int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { +  int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); +   +  if (!ret) +    ++(insn->readerCursor); +   +  return ret; +} + +/* + * lookAtByte - Like consumeByte, but does not advance the cursor. + * + * @param insn  - See consumeByte(). + * @param byte  - See consumeByte(). + * @return      - See consumeByte(). + */ +static inline int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { +  return insn->reader(insn->readerArg, byte, insn->readerCursor); +} + +static inline void unconsumeByte(struct InternalInstruction* insn) { +  insn->readerCursor--; +} + +#define CONSUME_FUNC(name, type)                                          \ +  static inline int name(struct InternalInstruction* insn, type* ptr) {   \ +    type combined = 0;                                                    \ +    unsigned offset;                                                      \ +    for (offset = 0; offset < sizeof(type); ++offset) {                   \ +      uint8_t byte;                                                       \ +      int ret = insn->reader(insn->readerArg,                             \ +                             &byte,                                       \ +                             insn->readerCursor + offset);                \ +      if (ret)                                                            \ +        return ret;                                                       \ +      combined = combined | ((type)byte << ((type)offset * 8));           \ +    }                                                                     \ +    *ptr = combined;                                                      \ +    insn->readerCursor += sizeof(type);                                   \ +    return 0;                                                             \ +  } + +/* + * consume* - Use the reader function provided by the user to consume data + *   values of various sizes from the instruction's memory and advance the + *   cursor appropriately.  These readers perform endian conversion. + * + * @param insn    - See consumeByte(). + * @param ptr     - A pointer to a pre-allocated memory of appropriate size to + *                  be populated with the data read. + * @return        - See consumeByte(). + */ +CONSUME_FUNC(consumeInt8, int8_t) +CONSUME_FUNC(consumeInt16, int16_t) +CONSUME_FUNC(consumeInt32, int32_t) +CONSUME_FUNC(consumeUInt16, uint16_t) +CONSUME_FUNC(consumeUInt32, uint32_t) +CONSUME_FUNC(consumeUInt64, uint64_t) + +/* + * dprintf - Uses the logging function provided by the user to log a single + *   message, typically without a carriage-return. + * + * @param insn    - The instruction containing the logging function. + * @param format  - See printf(). + * @param ...     - See printf(). + */ +static inline void dprintf(struct InternalInstruction* insn, +                           const char* format, +                           ...) {   +  char buffer[256]; +  va_list ap; +   +  if (!insn->dlog) +    return; +     +  va_start(ap, format); +  (void)vsnprintf(buffer, sizeof(buffer), format, ap); +  va_end(ap); +   +  insn->dlog(insn->dlogArg, buffer); +   +  return; +} + +/* + * setPrefixPresent - Marks that a particular prefix is present at a particular + *   location. + * + * @param insn      - The instruction to be marked as having the prefix. + * @param prefix    - The prefix that is present. + * @param location  - The location where the prefix is located (in the address + *                    space of the instruction's reader). + */ +static inline void setPrefixPresent(struct InternalInstruction* insn, +                                    uint8_t prefix, +                                    uint64_t location) +{ +  insn->prefixPresent[prefix] = 1; +  insn->prefixLocations[prefix] = location; +} + +/* + * isPrefixAtLocation - Queries an instruction to determine whether a prefix is + *   present at a given location. + * + * @param insn      - The instruction to be queried. + * @param prefix    - The prefix. + * @param location  - The location to query. + * @return          - Whether the prefix is at that location. + */ +static inline BOOL isPrefixAtLocation(struct InternalInstruction* insn, +                                      uint8_t prefix, +                                      uint64_t location) +{ +  if (insn->prefixPresent[prefix] == 1 && +     insn->prefixLocations[prefix] == location) +    return TRUE; +  else +    return FALSE; +} + +/* + * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the + *   instruction as having them.  Also sets the instruction's default operand, + *   address, and other relevant data sizes to report operands correctly. + * + * @param insn  - The instruction whose prefixes are to be read. + * @return      - 0 if the instruction could be read until the end of the prefix + *                bytes, and no prefixes conflicted; nonzero otherwise. + */ +static int readPrefixes(struct InternalInstruction* insn) { +  BOOL isPrefix = TRUE; +  BOOL prefixGroups[4] = { FALSE }; +  uint64_t prefixLocation; +  uint8_t byte; +   +  BOOL hasAdSize = FALSE; +  BOOL hasOpSize = FALSE; +   +  dprintf(insn, "readPrefixes()"); +     +  while (isPrefix) { +    prefixLocation = insn->readerCursor; +     +    if (consumeByte(insn, &byte)) +      return -1; +     +    switch (byte) { +    case 0xf0:  /* LOCK */ +    case 0xf2:  /* REPNE/REPNZ */ +    case 0xf3:  /* REP or REPE/REPZ */ +      if (prefixGroups[0]) +        dprintf(insn, "Redundant Group 1 prefix"); +      prefixGroups[0] = TRUE; +      setPrefixPresent(insn, byte, prefixLocation); +      break; +    case 0x2e:  /* CS segment override -OR- Branch not taken */ +    case 0x36:  /* SS segment override -OR- Branch taken */ +    case 0x3e:  /* DS segment override */ +    case 0x26:  /* ES segment override */ +    case 0x64:  /* FS segment override */ +    case 0x65:  /* GS segment override */ +      switch (byte) { +      case 0x2e: +        insn->segmentOverride = SEG_OVERRIDE_CS; +        break; +      case 0x36: +        insn->segmentOverride = SEG_OVERRIDE_SS; +        break; +      case 0x3e: +        insn->segmentOverride = SEG_OVERRIDE_DS; +        break; +      case 0x26: +        insn->segmentOverride = SEG_OVERRIDE_ES; +        break; +      case 0x64: +        insn->segmentOverride = SEG_OVERRIDE_FS; +        break; +      case 0x65: +        insn->segmentOverride = SEG_OVERRIDE_GS; +        break; +      default: +        unreachable("Unhandled override"); +      } +      if (prefixGroups[1]) +        dprintf(insn, "Redundant Group 2 prefix"); +      prefixGroups[1] = TRUE; +      setPrefixPresent(insn, byte, prefixLocation); +      break; +    case 0x66:  /* Operand-size override */ +      if (prefixGroups[2]) +        dprintf(insn, "Redundant Group 3 prefix"); +      prefixGroups[2] = TRUE; +      hasOpSize = TRUE; +      setPrefixPresent(insn, byte, prefixLocation); +      break; +    case 0x67:  /* Address-size override */ +      if (prefixGroups[3]) +        dprintf(insn, "Redundant Group 4 prefix"); +      prefixGroups[3] = TRUE; +      hasAdSize = TRUE; +      setPrefixPresent(insn, byte, prefixLocation); +      break; +    default:    /* Not a prefix byte */ +      isPrefix = FALSE; +      break; +    } +     +    if (isPrefix) +      dprintf(insn, "Found prefix 0x%hhx", byte); +  } +   +  if (insn->mode == MODE_64BIT) { +    if ((byte & 0xf0) == 0x40) { +      uint8_t opcodeByte; +       +      if(lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { +        dprintf(insn, "Redundant REX prefix"); +        return -1; +      } +       +      insn->rexPrefix = byte; +      insn->necessaryPrefixLocation = insn->readerCursor - 2; +       +      dprintf(insn, "Found REX prefix 0x%hhx", byte); +    } else {                 +      unconsumeByte(insn); +      insn->necessaryPrefixLocation = insn->readerCursor - 1; +    } +  } else { +    unconsumeByte(insn); +  } +   +  if (insn->mode == MODE_16BIT) { +    insn->registerSize       = (hasOpSize ? 4 : 2); +    insn->addressSize        = (hasAdSize ? 4 : 2); +    insn->displacementSize   = (hasAdSize ? 4 : 2); +    insn->immediateSize      = (hasOpSize ? 4 : 2); +  } else if (insn->mode == MODE_32BIT) { +    insn->registerSize       = (hasOpSize ? 2 : 4); +    insn->addressSize        = (hasAdSize ? 2 : 4); +    insn->displacementSize   = (hasAdSize ? 2 : 4); +    insn->immediateSize      = (hasAdSize ? 2 : 4); +  } else if (insn->mode == MODE_64BIT) { +    if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { +      insn->registerSize       = 8; +      insn->addressSize        = (hasAdSize ? 4 : 8); +      insn->displacementSize   = 4; +      insn->immediateSize      = 4; +    } else if (insn->rexPrefix) { +      insn->registerSize       = (hasOpSize ? 2 : 4); +      insn->addressSize        = (hasAdSize ? 4 : 8); +      insn->displacementSize   = (hasOpSize ? 2 : 4); +      insn->immediateSize      = (hasOpSize ? 2 : 4); +    } else { +      insn->registerSize       = (hasOpSize ? 2 : 4); +      insn->addressSize        = (hasAdSize ? 4 : 8); +      insn->displacementSize   = (hasOpSize ? 2 : 4); +      insn->immediateSize      = (hasOpSize ? 2 : 4); +    } +  } +   +  return 0; +} + +/* + * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of + *   extended or escape opcodes). + * + * @param insn  - The instruction whose opcode is to be read. + * @return      - 0 if the opcode could be read successfully; nonzero otherwise. + */ +static int readOpcode(struct InternalInstruction* insn) {   +  /* Determine the length of the primary opcode */ +   +  uint8_t current; +   +  dprintf(insn, "readOpcode()"); +   +  insn->opcodeType = ONEBYTE; +  if (consumeByte(insn, ¤t)) +    return -1; +   +  if (current == 0x0f) { +    dprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); +     +    insn->twoByteEscape = current; +     +    if (consumeByte(insn, ¤t)) +      return -1; +     +    if (current == 0x38) { +      dprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); +       +      insn->threeByteEscape = current; +       +      if (consumeByte(insn, ¤t)) +        return -1; +       +      insn->opcodeType = THREEBYTE_38; +    } else if (current == 0x3a) { +      dprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); +       +      insn->threeByteEscape = current; +       +      if (consumeByte(insn, ¤t)) +        return -1; +       +      insn->opcodeType = THREEBYTE_3A; +    } else { +      dprintf(insn, "Didn't find a three-byte escape prefix"); +       +      insn->opcodeType = TWOBYTE; +    } +  } +   +  /* +   * At this point we have consumed the full opcode. +   * Anything we consume from here on must be unconsumed. +   */ +   +  insn->opcode = current; +   +  return 0; +} + +static int readModRM(struct InternalInstruction* insn); + +/* + * getIDWithAttrMask - Determines the ID of an instruction, consuming + *   the ModR/M byte as appropriate for extended and escape opcodes, + *   and using a supplied attribute mask. + * + * @param instructionID - A pointer whose target is filled in with the ID of the + *                        instruction. + * @param insn          - The instruction whose ID is to be determined. + * @param attrMask      - The attribute mask to search. + * @return              - 0 if the ModR/M could be read when needed or was not + *                        needed; nonzero otherwise. + */ +static int getIDWithAttrMask(uint16_t* instructionID, +                             struct InternalInstruction* insn, +                             uint8_t attrMask) { +  BOOL hasModRMExtension; +   +  uint8_t instructionClass; + +  instructionClass = contextForAttrs(attrMask); +   +  hasModRMExtension = modRMRequired(insn->opcodeType, +                                    instructionClass, +                                    insn->opcode); +   +  if (hasModRMExtension) { +    readModRM(insn); +     +    *instructionID = decode(insn->opcodeType, +                            instructionClass, +                            insn->opcode, +                            insn->modRM); +  } else { +    *instructionID = decode(insn->opcodeType, +                            instructionClass, +                            insn->opcode, +                            0); +  } +       +  return 0; +} + +/* + * is16BitEquivalent - Determines whether two instruction names refer to + * equivalent instructions but one is 16-bit whereas the other is not. + * + * @param orig  - The instruction that is not 16-bit + * @param equiv - The instruction that is 16-bit + */ +static BOOL is16BitEquvalent(const char* orig, const char* equiv) { +  off_t i; +   +  for(i = 0;; i++) { +    if(orig[i] == '\0' && equiv[i] == '\0') +      return TRUE; +    if(orig[i] == '\0' || equiv[i] == '\0') +      return FALSE; +    if(orig[i] != equiv[i]) { +      if((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') +        continue; +      if((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') +        continue; +      if((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') +        continue; +      return FALSE; +    } +  } +} + +/* + * is64BitEquivalent - Determines whether two instruction names refer to + * equivalent instructions but one is 64-bit whereas the other is not. + * + * @param orig  - The instruction that is not 64-bit + * @param equiv - The instruction that is 64-bit + */ +static BOOL is64BitEquivalent(const char* orig, const char* equiv) { +  off_t i; +   +  for(i = 0;; i++) { +    if(orig[i] == '\0' && equiv[i] == '\0') +      return TRUE; +    if(orig[i] == '\0' || equiv[i] == '\0') +      return FALSE; +    if(orig[i] != equiv[i]) { +      if((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q') +        continue; +      if((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6') +        continue; +      if((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4') +        continue; +      return FALSE; +    } +  } +} + + +/* + * getID - Determines the ID of an instruction, consuming the ModR/M byte as  + *   appropriate for extended and escape opcodes.  Determines the attributes and  + *   context for the instruction before doing so. + * + * @param insn  - The instruction whose ID is to be determined. + * @return      - 0 if the ModR/M could be read when needed or was not needed; + *                nonzero otherwise. + */ +static int getID(struct InternalInstruction* insn) {   +  uint8_t attrMask; +  uint16_t instructionID; +   +  dprintf(insn, "getID()"); +     +  attrMask = ATTR_NONE; +   +  if (insn->mode == MODE_64BIT) +    attrMask |= ATTR_64BIT; +   +  if (insn->rexPrefix & 0x08) +    attrMask |= ATTR_REXW; +   +  if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) +    attrMask |= ATTR_OPSIZE; +  else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) +    attrMask |= ATTR_XS; +  else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) +    attrMask |= ATTR_XD; +   +  if(getIDWithAttrMask(&instructionID, insn, attrMask)) +    return -1; +   +  /* The following clauses compensate for limitations of the tables. */ +   +  if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) { +    /* +     * Although for SSE instructions it is usually necessary to treat REX.W+F2 +     * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is +     * an occasional instruction where F2 is incidental and REX.W is the more +     * significant.  If the decoded instruction is 32-bit and adding REX.W +     * instead of F2 changes a 32 to a 64, we adopt the new encoding. +     */ +     +    struct InstructionSpecifier* spec; +    uint16_t instructionIDWithREXw; +    struct InstructionSpecifier* specWithREXw; +     +    spec = specifierForUID(instructionID); +     +    if (getIDWithAttrMask(&instructionIDWithREXw, +                          insn, +                          attrMask & (~ATTR_XD))) { +      /* +       * Decoding with REX.w would yield nothing; give up and return original +       * decode. +       */ +       +      insn->instructionID = instructionID; +      insn->spec = spec; +      return 0; +    } +     +    specWithREXw = specifierForUID(instructionIDWithREXw); +     +    if (is64BitEquivalent(spec->name, specWithREXw->name)) { +      insn->instructionID = instructionIDWithREXw; +      insn->spec = specWithREXw; +    } else { +      insn->instructionID = instructionID; +      insn->spec = spec; +    } +    return 0; +  } +   +  if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { +    /* +     * The instruction tables make no distinction between instructions that +     * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a +     * particular spot (i.e., many MMX operations).  In general we're +     * conservative, but in the specific case where OpSize is present but not +     * in the right place we check if there's a 16-bit operation. +     */ +     +    struct InstructionSpecifier* spec; +    uint16_t instructionIDWithOpsize; +    struct InstructionSpecifier* specWithOpsize; +     +    spec = specifierForUID(instructionID); +     +    if (getIDWithAttrMask(&instructionIDWithOpsize, +                          insn, +                          attrMask | ATTR_OPSIZE)) { +      /*  +       * ModRM required with OpSize but not present; give up and return version +       * without OpSize set +       */ +       +      insn->instructionID = instructionID; +      insn->spec = spec; +      return 0; +    } +     +    specWithOpsize = specifierForUID(instructionIDWithOpsize); +     +    if (is16BitEquvalent(spec->name, specWithOpsize->name)) { +      insn->instructionID = instructionIDWithOpsize; +      insn->spec = specWithOpsize; +    } else { +      insn->instructionID = instructionID; +      insn->spec = spec; +    } +    return 0; +  } +   +  insn->instructionID = instructionID; +  insn->spec = specifierForUID(insn->instructionID); +   +  return 0; +} + +/* + * readSIB - Consumes the SIB byte to determine addressing information for an + *   instruction. + * + * @param insn  - The instruction whose SIB byte is to be read. + * @return      - 0 if the SIB byte was successfully read; nonzero otherwise. + */ +static int readSIB(struct InternalInstruction* insn) { +  SIBIndex sibIndexBase; +  SIBBase sibBaseBase; +  uint8_t index, base; +   +  dprintf(insn, "readSIB()"); +   +  if (insn->consumedSIB) +    return 0; +   +  insn->consumedSIB = TRUE; +   +  switch (insn->addressSize) { +  case 2: +    dprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); +    return -1; +    break; +  case 4: +    sibIndexBase = SIB_INDEX_EAX; +    sibBaseBase = SIB_BASE_EAX; +    break; +  case 8: +    sibIndexBase = SIB_INDEX_RAX; +    sibBaseBase = SIB_BASE_RAX; +    break; +  } + +  if (consumeByte(insn, &insn->sib)) +    return -1; +   +  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); +   +  switch (index) { +  case 0x4: +    insn->sibIndex = SIB_INDEX_NONE; +    break; +  default: +    insn->sibIndex = (EABase)(sibIndexBase + index); +    if (insn->sibIndex == SIB_INDEX_sib || +        insn->sibIndex == SIB_INDEX_sib64) +      insn->sibIndex = SIB_INDEX_NONE; +    break; +  } +   +  switch (scaleFromSIB(insn->sib)) { +  case 0: +    insn->sibScale = 1; +    break; +  case 1: +    insn->sibScale = 2; +    break; +  case 2: +    insn->sibScale = 4; +    break; +  case 3: +    insn->sibScale = 8; +    break; +  } +   +  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); +   +  switch (base) { +  case 0x5: +    switch (modFromModRM(insn->modRM)) { +    case 0x0: +      insn->eaDisplacement = EA_DISP_32; +      insn->sibBase = SIB_BASE_NONE; +      break; +    case 0x1: +      insn->eaDisplacement = EA_DISP_8; +      insn->sibBase = (insn->addressSize == 4 ?  +                       SIB_BASE_EBP : SIB_BASE_RBP); +      break; +    case 0x2: +      insn->eaDisplacement = EA_DISP_32; +      insn->sibBase = (insn->addressSize == 4 ?  +                       SIB_BASE_EBP : SIB_BASE_RBP); +      break; +    case 0x3: +      unreachable("Cannot have Mod = 0b11 and a SIB byte"); +    } +    break; +  default: +    insn->sibBase = (EABase)(sibBaseBase + base); +    break; +  } +   +  return 0; +} + +/* + * readDisplacement - Consumes the displacement of an instruction. + * + * @param insn  - The instruction whose displacement is to be read. + * @return      - 0 if the displacement byte was successfully read; nonzero  + *                otherwise. + */ +static int readDisplacement(struct InternalInstruction* insn) {   +  int8_t d8; +  int16_t d16; +  int32_t d32; +   +  dprintf(insn, "readDisplacement()"); +   +  if (insn->consumedDisplacement) +    return 0; +   +  insn->consumedDisplacement = TRUE; +   +  switch (insn->eaDisplacement) { +  case EA_DISP_NONE: +    insn->consumedDisplacement = FALSE; +    break; +  case EA_DISP_8: +    if (consumeInt8(insn, &d8)) +      return -1; +    insn->displacement = d8; +    break; +  case EA_DISP_16: +    if (consumeInt16(insn, &d16)) +      return -1; +    insn->displacement = d16; +    break; +  case EA_DISP_32: +    if (consumeInt32(insn, &d32)) +      return -1; +    insn->displacement = d32; +    break; +  } +   +  insn->consumedDisplacement = TRUE; +  return 0; +} + +/* + * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and + *   displacement) for an instruction and interprets it. + * + * @param insn  - The instruction whose addressing information is to be read. + * @return      - 0 if the information was successfully read; nonzero otherwise. + */ +static int readModRM(struct InternalInstruction* insn) {   +  uint8_t mod, rm, reg; +   +  dprintf(insn, "readModRM()"); +   +  if (insn->consumedModRM) +    return 0; +   +  consumeByte(insn, &insn->modRM); +  insn->consumedModRM = TRUE; +   +  mod     = modFromModRM(insn->modRM); +  rm      = rmFromModRM(insn->modRM); +  reg     = regFromModRM(insn->modRM); +   +  /* +   * This goes by insn->registerSize to pick the correct register, which messes +   * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in +   * fixupReg(). +   */ +  switch (insn->registerSize) { +  case 2: +    insn->regBase = REG_AX; +    insn->eaRegBase = EA_REG_AX; +    break; +  case 4: +    insn->regBase = REG_EAX; +    insn->eaRegBase = EA_REG_EAX; +    break; +  case 8: +    insn->regBase = REG_RAX; +    insn->eaRegBase = EA_REG_RAX; +    break; +  } +   +  reg |= rFromREX(insn->rexPrefix) << 3; +  rm  |= bFromREX(insn->rexPrefix) << 3; +   +  insn->reg = (Reg)(insn->regBase + reg); +   +  switch (insn->addressSize) { +  case 2: +    insn->eaBaseBase = EA_BASE_BX_SI; +      +    switch (mod) { +    case 0x0: +      if (rm == 0x6) { +        insn->eaBase = EA_BASE_NONE; +        insn->eaDisplacement = EA_DISP_16; +        if(readDisplacement(insn)) +          return -1; +      } else { +        insn->eaBase = (EABase)(insn->eaBaseBase + rm); +        insn->eaDisplacement = EA_DISP_NONE; +      } +      break; +    case 0x1: +      insn->eaBase = (EABase)(insn->eaBaseBase + rm); +      insn->eaDisplacement = EA_DISP_8; +      if(readDisplacement(insn)) +        return -1; +      break; +    case 0x2: +      insn->eaBase = (EABase)(insn->eaBaseBase + rm); +      insn->eaDisplacement = EA_DISP_16; +      if(readDisplacement(insn)) +        return -1; +      break; +    case 0x3: +      insn->eaBase = (EABase)(insn->eaRegBase + rm); +      if(readDisplacement(insn)) +        return -1; +      break; +    } +    break; +  case 4: +  case 8: +    insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); +     +    switch (mod) { +    case 0x0: +      insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ +      switch (rm) { +      case 0x4: +      case 0xc:   /* in case REXW.b is set */ +        insn->eaBase = (insn->addressSize == 4 ?  +                        EA_BASE_sib : EA_BASE_sib64); +        readSIB(insn); +        if(readDisplacement(insn)) +          return -1; +        break; +      case 0x5: +        insn->eaBase = EA_BASE_NONE; +        insn->eaDisplacement = EA_DISP_32; +        if(readDisplacement(insn)) +          return -1; +        break; +      default: +        insn->eaBase = (EABase)(insn->eaBaseBase + rm); +        break; +      } +      break; +    case 0x1: +    case 0x2: +      insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); +      switch (rm) { +      case 0x4: +      case 0xc:   /* in case REXW.b is set */ +        insn->eaBase = EA_BASE_sib; +        readSIB(insn); +        if(readDisplacement(insn)) +          return -1; +        break; +      default: +        insn->eaBase = (EABase)(insn->eaBaseBase + rm); +        if(readDisplacement(insn)) +          return -1; +        break; +      } +      break; +    case 0x3: +      insn->eaDisplacement = EA_DISP_NONE; +      insn->eaBase = (EABase)(insn->eaRegBase + rm); +      break; +    } +    break; +  } /* switch (insn->addressSize) */ +   +  return 0; +} + +#define GENERIC_FIXUP_FUNC(name, base, prefix)            \ +  static uint8_t name(struct InternalInstruction *insn,   \ +                      OperandType type,                   \ +                      uint8_t index,                      \ +                      uint8_t *valid) {                   \ +    *valid = 1;                                           \ +    switch (type) {                                       \ +    default:                                              \ +      unreachable("Unhandled register type");             \ +    case TYPE_Rv:                                         \ +      return base + index;                                \ +    case TYPE_R8:                                         \ +      if(insn->rexPrefix &&                               \ +         index >= 4 && index <= 7) {                      \ +        return prefix##_SPL + (index - 4);                \ +      } else {                                            \ +        return prefix##_AL + index;                       \ +      }                                                   \ +    case TYPE_R16:                                        \ +      return prefix##_AX + index;                         \ +    case TYPE_R32:                                        \ +      return prefix##_EAX + index;                        \ +    case TYPE_R64:                                        \ +      return prefix##_RAX + index;                        \ +    case TYPE_XMM128:                                     \ +    case TYPE_XMM64:                                      \ +    case TYPE_XMM32:                                      \ +    case TYPE_XMM:                                        \ +      return prefix##_XMM0 + index;                       \ +    case TYPE_MM64:                                       \ +    case TYPE_MM32:                                       \ +    case TYPE_MM:                                         \ +      if(index > 7)                                       \ +        *valid = 0;                                       \ +      return prefix##_MM0 + index;                        \ +    case TYPE_SEGMENTREG:                                 \ +      if(index > 5)                                       \ +        *valid = 0;                                       \ +      return prefix##_ES + index;                         \ +    case TYPE_DEBUGREG:                                   \ +      if(index > 7)                                       \ +        *valid = 0;                                       \ +      return prefix##_DR0 + index;                        \ +    case TYPE_CR32:                                       \ +      if(index > 7)                                       \ +        *valid = 0;                                       \ +      return prefix##_ECR0 + index;                       \ +    case TYPE_CR64:                                       \ +      if(index > 8)                                       \ +        *valid = 0;                                       \ +      return prefix##_RCR0 + index;                       \ +    }                                                     \ +  } + +/* + * fixup*Value - Consults an operand type to determine the meaning of the + *   reg or R/M field.  If the operand is an XMM operand, for example, an + *   operand would be XMM0 instead of AX, which readModRM() would otherwise + *   misinterpret it as. + * + * @param insn  - The instruction containing the operand. + * @param type  - The operand type. + * @param index - The existing value of the field as reported by readModRM(). + * @param valid - The address of a uint8_t.  The target is set to 1 if the + *                field is valid for the register class; 0 if not. + */ +GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    REG) +GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG) + +/* + * fixupReg - Consults an operand specifier to determine which of the + *   fixup*Value functions to use in correcting readModRM()'ss interpretation. + * + * @param insn  - See fixup*Value(). + * @param op    - The operand specifier. + * @return      - 0 if fixup was successful; -1 if the register returned was + *                invalid for its class. + */ +static int fixupReg(struct InternalInstruction *insn,  +                    struct OperandSpecifier *op) { +  uint8_t valid; +   +  dprintf(insn, "fixupReg()"); +   +  switch ((OperandEncoding)op->encoding) { +  default: +    unreachable("Expected a REG or R/M encoding in fixupReg"); +  case ENCODING_REG: +    insn->reg = (Reg)fixupRegValue(insn, +                                   (OperandType)op->type, +                                   insn->reg - insn->regBase, +                                   &valid); +    if (!valid) +      return -1; +    break; +  case ENCODING_RM: +    if (insn->eaBase >= insn->eaRegBase) { +      insn->eaBase = (EABase)fixupRMValue(insn, +                                          (OperandType)op->type, +                                          insn->eaBase - insn->eaRegBase, +                                          &valid); +      if (!valid) +        return -1; +    } +    break; +  } +   +  return 0; +} + +/* + * readOpcodeModifier - Reads an operand from the opcode field of an  + *   instruction.  Handles AddRegFrm instructions. + * + * @param insn    - The instruction whose opcode field is to be read. + * @param inModRM - Indicates that the opcode field is to be read from the + *                  ModR/M extension; useful for escape opcodes + */ +static void readOpcodeModifier(struct InternalInstruction* insn) { +  dprintf(insn, "readOpcodeModifier()"); +   +  if (insn->consumedOpcodeModifier) +    return; +   +  insn->consumedOpcodeModifier = TRUE; +   +  switch(insn->spec->modifierType) { +  default: +    unreachable("Unknown modifier type."); +  case MODIFIER_NONE: +    unreachable("No modifier but an operand expects one."); +  case MODIFIER_OPCODE: +    insn->opcodeModifier = insn->opcode - insn->spec->modifierBase; +    break; +  case MODIFIER_MODRM: +    insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; +    break; +  }   +} + +/* + * readOpcodeRegister - Reads an operand from the opcode field of an  + *   instruction and interprets it appropriately given the operand width. + *   Handles AddRegFrm instructions. + * + * @param insn  - See readOpcodeModifier(). + * @param size  - The width (in bytes) of the register being specified. + *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means + *                RAX. + */ +static void readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { +  dprintf(insn, "readOpcodeRegister()"); + +  readOpcodeModifier(insn); +   +  if (size == 0) +    size = insn->registerSize; +   +  switch (size) { +  case 1: +    insn->opcodeRegister = (Reg)(REG_AL + ((bFromREX(insn->rexPrefix) << 3)  +                                           | insn->opcodeModifier)); +    if(insn->rexPrefix &&  +       insn->opcodeRegister >= REG_AL + 0x4 && +       insn->opcodeRegister < REG_AL + 0x8) { +      insn->opcodeRegister = (Reg)(REG_SPL + (insn->opcodeRegister - REG_AL - 4)); +    } +       +    break; +  case 2: +    insn->opcodeRegister = (Reg)(REG_AX + ((bFromREX(insn->rexPrefix) << 3)  +                                            | insn->opcodeModifier)); +    break; +  case 4: +    insn->opcodeRegister = (Reg)(REG_EAX + ((bFromREX(insn->rexPrefix) << 3)  +                                             | insn->opcodeModifier)); +    break; +  case 8: +    insn->opcodeRegister = (Reg)(REG_RAX + ((bFromREX(insn->rexPrefix) << 3)  +                                             |insn->opcodeModifier)); +    break; +  } +} + +/* + * readImmediate - Consumes an immediate operand from an instruction, given the + *   desired operand size. + * + * @param insn  - The instruction whose operand is to be read. + * @param size  - The width (in bytes) of the operand. + * @return      - 0 if the immediate was successfully consumed; nonzero + *                otherwise. + */ +static int readImmediate(struct InternalInstruction* insn, uint8_t size) { +  uint8_t imm8; +  uint16_t imm16; +  uint32_t imm32; +  uint64_t imm64; +   +  dprintf(insn, "readImmediate()"); +   +  if (insn->numImmediatesConsumed == 2) +    unreachable("Already consumed two immediates"); +   +  if (size == 0) +    size = insn->immediateSize; +  else +    insn->immediateSize = size; +   +  switch (size) { +  case 1: +    if (consumeByte(insn, &imm8)) +      return -1; +    insn->immediates[insn->numImmediatesConsumed] = imm8; +    break; +  case 2: +    if (consumeUInt16(insn, &imm16)) +      return -1; +    insn->immediates[insn->numImmediatesConsumed] = imm16; +    break; +  case 4: +    if (consumeUInt32(insn, &imm32)) +      return -1; +    insn->immediates[insn->numImmediatesConsumed] = imm32; +    break; +  case 8: +    if (consumeUInt64(insn, &imm64)) +      return -1; +    insn->immediates[insn->numImmediatesConsumed] = imm64; +    break; +  } +   +  insn->numImmediatesConsumed++; +   +  return 0; +} + +/* + * readOperands - Consults the specifier for an instruction and consumes all + *   operands for that instruction, interpreting them as it goes. + * + * @param insn  - The instruction whose operands are to be read and interpreted. + * @return      - 0 if all operands could be read; nonzero otherwise. + */ +static int readOperands(struct InternalInstruction* insn) { +  int index; +   +  dprintf(insn, "readOperands()"); +   +  for (index = 0; index < X86_MAX_OPERANDS; ++index) { +    switch (insn->spec->operands[index].encoding) { +    case ENCODING_NONE: +      break; +    case ENCODING_REG: +    case ENCODING_RM: +      if (readModRM(insn)) +        return -1; +      if (fixupReg(insn, &insn->spec->operands[index])) +        return -1; +      break; +    case ENCODING_CB: +    case ENCODING_CW: +    case ENCODING_CD: +    case ENCODING_CP: +    case ENCODING_CO: +    case ENCODING_CT: +      dprintf(insn, "We currently don't hande code-offset encodings"); +      return -1; +    case ENCODING_IB: +      if (readImmediate(insn, 1)) +        return -1; +      break; +    case ENCODING_IW: +      if (readImmediate(insn, 2)) +        return -1; +      break; +    case ENCODING_ID: +      if (readImmediate(insn, 4)) +        return -1; +      break; +    case ENCODING_IO: +      if (readImmediate(insn, 8)) +        return -1; +      break; +    case ENCODING_Iv: +      readImmediate(insn, insn->immediateSize); +      break; +    case ENCODING_Ia: +      readImmediate(insn, insn->addressSize); +      break; +    case ENCODING_RB: +      readOpcodeRegister(insn, 1); +      break; +    case ENCODING_RW: +      readOpcodeRegister(insn, 2); +      break; +    case ENCODING_RD: +      readOpcodeRegister(insn, 4); +      break; +    case ENCODING_RO: +      readOpcodeRegister(insn, 8); +      break; +    case ENCODING_Rv: +      readOpcodeRegister(insn, 0); +      break; +    case ENCODING_I: +      readOpcodeModifier(insn); +      break; +    case ENCODING_DUP: +      break; +    default: +      dprintf(insn, "Encountered an operand with an unknown encoding."); +      return -1; +    } +  } +   +  return 0; +} + +/* + * decodeInstruction - Reads and interprets a full instruction provided by the + *   user. + * + * @param insn      - A pointer to the instruction to be populated.  Must be  + *                    pre-allocated. + * @param reader    - The function to be used to read the instruction's bytes. + * @param readerArg - A generic argument to be passed to the reader to store + *                    any internal state. + * @param logger    - If non-NULL, the function to be used to write log messages + *                    and warnings. + * @param loggerArg - A generic argument to be passed to the logger to store + *                    any internal state. + * @param startLoc  - The address (in the reader's address space) of the first + *                    byte in the instruction. + * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to + *                    decode the instruction in. + * @return          - 0 if the instruction's memory could be read; nonzero if + *                    not. + */ +int decodeInstruction(struct InternalInstruction* insn, +                      byteReader_t reader, +                      void* readerArg, +                      dlog_t logger, +                      void* loggerArg, +                      uint64_t startLoc, +                      DisassemblerMode mode) { +  bzero(insn, sizeof(struct InternalInstruction)); +     +  insn->reader = reader; +  insn->readerArg = readerArg; +  insn->dlog = logger; +  insn->dlogArg = loggerArg; +  insn->startLocation = startLoc; +  insn->readerCursor = startLoc; +  insn->mode = mode; +  insn->numImmediatesConsumed = 0; +   +  if (readPrefixes(insn)       || +      readOpcode(insn)         || +      getID(insn)              || +      insn->instructionID == 0 || +      readOperands(insn)) +    return -1; +   +  insn->length = insn->readerCursor - insn->startLocation; +   +  dprintf(insn, "Read from 0x%llx to 0x%llx: length %llu", +          startLoc, insn->readerCursor, insn->length); +     +  if (insn->length > 15) +    dprintf(insn, "Instruction exceeds 15-byte limit"); +   +  return 0; +} diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h new file mode 100644 index 0000000..f548c65 --- /dev/null +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -0,0 +1,515 @@ +/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==* + * + *                     The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * This file is part of the X86 Disassembler. + * It contains the public interface of the instruction decoder. + * Documentation for the disassembler can be found in X86Disassembler.h. + * + *===----------------------------------------------------------------------===*/ + +#ifndef X86DISASSEMBLERDECODER_H +#define X86DISASSEMBLERDECODER_H + +#ifdef __cplusplus +extern "C" { +#endif +   +#define INSTRUCTION_SPECIFIER_FIELDS  \ +  const char*             name; + +#define INSTRUCTION_IDS     \ +  InstrUID*  instructionIDs; + +#include "X86DisassemblerDecoderCommon.h" +   +#undef INSTRUCTION_SPECIFIER_FIELDS +#undef INSTRUCTION_IDS +   +/* + * Accessor functions for various fields of an Intel instruction + */ +static inline uint8_t modFromModRM(uint8_t modRM){ return (modRM & 0xc0) >> 6; } +static inline uint8_t regFromModRM(uint8_t modRM){ return (modRM & 0x38) >> 3; } +static inline uint8_t rmFromModRM(uint8_t modRM) { return (modRM & 0x7);       } +static inline uint8_t scaleFromSIB(uint8_t sib)  { return (sib & 0xc0) >> 6;   } +static inline uint8_t indexFromSIB(uint8_t sib)  { return (sib & 0x38) >> 3;   } +static inline uint8_t baseFromSIB(uint8_t sib)   { return (sib & 0x7);         } +static inline uint8_t wFromREX(uint8_t rex)      { return (rex & 0x8) >> 3;    } +static inline uint8_t rFromREX(uint8_t rex)      { return (rex & 0x4) >> 2;    } +static inline uint8_t xFromREX(uint8_t rex)      { return (rex & 0x2) >> 1;    } +static inline uint8_t bFromREX(uint8_t rex)      { return (rex & 0x1);         } + +/* + * These enums represent Intel registers for use by the decoder. + */ + +#define REGS_8BIT     \ +  ENTRY(AL)           \ +  ENTRY(CL)           \ +  ENTRY(DL)           \ +  ENTRY(BL)           \ +  ENTRY(AH)           \ +  ENTRY(CH)           \ +  ENTRY(DH)           \ +  ENTRY(BH)           \ +  ENTRY(R8B)          \ +  ENTRY(R9B)          \ +  ENTRY(R10B)         \ +  ENTRY(R11B)         \ +  ENTRY(R12B)         \ +  ENTRY(R13B)         \ +  ENTRY(R14B)         \ +  ENTRY(R15B)         \ +  ENTRY(SPL)          \ +  ENTRY(BPL)          \ +  ENTRY(SIL)          \ +  ENTRY(DIL) + +#define EA_BASES_16BIT  \ +  ENTRY(BX_SI)          \ +  ENTRY(BX_DI)          \ +  ENTRY(BP_SI)          \ +  ENTRY(BP_DI)          \ +  ENTRY(SI)             \ +  ENTRY(DI)             \ +  ENTRY(BP)             \ +  ENTRY(BX)             \ +  ENTRY(R8W)            \ +  ENTRY(R9W)            \ +  ENTRY(R10W)           \ +  ENTRY(R11W)           \ +  ENTRY(R12W)           \ +  ENTRY(R13W)           \ +  ENTRY(R14W)           \ +  ENTRY(R15W) + +#define REGS_16BIT    \ +  ENTRY(AX)           \ +  ENTRY(CX)           \ +  ENTRY(DX)           \ +  ENTRY(BX)           \ +  ENTRY(SP)           \ +  ENTRY(BP)           \ +  ENTRY(SI)           \ +  ENTRY(DI)           \ +  ENTRY(R8W)          \ +  ENTRY(R9W)          \ +  ENTRY(R10W)         \ +  ENTRY(R11W)         \ +  ENTRY(R12W)         \ +  ENTRY(R13W)         \ +  ENTRY(R14W)         \ +  ENTRY(R15W) + +#define EA_BASES_32BIT  \ +  ENTRY(EAX)            \ +  ENTRY(ECX)            \ +  ENTRY(EDX)            \ +  ENTRY(EBX)            \ +  ENTRY(sib)            \ +  ENTRY(EBP)            \ +  ENTRY(ESI)            \ +  ENTRY(EDI)            \ +  ENTRY(R8D)            \ +  ENTRY(R9D)            \ +  ENTRY(R10D)           \ +  ENTRY(R11D)           \ +  ENTRY(R12D)           \ +  ENTRY(R13D)           \ +  ENTRY(R14D)           \ +  ENTRY(R15D) + +#define REGS_32BIT  \ +  ENTRY(EAX)        \ +  ENTRY(ECX)        \ +  ENTRY(EDX)        \ +  ENTRY(EBX)        \ +  ENTRY(ESP)        \ +  ENTRY(EBP)        \ +  ENTRY(ESI)        \ +  ENTRY(EDI)        \ +  ENTRY(R8D)        \ +  ENTRY(R9D)        \ +  ENTRY(R10D)       \ +  ENTRY(R11D)       \ +  ENTRY(R12D)       \ +  ENTRY(R13D)       \ +  ENTRY(R14D)       \ +  ENTRY(R15D) + +#define EA_BASES_64BIT  \ +  ENTRY(RAX)            \ +  ENTRY(RCX)            \ +  ENTRY(RDX)            \ +  ENTRY(RBX)            \ +  ENTRY(sib64)          \ +  ENTRY(RBP)            \ +  ENTRY(RSI)            \ +  ENTRY(RDI)            \ +  ENTRY(R8)             \ +  ENTRY(R9)             \ +  ENTRY(R10)            \ +  ENTRY(R11)            \ +  ENTRY(R12)            \ +  ENTRY(R13)            \ +  ENTRY(R14)            \ +  ENTRY(R15) + +#define REGS_64BIT  \ +  ENTRY(RAX)        \ +  ENTRY(RCX)        \ +  ENTRY(RDX)        \ +  ENTRY(RBX)        \ +  ENTRY(RSP)        \ +  ENTRY(RBP)        \ +  ENTRY(RSI)        \ +  ENTRY(RDI)        \ +  ENTRY(R8)         \ +  ENTRY(R9)         \ +  ENTRY(R10)        \ +  ENTRY(R11)        \ +  ENTRY(R12)        \ +  ENTRY(R13)        \ +  ENTRY(R14)        \ +  ENTRY(R15) + +#define REGS_MMX  \ +  ENTRY(MM0)      \ +  ENTRY(MM1)      \ +  ENTRY(MM2)      \ +  ENTRY(MM3)      \ +  ENTRY(MM4)      \ +  ENTRY(MM5)      \ +  ENTRY(MM6)      \ +  ENTRY(MM7) + +#define REGS_XMM  \ +  ENTRY(XMM0)     \ +  ENTRY(XMM1)     \ +  ENTRY(XMM2)     \ +  ENTRY(XMM3)     \ +  ENTRY(XMM4)     \ +  ENTRY(XMM5)     \ +  ENTRY(XMM6)     \ +  ENTRY(XMM7)     \ +  ENTRY(XMM8)     \ +  ENTRY(XMM9)     \ +  ENTRY(XMM10)    \ +  ENTRY(XMM11)    \ +  ENTRY(XMM12)    \ +  ENTRY(XMM13)    \ +  ENTRY(XMM14)    \ +  ENTRY(XMM15) +   +#define REGS_SEGMENT \ +  ENTRY(ES)          \ +  ENTRY(CS)          \ +  ENTRY(SS)          \ +  ENTRY(DS)          \ +  ENTRY(FS)          \ +  ENTRY(GS) +   +#define REGS_DEBUG  \ +  ENTRY(DR0)        \ +  ENTRY(DR1)        \ +  ENTRY(DR2)        \ +  ENTRY(DR3)        \ +  ENTRY(DR4)        \ +  ENTRY(DR5)        \ +  ENTRY(DR6)        \ +  ENTRY(DR7) + +#define REGS_CONTROL_32BIT  \ +  ENTRY(ECR0)               \ +  ENTRY(ECR1)               \ +  ENTRY(ECR2)               \ +  ENTRY(ECR3)               \ +  ENTRY(ECR4)               \ +  ENTRY(ECR5)               \ +  ENTRY(ECR6)               \ +  ENTRY(ECR7) + +#define REGS_CONTROL_64BIT  \ +  ENTRY(RCR0)               \ +  ENTRY(RCR1)               \ +  ENTRY(RCR2)               \ +  ENTRY(RCR3)               \ +  ENTRY(RCR4)               \ +  ENTRY(RCR5)               \ +  ENTRY(RCR6)               \ +  ENTRY(RCR7)               \ +  ENTRY(RCR8) +   +#define ALL_EA_BASES  \ +  EA_BASES_16BIT      \ +  EA_BASES_32BIT      \ +  EA_BASES_64BIT +   +#define ALL_SIB_BASES \ +  REGS_32BIT          \ +  REGS_64BIT + +#define ALL_REGS      \ +  REGS_8BIT           \ +  REGS_16BIT          \ +  REGS_32BIT          \ +  REGS_64BIT          \ +  REGS_MMX            \ +  REGS_XMM            \ +  REGS_SEGMENT        \ +  REGS_DEBUG          \ +  REGS_CONTROL_32BIT  \ +  REGS_CONTROL_64BIT  \ +  ENTRY(RIP) + +/* + * EABase - All possible values of the base field for effective-address  + *   computations, a.k.a. the Mod and R/M fields of the ModR/M byte.  We + *   distinguish between bases (EA_BASE_*) and registers that just happen to be + *   referred to when Mod == 0b11 (EA_REG_*). + */ +typedef enum { +  EA_BASE_NONE, +#define ENTRY(x) EA_BASE_##x, +  ALL_EA_BASES +#undef ENTRY +#define ENTRY(x) EA_REG_##x, +  ALL_REGS +#undef ENTRY +  EA_max +} EABase; +   +/*  + * SIBIndex - All possible values of the SIB index field. + *   Borrows entries from ALL_EA_BASES with the special case that + *   sib is synonymous with NONE. + */ +typedef enum { +  SIB_INDEX_NONE, +#define ENTRY(x) SIB_INDEX_##x, +  ALL_EA_BASES +#undef ENTRY +  SIB_INDEX_max +} SIBIndex; +   +/* + * SIBBase - All possible values of the SIB base field. + */ +typedef enum { +  SIB_BASE_NONE, +#define ENTRY(x) SIB_BASE_##x, +  ALL_SIB_BASES +#undef ENTRY +  SIB_BASE_max +} SIBBase; + +/* + * EADisplacement - Possible displacement types for effective-address + *   computations. + */ +typedef enum { +  EA_DISP_NONE, +  EA_DISP_8, +  EA_DISP_16, +  EA_DISP_32 +} EADisplacement; + +/* + * Reg - All possible values of the reg field in the ModR/M byte. + */ +typedef enum { +#define ENTRY(x) REG_##x, +  ALL_REGS +#undef ENTRY +  REG_max +} Reg; +   +/* + * SegmentOverride - All possible segment overrides. + */ +typedef enum { +  SEG_OVERRIDE_NONE, +  SEG_OVERRIDE_CS, +  SEG_OVERRIDE_SS, +  SEG_OVERRIDE_DS, +  SEG_OVERRIDE_ES, +  SEG_OVERRIDE_FS, +  SEG_OVERRIDE_GS, +  SEG_OVERRIDE_max +} SegmentOverride; + +typedef uint8_t BOOL; + +/* + * byteReader_t - Type for the byte reader that the consumer must provide to + *   the decoder.  Reads a single byte from the instruction's address space. + * @param arg     - A baton that the consumer can associate with any internal + *                  state that it needs. + * @param byte    - A pointer to a single byte in memory that should be set to + *                  contain the value at address. + * @param address - The address in the instruction's address space that should + *                  be read from. + * @return        - -1 if the byte cannot be read for any reason; 0 otherwise. + */ +typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address); + +/* + * dlog_t - Type for the logging function that the consumer can provide to + *   get debugging output from the decoder. + * @param arg     - A baton that the consumer can associate with any internal + *                  state that it needs. + * @param log     - A string that contains the message.  Will be reused after + *                  the logger returns. + */ +typedef void (*dlog_t)(void* arg, const char *log); + +/* + * The x86 internal instruction, which is produced by the decoder. + */ +struct InternalInstruction { +  /* Reader interface (C) */ +  byteReader_t reader; +  /* Opaque value passed to the reader */ +  void* readerArg; +  /* The address of the next byte to read via the reader */ +  uint64_t readerCursor; + +  /* Logger interface (C) */ +  dlog_t dlog; +  /* Opaque value passed to the logger */ +  void* dlogArg; + +  /* General instruction information */ +   +  /* The mode to disassemble for (64-bit, protected, real) */ +  DisassemblerMode mode; +  /* The start of the instruction, usable with the reader */ +  uint64_t startLocation; +  /* The length of the instruction, in bytes */ +  size_t length; +   +  /* Prefix state */ +   +  /* 1 if the prefix byte corresponding to the entry is present; 0 if not */ +  uint8_t prefixPresent[0x100]; +  /* contains the location (for use with the reader) of the prefix byte */ +  uint64_t prefixLocations[0x100]; +  /* The value of the REX prefix, if present */ +  uint8_t rexPrefix; +  /* The location of the REX prefix */ +  uint64_t rexLocation; +  /* The location where a mandatory prefix would have to be (i.e., right before +     the opcode, or right before the REX prefix if one is present) */ +  uint64_t necessaryPrefixLocation; +  /* The segment override type */ +  SegmentOverride segmentOverride; +   +  /* Sizes of various critical pieces of data */ +  uint8_t registerSize; +  uint8_t addressSize; +  uint8_t displacementSize; +  uint8_t immediateSize; +   +  /* opcode state */ +   +  /* The value of the two-byte escape prefix (usually 0x0f) */ +  uint8_t twoByteEscape; +  /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */ +  uint8_t threeByteEscape; +  /* The last byte of the opcode, not counting any ModR/M extension */ +  uint8_t opcode; +  /* The ModR/M byte of the instruction, if it is an opcode extension */ +  uint8_t modRMExtension; +   +  /* decode state */ +   +  /* The type of opcode, used for indexing into the array of decode tables */ +  OpcodeType opcodeType; +  /* The instruction ID, extracted from the decode table */ +  uint16_t instructionID; +  /* The specifier for the instruction, from the instruction info table */ +  struct InstructionSpecifier* spec; +   +  /* state for additional bytes, consumed during operand decode.  Pattern: +     consumed___ indicates that the byte was already consumed and does not +     need to be consumed again */ +   +  /* The ModR/M byte, which contains most register operands and some portion of +     all memory operands */ +  BOOL                          consumedModRM; +  uint8_t                       modRM; +   +  /* The SIB byte, used for more complex 32- or 64-bit memory operands */ +  BOOL                          consumedSIB; +  uint8_t                       sib; + +  /* The displacement, used for memory operands */ +  BOOL                          consumedDisplacement; +  int32_t                       displacement; +   +  /* Immediates.  There can be two in some cases */ +  uint8_t                       numImmediatesConsumed; +  uint8_t                       numImmediatesTranslated; +  uint64_t                      immediates[2]; +   +  /* A register or immediate operand encoded into the opcode */ +  BOOL                          consumedOpcodeModifier; +  uint8_t                       opcodeModifier; +  Reg                           opcodeRegister; +   +  /* Portions of the ModR/M byte */ +   +  /* These fields determine the allowable values for the ModR/M fields, which +     depend on operand and address widths */ +  EABase                        eaBaseBase; +  EABase                        eaRegBase; +  Reg                           regBase; + +  /* The Mod and R/M fields can encode a base for an effective address, or a +     register.  These are separated into two fields here */ +  EABase                        eaBase; +  EADisplacement                eaDisplacement; +  /* The reg field always encodes a register */ +  Reg                           reg; +   +  /* SIB state */ +  SIBIndex                      sibIndex; +  uint8_t                       sibScale; +  SIBBase                       sibBase; +}; + +/* decodeInstruction - Decode one instruction and store the decoding results in + *   a buffer provided by the consumer. + * @param insn      - The buffer to store the instruction in.  Allocated by the + *                    consumer. + * @param reader    - The byteReader_t for the bytes to be read. + * @param readerArg - An argument to pass to the reader for storing context + *                    specific to the consumer.  May be NULL. + * @param logger    - The dlog_t to be used in printing status messages from the + *                    disassembler.  May be NULL. + * @param loggerArg - An argument to pass to the logger for storing context + *                    specific to the logger.  May be NULL. + * @param startLoc  - The address (in the reader's address space) of the first + *                    byte in the instruction. + * @param mode      - The mode (16-bit, 32-bit, 64-bit) to decode in. + * @return          - Nonzero if there was an error during decode, 0 otherwise. + */ +int decodeInstruction(struct InternalInstruction* insn, +                      byteReader_t reader, +                      void* readerArg, +                      dlog_t logger, +                      void* loggerArg, +                      uint64_t startLoc, +                      DisassemblerMode mode); + +#ifdef __cplusplus  +} +#endif +   +#endif diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h new file mode 100644 index 0000000..b226257 --- /dev/null +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -0,0 +1,354 @@ +/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==* + * + *                     The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * This file is part of the X86 Disassembler. + * It contains common definitions used by both the disassembler and the table + *  generator. + * Documentation for the disassembler can be found in X86Disassembler.h. + * + *===----------------------------------------------------------------------===*/ + +/* + * This header file provides those definitions that need to be shared between + * the decoder and the table generator in a C-friendly manner. + */ + +#ifndef X86DISASSEMBLERDECODERCOMMON_H +#define X86DISASSEMBLERDECODERCOMMON_H + +#include "llvm/System/DataTypes.h" + +#define INSTRUCTIONS_SYM  x86DisassemblerInstrSpecifiers +#define CONTEXTS_SYM      x86DisassemblerContexts +#define ONEBYTE_SYM       x86DisassemblerOneByteOpcodes +#define TWOBYTE_SYM       x86DisassemblerTwoByteOpcodes +#define THREEBYTE38_SYM   x86DisassemblerThreeByte38Opcodes +#define THREEBYTE3A_SYM   x86DisassemblerThreeByte3AOpcodes + +#define INSTRUCTIONS_STR  "x86DisassemblerInstrSpecifiers" +#define CONTEXTS_STR      "x86DisassemblerContexts" +#define ONEBYTE_STR       "x86DisassemblerOneByteOpcodes" +#define TWOBYTE_STR       "x86DisassemblerTwoByteOpcodes" +#define THREEBYTE38_STR   "x86DisassemblerThreeByte38Opcodes" +#define THREEBYTE3A_STR   "x86DisassemblerThreeByte3AOpcodes" + +/* + * Attributes of an instruction that must be known before the opcode can be + * processed correctly.  Most of these indicate the presence of particular + * prefixes, but ATTR_64BIT is simply an attribute of the decoding context. + */ +#define ATTRIBUTE_BITS          \ +  ENUM_ENTRY(ATTR_NONE,   0x00) \ +  ENUM_ENTRY(ATTR_64BIT,  0x01) \ +  ENUM_ENTRY(ATTR_XS,     0x02) \ +  ENUM_ENTRY(ATTR_XD,     0x04) \ +  ENUM_ENTRY(ATTR_REXW,   0x08) \ +  ENUM_ENTRY(ATTR_OPSIZE, 0x10) + +#define ENUM_ENTRY(n, v) n = v, +enum attributeBits { +  ATTRIBUTE_BITS +  ATTR_max +}; +#undef ENUM_ENTRY + +/* + * Combinations of the above attributes that are relevant to instruction + * decode.  Although other combinations are possible, they can be reduced to + * these without affecting the ultimately decoded instruction. + */ + +/*           Class name           Rank  Rationale for rank assignment         */ +#define INSTRUCTION_CONTEXTS                                                   \ +  ENUM_ENTRY(IC,                    0,  "says nothing about the instruction")  \ +  ENUM_ENTRY(IC_64BIT,              1,  "says the instruction applies in "     \ +                                        "64-bit mode but no more")             \ +  ENUM_ENTRY(IC_OPSIZE,             3,  "requires an OPSIZE prefix, so "       \ +                                        "operands change width")               \ +  ENUM_ENTRY(IC_XD,                 2,  "may say something about the opcode "  \ +                                        "but not the operands")                \ +  ENUM_ENTRY(IC_XS,                 2,  "may say something about the opcode "  \ +                                        "but not the operands")                \ +  ENUM_ENTRY(IC_64BIT_REXW,         4,  "requires a REX.W prefix, so operands "\ +                                        "change width; overrides IC_OPSIZE")   \ +  ENUM_ENTRY(IC_64BIT_OPSIZE,       3,  "Just as meaningful as IC_OPSIZE")     \ +  ENUM_ENTRY(IC_64BIT_XD,           5,  "XD instructions are SSE; REX.W is "   \ +                                        "secondary")                           \ +  ENUM_ENTRY(IC_64BIT_XS,           5,  "Just as meaningful as IC_64BIT_XD")   \ +  ENUM_ENTRY(IC_64BIT_REXW_XS,      6,  "OPSIZE could mean a different "       \ +                                        "opcode")                              \ +  ENUM_ENTRY(IC_64BIT_REXW_XD,      6,  "Just as meaningful as "               \ +                                        "IC_64BIT_REXW_XS")                    \ +  ENUM_ENTRY(IC_64BIT_REXW_OPSIZE,  7,  "The Dynamic Duo!  Prefer over all "   \ +                                        "else because this changes most "      \ +                                        "operands' meaning") + +#define ENUM_ENTRY(n, r, d) n,     +typedef enum { +  INSTRUCTION_CONTEXTS +  IC_max +} InstructionContext; +#undef ENUM_ENTRY + +/* + * Opcode types, which determine which decode table to use, both in the Intel + * manual and also for the decoder. + */ +typedef enum { +  ONEBYTE       = 0, +  TWOBYTE       = 1, +  THREEBYTE_38  = 2, +  THREEBYTE_3A  = 3 +} OpcodeType; + +/* + * The following structs are used for the hierarchical decode table.  After + * determining the instruction's class (i.e., which IC_* constant applies to + * it), the decoder reads the opcode.  Some instructions require specific + * values of the ModR/M byte, so the ModR/M byte indexes into the final table. + * + * If a ModR/M byte is not required, "required" is left unset, and the values + * for each instructionID are identical. + */ +  +typedef uint16_t InstrUID; + +/* + * ModRMDecisionType - describes the type of ModR/M decision, allowing the  + * consumer to determine the number of entries in it. + * + * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded + *                  instruction is the same. + * MODRM_SPLITRM  - If the ModR/M byte is between 0x00 and 0xbf, the opcode + *                  corresponds to one instruction; otherwise, it corresponds to + *                  a different instruction. + * MODRM_FULL     - Potentially, each value of the ModR/M byte could correspond + *                  to a different instruction. + */ + +#define MODRMTYPES            \ +  ENUM_ENTRY(MODRM_ONEENTRY)  \ +  ENUM_ENTRY(MODRM_SPLITRM)   \ +  ENUM_ENTRY(MODRM_FULL) + +#define ENUM_ENTRY(n) n,     +typedef enum { +  MODRMTYPES +  MODRM_max +} ModRMDecisionType; +#undef ENUM_ENTRY + +/* + * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which  + *  instruction each possible value of the ModR/M byte corresponds to.  Once + *  this information is known, we have narrowed down to a single instruction. + */ +struct ModRMDecision { +  uint8_t     modrm_type; +   +  /* The macro below must be defined wherever this file is included. */ +  INSTRUCTION_IDS +}; + +/* + * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at + *   given a particular opcode. + */ +struct OpcodeDecision { +  struct ModRMDecision modRMDecisions[256]; +}; + +/* + * ContextDecision - Specifies which opcode->instruction tables to look at given + *   a particular context (set of attributes).  Since there are many possible + *   contexts, the decoder first uses CONTEXTS_SYM to determine which context + *   applies given a specific set of attributes.  Hence there are only IC_max + *   entries in this table, rather than 2^(ATTR_max). + */ +struct ContextDecision { +  struct OpcodeDecision opcodeDecisions[IC_max]; +}; + +/*  + * Physical encodings of instruction operands. + */ + +#define ENCODINGS                                                              \ +  ENUM_ENTRY(ENCODING_NONE,   "")                                              \ +  ENUM_ENTRY(ENCODING_REG,    "Register operand in ModR/M byte.")              \ +  ENUM_ENTRY(ENCODING_RM,     "R/M operand in ModR/M byte.")                   \ +  ENUM_ENTRY(ENCODING_CB,     "1-byte code offset (possible new CS value)")    \ +  ENUM_ENTRY(ENCODING_CW,     "2-byte")                                        \ +  ENUM_ENTRY(ENCODING_CD,     "4-byte")                                        \ +  ENUM_ENTRY(ENCODING_CP,     "6-byte")                                        \ +  ENUM_ENTRY(ENCODING_CO,     "8-byte")                                        \ +  ENUM_ENTRY(ENCODING_CT,     "10-byte")                                       \ +  ENUM_ENTRY(ENCODING_IB,     "1-byte immediate")                              \ +  ENUM_ENTRY(ENCODING_IW,     "2-byte")                                        \ +  ENUM_ENTRY(ENCODING_ID,     "4-byte")                                        \ +  ENUM_ENTRY(ENCODING_IO,     "8-byte")                                        \ +  ENUM_ENTRY(ENCODING_RB,     "(AL..DIL, R8L..R15L) Register code added to "   \ +                              "the opcode byte")                               \ +  ENUM_ENTRY(ENCODING_RW,     "(AX..DI, R8W..R15W)")                           \ +  ENUM_ENTRY(ENCODING_RD,     "(EAX..EDI, R8D..R15D)")                         \ +  ENUM_ENTRY(ENCODING_RO,     "(RAX..RDI, R8..R15)")                           \ +  ENUM_ENTRY(ENCODING_I,      "Position on floating-point stack added to the " \ +                              "opcode byte")                                   \ +                                                                               \ +  ENUM_ENTRY(ENCODING_Iv,     "Immediate of operand size")                     \ +  ENUM_ENTRY(ENCODING_Ia,     "Immediate of address size")                     \ +  ENUM_ENTRY(ENCODING_Rv,     "Register code of operand size added to the "    \ +                              "opcode byte")                                   \ +  ENUM_ENTRY(ENCODING_DUP,    "Duplicate of another operand; ID is encoded "   \ +                              "in type") + +#define ENUM_ENTRY(n, d) n,     +  typedef enum { +    ENCODINGS +    ENCODING_max +  } OperandEncoding; +#undef ENUM_ENTRY + +/*  + * Semantic interpretations of instruction operands. + */ + +#define TYPES                                                                  \ +  ENUM_ENTRY(TYPE_NONE,       "")                                              \ +  ENUM_ENTRY(TYPE_REL8,       "1-byte immediate address")                      \ +  ENUM_ENTRY(TYPE_REL16,      "2-byte")                                        \ +  ENUM_ENTRY(TYPE_REL32,      "4-byte")                                        \ +  ENUM_ENTRY(TYPE_REL64,      "8-byte")                                        \ +  ENUM_ENTRY(TYPE_PTR1616,    "2+2-byte segment+offset address")               \ +  ENUM_ENTRY(TYPE_PTR1632,    "2+4-byte")                                      \ +  ENUM_ENTRY(TYPE_PTR1664,    "2+8-byte")                                      \ +  ENUM_ENTRY(TYPE_R8,         "1-byte register operand")                       \ +  ENUM_ENTRY(TYPE_R16,        "2-byte")                                        \ +  ENUM_ENTRY(TYPE_R32,        "4-byte")                                        \ +  ENUM_ENTRY(TYPE_R64,        "8-byte")                                        \ +  ENUM_ENTRY(TYPE_IMM8,       "1-byte immediate operand")                      \ +  ENUM_ENTRY(TYPE_IMM16,      "2-byte")                                        \ +  ENUM_ENTRY(TYPE_IMM32,      "4-byte")                                        \ +  ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \ +  ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \ +  ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \ +  ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \ +  ENUM_ENTRY(TYPE_RM64,       "8-byte")                                        \ +  ENUM_ENTRY(TYPE_M,          "Memory operand")                                \ +  ENUM_ENTRY(TYPE_M8,         "1-byte")                                        \ +  ENUM_ENTRY(TYPE_M16,        "2-byte")                                        \ +  ENUM_ENTRY(TYPE_M32,        "4-byte")                                        \ +  ENUM_ENTRY(TYPE_M64,        "8-byte")                                        \ +  ENUM_ENTRY(TYPE_M128,       "16-byte (SSE/SSE2)")                            \ +  ENUM_ENTRY(TYPE_M1616,      "2+2-byte segment+offset address")               \ +  ENUM_ENTRY(TYPE_M1632,      "2+4-byte")                                      \ +  ENUM_ENTRY(TYPE_M1664,      "2+8-byte")                                      \ +  ENUM_ENTRY(TYPE_M16_32,     "2+4-byte two-part memory operand (LIDT, LGDT)") \ +  ENUM_ENTRY(TYPE_M16_16,     "2+2-byte (BOUND)")                              \ +  ENUM_ENTRY(TYPE_M32_32,     "4+4-byte (BOUND)")                              \ +  ENUM_ENTRY(TYPE_M16_64,     "2+8-byte (LIDT, LGDT)")                         \ +  ENUM_ENTRY(TYPE_MOFFS8,     "1-byte memory offset (relative to segment "     \ +                              "base)")                                         \ +  ENUM_ENTRY(TYPE_MOFFS16,    "2-byte")                                        \ +  ENUM_ENTRY(TYPE_MOFFS32,    "4-byte")                                        \ +  ENUM_ENTRY(TYPE_MOFFS64,    "8-byte")                                        \ +  ENUM_ENTRY(TYPE_SREG,       "Byte with single bit set: 0 = ES, 1 = CS, "     \ +                              "2 = SS, 3 = DS, 4 = FS, 5 = GS")                \ +  ENUM_ENTRY(TYPE_M32FP,      "32-bit IEE754 memory floating-point operand")   \ +  ENUM_ENTRY(TYPE_M64FP,      "64-bit")                                        \ +  ENUM_ENTRY(TYPE_M80FP,      "80-bit extended")                               \ +  ENUM_ENTRY(TYPE_M16INT,     "2-byte memory integer operand for use in "      \ +                              "floating-point instructions")                   \ +  ENUM_ENTRY(TYPE_M32INT,     "4-byte")                                        \ +  ENUM_ENTRY(TYPE_M64INT,     "8-byte")                                        \ +  ENUM_ENTRY(TYPE_ST,         "Position on the floating-point stack")          \ +  ENUM_ENTRY(TYPE_MM,         "MMX register operand")                          \ +  ENUM_ENTRY(TYPE_MM32,       "4-byte MMX register or memory operand")         \ +  ENUM_ENTRY(TYPE_MM64,       "8-byte")                                        \ +  ENUM_ENTRY(TYPE_XMM,        "XMM register operand")                          \ +  ENUM_ENTRY(TYPE_XMM32,      "4-byte XMM register or memory operand")         \ +  ENUM_ENTRY(TYPE_XMM64,      "8-byte")                                        \ +  ENUM_ENTRY(TYPE_XMM128,     "16-byte")                                       \ +  ENUM_ENTRY(TYPE_XMM0,       "Implicit use of XMM0")                          \ +  ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \ +  ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \ +  ENUM_ENTRY(TYPE_CR32,       "4-byte control register operand")               \ +  ENUM_ENTRY(TYPE_CR64,       "8-byte")                                        \ +                                                                               \ +  ENUM_ENTRY(TYPE_Mv,         "Memory operand of operand size")                \ +  ENUM_ENTRY(TYPE_Rv,         "Register operand of operand size")              \ +  ENUM_ENTRY(TYPE_IMMv,       "Immediate operand of operand size")             \ +  ENUM_ENTRY(TYPE_RELv,       "Immediate address of operand size")             \ +  ENUM_ENTRY(TYPE_DUP0,       "Duplicate of operand 0")                        \ +  ENUM_ENTRY(TYPE_DUP1,       "operand 1")                                     \ +  ENUM_ENTRY(TYPE_DUP2,       "operand 2")                                     \ +  ENUM_ENTRY(TYPE_DUP3,       "operand 3")                                     \ +  ENUM_ENTRY(TYPE_DUP4,       "operand 4")                                     \ +  ENUM_ENTRY(TYPE_M512,       "512-bit FPU/MMX/XMM/MXCSR state") + +#define ENUM_ENTRY(n, d) n,     +typedef enum { +  TYPES +  TYPE_max +} OperandType; +#undef ENUM_ENTRY + +/*  + * OperandSpecifier - The specification for how to extract and interpret one + *   operand. + */ +struct OperandSpecifier { +  OperandEncoding  encoding; +  OperandType      type; +}; + +/* + * Indicates where the opcode modifier (if any) is to be found.  Extended + * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte. + */ + +#define MODIFIER_TYPES        \ +  ENUM_ENTRY(MODIFIER_NONE)   \ +  ENUM_ENTRY(MODIFIER_OPCODE) \ +  ENUM_ENTRY(MODIFIER_MODRM) + +#define ENUM_ENTRY(n) n, +typedef enum { +  MODIFIER_TYPES +  MODIFIER_max +} ModifierType; +#undef ENUM_ENTRY + +#define X86_MAX_OPERANDS 5 + +/* + * The specification for how to extract and interpret a full instruction and + * its operands. + */ +struct InstructionSpecifier { +  ModifierType modifierType; +  uint8_t modifierBase; +  struct OperandSpecifier operands[X86_MAX_OPERANDS]; +   +  /* The macro below must be defined wherever this file is included. */ +  INSTRUCTION_SPECIFIER_FIELDS +}; + +/* + * Decoding mode for the Intel disassembler.  16-bit, 32-bit, and 64-bit mode + * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode, + * respectively. + */ +typedef enum { +  MODE_16BIT, +  MODE_32BIT, +  MODE_64BIT +} DisassemblerMode; + +#endif diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile index b311a6e..6098dbf 100644 --- a/lib/Target/X86/Makefile +++ b/lib/Target/X86/Makefile @@ -15,8 +15,8 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \                  X86GenRegisterInfo.inc X86GenInstrNames.inc \                  X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \                  X86GenAsmWriter1.inc X86GenDAGISel.inc  \ -                X86GenFastISel.inc \ -                X86GenCallingConv.inc X86GenSubtarget.inc +                X86GenDisassemblerTables.inc X86GenFastISel.inc \ +                X86GenCallingConv.inc X86GenSubtarget.inc \  DIRS = AsmPrinter AsmParser Disassembler TargetInfo diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 0152121..90d9083 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -38,6 +38,8 @@ static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {    }  } +extern "C" void LLVMInitializeX86Disassembler(); +  extern "C" void LLVMInitializeX86Target() {     // Register the target.    RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target); @@ -47,6 +49,8 @@ extern "C" void LLVMInitializeX86Target() {    RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo);    RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo); +  LLVMInitializeX86Disassembler(); +    // Register the code emitter.    TargetRegistry::RegisterCodeEmitter(TheX86_32Target, createX86MCCodeEmitter);    TargetRegistry::RegisterCodeEmitter(TheX86_64Target, createX86MCCodeEmitter); diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt index daf8676..ce9b66f 100644 --- a/utils/TableGen/CMakeLists.txt +++ b/utils/TableGen/CMakeLists.txt @@ -23,6 +23,8 @@ add_executable(tblgen    TGValueTypes.cpp    TableGen.cpp    TableGenBackend.cpp +  X86DisassemblerTables.cpp +  X86RecognizableInstr.cpp    )  target_link_libraries(tblgen LLVMSupport LLVMSystem) diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp index cc13125..61b9b15 100644 --- a/utils/TableGen/DisassemblerEmitter.cpp +++ b/utils/TableGen/DisassemblerEmitter.cpp @@ -10,7 +10,86 @@  #include "DisassemblerEmitter.h"  #include "CodeGenTarget.h"  #include "Record.h" +#include "X86DisassemblerTables.h" +#include "X86RecognizableInstr.h"  using namespace llvm; +using namespace llvm::X86Disassembler; + +/// DisassemblerEmitter - Contains disassembler table emitters for various +/// architectures. + +/// X86 Disassembler Emitter +/// +/// *** IF YOU'RE HERE TO RESOLVE A "Primary decode conflict", LOOK DOWN NEAR +///     THE END OF THIS COMMENT! +/// +/// The X86 disassembler emitter is part of the X86 Disassembler, which is +/// documented in lib/Target/X86/X86Disassembler.h. +/// +/// The emitter produces the tables that the disassembler uses to translate +/// instructions.  The emitter generates the following tables: +/// +/// - One table (CONTEXTS_SYM) that contains a mapping of attribute masks to +///   instruction contexts.  Although for each attribute there are cases where +///   that attribute determines decoding, in the majority of cases decoding is +///   the same whether or not an attribute is present.  For example, a 64-bit +///   instruction with an OPSIZE prefix and an XS prefix decodes the same way in +///   all cases as a 64-bit instruction with only OPSIZE set.  (The XS prefix +///   may have effects on its execution, but does not change the instruction +///   returned.)  This allows considerable space savings in other tables. +/// - Four tables (ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, and +///   THREEBYTE3A_SYM) contain the hierarchy that the decoder traverses while +///   decoding an instruction.  At the lowest level of this hierarchy are +///   instruction UIDs, 16-bit integers that can be used to uniquely identify +///   the instruction and correspond exactly to its position in the list of +///   CodeGenInstructions for the target. +/// - One table (INSTRUCTIONS_SYM) contains information about the operands of +///   each instruction and how to decode them. +/// +/// During table generation, there may be conflicts between instructions that +/// occupy the same space in the decode tables.  These conflicts are resolved as +/// follows in setTableFields() (X86DisassemblerTables.cpp) +/// +/// - If the current context is the native context for one of the instructions +///   (that is, the attributes specified for it in the LLVM tables specify +///   precisely the current context), then it has priority. +/// - If the current context isn't native for either of the instructions, then +///   the higher-priority context wins (that is, the one that is more specific). +///   That hierarchy is determined by outranks() (X86DisassemblerTables.cpp) +/// - If the current context is native for both instructions, then the table +///   emitter reports a conflict and dies. +/// +/// *** RESOLUTION FOR "Primary decode conflict"S +/// +/// If two instructions collide, typically the solution is (in order of +/// likelihood): +/// +/// (1) to filter out one of the instructions by editing filter() +///     (X86RecognizableInstr.cpp).  This is the most common resolution, but +///     check the Intel manuals first to make sure that (2) and (3) are not the +///     problem. +/// (2) to fix the tables (X86.td and its subsidiaries) so the opcodes are +///     accurate.  Sometimes they are not. +/// (3) to fix the tables to reflect the actual context (for example, required +///     prefixes), and possibly to add a new context by editing +///     lib/Target/X86/X86DisassemblerDecoderCommon.h.  This is unlikely to be +///     the cause. +/// +/// DisassemblerEmitter.cpp contains the implementation for the emitter, +///   which simply pulls out instructions from the CodeGenTarget and pushes them +///   into X86DisassemblerTables. +/// X86DisassemblerTables.h contains the interface for the instruction tables, +///   which manage and emit the structures discussed above. +/// X86DisassemblerTables.cpp contains the implementation for the instruction +///   tables. +/// X86ModRMFilters.h contains filters that can be used to determine which +///   ModR/M values are valid for a particular instruction.  These are used to +///   populate ModRMDecisions. +/// X86RecognizableInstr.h contains the interface for a single instruction, +///   which knows how to translate itself from a CodeGenInstruction and provide +///   the information necessary for integration into the tables. +/// X86RecognizableInstr.cpp contains the implementation for a single +///   instruction.  void DisassemblerEmitter::run(raw_ostream &OS) {    CodeGenTarget Target; @@ -25,6 +104,26 @@ void DisassemblerEmitter::run(raw_ostream &OS) {       << " *===---------------------------------------------------------------"       << "-------===*/\n"; +  // X86 uses a custom disassembler. +  if (Target.getName() == "X86") { +    DisassemblerTables Tables; +   +    std::vector<const CodeGenInstruction*> numberedInstructions; +    Target.getInstructionsByEnumValue(numberedInstructions); +     +    for (unsigned i = 0, e = numberedInstructions.size(); i != e; ++i) +      RecognizableInstr::processInstr(Tables, *numberedInstructions[i], i); + +    // FIXME: As long as we are using exceptions, might as well drop this to the +    // actual conflict site. +    if (Tables.hasConflicts()) +      throw TGError(Target.getTargetRecord()->getLoc(), +                    "Primary decode conflict"); + +    Tables.emit(OS); +    return; +  } +    throw TGError(Target.getTargetRecord()->getLoc(),                  "Unable to generate disassembler for this target");  } diff --git a/utils/TableGen/X86DisassemblerShared.h b/utils/TableGen/X86DisassemblerShared.h new file mode 100644 index 0000000..9003cbf --- /dev/null +++ b/utils/TableGen/X86DisassemblerShared.h @@ -0,0 +1,37 @@ +//===- X86DisassemblerShared.h - Emitter shared header ----------*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef X86DISASSEMBLERSHARED_H +#define X86DISASSEMBLERSHARED_H + +#include <string> + +#define INSTRUCTION_SPECIFIER_FIELDS    \ +  bool                    filtered;     \ +  InstructionContext      insnContext;  \ +  std::string             name;         \ +                                        \ +  InstructionSpecifier() {              \ +    filtered = false;                   \ +    insnContext = IC;                   \ +    name = "";                          \ +    modifierType = MODIFIER_NONE;       \ +    modifierBase = 0;                   \ +    bzero(operands, sizeof(operands));  \ +  } + +#define INSTRUCTION_IDS           \ +  InstrUID   instructionIDs[256]; + +#include "../../lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h" + +#undef INSTRUCTION_SPECIFIER_FIELDS +#undef INSTRUCTION_IDS + +#endif diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp new file mode 100644 index 0000000..83284a7 --- /dev/null +++ b/utils/TableGen/X86DisassemblerTables.cpp @@ -0,0 +1,603 @@ +//===- X86DisassemblerTables.cpp - Disassembler tables ----------*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler Emitter. +// It contains the implementation of the disassembler tables. +// Documentation for the disassembler emitter in general can be found in +//  X86DisasemblerEmitter.h. +// +//===----------------------------------------------------------------------===// + +#include "X86DisassemblerShared.h" +#include "X86DisassemblerTables.h" + +#include "TableGenBackend.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" + +#include <string> + +using namespace llvm; +using namespace X86Disassembler; +   +/// inheritsFrom - Indicates whether all instructions in one class also belong +///   to another class. +/// +/// @param child  - The class that may be the subset +/// @param parent - The class that may be the superset +/// @return       - True if child is a subset of parent, false otherwise. +static inline bool inheritsFrom(InstructionContext child, +                                InstructionContext parent) { +  if (child == parent) +    return true; +   +  switch (parent) { +  case IC: +    return true; +  case IC_64BIT: +    return(inheritsFrom(child, IC_64BIT_REXW)   || +           inheritsFrom(child, IC_64BIT_OPSIZE) || +           inheritsFrom(child, IC_64BIT_XD)     || +           inheritsFrom(child, IC_64BIT_XS)); +  case IC_OPSIZE: +    return(inheritsFrom(child, IC_64BIT_OPSIZE)); +  case IC_XD: +    return(inheritsFrom(child, IC_64BIT_XD)); +  case IC_XS: +    return(inheritsFrom(child, IC_64BIT_XS)); +  case IC_64BIT_REXW: +    return(inheritsFrom(child, IC_64BIT_REXW_XS) || +           inheritsFrom(child, IC_64BIT_REXW_XD) || +           inheritsFrom(child, IC_64BIT_REXW_OPSIZE)); +  case IC_64BIT_OPSIZE: +    return(inheritsFrom(child, IC_64BIT_REXW_OPSIZE)); +  case IC_64BIT_XD: +    return(inheritsFrom(child, IC_64BIT_REXW_XD)); +  case IC_64BIT_XS: +    return(inheritsFrom(child, IC_64BIT_REXW_XS)); +  case IC_64BIT_REXW_XD: +    return false; +  case IC_64BIT_REXW_XS: +    return false; +  case IC_64BIT_REXW_OPSIZE: +    return false; +  default: +    return false; +  } +} + +/// outranks - Indicates whether, if an instruction has two different applicable +///   classes, which class should be preferred when performing decode.  This +///   imposes a total ordering (ties are resolved toward "lower") +/// +/// @param upper  - The class that may be preferable +/// @param lower  - The class that may be less preferable +/// @return       - True if upper is to be preferred, false otherwise. +static inline bool outranks(InstructionContext upper,  +                            InstructionContext lower) { +  assert(upper < IC_max); +  assert(lower < IC_max); +   +#define ENUM_ENTRY(n, r, d) r, +  static int ranks[IC_max] = { +    INSTRUCTION_CONTEXTS +  }; +#undef ENUM_ENTRY +   +  return (ranks[upper] > ranks[lower]); +} + +/// stringForContext - Returns a string containing the name of a particular +///   InstructionContext, usually for diagnostic purposes. +/// +/// @param insnContext  - The instruction class to transform to a string. +/// @return           - A statically-allocated string constant that contains the +///                     name of the instruction class. +static inline const char* stringForContext(InstructionContext insnContext) { +  switch (insnContext) { +  default: +    llvm_unreachable("Unhandled instruction class"); +#define ENUM_ENTRY(n, r, d)   case n: return #n; break; +  INSTRUCTION_CONTEXTS +#undef ENUM_ENTRY +  } +} + +/// stringForOperandType - Like stringForContext, but for OperandTypes. +static inline const char* stringForOperandType(OperandType type) { +  switch (type) { +  default: +    llvm_unreachable("Unhandled type"); +#define ENUM_ENTRY(i, d) case i: return #i; +  TYPES +#undef ENUM_ENTRY +  } +} + +/// stringForOperandEncoding - like stringForContext, but for +///   OperandEncodings. +static inline const char* stringForOperandEncoding(OperandEncoding encoding) { +  switch (encoding) { +  default: +    llvm_unreachable("Unhandled encoding"); +#define ENUM_ENTRY(i, d) case i: return #i; +  ENCODINGS +#undef ENUM_ENTRY +  } +} + +void DisassemblerTables::emitOneID(raw_ostream &o, +                                   uint32_t &i, +                                   InstrUID id, +                                   bool addComma) const { +  if (id) +    o.indent(i * 2) << format("0x%hx", id); +  else +    o.indent(i * 2) << 0; +   +  if (addComma) +    o << ", "; +  else +    o << "  "; +   +  o << "/* "; +  o << InstructionSpecifiers[id].name; +  o << "*/"; +   +  o << "\n"; +} + +/// emitEmptyTable - Emits the modRMEmptyTable, which is used as a ID table by +///   all ModR/M decisions for instructions that are invalid for all possible +///   ModR/M byte values. +/// +/// @param o        - The output stream on which to emit the table. +/// @param i        - The indentation level for that output stream. +static void emitEmptyTable(raw_ostream &o, uint32_t &i) +{ +  o.indent(i * 2) << "InstrUID modRMEmptyTable[1] = { 0 };" << "\n"; +  o << "\n"; +} + +/// getDecisionType - Determines whether a ModRM decision with 255 entries can +///   be compacted by eliminating redundant information. +/// +/// @param decision - The decision to be compacted. +/// @return         - The compactest available representation for the decision. +static ModRMDecisionType getDecisionType(ModRMDecision &decision) +{ +  bool satisfiesOneEntry = true; +  bool satisfiesSplitRM = true; +   +  uint16_t index; +   +  for (index = 0; index < 256; ++index) { +    if (decision.instructionIDs[index] != decision.instructionIDs[0]) +      satisfiesOneEntry = false; +     +    if (((index & 0xc0) == 0xc0) && +       (decision.instructionIDs[index] != decision.instructionIDs[0xc0])) +      satisfiesSplitRM = false; +     +    if (((index & 0xc0) != 0xc0) && +       (decision.instructionIDs[index] != decision.instructionIDs[0x00])) +      satisfiesSplitRM = false; +  } +   +  if (satisfiesOneEntry) +    return MODRM_ONEENTRY; +   +  if (satisfiesSplitRM) +    return MODRM_SPLITRM; +   +  return MODRM_FULL; +} + +/// stringForDecisionType - Returns a statically-allocated string corresponding +///   to a particular decision type. +/// +/// @param dt - The decision type. +/// @return   - A pointer to the statically-allocated string (e.g.,  +///             "MODRM_ONEENTRY" for MODRM_ONEENTRY). +static const char* stringForDecisionType(ModRMDecisionType dt) +{ +#define ENUM_ENTRY(n) case n: return #n; +  switch (dt) { +    default: +      llvm_unreachable("Unknown decision type");   +    MODRMTYPES +  };   +#undef ENUM_ENTRY +} +   +/// stringForModifierType - Returns a statically-allocated string corresponding +///   to an opcode modifier type. +/// +/// @param mt - The modifier type. +/// @return   - A pointer to the statically-allocated string (e.g., +///             "MODIFIER_NONE" for MODIFIER_NONE). +static const char* stringForModifierType(ModifierType mt) +{ +#define ENUM_ENTRY(n) case n: return #n; +  switch(mt) { +    default: +      llvm_unreachable("Unknown modifier type"); +    MODIFIER_TYPES +  }; +#undef ENUM_ENTRY +} +   +DisassemblerTables::DisassemblerTables() { +  unsigned i; +   +  for (i = 0; i < 4; i++) { +    Tables[i] = new ContextDecision; +    bzero(Tables[i], sizeof(ContextDecision)); +  } +   +  HasConflicts = false; +} +   +DisassemblerTables::~DisassemblerTables() { +  unsigned i; +   +  for (i = 0; i < 4; i++) +    delete Tables[i]; +} +   +void DisassemblerTables::emitModRMDecision(raw_ostream &o1, +                                           raw_ostream &o2, +                                           uint32_t &i1, +                                           uint32_t &i2, +                                           ModRMDecision &decision) +  const { +  static uint64_t sTableNumber = 0; +  uint64_t thisTableNumber = sTableNumber; +  ModRMDecisionType dt = getDecisionType(decision); +  uint16_t index; +   +  if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0) +  { +    o2.indent(i2) << "{ /* ModRMDecision */" << "\n"; +    i2++; +     +    o2.indent(i2) << stringForDecisionType(dt) << "," << "\n"; +    o2.indent(i2) << "modRMEmptyTable"; +     +    i2--; +    o2.indent(i2) << "}"; +    return; +  } +     +  o1.indent(i1) << "InstrUID modRMTable" << thisTableNumber; +     +  switch (dt) { +    default: +      llvm_unreachable("Unknown decision type"); +    case MODRM_ONEENTRY: +      o1 << "[1]"; +      break; +    case MODRM_SPLITRM: +      o1 << "[2]"; +      break; +    case MODRM_FULL: +      o1 << "[256]"; +      break;       +  } + +  o1 << " = {" << "\n"; +  i1++; +     +  switch (dt) { +    default: +      llvm_unreachable("Unknown decision type"); +    case MODRM_ONEENTRY: +      emitOneID(o1, i1, decision.instructionIDs[0], false); +      break; +    case MODRM_SPLITRM: +      emitOneID(o1, i1, decision.instructionIDs[0x00], true); // mod = 0b00 +      emitOneID(o1, i1, decision.instructionIDs[0xc0], false); // mod = 0b11 +      break; +    case MODRM_FULL: +      for (index = 0; index < 256; ++index) +        emitOneID(o1, i1, decision.instructionIDs[index], index < 255); +      break; +  } +     +  i1--; +  o1.indent(i1) << "};" << "\n"; +  o1 << "\n"; +     +  o2.indent(i2) << "{ /* struct ModRMDecision */" << "\n"; +  i2++; +     +  o2.indent(i2) << stringForDecisionType(dt) << "," << "\n"; +  o2.indent(i2) << "modRMTable" << sTableNumber << "\n"; +     +  i2--; +  o2.indent(i2) << "}"; +     +  ++sTableNumber; +} + +void DisassemblerTables::emitOpcodeDecision( +  raw_ostream &o1, +  raw_ostream &o2, +  uint32_t &i1, +  uint32_t &i2, +  OpcodeDecision &decision) const { +  uint16_t index; + +  o2.indent(i2) << "{ /* struct OpcodeDecision */" << "\n"; +  i2++; +  o2.indent(i2) << "{" << "\n"; +  i2++; + +  for (index = 0; index < 256; ++index) { +    o2.indent(i2); + +    o2 << "/* 0x" << format("%02hhx", index) << " */" << "\n"; + +    emitModRMDecision(o1, o2, i1, i2, decision.modRMDecisions[index]); + +    if (index <  255) +      o2 << ","; + +    o2 << "\n"; +  } + +  i2--; +  o2.indent(i2) << "}" << "\n"; +  i2--; +  o2.indent(i2) << "}" << "\n"; +} + +void DisassemblerTables::emitContextDecision( +  raw_ostream &o1, +  raw_ostream &o2, +  uint32_t &i1, +  uint32_t &i2, +  ContextDecision &decision, +  const char* name) const { +  o2.indent(i2) << "struct ContextDecision " << name << " = {" << "\n"; +  i2++; +  o2.indent(i2) << "{ /* opcodeDecisions */" << "\n"; +  i2++; + +  unsigned index; + +  for (index = 0; index < IC_max; ++index) { +    o2.indent(i2) << "/* "; +    o2 << stringForContext((InstructionContext)index); +    o2 << " */"; +    o2 << "\n"; + +    emitOpcodeDecision(o1, o2, i1, i2, decision.opcodeDecisions[index]); + +    if (index + 1 < IC_max) +      o2 << ", "; +  } + +  i2--; +  o2.indent(i2) << "}" << "\n"; +  i2--; +  o2.indent(i2) << "};" << "\n"; +} + +void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i)  +  const { +  o.indent(i * 2) << "struct InstructionSpecifier "; +  o << INSTRUCTIONS_STR << "["; +  o << InstructionSpecifiers.size(); +  o << "] = {" << "\n"; +   +  i++; + +  uint16_t numInstructions = InstructionSpecifiers.size(); +  uint16_t index, operandIndex; + +  for (index = 0; index < numInstructions; ++index) { +    o.indent(i * 2) << "{ /* " << index << " */" << "\n"; +    i++; +     +    o.indent(i * 2) <<  +      stringForModifierType(InstructionSpecifiers[index].modifierType); +    o << "," << "\n"; +     +    o.indent(i * 2) << "0x"; +    o << format("%02hhx", (uint16_t)InstructionSpecifiers[index].modifierBase); +    o << "," << "\n"; + +    o.indent(i * 2) << "{" << "\n"; +    i++; + +    for (operandIndex = 0; operandIndex < X86_MAX_OPERANDS; ++operandIndex) { +      o.indent(i * 2) << "{ "; +      o << stringForOperandEncoding(InstructionSpecifiers[index] +                                    .operands[operandIndex] +                                    .encoding); +      o << ", "; +      o << stringForOperandType(InstructionSpecifiers[index] +                                .operands[operandIndex] +                                .type); +      o << " }"; + +      if (operandIndex < X86_MAX_OPERANDS - 1) +        o << ","; + +      o << "\n"; +    } + +    i--; +    o.indent(i * 2) << "}," << "\n"; +     +    o.indent(i * 2) << "\"" << InstructionSpecifiers[index].name << "\""; +    o << "\n"; + +    i--; +    o.indent(i * 2) << "}"; + +    if (index + 1 < numInstructions) +      o << ","; + +    o << "\n"; +  } + +  i--; +  o.indent(i * 2) << "};" << "\n"; +} + +void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const { +  uint16_t index; + +  o.indent(i * 2) << "InstructionContext "; +  o << CONTEXTS_STR << "[256] = {" << "\n"; +  i++; + +  for (index = 0; index < 256; ++index) { +    o.indent(i * 2); + +    if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS)) +      o << "IC_64BIT_REXW_XS"; +    else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD)) +      o << "IC_64BIT_REXW_XD"; +    else if ((index & ATTR_64BIT) && (index & ATTR_REXW) &&  +             (index & ATTR_OPSIZE)) +      o << "IC_64BIT_REXW_OPSIZE"; +    else if ((index & ATTR_64BIT) && (index & ATTR_XS)) +      o << "IC_64BIT_XS"; +    else if ((index & ATTR_64BIT) && (index & ATTR_XD)) +      o << "IC_64BIT_XD"; +    else if ((index & ATTR_64BIT) && (index & ATTR_OPSIZE)) +      o << "IC_64BIT_OPSIZE"; +    else if ((index & ATTR_64BIT) && (index & ATTR_REXW)) +      o << "IC_64BIT_REXW"; +    else if ((index & ATTR_64BIT)) +      o << "IC_64BIT"; +    else if (index & ATTR_XS) +      o << "IC_XS"; +    else if (index & ATTR_XD) +      o << "IC_XD"; +    else if (index & ATTR_OPSIZE) +      o << "IC_OPSIZE"; +    else +      o << "IC"; + +    if (index < 255) +      o << ","; +    else +      o << " "; + +    o << " /* " << index << " */"; + +    o << "\n"; +  } + +  i--; +  o.indent(i * 2) << "};" << "\n"; +} + +void DisassemblerTables::emitContextDecisions(raw_ostream &o1, +                                            raw_ostream &o2, +                                            uint32_t &i1, +                                            uint32_t &i2) +  const { +  emitContextDecision(o1, o2, i1, i2, *Tables[0], ONEBYTE_STR); +  emitContextDecision(o1, o2, i1, i2, *Tables[1], TWOBYTE_STR); +  emitContextDecision(o1, o2, i1, i2, *Tables[2], THREEBYTE38_STR); +  emitContextDecision(o1, o2, i1, i2, *Tables[3], THREEBYTE3A_STR); +} + +void DisassemblerTables::emit(raw_ostream &o) const { +  uint32_t i1 = 0; +  uint32_t i2 = 0; +   +  std::string s1; +  std::string s2; +   +  raw_string_ostream o1(s1); +  raw_string_ostream o2(s2); +   +  emitInstructionInfo(o, i2); +  o << "\n"; + +  emitContextTable(o, i2); +  o << "\n"; +   +  emitEmptyTable(o1, i1); +  emitContextDecisions(o1, o2, i1, i2); +   +  o << o1.str(); +  o << "\n"; +  o << o2.str(); +  o << "\n"; +  o << "\n"; +} + +void DisassemblerTables::setTableFields(ModRMDecision     &decision, +                                        const ModRMFilter &filter, +                                        InstrUID          uid, +                                        uint8_t           opcode) { +  unsigned index; + +  for (index = 0; index < 256; ++index) { +    if (filter.accepts(index)) { +      if (decision.instructionIDs[index] == uid) +        continue; + +      if (decision.instructionIDs[index] != 0) { +        InstructionSpecifier &newInfo = +          InstructionSpecifiers[uid]; +        InstructionSpecifier &previousInfo = +          InstructionSpecifiers[decision.instructionIDs[index]]; +         +        if(newInfo.filtered) +          continue; // filtered instructions get lowest priority +         +        if(previousInfo.name == "NOOP") +          continue; // special case for XCHG32ar and NOOP + +        if (outranks(previousInfo.insnContext, newInfo.insnContext)) +          continue; +         +        if (previousInfo.insnContext == newInfo.insnContext && +            !previousInfo.filtered) { +          errs() << "Error: Primary decode conflict: "; +          errs() << newInfo.name << " would overwrite " << previousInfo.name; +          errs() << "\n"; +          errs() << "ModRM   " << index << "\n"; +          errs() << "Opcode  " << (uint16_t)opcode << "\n"; +          errs() << "Context " << stringForContext(newInfo.insnContext) << "\n"; +          HasConflicts = true; +        } +      } + +      decision.instructionIDs[index] = uid; +    } +  } +} + +void DisassemblerTables::setTableFields(OpcodeType          type, +                                        InstructionContext  insnContext, +                                        uint8_t             opcode, +                                        const ModRMFilter   &filter, +                                        InstrUID            uid) { +  unsigned index; +   +  ContextDecision &decision = *Tables[type]; + +  for (index = 0; index < IC_max; ++index) { +    if (inheritsFrom((InstructionContext)index,  +                     InstructionSpecifiers[uid].insnContext)) +      setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode],  +                     filter, +                     uid, +                     opcode); +  } +} diff --git a/utils/TableGen/X86DisassemblerTables.h b/utils/TableGen/X86DisassemblerTables.h new file mode 100644 index 0000000..08eba01 --- /dev/null +++ b/utils/TableGen/X86DisassemblerTables.h @@ -0,0 +1,291 @@ +//===- X86DisassemblerTables.h - Disassembler tables ------------*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler Emitter. +// It contains the interface of the disassembler tables. +// Documentation for the disassembler emitter in general can be found in +//  X86DisasemblerEmitter.h. +// +//===----------------------------------------------------------------------===// + +#ifndef X86DISASSEMBLERTABLES_H +#define X86DISASSEMBLERTABLES_H + +#include "X86DisassemblerShared.h" +#include "X86ModRMFilters.h" + +#include "llvm/Support/raw_ostream.h" + +#include <vector> + +namespace llvm { + +namespace X86Disassembler { + +/// DisassemblerTables - Encapsulates all the decode tables being generated by +///   the table emitter.  Contains functions to populate the tables as well as +///   to emit them as hierarchical C structures suitable for consumption by the +///   runtime. +class DisassemblerTables { +private: +  /// The decoder tables.  There is one for each opcode type: +  /// [0] one-byte opcodes +  /// [1] two-byte opcodes of the form 0f __ +  /// [2] three-byte opcodes of the form 0f 38 __ +  /// [3] three-byte opcodes of the form 0f 3a __ +  ContextDecision* Tables[4]; +   +  /// The instruction information table +  std::vector<InstructionSpecifier> InstructionSpecifiers; +   +  /// True if there are primary decode conflicts in the instruction set +  bool HasConflicts; +   +  /// emitOneID - Emits a table entry for a single instruction entry, at the +  ///   innermost level of the structure hierarchy.  The entry is printed out +  ///   in the format "nnnn, /* MNEMONIC */" where nnnn is the ID in decimal, +  ///   the comma is printed if addComma is true, and the menonic is the name +  ///   of the instruction as listed in the LLVM tables. +  /// +  /// @param o        - The output stream to print the entry on. +  /// @param i        - The indentation level for o. +  /// @param id       - The unique ID of the instruction to print. +  /// @param addComma - Whether or not to print a comma after the ID.  True if +  ///                    additional items will follow. +  void emitOneID(raw_ostream &o, +                 uint32_t &i, +                 InstrUID id, +                 bool addComma) const; +   +  /// emitModRMDecision - Emits a table of entries corresponding to a single +  ///   ModR/M decision.  Compacts the ModR/M decision if possible.  ModR/M +  ///   decisions are printed as: +  /// +  ///   { /* struct ModRMDecision */ +  ///     TYPE, +  ///     modRMTablennnn +  ///   } +  /// +  ///   where nnnn is a unique ID for the corresponding table of IDs. +  ///   TYPE indicates whether the table has one entry that is the same +  ///   regardless of ModR/M byte, two entries - one for bytes 0x00-0xbf and one +  ///   for bytes 0xc0-0xff -, or 256 entries, one for each possible byte.   +  ///   nnnn is the number of a table for looking up these values.  The tables +  ///   are writen separately so that tables consisting entirely of zeros will +  ///   not be duplicated.  (These all have the name modRMEmptyTable.)  A table +  ///   is printed as: +  ///    +  ///   InstrUID modRMTablennnn[k] = { +  ///     nnnn, /* MNEMONIC */ +  ///     ... +  ///     nnnn /* MNEMONIC */ +  ///   }; +  /// +  /// @param o1       - The output stream to print the ID table to. +  /// @param o2       - The output stream to print the decision structure to. +  /// @param i1       - The indentation level to use with stream o1. +  /// @param i2       - The indentation level to use with stream o2. +  /// @param decision - The ModR/M decision to emit.  This decision has 256 +  ///                   entries - emitModRMDecision decides how to compact it. +  void emitModRMDecision(raw_ostream &o1, +                         raw_ostream &o2, +                         uint32_t &i1, +                         uint32_t &i2, +                         ModRMDecision &decision) const; +   +  /// emitOpcodeDecision - Emits an OpcodeDecision and all its subsidiary ModR/M +  ///   decisions.  An OpcodeDecision is printed as: +  /// +  ///   { /* struct OpcodeDecision */ +  ///     /* 0x00 */ +  ///     { /* struct ModRMDecision */ +  ///       ... +  ///     } +  ///     ... +  ///   } +  /// +  ///   where the ModRMDecision structure is printed as described in the +  ///   documentation for emitModRMDecision().  emitOpcodeDecision() passes on a +  ///   stream and indent level for the UID tables generated by +  ///   emitModRMDecision(), but does not use them itself. +  /// +  /// @param o1       - The output stream to print the ID tables generated by +  ///                   emitModRMDecision() to. +  /// @param o2       - The output stream for the decision structure itself. +  /// @param i1       - The indent level to use with stream o1. +  /// @param i2       - The indent level to use with stream o2. +  /// @param decision - The OpcodeDecision to emit along with its subsidiary +  ///                    structures. +  void emitOpcodeDecision(raw_ostream &o1, +                          raw_ostream &o2, +                          uint32_t &i1, +                          uint32_t &i2, +                          OpcodeDecision &decision) const; +   +  /// emitContextDecision - Emits a ContextDecision and all its subsidiary  +  ///   Opcode and ModRMDecisions.  A ContextDecision is printed as: +  /// +  ///   struct ContextDecision NAME = { +  ///     { /* OpcodeDecisions */ +  ///       /* IC */ +  ///       { /* struct OpcodeDecision */ +  ///         ... +  ///       }, +  ///       ... +  ///     } +  ///   } +  /// +  ///   NAME is the name of the ContextDecision (typically one of the four names  +  ///   ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, and THREEBYTE3A_SYM from +  ///   X86DisassemblerDecoderCommon.h). +  ///   IC is one of the contexts in InstructionContext.  There is an opcode +  ///   decision for each possible context. +  ///   The OpcodeDecision structures are printed as described in the +  ///   documentation for emitOpcodeDecision. +  /// +  /// @param o1       - The output stream to print the ID tables generated by +  ///                   emitModRMDecision() to. +  /// @param o2       - The output stream to print the decision structure to. +  /// @param i1       - The indent level to use with stream o1. +  /// @param i2       - The indent level to use with stream o2. +  /// @param decision - The ContextDecision to emit along with its subsidiary +  ///                   structures. +  /// @param name     - The name for the ContextDecision. +  void emitContextDecision(raw_ostream &o1, +                           raw_ostream &o2, +                           uint32_t &i1, +                           uint32_t &i2,                            +                           ContextDecision &decision, +                           const char* name) const; +   +  /// emitInstructionInfo - Prints the instruction specifier table, which has +  ///   one entry for each instruction, and contains name and operand +  ///   information.  This table is printed as: +  /// +  ///   struct InstructionSpecifier CONTEXTS_SYM[k] = { +  ///     { +  ///       /* nnnn */ +  ///       "MNEMONIC", +  ///       0xnn, +  ///       { +  ///         { +  ///           ENCODING, +  ///           TYPE +  ///         }, +  ///         ... +  ///       } +  ///     }, +  ///   }; +  /// +  ///   k is the total number of instructions. +  ///   nnnn is the ID of the current instruction (0-based).  This table  +  ///   includes entries for non-instructions like PHINODE. +  ///   0xnn is the lowest possible opcode for the current instruction, used for +  ///   AddRegFrm instructions to compute the operand's value. +  ///   ENCODING and TYPE describe the encoding and type for a single operand. +  /// +  /// @param o  - The output stream to which the instruction table should be  +  ///             written. +  /// @param i  - The indent level for use with the stream. +  void emitInstructionInfo(raw_ostream &o, uint32_t &i) const; +   +  /// emitContextTable - Prints the table that is used to translate from an +  ///   instruction attribute mask to an instruction context.  This table is +  ///   printed as: +  /// +  ///   InstructionContext CONTEXTS_STR[256] = { +  ///     IC, /* 0x00 */ +  ///     ... +  ///   }; +  /// +  ///   IC is the context corresponding to the mask 0x00, and there are 256 +  ///   possible masks. +  /// +  /// @param o  - The output stream to which the context table should be written. +  /// @param i  - The indent level for use with the stream. +  void emitContextTable(raw_ostream &o, uint32_t &i) const; +   +  /// emitContextDecisions - Prints all four ContextDecision structures using +  ///   emitContextDecision(). +  /// +  /// @param o1 - The output stream to print the ID tables generated by +  ///             emitModRMDecision() to. +  /// @param o2 - The output stream to print the decision structures to. +  /// @param i1 - The indent level to use with stream o1. +  /// @param i2 - The indent level to use with stream o2. +  void emitContextDecisions(raw_ostream &o1, +                            raw_ostream &o2, +                            uint32_t &i1, +                            uint32_t &i2) const;  + +  /// setTableFields - Uses a ModRMFilter to set the appropriate entries in a +  ///   ModRMDecision to refer to a particular instruction ID. +  /// +  /// @param decision - The ModRMDecision to populate. +  /// @param filter   - The filter to use in deciding which entries to populate. +  /// @param uid      - The unique ID to set matching entries to. +  /// @param opcode   - The opcode of the instruction, for error reporting. +  void setTableFields(ModRMDecision &decision, +                      const ModRMFilter &filter, +                      InstrUID uid, +                      uint8_t opcode); +public: +  /// Constructor - Allocates space for the class decisions and clears them. +  DisassemblerTables(); +   +  ~DisassemblerTables(); +   +  /// emit - Emits the instruction table, context table, and class decisions. +  /// +  /// @param o  - The output stream to print the tables to. +  void emit(raw_ostream &o) const; +   +  /// setTableFields - Uses the opcode type, instruction context, opcode, and a +  ///   ModRMFilter as criteria to set a particular set of entries in the +  ///   decode tables to point to a specific uid. +  /// +  /// @param type         - The opcode type (ONEBYTE, TWOBYTE, etc.) +  /// @param insnContext  - The context to use (IC, IC_64BIT, etc.) +  /// @param opcode       - The last byte of the opcode (not counting any escape +  ///                       or extended opcodes). +  /// @param filter       - The ModRMFilter that decides which ModR/M byte values +  ///                       correspond to the desired instruction. +  /// @param uid          - The unique ID of the instruction. +  void setTableFields(OpcodeType type, +                      InstructionContext insnContext, +                      uint8_t opcode, +                      const ModRMFilter &filter, +                      InstrUID uid);   +   +  /// specForUID - Returns the instruction specifier for a given unique +  ///   instruction ID.  Used when resolving collisions. +  /// +  /// @param uid  - The unique ID of the instruction. +  /// @return     - A reference to the instruction specifier.  +  InstructionSpecifier& specForUID(InstrUID uid) { +    if (uid >= InstructionSpecifiers.size()) +      InstructionSpecifiers.resize(uid + 1); +     +    return InstructionSpecifiers[uid]; +  } +   +  // hasConflicts - Reports whether there were primary decode conflicts +  //   from any instructions added to the tables. +  // @return  - true if there were; false otherwise. +   +  bool hasConflicts() { +    return HasConflicts; +  } +}; + +} // namespace X86Disassembler + +} // namespace llvm + +#endif diff --git a/utils/TableGen/X86ModRMFilters.h b/utils/TableGen/X86ModRMFilters.h new file mode 100644 index 0000000..4fe4af3 --- /dev/null +++ b/utils/TableGen/X86ModRMFilters.h @@ -0,0 +1,197 @@ +//===- X86ModRMFilters.h - Disassembler ModR/M filterss ---------*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler Emitter. +// It contains ModR/M filters that determine which values of the ModR/M byte +//  are valid for a partiuclar instruction. +// Documentation for the disassembler emitter in general can be found in +//  X86DisasemblerEmitter.h. +// +//===----------------------------------------------------------------------===// + +#ifndef X86MODRMFILTERS_H +#define X86MODRMFILTERS_H + +#include "llvm/System/DataTypes.h" + +namespace llvm { + +namespace X86Disassembler { + +/// ModRMFilter - Abstract base class for clases that recognize patterns in +///   ModR/M bytes. +class ModRMFilter { +public: +  /// Destructor    - Override as necessary. +  virtual ~ModRMFilter() { } + +  /// isDumb        - Indicates whether this filter returns the same value for +  ///                 any value of the ModR/M byte. +  /// +  /// @result       - True if the filter returns the same value for any ModR/M +  ///                 byte; false if not. +  virtual bool isDumb() const { return false; } +   +  /// accepts       - Indicates whether the filter accepts a particular ModR/M +  ///                 byte value. +  /// +  /// @result       - True if the filter accepts the ModR/M byte; false if not. +  virtual bool accepts(uint8_t modRM) const = 0; +}; + +/// DumbFilter - Accepts any ModR/M byte.  Used for instructions that do not +///   require a ModR/M byte or instructions where the entire ModR/M byte is used +///   for operands. +class DumbFilter : public ModRMFilter { +public: +  bool isDumb() const { +    return true; +  } +   +  bool accepts(uint8_t modRM) const { +    return true; +  } +}; + +/// ModFilter - Filters based on the mod bits [bits 7-6] of the ModR/M byte. +///   Some instructions are classified based on whether they are 11 or anything +///   else.  This filter performs that classification. +class ModFilter : public ModRMFilter { +private: +  bool R; +public: +  /// Constructor +  /// +  /// @r            - True if the mod bits of the ModR/M byte must be 11; false +  ///                 otherwise.  The name r derives from the fact that the mod +  ///                 bits indicate whether the R/M bits [bits 2-0] signify a +  ///                 register or a memory operand. +  ModFilter(bool r) : +    ModRMFilter(), +    R(r) { +  } +     +  bool accepts(uint8_t modRM) const { +    if (R == ((modRM & 0xc0) == 0xc0)) +      return true; +    else +      return false; +  } +}; + +/// EscapeFilter - Filters escape opcodes, which are classified in two ways.  If +///   the ModR/M byte is between 0xc0 and 0xff, then there is one slot for each +///   possible value.  Otherwise, there is one instruction for each value of the +///   nnn field [bits 5-3], known elsewhere as the reg field. +class EscapeFilter : public ModRMFilter { +private: +  bool C0_FF; +  uint8_t NNN_or_ModRM; +public: +  /// Constructor +  /// +  /// @c0_ff        - True if the ModR/M byte must fall between 0xc0 and 0xff; +  ///                 false otherwise. +  /// @nnn_or_modRM - If c0_ff is true, the required value of the entire ModR/M +  ///                 byte.  If c0_ff is false, the required value of the nnn +  ///                 field. +  EscapeFilter(bool c0_ff, uint8_t nnn_or_modRM) : +    ModRMFilter(), +    C0_FF(c0_ff), +    NNN_or_ModRM(nnn_or_modRM) { +  } +     +  bool accepts(uint8_t modRM) const { +    if ((C0_FF && modRM >= 0xc0 && (modRM == NNN_or_ModRM)) || +        (!C0_FF && modRM < 0xc0  && ((modRM & 0x38) >> 3) == NNN_or_ModRM)) +      return true; +    else +      return false; +  } +}; + +/// AddRegEscapeFilter - Some escape opcodes have one of the register operands +///   added to the ModR/M byte, meaning that a range of eight ModR/M values +///   maps to a single instruction.  Such instructions require the ModR/M byte +///   to fall between 0xc0 and 0xff. +class AddRegEscapeFilter : public ModRMFilter { +private: +  uint8_t ModRM; +public: +  /// Constructor +  /// +  /// @modRM        - The value of the ModR/M byte when the register operand +  ///                 refers to the first register in the register set. +  AddRegEscapeFilter(uint8_t modRM) : ModRM(modRM) { +  } +   +  bool accepts(uint8_t modRM) const { +    if (modRM >= ModRM && modRM < ModRM + 8) +      return true; +    else +      return false; +  } +}; + +/// ExtendedFilter - Extended opcodes are classified based on the value of the +///   mod field [bits 7-6] and the value of the nnn field [bits 5-3].  +class ExtendedFilter : public ModRMFilter { +private: +  bool R; +  uint8_t NNN; +public: +  /// Constructor +  /// +  /// @r            - True if the mod field must be set to 11; false otherwise. +  ///                 The name is explained at ModFilter. +  /// @nnn          - The required value of the nnn field. +  ExtendedFilter(bool r, uint8_t nnn) :  +    ModRMFilter(), +    R(r), +    NNN(nnn) { +  } +     +  bool accepts(uint8_t modRM) const { +    if (((R  && ((modRM & 0xc0) == 0xc0)) || +        (!R && ((modRM & 0xc0) != 0xc0))) && +        (((modRM & 0x38) >> 3) == NNN)) +      return true; +    else +      return false; +  } +}; + +/// ExactFilter - The occasional extended opcode (such as VMCALL or MONITOR) +///   requires the ModR/M byte to have a specific value. +class ExactFilter : public ModRMFilter +{ +private: +  uint8_t ModRM; +public: +  /// Constructor +  /// +  /// @modRM        - The required value of the full ModR/M byte. +  ExactFilter(uint8_t modRM) : +    ModRMFilter(), +    ModRM(modRM) { +  } +     +  bool accepts(uint8_t modRM) const { +    if (ModRM == modRM) +      return true; +    else +      return false; +  } +}; + +} // namespace X86Disassembler + +} // namespace llvm + +#endif
\ No newline at end of file diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp new file mode 100644 index 0000000..8a21399 --- /dev/null +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -0,0 +1,959 @@ +//===- X86RecognizableInstr.cpp - Disassembler instruction spec --*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler Emitter. +// It contains the implementation of a single recognizable instruction. +// Documentation for the disassembler emitter in general can be found in +//  X86DisasemblerEmitter.h. +// +//===----------------------------------------------------------------------===// + +#include "X86DisassemblerShared.h" +#include "X86RecognizableInstr.h" +#include "X86ModRMFilters.h" + +#include "llvm/Support/ErrorHandling.h" + +#include <string> + +using namespace llvm; + +// A clone of X86 since we can't depend on something that is generated. +namespace X86Local { +  enum { +    Pseudo      = 0, +    RawFrm      = 1, +    AddRegFrm   = 2, +    MRMDestReg  = 3, +    MRMDestMem  = 4, +    MRMSrcReg   = 5, +    MRMSrcMem   = 6, +    MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19,  +    MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23, +    MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, +    MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, +    MRMInitReg  = 32 +  }; +   +  enum { +    TB  = 1, +    REP = 2, +    D8 = 3, D9 = 4, DA = 5, DB = 6, +    DC = 7, DD = 8, DE = 9, DF = 10, +    XD = 11,  XS = 12, +    T8 = 13,  TA = 14 +  }; +} +   +#define ONE_BYTE_EXTENSION_TABLES \ +  EXTENSION_TABLE(80)             \ +  EXTENSION_TABLE(81)             \ +  EXTENSION_TABLE(82)             \ +  EXTENSION_TABLE(83)             \ +  EXTENSION_TABLE(8f)             \ +  EXTENSION_TABLE(c0)             \ +  EXTENSION_TABLE(c1)             \ +  EXTENSION_TABLE(c6)             \ +  EXTENSION_TABLE(c7)             \ +  EXTENSION_TABLE(d0)             \ +  EXTENSION_TABLE(d1)             \ +  EXTENSION_TABLE(d2)             \ +  EXTENSION_TABLE(d3)             \ +  EXTENSION_TABLE(f6)             \ +  EXTENSION_TABLE(f7)             \ +  EXTENSION_TABLE(fe)             \ +  EXTENSION_TABLE(ff) +   +#define TWO_BYTE_EXTENSION_TABLES \ +  EXTENSION_TABLE(00)             \ +  EXTENSION_TABLE(01)             \ +  EXTENSION_TABLE(18)             \ +  EXTENSION_TABLE(71)             \ +  EXTENSION_TABLE(72)             \ +  EXTENSION_TABLE(73)             \ +  EXTENSION_TABLE(ae)             \ +  EXTENSION_TABLE(b9)             \ +  EXTENSION_TABLE(ba)             \ +  EXTENSION_TABLE(c7) +   +#define TWO_BYTE_FULL_EXTENSION_TABLES \ +  EXTENSION_TABLE(01) +   + +using namespace X86Disassembler; + +/// needsModRMForDecode - Indicates whether a particular instruction requires a +///   ModR/M byte for the instruction to be properly decoded.  For example, a  +///   MRMDestReg instruction needs the Mod field in the ModR/M byte to be set to +///   0b11. +/// +/// @param form - The form of the instruction. +/// @return     - true if the form implies that a ModR/M byte is required, false +///               otherwise. +static bool needsModRMForDecode(uint8_t form) { +  if (form == X86Local::MRMDestReg    || +     form == X86Local::MRMDestMem    || +     form == X86Local::MRMSrcReg     || +     form == X86Local::MRMSrcMem     || +     (form >= X86Local::MRM0r && form <= X86Local::MRM7r) || +     (form >= X86Local::MRM0m && form <= X86Local::MRM7m)) +    return true; +  else +    return false; +} + +/// isRegFormat - Indicates whether a particular form requires the Mod field of +///   the ModR/M byte to be 0b11. +/// +/// @param form - The form of the instruction. +/// @return     - true if the form implies that Mod must be 0b11, false +///               otherwise. +static bool isRegFormat(uint8_t form) { +  if (form == X86Local::MRMDestReg || +     form == X86Local::MRMSrcReg  || +     (form >= X86Local::MRM0r && form <= X86Local::MRM7r)) +    return true; +  else +    return false; +} + +/// byteFromBitsInit - Extracts a value at most 8 bits in width from a BitsInit. +///   Useful for switch statements and the like. +/// +/// @param init - A reference to the BitsInit to be decoded. +/// @return     - The field, with the first bit in the BitsInit as the lowest +///               order bit. +static uint8_t byteFromBitsInit(BitsInit &init) { +  int width = init.getNumBits(); + +  assert(width <= 8 && "Field is too large for uint8_t!"); + +  int     index; +  uint8_t mask = 0x01; + +  uint8_t ret = 0; + +  for (index = 0; index < width; index++) { +    if (static_cast<BitInit*>(init.getBit(index))->getValue()) +      ret |= mask; + +    mask <<= 1; +  } + +  return ret; +} + +/// byteFromRec - Extract a value at most 8 bits in with from a Record given the +///   name of the field. +/// +/// @param rec  - The record from which to extract the value. +/// @param name - The name of the field in the record. +/// @return     - The field, as translated by byteFromBitsInit(). +static uint8_t byteFromRec(const Record* rec, const std::string &name) { +  BitsInit* bits = rec->getValueAsBitsInit(name); +  return byteFromBitsInit(*bits); +} + +RecognizableInstr::RecognizableInstr(DisassemblerTables &tables, +                                     const CodeGenInstruction &insn, +                                     InstrUID uid) { +  UID = uid; + +  Rec = insn.TheDef; +  Name = Rec->getName(); +  Spec = &tables.specForUID(UID); +   +  if (!Rec->isSubClassOf("X86Inst")) { +    ShouldBeEmitted = false; +    return; +  } +   +  Prefix   = byteFromRec(Rec, "Prefix"); +  Opcode   = byteFromRec(Rec, "Opcode"); +  Form     = byteFromRec(Rec, "FormBits"); +  SegOvr   = byteFromRec(Rec, "SegOvrBits"); +   +  HasOpSizePrefix  = Rec->getValueAsBit("hasOpSizePrefix"); +  HasREX_WPrefix   = Rec->getValueAsBit("hasREX_WPrefix"); +  HasLockPrefix    = Rec->getValueAsBit("hasLockPrefix"); +  IsCodeGenOnly    = Rec->getValueAsBit("isCodeGenOnly"); +   +  Name      = Rec->getName(); +  AsmString = Rec->getValueAsString("AsmString"); +   +  Operands = &insn.OperandList; +   +  IsSSE            = HasOpSizePrefix && (Name.find("16") == Name.npos); +  HasFROperands    = false; +   +  ShouldBeEmitted  = true; +} +   +void RecognizableInstr::processInstr(DisassemblerTables &tables, +                                   const CodeGenInstruction &insn, +                                   InstrUID uid) +{ +  RecognizableInstr recogInstr(tables, insn, uid); +   +  recogInstr.emitInstructionSpecifier(tables); +   +  if (recogInstr.shouldBeEmitted()) +    recogInstr.emitDecodePath(tables); +} + +InstructionContext RecognizableInstr::insnContext() const { +  InstructionContext insnContext; + +  if (Name.find("64") != Name.npos || HasREX_WPrefix) { +    if (HasREX_WPrefix && HasOpSizePrefix) +      insnContext = IC_64BIT_REXW_OPSIZE; +    else if (HasOpSizePrefix) +      insnContext = IC_64BIT_OPSIZE; +    else if (HasREX_WPrefix && Prefix == X86Local::XS) +      insnContext = IC_64BIT_REXW_XS; +    else if (HasREX_WPrefix && Prefix == X86Local::XD) +      insnContext = IC_64BIT_REXW_XD; +    else if (Prefix == X86Local::XD) +      insnContext = IC_64BIT_XD; +    else if (Prefix == X86Local::XS) +      insnContext = IC_64BIT_XS; +    else if (HasREX_WPrefix) +      insnContext = IC_64BIT_REXW; +    else +      insnContext = IC_64BIT; +  } else { +    if (HasOpSizePrefix) +      insnContext = IC_OPSIZE; +    else if (Prefix == X86Local::XD) +      insnContext = IC_XD; +    else if (Prefix == X86Local::XS) +      insnContext = IC_XS; +    else +      insnContext = IC; +  } + +  return insnContext; +} +   +RecognizableInstr::filter_ret RecognizableInstr::filter() const { +  // Filter out intrinsics +   +  if (!Rec->isSubClassOf("X86Inst")) +    return FILTER_STRONG; +   +  if (Form == X86Local::Pseudo || +      IsCodeGenOnly) +    return FILTER_STRONG; +   +  // Filter out instructions with a LOCK prefix; +  //   prefer forms that do not have the prefix +  if (HasLockPrefix) +    return FILTER_WEAK; +   +  // Filter out artificial instructions + +  if (Name.find("TAILJMP") != Name.npos    || +     Name.find("_Int") != Name.npos       || +     Name.find("_int") != Name.npos       || +     Name.find("Int_") != Name.npos       || +     Name.find("_NOREX") != Name.npos     || +     Name.find("EH_RETURN") != Name.npos  || +     Name.find("V_SET") != Name.npos      || +     Name.find("LOCK_") != Name.npos      || +     Name.find("WIN") != Name.npos) +    return FILTER_STRONG; + +  // Special cases. +   +  if (Name.find("PCMPISTRI") != Name.npos && Name != "PCMPISTRI") +    return FILTER_WEAK; +  if (Name.find("PCMPESTRI") != Name.npos && Name != "PCMPESTRI") +    return FILTER_WEAK; + +  if (Name.find("MOV") != Name.npos && Name.find("r0") != Name.npos) +    return FILTER_WEAK; +  if (Name.find("MOVZ") != Name.npos && Name.find("MOVZX") == Name.npos) +    return FILTER_WEAK; +  if (Name.find("Fs") != Name.npos) +    return FILTER_WEAK; +  if (Name == "MOVLPDrr"          || +      Name == "MOVLPSrr"          || +      Name == "PUSHFQ"            || +      Name == "BSF16rr"           || +      Name == "BSF16rm"           || +      Name == "BSR16rr"           || +      Name == "BSR16rm"           || +      Name == "MOVSX16rm8"        || +      Name == "MOVSX16rr8"        || +      Name == "MOVZX16rm8"        || +      Name == "MOVZX16rr8"        || +      Name == "PUSH32i16"         || +      Name == "PUSH64i16"         || +      Name == "MOVPQI2QImr"       || +      Name == "MOVSDmr"           || +      Name == "MOVSDrm"           || +      Name == "MOVSSmr"           || +      Name == "MOVSSrm"           || +      Name == "MMX_MOVD64rrv164"  || +      Name == "CRC32m16"          || +      Name == "MOV64ri64i32"      || +      Name == "CRC32r16") +    return FILTER_WEAK; + +  // Filter out instructions with segment override prefixes. +  // They're too messy to handle now and we'll special case them if needed. + +  if (SegOvr) +    return FILTER_STRONG; +   +  // Filter out instructions that can't be printed. + +  if (AsmString.size() == 0) +    return FILTER_STRONG; +   +  // Filter out instructions with subreg operands. +   +  if (AsmString.find("subreg") != AsmString.npos) +    return FILTER_STRONG; + +  assert(Form != X86Local::MRMInitReg && +         "FORMAT_MRMINITREG instruction not skipped"); +   +  if (HasFROperands && Name.find("MOV") != Name.npos && +     ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) ||  +      (Name.find("to") != Name.npos))) +    return FILTER_WEAK; + +  return FILTER_NORMAL; +} +   +void RecognizableInstr::handleOperand( +  bool optional, +  unsigned &operandIndex, +  unsigned &physicalOperandIndex, +  unsigned &numPhysicalOperands, +  unsigned *operandMapping, +  OperandEncoding (*encodingFromString)(const std::string&, bool hasOpSizePrefix)) { +  if (optional) { +    if (physicalOperandIndex >= numPhysicalOperands) +      return; +  } else { +    assert(physicalOperandIndex < numPhysicalOperands); +  } +   +  while (operandMapping[operandIndex] != operandIndex) { +    Spec->operands[operandIndex].encoding = ENCODING_DUP; +    Spec->operands[operandIndex].type = +      (OperandType)(TYPE_DUP0 + operandMapping[operandIndex]); +    ++operandIndex; +  } +   +  const std::string &typeName = (*Operands)[operandIndex].Rec->getName(); +   +  Spec->operands[operandIndex].encoding = encodingFromString(typeName, +                                                              HasOpSizePrefix); +  Spec->operands[operandIndex].type = typeFromString(typeName,  +                                                      IsSSE, +                                                      HasREX_WPrefix, +                                                      HasOpSizePrefix); +   +  ++operandIndex; +  ++physicalOperandIndex; +} + +void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) { +  Spec->name       = Name; +     +  if (!Rec->isSubClassOf("X86Inst")) +    return; +   +  switch (filter()) { +  case FILTER_WEAK: +    Spec->filtered = true; +    break; +  case FILTER_STRONG: +    ShouldBeEmitted = false; +    return; +  case FILTER_NORMAL: +    break; +  } +   +  Spec->insnContext = insnContext(); +     +  const std::vector<CodeGenInstruction::OperandInfo> &OperandList = *Operands; +   +  unsigned operandIndex; +  unsigned numOperands = OperandList.size(); +  unsigned numPhysicalOperands = 0; +   +  // operandMapping maps from operands in OperandList to their originals. +  // If operandMapping[i] != i, then the entry is a duplicate. +  unsigned operandMapping[X86_MAX_OPERANDS]; +   +  bool hasFROperands = false; +   +  assert(numOperands < X86_MAX_OPERANDS && "X86_MAX_OPERANDS is not large enough"); +   +  for (operandIndex = 0; operandIndex < numOperands; ++operandIndex) { +    if (OperandList[operandIndex].Constraints.size()) { +      const std::string &constraint = OperandList[operandIndex].Constraints[0]; +      std::string::size_type tiedToPos; + +      if ((tiedToPos = constraint.find(" << 16) | (1 << TOI::TIED_TO))")) != +         constraint.npos) { +        tiedToPos--; +        operandMapping[operandIndex] = constraint[tiedToPos] - '0'; +      } else { +        ++numPhysicalOperands; +        operandMapping[operandIndex] = operandIndex; +      } +    } else { +      ++numPhysicalOperands; +      operandMapping[operandIndex] = operandIndex; +    } + +    const std::string &recName = OperandList[operandIndex].Rec->getName(); + +    if (recName.find("FR") != recName.npos) +      hasFROperands = true; +  } +   +  if (hasFROperands && Name.find("MOV") != Name.npos && +     ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) || +      (Name.find("to") != Name.npos))) +    ShouldBeEmitted = false; +   +  if (!ShouldBeEmitted) +    return; + +#define HANDLE_OPERAND(class)               \ +  handleOperand(false,                      \ +                operandIndex,               \ +                physicalOperandIndex,       \ +                numPhysicalOperands,        \ +                operandMapping,             \ +                class##EncodingFromString); +   +#define HANDLE_OPTIONAL(class)              \ +  handleOperand(true,                       \ +                operandIndex,               \ +                physicalOperandIndex,       \ +                numPhysicalOperands,        \ +                operandMapping,             \ +                class##EncodingFromString); +   +  // operandIndex should always be < numOperands +  operandIndex = 0; +  // physicalOperandIndex should always be < numPhysicalOperands +  unsigned physicalOperandIndex = 0; +     +  switch (Form) { +  case X86Local::RawFrm: +    // Operand 1 (optional) is an address or immediate. +    // Operand 2 (optional) is an immediate. +    assert(numPhysicalOperands <= 2 &&  +           "Unexpected number of operands for RawFrm"); +    HANDLE_OPTIONAL(relocation) +    HANDLE_OPTIONAL(immediate) +    break; +  case X86Local::AddRegFrm: +    // Operand 1 is added to the opcode. +    // Operand 2 (optional) is an address. +    assert(numPhysicalOperands >= 1 && numPhysicalOperands <= 2 && +           "Unexpected number of operands for AddRegFrm"); +    HANDLE_OPERAND(opcodeModifier) +    HANDLE_OPTIONAL(relocation) +    break; +  case X86Local::MRMDestReg: +    // Operand 1 is a register operand in the R/M field. +    // Operand 2 is a register operand in the Reg/Opcode field. +    // Operand 3 (optional) is an immediate. +    assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 && +           "Unexpected number of operands for MRMDestRegFrm"); +    HANDLE_OPERAND(rmRegister) +    HANDLE_OPERAND(roRegister) +    HANDLE_OPTIONAL(immediate) +    break; +  case X86Local::MRMDestMem: +    // Operand 1 is a memory operand (possibly SIB-extended) +    // Operand 2 is a register operand in the Reg/Opcode field. +    // Operand 3 (optional) is an immediate. +    assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 && +           "Unexpected number of operands for MRMDestMemFrm"); +    HANDLE_OPERAND(memory) +    HANDLE_OPERAND(roRegister) +    HANDLE_OPTIONAL(immediate) +    break; +  case X86Local::MRMSrcReg: +    // Operand 1 is a register operand in the Reg/Opcode field. +    // Operand 2 is a register operand in the R/M field. +    // Operand 3 (optional) is an immediate. +    assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 && +           "Unexpected number of operands for MRMSrcRegFrm"); +    HANDLE_OPERAND(roRegister) +    HANDLE_OPERAND(rmRegister) +    HANDLE_OPTIONAL(immediate) +    break; +  case X86Local::MRMSrcMem: +    // Operand 1 is a register operand in the Reg/Opcode field. +    // Operand 2 is a memory operand (possibly SIB-extended) +    // Operand 3 (optional) is an immediate. +    assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 && +           "Unexpected number of operands for MRMSrcMemFrm"); +    HANDLE_OPERAND(roRegister) +    HANDLE_OPERAND(memory) +    HANDLE_OPTIONAL(immediate) +    break; +  case X86Local::MRM0r: +  case X86Local::MRM1r: +  case X86Local::MRM2r: +  case X86Local::MRM3r: +  case X86Local::MRM4r: +  case X86Local::MRM5r: +  case X86Local::MRM6r: +  case X86Local::MRM7r: +    // Operand 1 is a register operand in the R/M field. +    // Operand 2 (optional) is an immediate or relocation. +    assert(numPhysicalOperands <= 2 && +           "Unexpected number of operands for MRMnRFrm"); +    HANDLE_OPTIONAL(rmRegister) +    HANDLE_OPTIONAL(relocation) +    break; +  case X86Local::MRM0m: +  case X86Local::MRM1m: +  case X86Local::MRM2m: +  case X86Local::MRM3m: +  case X86Local::MRM4m: +  case X86Local::MRM5m: +  case X86Local::MRM6m: +  case X86Local::MRM7m: +    // Operand 1 is a memory operand (possibly SIB-extended) +    // Operand 2 (optional) is an immediate or relocation. +    assert(numPhysicalOperands >= 1 && numPhysicalOperands <= 2 && +           "Unexpected number of operands for MRMnMFrm"); +    HANDLE_OPERAND(memory) +    HANDLE_OPTIONAL(relocation) +    break; +  case X86Local::MRMInitReg: +    // Ignored. +    break; +  } +   +  #undef HANDLE_OPERAND +  #undef HANDLE_OPTIONAL +} + +void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { +  // Special cases where the LLVM tables are not complete + +#define EXACTCASE(class, name, lastbyte)         \ +  if (Name == name) {                           \ +    tables.setTableFields(class,                 \ +                          insnContext(),         \ +                          Opcode,               \ +                          ExactFilter(lastbyte), \ +                          UID);                 \ +    Spec->modifierBase = Opcode;               \ +    return;                                      \ +  }  + +  EXACTCASE(TWOBYTE, "MONITOR",  0xc8) +  EXACTCASE(TWOBYTE, "MWAIT",    0xc9) +  EXACTCASE(TWOBYTE, "SWPGS",    0xf8) +  EXACTCASE(TWOBYTE, "INVEPT",   0x80) +  EXACTCASE(TWOBYTE, "INVVPID",  0x81) +  EXACTCASE(TWOBYTE, "VMCALL",   0xc1) +  EXACTCASE(TWOBYTE, "VMLAUNCH", 0xc2) +  EXACTCASE(TWOBYTE, "VMRESUME", 0xc3) +  EXACTCASE(TWOBYTE, "VMXOFF",   0xc4) + +  if (Name == "INVLPG") { +    tables.setTableFields(TWOBYTE, +                          insnContext(), +                          Opcode, +                          ExtendedFilter(false, 7), +                          UID); +    Spec->modifierBase = Opcode; +    return; +  } + +  OpcodeType    opcodeType  = (OpcodeType)-1; +   +  ModRMFilter*  filter      = NULL;  +  uint8_t       opcodeToSet = 0; + +  switch (Prefix) { +  // Extended two-byte opcodes can start with f2 0f, f3 0f, or 0f +  case X86Local::XD: +  case X86Local::XS: +  case X86Local::TB: +    opcodeType = TWOBYTE; + +    switch (Opcode) { +#define EXTENSION_TABLE(n) case 0x##n: +    TWO_BYTE_EXTENSION_TABLES +#undef EXTENSION_TABLE +      switch (Form) { +      default: +        llvm_unreachable("Unhandled two-byte extended opcode"); +      case X86Local::MRM0r: +      case X86Local::MRM1r: +      case X86Local::MRM2r: +      case X86Local::MRM3r: +      case X86Local::MRM4r: +      case X86Local::MRM5r: +      case X86Local::MRM6r: +      case X86Local::MRM7r: +        filter = new ExtendedFilter(true, Form - X86Local::MRM0r); +        break; +      case X86Local::MRM0m: +      case X86Local::MRM1m: +      case X86Local::MRM2m: +      case X86Local::MRM3m: +      case X86Local::MRM4m: +      case X86Local::MRM5m: +      case X86Local::MRM6m: +      case X86Local::MRM7m: +        filter = new ExtendedFilter(false, Form - X86Local::MRM0m); +        break; +      } // switch (Form) +      break; +    default: +      if (needsModRMForDecode(Form)) +        filter = new ModFilter(isRegFormat(Form)); +      else +        filter = new DumbFilter(); +         +      break; +    } // switch (opcode) +    opcodeToSet = Opcode; +    break; +  case X86Local::T8: +    opcodeType = THREEBYTE_38; +    if (needsModRMForDecode(Form)) +      filter = new ModFilter(isRegFormat(Form)); +    else +      filter = new DumbFilter(); +    opcodeToSet = Opcode; +    break; +  case X86Local::TA: +    opcodeType = THREEBYTE_3A; +    if (needsModRMForDecode(Form)) +      filter = new ModFilter(isRegFormat(Form)); +    else +      filter = new DumbFilter(); +    opcodeToSet = Opcode; +    break; +  case X86Local::D8: +  case X86Local::D9: +  case X86Local::DA: +  case X86Local::DB: +  case X86Local::DC: +  case X86Local::DD: +  case X86Local::DE: +  case X86Local::DF: +    assert(Opcode >= 0xc0 && "Unexpected opcode for an escape opcode"); +    opcodeType = ONEBYTE; +    if (Form == X86Local::AddRegFrm) { +      Spec->modifierType = MODIFIER_MODRM; +      Spec->modifierBase = Opcode; +      filter = new AddRegEscapeFilter(Opcode); +    } else { +      filter = new EscapeFilter(true, Opcode); +    } +    opcodeToSet = 0xd8 + (Prefix - X86Local::D8); +    break; +  default: +    opcodeType = ONEBYTE; +    switch (Opcode) { +#define EXTENSION_TABLE(n) case 0x##n: +    ONE_BYTE_EXTENSION_TABLES +#undef EXTENSION_TABLE +      switch (Form) { +      default: +        llvm_unreachable("Fell through the cracks of a single-byte " +                         "extended opcode"); +      case X86Local::MRM0r: +      case X86Local::MRM1r: +      case X86Local::MRM2r: +      case X86Local::MRM3r: +      case X86Local::MRM4r: +      case X86Local::MRM5r: +      case X86Local::MRM6r: +      case X86Local::MRM7r: +        filter = new ExtendedFilter(true, Form - X86Local::MRM0r); +        break; +      case X86Local::MRM0m: +      case X86Local::MRM1m: +      case X86Local::MRM2m: +      case X86Local::MRM3m: +      case X86Local::MRM4m: +      case X86Local::MRM5m: +      case X86Local::MRM6m: +      case X86Local::MRM7m: +        filter = new ExtendedFilter(false, Form - X86Local::MRM0m); +        break; +      } // switch (Form) +      break; +    case 0xd8: +    case 0xd9: +    case 0xda: +    case 0xdb: +    case 0xdc: +    case 0xdd: +    case 0xde: +    case 0xdf: +      filter = new EscapeFilter(false, Form - X86Local::MRM0m); +      break; +    default: +      if (needsModRMForDecode(Form)) +        filter = new ModFilter(isRegFormat(Form)); +      else +        filter = new DumbFilter(); +      break; +    } // switch (Opcode) +    opcodeToSet = Opcode; +  } // switch (Prefix) + +  assert(opcodeType != (OpcodeType)-1 && +         "Opcode type not set"); +  assert(filter && "Filter not set"); + +  if (Form == X86Local::AddRegFrm) { +    if(Spec->modifierType != MODIFIER_MODRM) { +      assert(opcodeToSet < 0xf9 && +             "Not enough room for all ADDREG_FRM operands"); +     +      uint8_t currentOpcode; + +      for (currentOpcode = opcodeToSet; +           currentOpcode < opcodeToSet + 8; +           ++currentOpcode) +        tables.setTableFields(opcodeType,  +                              insnContext(),  +                              currentOpcode,  +                              *filter,  +                              UID); +     +      Spec->modifierType = MODIFIER_OPCODE; +      Spec->modifierBase = opcodeToSet; +    } else { +      // modifierBase was set where MODIFIER_MODRM was set +      tables.setTableFields(opcodeType,  +                            insnContext(),  +                            opcodeToSet,  +                            *filter,  +                            UID); +    } +  } else { +    tables.setTableFields(opcodeType, +                          insnContext(), +                          opcodeToSet, +                          *filter, +                          UID); +     +    Spec->modifierType = MODIFIER_NONE; +    Spec->modifierBase = opcodeToSet; +  } +   +  delete filter; +} + +#define TYPE(str, type) if (s == str) return type; +OperandType RecognizableInstr::typeFromString(const std::string &s, +                                              bool isSSE, +                                              bool hasREX_WPrefix, +                                              bool hasOpSizePrefix) { +  if (isSSE) { +    // For SSE instructions, we ignore the OpSize prefix and force operand  +    // sizes. +    TYPE("GR16",              TYPE_R16) +    TYPE("GR32",              TYPE_R32) +    TYPE("GR64",              TYPE_R64) +  } +  if(hasREX_WPrefix) { +    // For instructions with a REX_W prefix, a declared 32-bit register encoding +    // is special. +    TYPE("GR32",              TYPE_R32) +  } +  if(!hasOpSizePrefix) { +    // For instructions without an OpSize prefix, a declared 16-bit register or +    // immediate encoding is special. +    TYPE("GR16",              TYPE_R16) +    TYPE("i16imm",            TYPE_IMM16) +  } +  TYPE("i16mem",              TYPE_Mv) +  TYPE("i16imm",              TYPE_IMMv) +  TYPE("i16i8imm",            TYPE_IMMv) +  TYPE("GR16",                TYPE_Rv) +  TYPE("i32mem",              TYPE_Mv) +  TYPE("i32imm",              TYPE_IMMv) +  TYPE("i32i8imm",            TYPE_IMM32) +  TYPE("GR32",                TYPE_Rv) +  TYPE("i64mem",              TYPE_Mv) +  TYPE("i64i32imm",           TYPE_IMM64) +  TYPE("i64i8imm",            TYPE_IMM64) +  TYPE("GR64",                TYPE_R64) +  TYPE("i8mem",               TYPE_M8) +  TYPE("i8imm",               TYPE_IMM8) +  TYPE("GR8",                 TYPE_R8) +  TYPE("VR128",               TYPE_XMM128) +  TYPE("f128mem",             TYPE_M128) +  TYPE("FR64",                TYPE_XMM64) +  TYPE("f64mem",              TYPE_M64FP) +  TYPE("FR32",                TYPE_XMM32) +  TYPE("f32mem",              TYPE_M32FP) +  TYPE("RST",                 TYPE_ST) +  TYPE("i128mem",             TYPE_M128) +  TYPE("i64i32imm_pcrel",     TYPE_REL64) +  TYPE("i32imm_pcrel",        TYPE_REL32) +  TYPE("SSECC",               TYPE_IMM8) +  TYPE("brtarget",            TYPE_RELv) +  TYPE("brtarget8",           TYPE_REL8) +  TYPE("f80mem",              TYPE_M80FP) +  TYPE("lea32mem",            TYPE_M32) +  TYPE("lea64_32mem",         TYPE_M64) +  TYPE("lea64mem",            TYPE_M64) +  TYPE("VR64",                TYPE_MM64) +  TYPE("i64imm",              TYPE_IMMv) +  TYPE("opaque32mem",         TYPE_M1616) +  TYPE("opaque48mem",         TYPE_M1632) +  TYPE("opaque80mem",         TYPE_M1664) +  TYPE("opaque512mem",        TYPE_M512) +  TYPE("SEGMENT_REG",         TYPE_SEGMENTREG) +  TYPE("DEBUG_REG",           TYPE_DEBUGREG) +  TYPE("CONTROL_REG_32",      TYPE_CR32) +  TYPE("CONTROL_REG_64",      TYPE_CR64) +  TYPE("offset8",             TYPE_MOFFS8) +  TYPE("offset16",            TYPE_MOFFS16) +  TYPE("offset32",            TYPE_MOFFS32) +  TYPE("offset64",            TYPE_MOFFS64) +  errs() << "Unhandled type string " << s << "\n"; +  llvm_unreachable("Unhandled type string"); +} +#undef TYPE + +#define ENCODING(str, encoding) if (s == str) return encoding; +OperandEncoding RecognizableInstr::immediateEncodingFromString +  (const std::string &s, +   bool hasOpSizePrefix) { +  if(!hasOpSizePrefix) { +    // For instructions without an OpSize prefix, a declared 16-bit register or +    // immediate encoding is special. +    ENCODING("i16imm",        ENCODING_IW) +  } +  ENCODING("i32i8imm",        ENCODING_IB) +  ENCODING("SSECC",           ENCODING_IB) +  ENCODING("i16imm",          ENCODING_Iv) +  ENCODING("i16i8imm",        ENCODING_IB) +  ENCODING("i32imm",          ENCODING_Iv) +  ENCODING("i64i32imm",       ENCODING_ID) +  ENCODING("i64i8imm",        ENCODING_IB) +  ENCODING("i8imm",           ENCODING_IB) +  errs() << "Unhandled immediate encoding " << s << "\n"; +  llvm_unreachable("Unhandled immediate encoding"); +} + +OperandEncoding RecognizableInstr::rmRegisterEncodingFromString +  (const std::string &s, +   bool hasOpSizePrefix) { +  ENCODING("GR16",            ENCODING_RM) +  ENCODING("GR32",            ENCODING_RM) +  ENCODING("GR64",            ENCODING_RM) +  ENCODING("GR8",             ENCODING_RM) +  ENCODING("VR128",           ENCODING_RM) +  ENCODING("FR64",            ENCODING_RM) +  ENCODING("FR32",            ENCODING_RM) +  ENCODING("VR64",            ENCODING_RM) +  errs() << "Unhandled R/M register encoding " << s << "\n"; +  llvm_unreachable("Unhandled R/M register encoding"); +} + +OperandEncoding RecognizableInstr::roRegisterEncodingFromString +  (const std::string &s, +   bool hasOpSizePrefix) { +  ENCODING("GR16",            ENCODING_REG) +  ENCODING("GR32",            ENCODING_REG) +  ENCODING("GR64",            ENCODING_REG) +  ENCODING("GR8",             ENCODING_REG) +  ENCODING("VR128",           ENCODING_REG) +  ENCODING("FR64",            ENCODING_REG) +  ENCODING("FR32",            ENCODING_REG) +  ENCODING("VR64",            ENCODING_REG) +  ENCODING("SEGMENT_REG",     ENCODING_REG) +  ENCODING("DEBUG_REG",       ENCODING_REG) +  ENCODING("CONTROL_REG_32",  ENCODING_REG) +  ENCODING("CONTROL_REG_64",  ENCODING_REG) +  errs() << "Unhandled reg/opcode register encoding " << s << "\n"; +  llvm_unreachable("Unhandled reg/opcode register encoding"); +} + +OperandEncoding RecognizableInstr::memoryEncodingFromString +  (const std::string &s, +   bool hasOpSizePrefix) { +  ENCODING("i16mem",          ENCODING_RM) +  ENCODING("i32mem",          ENCODING_RM) +  ENCODING("i64mem",          ENCODING_RM) +  ENCODING("i8mem",           ENCODING_RM) +  ENCODING("f128mem",         ENCODING_RM) +  ENCODING("f64mem",          ENCODING_RM) +  ENCODING("f32mem",          ENCODING_RM) +  ENCODING("i128mem",         ENCODING_RM) +  ENCODING("f80mem",          ENCODING_RM) +  ENCODING("lea32mem",        ENCODING_RM) +  ENCODING("lea64_32mem",     ENCODING_RM) +  ENCODING("lea64mem",        ENCODING_RM) +  ENCODING("opaque32mem",     ENCODING_RM) +  ENCODING("opaque48mem",     ENCODING_RM) +  ENCODING("opaque80mem",     ENCODING_RM) +  ENCODING("opaque512mem",    ENCODING_RM) +  errs() << "Unhandled memory encoding " << s << "\n"; +  llvm_unreachable("Unhandled memory encoding"); +} + +OperandEncoding RecognizableInstr::relocationEncodingFromString +  (const std::string &s, +   bool hasOpSizePrefix) { +  if(!hasOpSizePrefix) { +    // For instructions without an OpSize prefix, a declared 16-bit register or +    // immediate encoding is special. +    ENCODING("i16imm",        ENCODING_IW) +  } +  ENCODING("i16imm",          ENCODING_Iv) +  ENCODING("i16i8imm",        ENCODING_IB) +  ENCODING("i32imm",          ENCODING_Iv) +  ENCODING("i32i8imm",        ENCODING_IB) +  ENCODING("i64i32imm",       ENCODING_ID) +  ENCODING("i64i8imm",        ENCODING_IB) +  ENCODING("i8imm",           ENCODING_IB) +  ENCODING("i64i32imm_pcrel", ENCODING_ID) +  ENCODING("i32imm_pcrel",    ENCODING_ID) +  ENCODING("brtarget",        ENCODING_Iv) +  ENCODING("brtarget8",       ENCODING_IB) +  ENCODING("i64imm",          ENCODING_IO) +  ENCODING("offset8",         ENCODING_Ia) +  ENCODING("offset16",        ENCODING_Ia) +  ENCODING("offset32",        ENCODING_Ia) +  ENCODING("offset64",        ENCODING_Ia) +  errs() << "Unhandled relocation encoding " << s << "\n"; +  llvm_unreachable("Unhandled relocation encoding"); +} + +OperandEncoding RecognizableInstr::opcodeModifierEncodingFromString +  (const std::string &s, +   bool hasOpSizePrefix) { +  ENCODING("RST",             ENCODING_I) +  ENCODING("GR32",            ENCODING_Rv) +  ENCODING("GR64",            ENCODING_RO) +  ENCODING("GR16",            ENCODING_Rv) +  ENCODING("GR8",             ENCODING_RB) +  errs() << "Unhandled opcode modifier encoding " << s << "\n"; +  llvm_unreachable("Unhandled opcode modifier encoding"); +} +#undef ENCODING
\ No newline at end of file diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h new file mode 100644 index 0000000..84374b0 --- /dev/null +++ b/utils/TableGen/X86RecognizableInstr.h @@ -0,0 +1,237 @@ +//===- X86RecognizableInstr.h - Disassembler instruction spec ----*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler Emitter. +// It contains the interface of a single recognizable instruction. +// Documentation for the disassembler emitter in general can be found in +//  X86DisasemblerEmitter.h. +// +//===----------------------------------------------------------------------===// + +#ifndef X86RECOGNIZABLEINSTR_H +#define X86RECOGNIZABLEINSTR_H + +#include "X86DisassemblerTables.h" + +#include "CodeGenTarget.h" +#include "Record.h" + +#include "llvm/System/DataTypes.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +namespace X86Disassembler { + +/// RecognizableInstr - Encapsulates all information required to decode a single +///   instruction, as extracted from the LLVM instruction tables.  Has methods +///   to interpret the information available in the LLVM tables, and to emit the +///   instruction into DisassemblerTables. +class RecognizableInstr { +private: +  /// The opcode of the instruction, as used in an MCInst +  InstrUID UID; +  /// The record from the .td files corresponding to this instruction +  const Record* Rec; +  /// The prefix field from the record +  uint8_t Prefix; +  /// The opcode field from the record; this is the opcode used in the Intel +  /// encoding and therefore distinct from the UID +  uint8_t Opcode; +  /// The form field from the record +  uint8_t Form; +  /// The segment override field from the record +  uint8_t SegOvr; +  /// The hasOpSizePrefix field from the record +  bool HasOpSizePrefix; +  /// The hasREX_WPrefix field from the record +  bool HasREX_WPrefix; +  /// The hasLockPrefix field from the record +  bool HasLockPrefix; +  /// The isCodeGenOnly filed from the record +  bool IsCodeGenOnly; +   +  /// The instruction name as listed in the tables +  std::string Name; +  /// The AT&T AsmString for the instruction +  std::string AsmString; +   +  /// Indicates whether the instruction is SSE +  bool IsSSE; +  /// Indicates whether the instruction has FR operands - MOVs with FR operands +  /// are typically ignored +  bool HasFROperands; +  /// Indicates whether the instruction should be emitted into the decode +  /// tables; regardless, it will be emitted into the instruction info table +  bool ShouldBeEmitted; +   +  /// The operands of the instruction, as listed in the CodeGenInstruction. +  /// They are not one-to-one with operands listed in the MCInst; for example, +  /// memory operands expand to 5 operands in the MCInst +  const std::vector<CodeGenInstruction::OperandInfo>* Operands; +  /// The description of the instruction that is emitted into the instruction +  /// info table +  InstructionSpecifier* Spec; + +  /// insnContext - Returns the primary context in which the instruction is +  ///   valid. +  /// +  /// @return - The context in which the instruction is valid. +  InstructionContext insnContext() const; +   +  enum filter_ret { +    FILTER_STRONG,    // instruction has no place in the instruction tables +    FILTER_WEAK,      // instruction may conflict, and should be eliminated if +                      // it does +    FILTER_NORMAL     // instruction should have high priority and generate an +                      // error if it conflcits with any other FILTER_NORMAL +                      // instruction +  }; +   +  /// filter - Determines whether the instruction should be decodable.  Some  +  ///   instructions are pure intrinsics and use unencodable operands; many +  ///   synthetic instructions are duplicates of other instructions; other +  ///   instructions only differ in the logical way in which they are used, and +  ///   have the same decoding.  Because these would cause decode conflicts, +  ///   they must be filtered out. +  /// +  /// @return - The degree of filtering to be applied (see filter_ret). +  filter_ret filter() const; +   +  /// typeFromString - Translates an operand type from the string provided in +  ///   the LLVM tables to an OperandType for use in the operand specifier. +  /// +  /// @param s              - The string, as extracted by calling Rec->getName() +  ///                         on a CodeGenInstruction::OperandInfo. +  /// @param isSSE          - Indicates whether the instruction is an SSE  +  ///                         instruction.  For SSE instructions, immediates are  +  ///                         fixed-size rather than being affected by the +  ///                         mandatory OpSize prefix. +  /// @param hasREX_WPrefix - Indicates whether the instruction has a REX.W +  ///                         prefix.  If it does, 32-bit register operands stay +  ///                         32-bit regardless of the operand size. +  /// @param hasOpSizePrefix- Indicates whether the instruction has an OpSize +  ///                         prefix.  If it does not, then 16-bit register +  ///                         operands stay 16-bit. +  /// @return               - The operand's type. +  static OperandType typeFromString(const std::string& s,  +                                    bool isSSE, +                                    bool hasREX_WPrefix, +                                    bool hasOpSizePrefix); +   +  /// immediateEncodingFromString - Translates an immediate encoding from the +  ///   string provided in the LLVM tables to an OperandEncoding for use in +  ///   the operand specifier. +  /// +  /// @param s                - See typeFromString(). +  /// @param hasOpSizePrefix  - Indicates whether the instruction has an OpSize +  ///                           prefix.  If it does not, then 16-bit immediate +  ///                           operands stay 16-bit. +  /// @return                 - The operand's encoding. +  static OperandEncoding immediateEncodingFromString(const std::string &s, +                                                     bool hasOpSizePrefix); +   +  /// rmRegisterEncodingFromString - Like immediateEncodingFromString, but +  ///   handles operands that are in the REG field of the ModR/M byte. +  static OperandEncoding rmRegisterEncodingFromString(const std::string &s, +                                                      bool hasOpSizePrefix); +   +  /// rmRegisterEncodingFromString - Like immediateEncodingFromString, but +  ///   handles operands that are in the REG field of the ModR/M byte. +  static OperandEncoding roRegisterEncodingFromString(const std::string &s, +                                                      bool hasOpSizePrefix); +  static OperandEncoding memoryEncodingFromString(const std::string &s, +                                                  bool hasOpSizePrefix); +  static OperandEncoding relocationEncodingFromString(const std::string &s, +                                                      bool hasOpSizePrefix); +  static OperandEncoding opcodeModifierEncodingFromString(const std::string &s, +                                                          bool hasOpSizePrefix); +   +  /// handleOperand - Converts a single operand from the LLVM table format to +  ///   the emitted table format, handling any duplicate operands it encounters +  ///   and then one non-duplicate. +  /// +  /// @param optional             - Determines whether to assert that the +  ///                               operand exists. +  /// @param operandIndex         - The index into the generated operand table. +  ///                               Incremented by this function one or more +  ///                               times to reflect possible duplicate  +  ///                               operands). +  /// @param physicalOperandIndex - The index of the current operand into the +  ///                               set of non-duplicate ('physical') operands. +  ///                               Incremented by this function once. +  /// @param numPhysicalOperands  - The number of non-duplicate operands in the +  ///                               instructions. +  /// @param operandMapping       - The operand mapping, which has an entry for +  ///                               each operand that indicates whether it is a +  ///                               duplicate, and of what. +  void handleOperand(bool optional, +                     unsigned &operandIndex, +                     unsigned &physicalOperandIndex, +                     unsigned &numPhysicalOperands, +                     unsigned *operandMapping, +                     OperandEncoding (*encodingFromString) +                       (const std::string&, +                        bool hasOpSizePrefix)); +   +  /// shouldBeEmitted - Returns the shouldBeEmitted field.  Although filter() +  ///   filters out many instructions, at various points in decoding we +  ///   determine that the instruction should not actually be decodable.  In +  ///   particular, MMX MOV instructions aren't emitted, but they're only +  ///   identified during operand parsing. +  /// +  /// @return - true if at this point we believe the instruction should be +  ///   emitted; false if not.  This will return false if filter() returns false +  ///   once emitInstructionSpecifier() has been called. +  bool shouldBeEmitted() const { +    return ShouldBeEmitted; +  } +   +  /// emitInstructionSpecifier - Loads the instruction specifier for the current +  ///   instruction into a DisassemblerTables. +  /// +  /// @arg tables - The DisassemblerTables to populate with the specifier for +  ///               the current instruction. +  void emitInstructionSpecifier(DisassemblerTables &tables); +   +  /// emitDecodePath - Populates the proper fields in the decode tables +  ///   corresponding to the decode paths for this instruction. +  /// +  /// @arg tables - The DisassemblerTables to populate with the decode +  ///               decode information for the current instruction. +  void emitDecodePath(DisassemblerTables &tables) const; + +  /// Constructor - Initializes a RecognizableInstr with the appropriate fields +  ///   from a CodeGenInstruction. +  /// +  /// @arg tables - The DisassemblerTables that the specifier will be added to. +  /// @arg insn   - The CodeGenInstruction to extract information from. +  /// @arg uid    - The unique ID of the current instruction. +  RecognizableInstr(DisassemblerTables &tables, +                    const CodeGenInstruction &insn, +                    InstrUID uid); +public: +  /// processInstr - Accepts a CodeGenInstruction and loads decode information +  ///   for it into a DisassemblerTables if appropriate. +  /// +  /// @arg tables - The DiassemblerTables to be populated with decode +  ///               information. +  /// @arg insn   - The CodeGenInstruction to be used as a source for this +  ///               information. +  /// @uid        - The unique ID of the instruction. +  static void processInstr(DisassemblerTables &tables, +                           const CodeGenInstruction &insn, +                           InstrUID uid); +}; +   +} // namespace X86Disassembler + +} // namespace llvm + +#endif | 
