aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSean Callanan <scallanan@apple.com>2009-12-19 02:59:52 +0000
committerSean Callanan <scallanan@apple.com>2009-12-19 02:59:52 +0000
commit8ed9f51663bc5533f36ca62e5668ae08e9a1313f (patch)
tree3054645839caee367e9403507d8487538819ed5b
parente9ec6ad1ba5fd9ad70f5d0c059c5a5aa44f501f7 (diff)
downloadexternal_llvm-8ed9f51663bc5533f36ca62e5668ae08e9a1313f.zip
external_llvm-8ed9f51663bc5533f36ca62e5668ae08e9a1313f.tar.gz
external_llvm-8ed9f51663bc5533f36ca62e5668ae08e9a1313f.tar.bz2
Table-driven disassembler for the X86 architecture (16-, 32-, and 64-bit
incarnations), integrated into the MC framework. The disassembler is table-driven, using a custom TableGen backend to generate hierarchical tables optimized for fast decode. The disassembler consumes MemoryObjects and produces arrays of MCInsts, adhering to the abstract base class MCDisassembler (llvm/MC/MCDisassembler.h). The disassembler is documented in detail in - lib/Target/X86/Disassembler/X86Disassembler.cpp (disassembler runtime) - utils/TableGen/DisassemblerEmitter.cpp (table emitter) You can test the disassembler by running llvm-mc -disassemble for i386 or x86_64 targets. Please let me know if you encounter any problems with it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@91749 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--cmake/modules/LLVMLibDeps.cmake14
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/Disassembler/CMakeLists.txt1
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp438
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.h150
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c1361
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h515
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h354
-rw-r--r--lib/Target/X86/Makefile4
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp4
-rw-r--r--utils/TableGen/CMakeLists.txt2
-rw-r--r--utils/TableGen/DisassemblerEmitter.cpp99
-rw-r--r--utils/TableGen/X86DisassemblerShared.h37
-rw-r--r--utils/TableGen/X86DisassemblerTables.cpp603
-rw-r--r--utils/TableGen/X86DisassemblerTables.h291
-rw-r--r--utils/TableGen/X86ModRMFilters.h197
-rw-r--r--utils/TableGen/X86RecognizableInstr.cpp959
-rw-r--r--utils/TableGen/X86RecognizableInstr.h237
18 files changed, 5255 insertions, 12 deletions
diff --git a/cmake/modules/LLVMLibDeps.cmake b/cmake/modules/LLVMLibDeps.cmake
index 6a35354..4000365 100644
--- a/cmake/modules/LLVMLibDeps.cmake
+++ b/cmake/modules/LLVMLibDeps.cmake
@@ -2,7 +2,7 @@ set(MSVC_LIB_DEPS_LLVMARMAsmParser LLVMARMInfo LLVMMC)
set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMARMInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMARMInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMARMInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaCodeGen LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMAlphaInfo LLVMSupport)
set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget)
@@ -11,12 +11,12 @@ set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport LLVMSystem)
set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport LLVMSystem)
-set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
set(MSVC_LIB_DEPS_LLVMBlackfinInfo LLVMSupport)
set(MSVC_LIB_DEPS_LLVMCBackend LLVMAnalysis LLVMCBackendInfo LLVMCodeGen LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
set(MSVC_LIB_DEPS_LLVMCBackendInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
set(MSVC_LIB_DEPS_LLVMCellSPUInfo LLVMSupport)
set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
@@ -31,7 +31,7 @@ set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVM
set(MSVC_LIB_DEPS_LLVMMC LLVMSupport LLVMSystem)
set(MSVC_LIB_DEPS_LLVMMSIL LLVMAnalysis LLVMCodeGen LLVMCore LLVMMSILInfo LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
set(MSVC_LIB_DEPS_LLVMMSILInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430CodeGen LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMMSP430Info LLVMSupport)
set(MSVC_LIB_DEPS_LLVMMipsAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsCodeGen LLVMMipsInfo LLVMSupport LLVMSystem LLVMTarget)
@@ -40,17 +40,17 @@ set(MSVC_LIB_DEPS_LLVMMipsInfo LLVMSupport)
set(MSVC_LIB_DEPS_LLVMPIC16 LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMPIC16Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMPIC16AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPIC16 LLVMPIC16Info LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMPIC16Info LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCCodeGen LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMSupport)
set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcCodeGen LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMSparcInfo LLVMSupport)
set(MSVC_LIB_DEPS_LLVMSupport LLVMSystem)
set(MSVC_LIB_DEPS_LLVMSystem )
-set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZInfo LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZCodeGen LLVMSystemZInfo LLVMTarget)
set(MSVC_LIB_DEPS_LLVMSystemZCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystemZInfo LLVMTarget)
set(MSVC_LIB_DEPS_LLVMSystemZInfo LLVMSupport)
set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport LLVMSystem)
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 3ad65fb..4186fec 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS X86.td)
tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header)
tablegen(X86GenRegisterNames.inc -gen-register-enums)
tablegen(X86GenRegisterInfo.inc -gen-register-desc)
+tablegen(X86GenDisassemblerTables.inc -gen-disassembler)
tablegen(X86GenInstrNames.inc -gen-instr-enums)
tablegen(X86GenInstrInfo.inc -gen-instr-desc)
tablegen(X86GenAsmWriter.inc -gen-asm-writer)
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index b329e89..2a83a9c 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -2,5 +2,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMX86Disassembler
X86Disassembler.cpp
+ X86DisassemblerDecoder.c
)
add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen)
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 2ebbc9b..99617e7 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -6,18 +6,450 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler.
+// It contains code to translate the data produced by the decoder into
+// MCInsts.
+// Documentation for the disassembler can be found in X86Disassembler.h.
+//
+//===----------------------------------------------------------------------===//
+#include "X86Disassembler.h"
+#include "X86DisassemblerDecoder.h"
+#include "X86InstrInfo.h"
+
+#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/Target/TargetRegistry.h"
-#include "X86.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+using namespace llvm::X86Disassembler;
+
+namespace llvm {
+
+// Fill-ins to make the compiler happy. These constants are never actually
+// assigned; they are just filler to make an automatically-generated switch
+// statement work.
+namespace X86 {
+ enum {
+ BX_SI = 500,
+ BX_DI = 501,
+ BP_SI = 502,
+ BP_DI = 503,
+ sib = 504,
+ sib64 = 505
+ };
+}
+
+}
+
+static void translateInstruction(MCInst &target,
+ InternalInstruction &source);
+
+X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
+ MCDisassembler(),
+ fMode(mode) {
+}
+
+X86GenericDisassembler::~X86GenericDisassembler() {
+}
+
+/// regionReader - a callback function that wraps the readByte method from
+/// MemoryObject.
+///
+/// @param arg - The generic callback parameter. In this case, this should
+/// be a pointer to a MemoryObject.
+/// @param byte - A pointer to the byte to be read.
+/// @param address - The address to be read.
+static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
+ MemoryObject* region = static_cast<MemoryObject*>(arg);
+ return region->readByte(address, byte);
+}
+
+/// logger - a callback function that wraps the operator<< method from
+/// raw_ostream.
+///
+/// @param arg - The generic callback parameter. This should be a pointe
+/// to a raw_ostream.
+/// @param log - A string to be logged. logger() adds a newline.
+static void logger(void* arg, const char* log) {
+ if (!arg)
+ return;
+
+ raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
+ vStream << log << "\n";
+}
+
+//
+// Public interface for the disassembler
+//
+
+bool X86GenericDisassembler::getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream) const {
+ InternalInstruction internalInstr;
+
+ int ret = decodeInstruction(&internalInstr,
+ regionReader,
+ (void*)&region,
+ logger,
+ (void*)&vStream,
+ address,
+ fMode);
+
+ if(ret) {
+ size = internalInstr.readerCursor - address;
+ return false;
+ }
+ else {
+ size = internalInstr.length;
+ translateInstruction(instr, internalInstr);
+ return true;
+ }
+}
+
+//
+// Private code that translates from struct InternalInstructions to MCInsts.
+//
+
+/// translateRegister - Translates an internal register to the appropriate LLVM
+/// register, and appends it as an operand to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param reg - The Reg to append.
+static void translateRegister(MCInst &mcInst, Reg reg) {
+#define ENTRY(x) X86::x,
+ uint8_t llvmRegnums[] = {
+ ALL_REGS
+ 0
+ };
+#undef ENTRY
+
+ uint8_t llvmRegnum = llvmRegnums[reg];
+ mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
+}
+
+/// translateImmediate - Appends an immediate operand to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param immediate - The immediate value to append.
+static void translateImmediate(MCInst &mcInst, uint64_t immediate) {
+ mcInst.addOperand(MCOperand::CreateImm(immediate));
+}
+
+/// translateRMRegister - Translates a register stored in the R/M field of the
+/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
+/// @param mcInst - The MCInst to append to.
+/// @param insn - The internal instruction to extract the R/M field
+/// from.
+static void translateRMRegister(MCInst &mcInst,
+ InternalInstruction &insn) {
+ assert(insn.eaBase != EA_BASE_sib && insn.eaBase != EA_BASE_sib64 &&
+ "A R/M register operand may not have a SIB byte");
+
+ switch (insn.eaBase) {
+ case EA_BASE_NONE:
+ llvm_unreachable("EA_BASE_NONE for ModR/M base");
+ break;
+#define ENTRY(x) case EA_BASE_##x:
+ ALL_EA_BASES
+#undef ENTRY
+ llvm_unreachable("A R/M register operand may not have a base; "
+ "the operand must be a register.");
+ break;
+#define ENTRY(x) \
+ case EA_REG_##x: \
+ mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
+ ALL_REGS
+#undef ENTRY
+ default:
+ llvm_unreachable("Unexpected EA base register");
+ }
+}
+
+/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
+/// fields of an internal instruction (and possibly its SIB byte) to a memory
+/// operand in LLVM's format, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param insn - The instruction to extract Mod, R/M, and SIB fields
+/// from.
+static void translateRMMemory(MCInst &mcInst,
+ InternalInstruction &insn) {
+ // Addresses in an MCInst are represented as five operands:
+ // 1. basereg (register) The R/M base, or (if there is a SIB) the
+ // SIB base
+ // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
+ // scale amount
+ // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
+ // the index (which is multiplied by the
+ // scale amount)
+ // 4. displacement (immediate) 0, or the displacement if there is one
+ // 5. segmentreg (register) x86_registerNONE for now, but could be set
+ // if we have segment overrides
+
+ MCOperand baseReg;
+ MCOperand scaleAmount;
+ MCOperand indexReg;
+ MCOperand displacement;
+ MCOperand segmentReg;
+
+ if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
+ if (insn.sibBase != SIB_BASE_NONE) {
+ switch (insn.sibBase) {
+ default:
+ llvm_unreachable("Unexpected sibBase");
+#define ENTRY(x) \
+ case SIB_BASE_##x: \
+ baseReg = MCOperand::CreateReg(X86::x); break;
+ ALL_SIB_BASES
+#undef ENTRY
+ }
+ } else {
+ baseReg = MCOperand::CreateReg(0);
+ }
+
+ if (insn.sibIndex != SIB_INDEX_NONE) {
+ switch (insn.sibIndex) {
+ default:
+ llvm_unreachable("Unexpected sibIndex");
+#define ENTRY(x) \
+ case SIB_INDEX_##x: \
+ indexReg = MCOperand::CreateReg(X86::x); break;
+ EA_BASES_32BIT
+ EA_BASES_64BIT
+#undef ENTRY
+ }
+ } else {
+ indexReg = MCOperand::CreateReg(0);
+ }
+
+ scaleAmount = MCOperand::CreateImm(insn.sibScale);
+ } else {
+ switch (insn.eaBase) {
+ case EA_BASE_NONE:
+ assert(insn.eaDisplacement != EA_DISP_NONE &&
+ "EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
+
+ if (insn.mode == MODE_64BIT)
+ baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
+ else
+ baseReg = MCOperand::CreateReg(0);
+
+ indexReg = MCOperand::CreateReg(0);
+ break;
+ case EA_BASE_BX_SI:
+ baseReg = MCOperand::CreateReg(X86::BX);
+ indexReg = MCOperand::CreateReg(X86::SI);
+ break;
+ case EA_BASE_BX_DI:
+ baseReg = MCOperand::CreateReg(X86::BX);
+ indexReg = MCOperand::CreateReg(X86::DI);
+ break;
+ case EA_BASE_BP_SI:
+ baseReg = MCOperand::CreateReg(X86::BP);
+ indexReg = MCOperand::CreateReg(X86::SI);
+ break;
+ case EA_BASE_BP_DI:
+ baseReg = MCOperand::CreateReg(X86::BP);
+ indexReg = MCOperand::CreateReg(X86::DI);
+ break;
+ default:
+ indexReg = MCOperand::CreateReg(0);
+ switch (insn.eaBase) {
+ default:
+ llvm_unreachable("Unexpected eaBase");
+ break;
+ // Here, we will use the fill-ins defined above. However,
+ // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
+ // sib and sib64 were handled in the top-level if, so they're only
+ // placeholders to keep the compiler happy.
+#define ENTRY(x) \
+ case EA_BASE_##x: \
+ baseReg = MCOperand::CreateReg(X86::x); break;
+ ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) case EA_REG_##x:
+ ALL_REGS
+#undef ENTRY
+ llvm_unreachable("A R/M memory operand may not be a register; "
+ "the base field must be a base.");
+ break;
+ }
+ }
+ }
+
+ displacement = MCOperand::CreateImm(insn.displacement);
+
+ static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
+ 0, // SEG_OVERRIDE_NONE
+ X86::CS,
+ X86::SS,
+ X86::DS,
+ X86::ES,
+ X86::FS,
+ X86::GS
+ };
+
+ segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
+
+ mcInst.addOperand(baseReg);
+ mcInst.addOperand(scaleAmount);
+ mcInst.addOperand(indexReg);
+ mcInst.addOperand(displacement);
+ mcInst.addOperand(segmentReg);
+}
+
+/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
+/// byte of an instruction to LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The instruction to extract Mod, R/M, and SIB fields
+/// from.
+static void translateRM(MCInst &mcInst,
+ OperandSpecifier &operand,
+ InternalInstruction &insn) {
+ switch (operand.type) {
+ default:
+ llvm_unreachable("Unexpected type for a R/M operand");
+ case TYPE_R8:
+ case TYPE_R16:
+ case TYPE_R32:
+ case TYPE_R64:
+ case TYPE_Rv:
+ case TYPE_MM:
+ case TYPE_MM32:
+ case TYPE_MM64:
+ case TYPE_XMM:
+ case TYPE_XMM32:
+ case TYPE_XMM64:
+ case TYPE_XMM128:
+ case TYPE_DEBUGREG:
+ case TYPE_CR32:
+ case TYPE_CR64:
+ translateRMRegister(mcInst, insn);
+ break;
+ case TYPE_M:
+ case TYPE_M8:
+ case TYPE_M16:
+ case TYPE_M32:
+ case TYPE_M64:
+ case TYPE_M128:
+ case TYPE_M512:
+ case TYPE_Mv:
+ case TYPE_M32FP:
+ case TYPE_M64FP:
+ case TYPE_M80FP:
+ case TYPE_M16INT:
+ case TYPE_M32INT:
+ case TYPE_M64INT:
+ case TYPE_M1616:
+ case TYPE_M1632:
+ case TYPE_M1664:
+ translateRMMemory(mcInst, insn);
+ break;
+ }
+}
+
+/// translateFPRegister - Translates a stack position on the FPU stack to its
+/// LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param stackPos - The stack position to translate.
+static void translateFPRegister(MCInst &mcInst,
+ uint8_t stackPos) {
+ assert(stackPos < 8 && "Invalid FP stack position");
+
+ mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
+}
+
+/// translateOperand - Translates an operand stored in an internal instruction
+/// to LLVM's format and appends it to an MCInst.
+///
+/// @param mcInst - The MCInst to append to.
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The internal instruction.
+static void translateOperand(MCInst &mcInst,
+ OperandSpecifier &operand,
+ InternalInstruction &insn) {
+ switch (operand.encoding) {
+ default:
+ llvm_unreachable("Unhandled operand encoding during translation");
+ case ENCODING_REG:
+ translateRegister(mcInst, insn.reg);
+ break;
+ case ENCODING_RM:
+ translateRM(mcInst, operand, insn);
+ break;
+ case ENCODING_CB:
+ case ENCODING_CW:
+ case ENCODING_CD:
+ case ENCODING_CP:
+ case ENCODING_CO:
+ case ENCODING_CT:
+ llvm_unreachable("Translation of code offsets isn't supported.");
+ case ENCODING_IB:
+ case ENCODING_IW:
+ case ENCODING_ID:
+ case ENCODING_IO:
+ case ENCODING_Iv:
+ case ENCODING_Ia:
+ translateImmediate(mcInst,
+ insn.immediates[insn.numImmediatesTranslated++]);
+ break;
+ case ENCODING_RB:
+ case ENCODING_RW:
+ case ENCODING_RD:
+ case ENCODING_RO:
+ translateRegister(mcInst, insn.opcodeRegister);
+ break;
+ case ENCODING_I:
+ translateFPRegister(mcInst, insn.opcodeModifier);
+ break;
+ case ENCODING_Rv:
+ translateRegister(mcInst, insn.opcodeRegister);
+ break;
+ case ENCODING_DUP:
+ translateOperand(mcInst,
+ insn.spec->operands[operand.type - TYPE_DUP0],
+ insn);
+ break;
+ }
+}
+
+/// translateInstruction - Translates an internal instruction and all its
+/// operands to an MCInst.
+///
+/// @param mcInst - The MCInst to populate with the instruction's data.
+/// @param insn - The internal instruction.
+static void translateInstruction(MCInst &mcInst,
+ InternalInstruction &insn) {
+ assert(insn.spec);
+
+ mcInst.setOpcode(insn.instructionID);
+
+ int index;
+
+ insn.numImmediatesTranslated = 0;
+
+ for (index = 0; index < X86_MAX_OPERANDS; ++index) {
+ if (insn.spec->operands[index].encoding != ENCODING_NONE)
+ translateOperand(mcInst, insn.spec->operands[index], insn);
+ }
+}
static const MCDisassembler *createX86_32Disassembler(const Target &T) {
- return 0;
+ return new X86Disassembler::X86_32Disassembler;
}
static const MCDisassembler *createX86_64Disassembler(const Target &T) {
- return 0;
+ return new X86Disassembler::X86_64Disassembler;
}
extern "C" void LLVMInitializeX86Disassembler() {
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
new file mode 100644
index 0000000..0e6e0b0
--- /dev/null
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -0,0 +1,150 @@
+//===- X86Disassembler.h - Disassembler for x86 and x86_64 ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
+// 64-bit X86 instruction sets. The main decode sequence for an assembly
+// instruction in this disassembler is:
+//
+// 1. Read the prefix bytes and determine the attributes of the instruction.
+// These attributes, recorded in enum attributeBits
+// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
+// provides a mapping from bitmasks to contexts, which are represented by
+// enum InstructionContext (ibid.).
+//
+// 2. Read the opcode, and determine what kind of opcode it is. The
+// disassembler distinguishes four kinds of opcodes, which are enumerated in
+// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
+// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
+// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
+//
+// 3. Depending on the opcode type, look in one of four ClassDecision structures
+// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
+// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
+// a ModRMDecision (ibid.).
+//
+// 4. Some instructions, such as escape opcodes or extended opcodes, or even
+// instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
+// ModR/M byte to complete decode. The ModRMDecision's type is an entry from
+// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
+// ModR/M byte is required and how to interpret it.
+//
+// 5. After resolving the ModRMDecision, the disassembler has a unique ID
+// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
+// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
+// meanings of its operands.
+//
+// 6. For each operand, its encoding is an entry from OperandEncoding
+// (X86DisassemblerDecoderCommon.h) and its type is an entry from
+// OperandType (ibid.). The encoding indicates how to read it from the
+// instruction; the type indicates how to interpret the value once it has
+// been read. For example, a register operand could be stored in the R/M
+// field of the ModR/M byte, the REG field of the ModR/M byte, or added to
+// the main opcode. This is orthogonal from its meaning (an GPR or an XMM
+// register, for instance). Given this information, the operands can be
+// extracted and interpreted.
+//
+// 7. As the last step, the disassembler translates the instruction information
+// and operands into a format understandable by the client - in this case, an
+// MCInst for use by the MC infrastructure.
+//
+// The disassembler is broken broadly into two parts: the table emitter that
+// emits the instruction decode tables discussed above during compilation, and
+// the disassembler itself. The table emitter is documented in more detail in
+// utils/TableGen/X86DisassemblerEmitter.h.
+//
+// X86Disassembler.h contains the public interface for the disassembler,
+// adhering to the MCDisassembler interface.
+// X86Disassembler.cpp contains the code responsible for step 7, and for
+// invoking the decoder to execute steps 1-6.
+// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
+// table emitter and the disassembler.
+// X86DisassemblerDecoder.h contains the public interface of the decoder,
+// factored out into C for possible use by other projects.
+// X86DisassemblerDecoder.c contains the source code of the decoder, which is
+// responsible for steps 1-6.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86DISASSEMBLER_H
+#define X86DISASSEMBLER_H
+
+#define INSTRUCTION_SPECIFIER_FIELDS \
+ const char* name;
+
+#define INSTRUCTION_IDS \
+ InstrUID* instructionIDs;
+
+#include "X86DisassemblerDecoderCommon.h"
+
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+
+#include "llvm/MC/MCDisassembler.h"
+
+struct InternalInstruction;
+
+namespace llvm {
+
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+namespace X86Disassembler {
+
+/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
+/// All each platform class should have to do is subclass the constructor, and
+/// provide a different disassemblerMode value.
+class X86GenericDisassembler : public MCDisassembler {
+protected:
+ /// Constructor - Initializes the disassembler.
+ ///
+ /// @param mode - The X86 architecture mode to decode for.
+ X86GenericDisassembler(DisassemblerMode mode);
+public:
+ ~X86GenericDisassembler();
+
+ /// getInstruction - See MCDisassembler.
+ bool getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream) const;
+private:
+ DisassemblerMode fMode;
+};
+
+/// X86_16Disassembler - 16-bit X86 disassembler.
+class X86_16Disassembler : public X86GenericDisassembler {
+public:
+ X86_16Disassembler() :
+ X86GenericDisassembler(MODE_16BIT) {
+ }
+};
+
+/// X86_16Disassembler - 32-bit X86 disassembler.
+class X86_32Disassembler : public X86GenericDisassembler {
+public:
+ X86_32Disassembler() :
+ X86GenericDisassembler(MODE_32BIT) {
+ }
+};
+
+/// X86_16Disassembler - 64-bit X86 disassembler.
+class X86_64Disassembler : public X86GenericDisassembler {
+public:
+ X86_64Disassembler() :
+ X86GenericDisassembler(MODE_64BIT) {
+ }
+};
+
+} // namespace X86Disassembler
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
new file mode 100644
index 0000000..99ae9cd
--- /dev/null
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -0,0 +1,1361 @@
+/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains the implementation of the instruction decoder.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include <assert.h> /* for assert() */
+#include <stdarg.h> /* for va_*() */
+#include <stdio.h> /* for vsnprintf() */
+#include <stdlib.h> /* for exit() */
+#include <string.h> /* for bzero() */
+
+#include "X86DisassemblerDecoder.h"
+
+#include "X86GenDisassemblerTables.inc"
+
+#define TRUE 1
+#define FALSE 0
+
+#ifdef __GNUC__
+#define NORETURN __attribute__((noreturn))
+#else
+#define NORETURN
+#endif
+
+#define unreachable(s) \
+ do { \
+ fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, s); \
+ exit(-1); \
+ } while (0);
+
+/*
+ * contextForAttrs - Client for the instruction context table. Takes a set of
+ * attributes and returns the appropriate decode context.
+ *
+ * @param attrMask - Attributes, from the enumeration attributeBits.
+ * @return - The InstructionContext to use when looking up an
+ * an instruction with these attributes.
+ */
+static inline InstructionContext contextForAttrs(uint8_t attrMask) {
+ return CONTEXTS_SYM[attrMask];
+}
+
+/*
+ * modRMRequired - Reads the appropriate instruction table to determine whether
+ * the ModR/M byte is required to decode a particular instruction.
+ *
+ * @param type - The opcode type (i.e., how many bytes it has).
+ * @param insnContext - The context for the instruction, as returned by
+ * contextForAttrs.
+ * @param opcode - The last byte of the instruction's opcode, not counting
+ * ModR/M extensions and escapes.
+ * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
+ */
+static inline int modRMRequired(OpcodeType type,
+ InstructionContext insnContext,
+ uint8_t opcode) {
+ const struct ContextDecision* decision;
+
+ switch (type) {
+ case ONEBYTE:
+ decision = &ONEBYTE_SYM;
+ break;
+ case TWOBYTE:
+ decision = &TWOBYTE_SYM;
+ break;
+ case THREEBYTE_38:
+ decision = &THREEBYTE38_SYM;
+ break;
+ case THREEBYTE_3A:
+ decision = &THREEBYTE3A_SYM;
+ break;
+ }
+
+ return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
+ modrm_type != MODRM_ONEENTRY;
+
+ unreachable("Unknown opcode type");
+ return 0;
+}
+
+/*
+ * decode - Reads the appropriate instruction table to obtain the unique ID of
+ * an instruction.
+ *
+ * @param type - See modRMRequired().
+ * @param insnContext - See modRMRequired().
+ * @param opcode - See modRMRequired().
+ * @param modRM - The ModR/M byte if required, or any value if not.
+ */
+static inline InstrUID decode(OpcodeType type,
+ InstructionContext insnContext,
+ uint8_t opcode,
+ uint8_t modRM) {
+ struct ModRMDecision* dec;
+
+ switch (type) {
+ default:
+ unreachable("Unknown opcode type");
+ case ONEBYTE:
+ dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case TWOBYTE:
+ dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_38:
+ dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_3A:
+ dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ }
+
+ switch (dec->modrm_type) {
+ default:
+ unreachable("Corrupt table! Unknown modrm_type");
+ case MODRM_ONEENTRY:
+ return dec->instructionIDs[0];
+ case MODRM_SPLITRM:
+ if (modFromModRM(modRM) == 0x3)
+ return dec->instructionIDs[1];
+ else
+ return dec->instructionIDs[0];
+ case MODRM_FULL:
+ return dec->instructionIDs[modRM];
+ }
+
+ return 0;
+}
+
+/*
+ * specifierForUID - Given a UID, returns the name and operand specification for
+ * that instruction.
+ *
+ * @param uid - The unique ID for the instruction. This should be returned by
+ * decode(); specifierForUID will not check bounds.
+ * @return - A pointer to the specification for that instruction.
+ */
+static inline struct InstructionSpecifier* specifierForUID(InstrUID uid) {
+ return &INSTRUCTIONS_SYM[uid];
+}
+
+/*
+ * consumeByte - Uses the reader function provided by the user to consume one
+ * byte from the instruction's memory and advance the cursor.
+ *
+ * @param insn - The instruction with the reader function to use. The cursor
+ * for this instruction is advanced.
+ * @param byte - A pointer to a pre-allocated memory buffer to be populated
+ * with the data read.
+ * @return - 0 if the read was successful; nonzero otherwise.
+ */
+static inline int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
+ int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
+
+ if (!ret)
+ ++(insn->readerCursor);
+
+ return ret;
+}
+
+/*
+ * lookAtByte - Like consumeByte, but does not advance the cursor.
+ *
+ * @param insn - See consumeByte().
+ * @param byte - See consumeByte().
+ * @return - See consumeByte().
+ */
+static inline int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
+ return insn->reader(insn->readerArg, byte, insn->readerCursor);
+}
+
+static inline void unconsumeByte(struct InternalInstruction* insn) {
+ insn->readerCursor--;
+}
+
+#define CONSUME_FUNC(name, type) \
+ static inline int name(struct InternalInstruction* insn, type* ptr) { \
+ type combined = 0; \
+ unsigned offset; \
+ for (offset = 0; offset < sizeof(type); ++offset) { \
+ uint8_t byte; \
+ int ret = insn->reader(insn->readerArg, \
+ &byte, \
+ insn->readerCursor + offset); \
+ if (ret) \
+ return ret; \
+ combined = combined | ((type)byte << ((type)offset * 8)); \
+ } \
+ *ptr = combined; \
+ insn->readerCursor += sizeof(type); \
+ return 0; \
+ }
+
+/*
+ * consume* - Use the reader function provided by the user to consume data
+ * values of various sizes from the instruction's memory and advance the
+ * cursor appropriately. These readers perform endian conversion.
+ *
+ * @param insn - See consumeByte().
+ * @param ptr - A pointer to a pre-allocated memory of appropriate size to
+ * be populated with the data read.
+ * @return - See consumeByte().
+ */
+CONSUME_FUNC(consumeInt8, int8_t)
+CONSUME_FUNC(consumeInt16, int16_t)
+CONSUME_FUNC(consumeInt32, int32_t)
+CONSUME_FUNC(consumeUInt16, uint16_t)
+CONSUME_FUNC(consumeUInt32, uint32_t)
+CONSUME_FUNC(consumeUInt64, uint64_t)
+
+/*
+ * dprintf - Uses the logging function provided by the user to log a single
+ * message, typically without a carriage-return.
+ *
+ * @param insn - The instruction containing the logging function.
+ * @param format - See printf().
+ * @param ... - See printf().
+ */
+static inline void dprintf(struct InternalInstruction* insn,
+ const char* format,
+ ...) {
+ char buffer[256];
+ va_list ap;
+
+ if (!insn->dlog)
+ return;
+
+ va_start(ap, format);
+ (void)vsnprintf(buffer, sizeof(buffer), format, ap);
+ va_end(ap);
+
+ insn->dlog(insn->dlogArg, buffer);
+
+ return;
+}
+
+/*
+ * setPrefixPresent - Marks that a particular prefix is present at a particular
+ * location.
+ *
+ * @param insn - The instruction to be marked as having the prefix.
+ * @param prefix - The prefix that is present.
+ * @param location - The location where the prefix is located (in the address
+ * space of the instruction's reader).
+ */
+static inline void setPrefixPresent(struct InternalInstruction* insn,
+ uint8_t prefix,
+ uint64_t location)
+{
+ insn->prefixPresent[prefix] = 1;
+ insn->prefixLocations[prefix] = location;
+}
+
+/*
+ * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
+ * present at a given location.
+ *
+ * @param insn - The instruction to be queried.
+ * @param prefix - The prefix.
+ * @param location - The location to query.
+ * @return - Whether the prefix is at that location.
+ */
+static inline BOOL isPrefixAtLocation(struct InternalInstruction* insn,
+ uint8_t prefix,
+ uint64_t location)
+{
+ if (insn->prefixPresent[prefix] == 1 &&
+ insn->prefixLocations[prefix] == location)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+/*
+ * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
+ * instruction as having them. Also sets the instruction's default operand,
+ * address, and other relevant data sizes to report operands correctly.
+ *
+ * @param insn - The instruction whose prefixes are to be read.
+ * @return - 0 if the instruction could be read until the end of the prefix
+ * bytes, and no prefixes conflicted; nonzero otherwise.
+ */
+static int readPrefixes(struct InternalInstruction* insn) {
+ BOOL isPrefix = TRUE;
+ BOOL prefixGroups[4] = { FALSE };
+ uint64_t prefixLocation;
+ uint8_t byte;
+
+ BOOL hasAdSize = FALSE;
+ BOOL hasOpSize = FALSE;
+
+ dprintf(insn, "readPrefixes()");
+
+ while (isPrefix) {
+ prefixLocation = insn->readerCursor;
+
+ if (consumeByte(insn, &byte))
+ return -1;
+
+ switch (byte) {
+ case 0xf0: /* LOCK */
+ case 0xf2: /* REPNE/REPNZ */
+ case 0xf3: /* REP or REPE/REPZ */
+ if (prefixGroups[0])
+ dprintf(insn, "Redundant Group 1 prefix");
+ prefixGroups[0] = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ case 0x2e: /* CS segment override -OR- Branch not taken */
+ case 0x36: /* SS segment override -OR- Branch taken */
+ case 0x3e: /* DS segment override */
+ case 0x26: /* ES segment override */
+ case 0x64: /* FS segment override */
+ case 0x65: /* GS segment override */
+ switch (byte) {
+ case 0x2e:
+ insn->segmentOverride = SEG_OVERRIDE_CS;
+ break;
+ case 0x36:
+ insn->segmentOverride = SEG_OVERRIDE_SS;
+ break;
+ case 0x3e:
+ insn->segmentOverride = SEG_OVERRIDE_DS;
+ break;
+ case 0x26:
+ insn->segmentOverride = SEG_OVERRIDE_ES;
+ break;
+ case 0x64:
+ insn->segmentOverride = SEG_OVERRIDE_FS;
+ break;
+ case 0x65:
+ insn->segmentOverride = SEG_OVERRIDE_GS;
+ break;
+ default:
+ unreachable("Unhandled override");
+ }
+ if (prefixGroups[1])
+ dprintf(insn, "Redundant Group 2 prefix");
+ prefixGroups[1] = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ case 0x66: /* Operand-size override */
+ if (prefixGroups[2])
+ dprintf(insn, "Redundant Group 3 prefix");
+ prefixGroups[2] = TRUE;
+ hasOpSize = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ case 0x67: /* Address-size override */
+ if (prefixGroups[3])
+ dprintf(insn, "Redundant Group 4 prefix");
+ prefixGroups[3] = TRUE;
+ hasAdSize = TRUE;
+ setPrefixPresent(insn, byte, prefixLocation);
+ break;
+ default: /* Not a prefix byte */
+ isPrefix = FALSE;
+ break;
+ }
+
+ if (isPrefix)
+ dprintf(insn, "Found prefix 0x%hhx", byte);
+ }
+
+ if (insn->mode == MODE_64BIT) {
+ if ((byte & 0xf0) == 0x40) {
+ uint8_t opcodeByte;
+
+ if(lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
+ dprintf(insn, "Redundant REX prefix");
+ return -1;
+ }
+
+ insn->rexPrefix = byte;
+ insn->necessaryPrefixLocation = insn->readerCursor - 2;
+
+ dprintf(insn, "Found REX prefix 0x%hhx", byte);
+ } else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ } else {
+ unconsumeByte(insn);
+ }
+
+ if (insn->mode == MODE_16BIT) {
+ insn->registerSize = (hasOpSize ? 4 : 2);
+ insn->addressSize = (hasAdSize ? 4 : 2);
+ insn->displacementSize = (hasAdSize ? 4 : 2);
+ insn->immediateSize = (hasOpSize ? 4 : 2);
+ } else if (insn->mode == MODE_32BIT) {
+ insn->registerSize = (hasOpSize ? 2 : 4);
+ insn->addressSize = (hasAdSize ? 2 : 4);
+ insn->displacementSize = (hasAdSize ? 2 : 4);
+ insn->immediateSize = (hasAdSize ? 2 : 4);
+ } else if (insn->mode == MODE_64BIT) {
+ if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
+ insn->registerSize = 8;
+ insn->addressSize = (hasAdSize ? 4 : 8);
+ insn->displacementSize = 4;
+ insn->immediateSize = 4;
+ } else if (insn->rexPrefix) {
+ insn->registerSize = (hasOpSize ? 2 : 4);
+ insn->addressSize = (hasAdSize ? 4 : 8);
+ insn->displacementSize = (hasOpSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
+ } else {
+ insn->registerSize = (hasOpSize ? 2 : 4);
+ insn->addressSize = (hasAdSize ? 4 : 8);
+ insn->displacementSize = (hasOpSize ? 2 : 4);
+ insn->immediateSize = (hasOpSize ? 2 : 4);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
+ * extended or escape opcodes).
+ *
+ * @param insn - The instruction whose opcode is to be read.
+ * @return - 0 if the opcode could be read successfully; nonzero otherwise.
+ */
+static int readOpcode(struct InternalInstruction* insn) {
+ /* Determine the length of the primary opcode */
+
+ uint8_t current;
+
+ dprintf(insn, "readOpcode()");
+
+ insn->opcodeType = ONEBYTE;
+ if (consumeByte(insn, &current))
+ return -1;
+
+ if (current == 0x0f) {
+ dprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
+
+ insn->twoByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ if (current == 0x38) {
+ dprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_38;
+ } else if (current == 0x3a) {
+ dprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_3A;
+ } else {
+ dprintf(insn, "Didn't find a three-byte escape prefix");
+
+ insn->opcodeType = TWOBYTE;
+ }
+ }
+
+ /*
+ * At this point we have consumed the full opcode.
+ * Anything we consume from here on must be unconsumed.
+ */
+
+ insn->opcode = current;
+
+ return 0;
+}
+
+static int readModRM(struct InternalInstruction* insn);
+
+/*
+ * getIDWithAttrMask - Determines the ID of an instruction, consuming
+ * the ModR/M byte as appropriate for extended and escape opcodes,
+ * and using a supplied attribute mask.
+ *
+ * @param instructionID - A pointer whose target is filled in with the ID of the
+ * instruction.
+ * @param insn - The instruction whose ID is to be determined.
+ * @param attrMask - The attribute mask to search.
+ * @return - 0 if the ModR/M could be read when needed or was not
+ * needed; nonzero otherwise.
+ */
+static int getIDWithAttrMask(uint16_t* instructionID,
+ struct InternalInstruction* insn,
+ uint8_t attrMask) {
+ BOOL hasModRMExtension;
+
+ uint8_t instructionClass;
+
+ instructionClass = contextForAttrs(attrMask);
+
+ hasModRMExtension = modRMRequired(insn->opcodeType,
+ instructionClass,
+ insn->opcode);
+
+ if (hasModRMExtension) {
+ readModRM(insn);
+
+ *instructionID = decode(insn->opcodeType,
+ instructionClass,
+ insn->opcode,
+ insn->modRM);
+ } else {
+ *instructionID = decode(insn->opcodeType,
+ instructionClass,
+ insn->opcode,
+ 0);
+ }
+
+ return 0;
+}
+
+/*
+ * is16BitEquivalent - Determines whether two instruction names refer to
+ * equivalent instructions but one is 16-bit whereas the other is not.
+ *
+ * @param orig - The instruction that is not 16-bit
+ * @param equiv - The instruction that is 16-bit
+ */
+static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
+ off_t i;
+
+ for(i = 0;; i++) {
+ if(orig[i] == '\0' && equiv[i] == '\0')
+ return TRUE;
+ if(orig[i] == '\0' || equiv[i] == '\0')
+ return FALSE;
+ if(orig[i] != equiv[i]) {
+ if((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
+ continue;
+ if((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
+ continue;
+ if((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
+ continue;
+ return FALSE;
+ }
+ }
+}
+
+/*
+ * is64BitEquivalent - Determines whether two instruction names refer to
+ * equivalent instructions but one is 64-bit whereas the other is not.
+ *
+ * @param orig - The instruction that is not 64-bit
+ * @param equiv - The instruction that is 64-bit
+ */
+static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
+ off_t i;
+
+ for(i = 0;; i++) {
+ if(orig[i] == '\0' && equiv[i] == '\0')
+ return TRUE;
+ if(orig[i] == '\0' || equiv[i] == '\0')
+ return FALSE;
+ if(orig[i] != equiv[i]) {
+ if((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
+ continue;
+ if((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
+ continue;
+ if((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
+ continue;
+ return FALSE;
+ }
+ }
+}
+
+
+/*
+ * getID - Determines the ID of an instruction, consuming the ModR/M byte as
+ * appropriate for extended and escape opcodes. Determines the attributes and
+ * context for the instruction before doing so.
+ *
+ * @param insn - The instruction whose ID is to be determined.
+ * @return - 0 if the ModR/M could be read when needed or was not needed;
+ * nonzero otherwise.
+ */
+static int getID(struct InternalInstruction* insn) {
+ uint8_t attrMask;
+ uint16_t instructionID;
+
+ dprintf(insn, "getID()");
+
+ attrMask = ATTR_NONE;
+
+ if (insn->mode == MODE_64BIT)
+ attrMask |= ATTR_64BIT;
+
+ if (insn->rexPrefix & 0x08)
+ attrMask |= ATTR_REXW;
+
+ if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_OPSIZE;
+ else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XS;
+ else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XD;
+
+ if(getIDWithAttrMask(&instructionID, insn, attrMask))
+ return -1;
+
+ /* The following clauses compensate for limitations of the tables. */
+
+ if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
+ /*
+ * Although for SSE instructions it is usually necessary to treat REX.W+F2
+ * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
+ * an occasional instruction where F2 is incidental and REX.W is the more
+ * significant. If the decoded instruction is 32-bit and adding REX.W
+ * instead of F2 changes a 32 to a 64, we adopt the new encoding.
+ */
+
+ struct InstructionSpecifier* spec;
+ uint16_t instructionIDWithREXw;
+ struct InstructionSpecifier* specWithREXw;
+
+ spec = specifierForUID(instructionID);
+
+ if (getIDWithAttrMask(&instructionIDWithREXw,
+ insn,
+ attrMask & (~ATTR_XD))) {
+ /*
+ * Decoding with REX.w would yield nothing; give up and return original
+ * decode.
+ */
+
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specWithREXw = specifierForUID(instructionIDWithREXw);
+
+ if (is64BitEquivalent(spec->name, specWithREXw->name)) {
+ insn->instructionID = instructionIDWithREXw;
+ insn->spec = specWithREXw;
+ } else {
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ }
+ return 0;
+ }
+
+ if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
+ /*
+ * The instruction tables make no distinction between instructions that
+ * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
+ * particular spot (i.e., many MMX operations). In general we're
+ * conservative, but in the specific case where OpSize is present but not
+ * in the right place we check if there's a 16-bit operation.
+ */
+
+ struct InstructionSpecifier* spec;
+ uint16_t instructionIDWithOpsize;
+ struct InstructionSpecifier* specWithOpsize;
+
+ spec = specifierForUID(instructionID);
+
+ if (getIDWithAttrMask(&instructionIDWithOpsize,
+ insn,
+ attrMask | ATTR_OPSIZE)) {
+ /*
+ * ModRM required with OpSize but not present; give up and return version
+ * without OpSize set
+ */
+
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ return 0;
+ }
+
+ specWithOpsize = specifierForUID(instructionIDWithOpsize);
+
+ if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
+ insn->instructionID = instructionIDWithOpsize;
+ insn->spec = specWithOpsize;
+ } else {
+ insn->instructionID = instructionID;
+ insn->spec = spec;
+ }
+ return 0;
+ }
+
+ insn->instructionID = instructionID;
+ insn->spec = specifierForUID(insn->instructionID);
+
+ return 0;
+}
+
+/*
+ * readSIB - Consumes the SIB byte to determine addressing information for an
+ * instruction.
+ *
+ * @param insn - The instruction whose SIB byte is to be read.
+ * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
+ */
+static int readSIB(struct InternalInstruction* insn) {
+ SIBIndex sibIndexBase;
+ SIBBase sibBaseBase;
+ uint8_t index, base;
+
+ dprintf(insn, "readSIB()");
+
+ if (insn->consumedSIB)
+ return 0;
+
+ insn->consumedSIB = TRUE;
+
+ switch (insn->addressSize) {
+ case 2:
+ dprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
+ return -1;
+ break;
+ case 4:
+ sibIndexBase = SIB_INDEX_EAX;
+ sibBaseBase = SIB_BASE_EAX;
+ break;
+ case 8:
+ sibIndexBase = SIB_INDEX_RAX;
+ sibBaseBase = SIB_BASE_RAX;
+ break;
+ }
+
+ if (consumeByte(insn, &insn->sib))
+ return -1;
+
+ index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
+
+ switch (index) {
+ case 0x4:
+ insn->sibIndex = SIB_INDEX_NONE;
+ break;
+ default:
+ insn->sibIndex = (EABase)(sibIndexBase + index);
+ if (insn->sibIndex == SIB_INDEX_sib ||
+ insn->sibIndex == SIB_INDEX_sib64)
+ insn->sibIndex = SIB_INDEX_NONE;
+ break;
+ }
+
+ switch (scaleFromSIB(insn->sib)) {
+ case 0:
+ insn->sibScale = 1;
+ break;
+ case 1:
+ insn->sibScale = 2;
+ break;
+ case 2:
+ insn->sibScale = 4;
+ break;
+ case 3:
+ insn->sibScale = 8;
+ break;
+ }
+
+ base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
+
+ switch (base) {
+ case 0x5:
+ switch (modFromModRM(insn->modRM)) {
+ case 0x0:
+ insn->eaDisplacement = EA_DISP_32;
+ insn->sibBase = SIB_BASE_NONE;
+ break;
+ case 0x1:
+ insn->eaDisplacement = EA_DISP_8;
+ insn->sibBase = (insn->addressSize == 4 ?
+ SIB_BASE_EBP : SIB_BASE_RBP);
+ break;
+ case 0x2:
+ insn->eaDisplacement = EA_DISP_32;
+ insn->sibBase = (insn->addressSize == 4 ?
+ SIB_BASE_EBP : SIB_BASE_RBP);
+ break;
+ case 0x3:
+ unreachable("Cannot have Mod = 0b11 and a SIB byte");
+ }
+ break;
+ default:
+ insn->sibBase = (EABase)(sibBaseBase + base);
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * readDisplacement - Consumes the displacement of an instruction.
+ *
+ * @param insn - The instruction whose displacement is to be read.
+ * @return - 0 if the displacement byte was successfully read; nonzero
+ * otherwise.
+ */
+static int readDisplacement(struct InternalInstruction* insn) {
+ int8_t d8;
+ int16_t d16;
+ int32_t d32;
+
+ dprintf(insn, "readDisplacement()");
+
+ if (insn->consumedDisplacement)
+ return 0;
+
+ insn->consumedDisplacement = TRUE;
+
+ switch (insn->eaDisplacement) {
+ case EA_DISP_NONE:
+ insn->consumedDisplacement = FALSE;
+ break;
+ case EA_DISP_8:
+ if (consumeInt8(insn, &d8))
+ return -1;
+ insn->displacement = d8;
+ break;
+ case EA_DISP_16:
+ if (consumeInt16(insn, &d16))
+ return -1;
+ insn->displacement = d16;
+ break;
+ case EA_DISP_32:
+ if (consumeInt32(insn, &d32))
+ return -1;
+ insn->displacement = d32;
+ break;
+ }
+
+ insn->consumedDisplacement = TRUE;
+ return 0;
+}
+
+/*
+ * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
+ * displacement) for an instruction and interprets it.
+ *
+ * @param insn - The instruction whose addressing information is to be read.
+ * @return - 0 if the information was successfully read; nonzero otherwise.
+ */
+static int readModRM(struct InternalInstruction* insn) {
+ uint8_t mod, rm, reg;
+
+ dprintf(insn, "readModRM()");
+
+ if (insn->consumedModRM)
+ return 0;
+
+ consumeByte(insn, &insn->modRM);
+ insn->consumedModRM = TRUE;
+
+ mod = modFromModRM(insn->modRM);
+ rm = rmFromModRM(insn->modRM);
+ reg = regFromModRM(insn->modRM);
+
+ /*
+ * This goes by insn->registerSize to pick the correct register, which messes
+ * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
+ * fixupReg().
+ */
+ switch (insn->registerSize) {
+ case 2:
+ insn->regBase = REG_AX;
+ insn->eaRegBase = EA_REG_AX;
+ break;
+ case 4:
+ insn->regBase = REG_EAX;
+ insn->eaRegBase = EA_REG_EAX;
+ break;
+ case 8:
+ insn->regBase = REG_RAX;
+ insn->eaRegBase = EA_REG_RAX;
+ break;
+ }
+
+ reg |= rFromREX(insn->rexPrefix) << 3;
+ rm |= bFromREX(insn->rexPrefix) << 3;
+
+ insn->reg = (Reg)(insn->regBase + reg);
+
+ switch (insn->addressSize) {
+ case 2:
+ insn->eaBaseBase = EA_BASE_BX_SI;
+
+ switch (mod) {
+ case 0x0:
+ if (rm == 0x6) {
+ insn->eaBase = EA_BASE_NONE;
+ insn->eaDisplacement = EA_DISP_16;
+ if(readDisplacement(insn))
+ return -1;
+ } else {
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_NONE;
+ }
+ break;
+ case 0x1:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_8;
+ if(readDisplacement(insn))
+ return -1;
+ break;
+ case 0x2:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ insn->eaDisplacement = EA_DISP_16;
+ if(readDisplacement(insn))
+ return -1;
+ break;
+ case 0x3:
+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
+ if(readDisplacement(insn))
+ return -1;
+ break;
+ }
+ break;
+ case 4:
+ case 8:
+ insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
+
+ switch (mod) {
+ case 0x0:
+ insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
+ switch (rm) {
+ case 0x4:
+ case 0xc: /* in case REXW.b is set */
+ insn->eaBase = (insn->addressSize == 4 ?
+ EA_BASE_sib : EA_BASE_sib64);
+ readSIB(insn);
+ if(readDisplacement(insn))
+ return -1;
+ break;
+ case 0x5:
+ insn->eaBase = EA_BASE_NONE;
+ insn->eaDisplacement = EA_DISP_32;
+ if(readDisplacement(insn))
+ return -1;
+ break;
+ default:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ break;
+ }
+ break;
+ case 0x1:
+ case 0x2:
+ insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
+ switch (rm) {
+ case 0x4:
+ case 0xc: /* in case REXW.b is set */
+ insn->eaBase = EA_BASE_sib;
+ readSIB(insn);
+ if(readDisplacement(insn))
+ return -1;
+ break;
+ default:
+ insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+ if(readDisplacement(insn))
+ return -1;
+ break;
+ }
+ break;
+ case 0x3:
+ insn->eaDisplacement = EA_DISP_NONE;
+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
+ break;
+ }
+ break;
+ } /* switch (insn->addressSize) */
+
+ return 0;
+}
+
+#define GENERIC_FIXUP_FUNC(name, base, prefix) \
+ static uint8_t name(struct InternalInstruction *insn, \
+ OperandType type, \
+ uint8_t index, \
+ uint8_t *valid) { \
+ *valid = 1; \
+ switch (type) { \
+ default: \
+ unreachable("Unhandled register type"); \
+ case TYPE_Rv: \
+ return base + index; \
+ case TYPE_R8: \
+ if(insn->rexPrefix && \
+ index >= 4 && index <= 7) { \
+ return prefix##_SPL + (index - 4); \
+ } else { \
+ return prefix##_AL + index; \
+ } \
+ case TYPE_R16: \
+ return prefix##_AX + index; \
+ case TYPE_R32: \
+ return prefix##_EAX + index; \
+ case TYPE_R64: \
+ return prefix##_RAX + index; \
+ case TYPE_XMM128: \
+ case TYPE_XMM64: \
+ case TYPE_XMM32: \
+ case TYPE_XMM: \
+ return prefix##_XMM0 + index; \
+ case TYPE_MM64: \
+ case TYPE_MM32: \
+ case TYPE_MM: \
+ if(index > 7) \
+ *valid = 0; \
+ return prefix##_MM0 + index; \
+ case TYPE_SEGMENTREG: \
+ if(index > 5) \
+ *valid = 0; \
+ return prefix##_ES + index; \
+ case TYPE_DEBUGREG: \
+ if(index > 7) \
+ *valid = 0; \
+ return prefix##_DR0 + index; \
+ case TYPE_CR32: \
+ if(index > 7) \
+ *valid = 0; \
+ return prefix##_ECR0 + index; \
+ case TYPE_CR64: \
+ if(index > 8) \
+ *valid = 0; \
+ return prefix##_RCR0 + index; \
+ } \
+ }
+
+/*
+ * fixup*Value - Consults an operand type to determine the meaning of the
+ * reg or R/M field. If the operand is an XMM operand, for example, an
+ * operand would be XMM0 instead of AX, which readModRM() would otherwise
+ * misinterpret it as.
+ *
+ * @param insn - The instruction containing the operand.
+ * @param type - The operand type.
+ * @param index - The existing value of the field as reported by readModRM().
+ * @param valid - The address of a uint8_t. The target is set to 1 if the
+ * field is valid for the register class; 0 if not.
+ */
+GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, REG)
+GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
+
+/*
+ * fixupReg - Consults an operand specifier to determine which of the
+ * fixup*Value functions to use in correcting readModRM()'ss interpretation.
+ *
+ * @param insn - See fixup*Value().
+ * @param op - The operand specifier.
+ * @return - 0 if fixup was successful; -1 if the register returned was
+ * invalid for its class.
+ */
+static int fixupReg(struct InternalInstruction *insn,
+ struct OperandSpecifier *op) {
+ uint8_t valid;
+
+ dprintf(insn, "fixupReg()");
+
+ switch ((OperandEncoding)op->encoding) {
+ default:
+ unreachable("Expected a REG or R/M encoding in fixupReg");
+ case ENCODING_REG:
+ insn->reg = (Reg)fixupRegValue(insn,
+ (OperandType)op->type,
+ insn->reg - insn->regBase,
+ &valid);
+ if (!valid)
+ return -1;
+ break;
+ case ENCODING_RM:
+ if (insn->eaBase >= insn->eaRegBase) {
+ insn->eaBase = (EABase)fixupRMValue(insn,
+ (OperandType)op->type,
+ insn->eaBase - insn->eaRegBase,
+ &valid);
+ if (!valid)
+ return -1;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * readOpcodeModifier - Reads an operand from the opcode field of an
+ * instruction. Handles AddRegFrm instructions.
+ *
+ * @param insn - The instruction whose opcode field is to be read.
+ * @param inModRM - Indicates that the opcode field is to be read from the
+ * ModR/M extension; useful for escape opcodes
+ */
+static void readOpcodeModifier(struct InternalInstruction* insn) {
+ dprintf(insn, "readOpcodeModifier()");
+
+ if (insn->consumedOpcodeModifier)
+ return;
+
+ insn->consumedOpcodeModifier = TRUE;
+
+ switch(insn->spec->modifierType) {
+ default:
+ unreachable("Unknown modifier type.");
+ case MODIFIER_NONE:
+ unreachable("No modifier but an operand expects one.");
+ case MODIFIER_OPCODE:
+ insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
+ break;
+ case MODIFIER_MODRM:
+ insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
+ break;
+ }
+}
+
+/*
+ * readOpcodeRegister - Reads an operand from the opcode field of an
+ * instruction and interprets it appropriately given the operand width.
+ * Handles AddRegFrm instructions.
+ *
+ * @param insn - See readOpcodeModifier().
+ * @param size - The width (in bytes) of the register being specified.
+ * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
+ * RAX.
+ */
+static void readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
+ dprintf(insn, "readOpcodeRegister()");
+
+ readOpcodeModifier(insn);
+
+ if (size == 0)
+ size = insn->registerSize;
+
+ switch (size) {
+ case 1:
+ insn->opcodeRegister = (Reg)(REG_AL + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ if(insn->rexPrefix &&
+ insn->opcodeRegister >= REG_AL + 0x4 &&
+ insn->opcodeRegister < REG_AL + 0x8) {
+ insn->opcodeRegister = (Reg)(REG_SPL + (insn->opcodeRegister - REG_AL - 4));
+ }
+
+ break;
+ case 2:
+ insn->opcodeRegister = (Reg)(REG_AX + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ break;
+ case 4:
+ insn->opcodeRegister = (Reg)(REG_EAX + ((bFromREX(insn->rexPrefix) << 3)
+ | insn->opcodeModifier));
+ break;
+ case 8:
+ insn->opcodeRegister = (Reg)(REG_RAX + ((bFromREX(insn->rexPrefix) << 3)
+ |insn->opcodeModifier));
+ break;
+ }
+}
+
+/*
+ * readImmediate - Consumes an immediate operand from an instruction, given the
+ * desired operand size.
+ *
+ * @param insn - The instruction whose operand is to be read.
+ * @param size - The width (in bytes) of the operand.
+ * @return - 0 if the immediate was successfully consumed; nonzero
+ * otherwise.
+ */
+static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
+ uint8_t imm8;
+ uint16_t imm16;
+ uint32_t imm32;
+ uint64_t imm64;
+
+ dprintf(insn, "readImmediate()");
+
+ if (insn->numImmediatesConsumed == 2)
+ unreachable("Already consumed two immediates");
+
+ if (size == 0)
+ size = insn->immediateSize;
+ else
+ insn->immediateSize = size;
+
+ switch (size) {
+ case 1:
+ if (consumeByte(insn, &imm8))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm8;
+ break;
+ case 2:
+ if (consumeUInt16(insn, &imm16))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm16;
+ break;
+ case 4:
+ if (consumeUInt32(insn, &imm32))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm32;
+ break;
+ case 8:
+ if (consumeUInt64(insn, &imm64))
+ return -1;
+ insn->immediates[insn->numImmediatesConsumed] = imm64;
+ break;
+ }
+
+ insn->numImmediatesConsumed++;
+
+ return 0;
+}
+
+/*
+ * readOperands - Consults the specifier for an instruction and consumes all
+ * operands for that instruction, interpreting them as it goes.
+ *
+ * @param insn - The instruction whose operands are to be read and interpreted.
+ * @return - 0 if all operands could be read; nonzero otherwise.
+ */
+static int readOperands(struct InternalInstruction* insn) {
+ int index;
+
+ dprintf(insn, "readOperands()");
+
+ for (index = 0; index < X86_MAX_OPERANDS; ++index) {
+ switch (insn->spec->operands[index].encoding) {
+ case ENCODING_NONE:
+ break;
+ case ENCODING_REG:
+ case ENCODING_RM:
+ if (readModRM(insn))
+ return -1;
+ if (fixupReg(insn, &insn->spec->operands[index]))
+ return -1;
+ break;
+ case ENCODING_CB:
+ case ENCODING_CW:
+ case ENCODING_CD:
+ case ENCODING_CP:
+ case ENCODING_CO:
+ case ENCODING_CT:
+ dprintf(insn, "We currently don't hande code-offset encodings");
+ return -1;
+ case ENCODING_IB:
+ if (readImmediate(insn, 1))
+ return -1;
+ break;
+ case ENCODING_IW:
+ if (readImmediate(insn, 2))
+ return -1;
+ break;
+ case ENCODING_ID:
+ if (readImmediate(insn, 4))
+ return -1;
+ break;
+ case ENCODING_IO:
+ if (readImmediate(insn, 8))
+ return -1;
+ break;
+ case ENCODING_Iv:
+ readImmediate(insn, insn->immediateSize);
+ break;
+ case ENCODING_Ia:
+ readImmediate(insn, insn->addressSize);
+ break;
+ case ENCODING_RB:
+ readOpcodeRegister(insn, 1);
+ break;
+ case ENCODING_RW:
+ readOpcodeRegister(insn, 2);
+ break;
+ case ENCODING_RD:
+ readOpcodeRegister(insn, 4);
+ break;
+ case ENCODING_RO:
+ readOpcodeRegister(insn, 8);
+ break;
+ case ENCODING_Rv:
+ readOpcodeRegister(insn, 0);
+ break;
+ case ENCODING_I:
+ readOpcodeModifier(insn);
+ break;
+ case ENCODING_DUP:
+ break;
+ default:
+ dprintf(insn, "Encountered an operand with an unknown encoding.");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * decodeInstruction - Reads and interprets a full instruction provided by the
+ * user.
+ *
+ * @param insn - A pointer to the instruction to be populated. Must be
+ * pre-allocated.
+ * @param reader - The function to be used to read the instruction's bytes.
+ * @param readerArg - A generic argument to be passed to the reader to store
+ * any internal state.
+ * @param logger - If non-NULL, the function to be used to write log messages
+ * and warnings.
+ * @param loggerArg - A generic argument to be passed to the logger to store
+ * any internal state.
+ * @param startLoc - The address (in the reader's address space) of the first
+ * byte in the instruction.
+ * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
+ * decode the instruction in.
+ * @return - 0 if the instruction's memory could be read; nonzero if
+ * not.
+ */
+int decodeInstruction(struct InternalInstruction* insn,
+ byteReader_t reader,
+ void* readerArg,
+ dlog_t logger,
+ void* loggerArg,
+ uint64_t startLoc,
+ DisassemblerMode mode) {
+ bzero(insn, sizeof(struct InternalInstruction));
+
+ insn->reader = reader;
+ insn->readerArg = readerArg;
+ insn->dlog = logger;
+ insn->dlogArg = loggerArg;
+ insn->startLocation = startLoc;
+ insn->readerCursor = startLoc;
+ insn->mode = mode;
+ insn->numImmediatesConsumed = 0;
+
+ if (readPrefixes(insn) ||
+ readOpcode(insn) ||
+ getID(insn) ||
+ insn->instructionID == 0 ||
+ readOperands(insn))
+ return -1;
+
+ insn->length = insn->readerCursor - insn->startLocation;
+
+ dprintf(insn, "Read from 0x%llx to 0x%llx: length %llu",
+ startLoc, insn->readerCursor, insn->length);
+
+ if (insn->length > 15)
+ dprintf(insn, "Instruction exceeds 15-byte limit");
+
+ return 0;
+}
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
new file mode 100644
index 0000000..f548c65
--- /dev/null
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -0,0 +1,515 @@
+/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains the public interface of the instruction decoder.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#ifndef X86DISASSEMBLERDECODER_H
+#define X86DISASSEMBLERDECODER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define INSTRUCTION_SPECIFIER_FIELDS \
+ const char* name;
+
+#define INSTRUCTION_IDS \
+ InstrUID* instructionIDs;
+
+#include "X86DisassemblerDecoderCommon.h"
+
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+
+/*
+ * Accessor functions for various fields of an Intel instruction
+ */
+static inline uint8_t modFromModRM(uint8_t modRM){ return (modRM & 0xc0) >> 6; }
+static inline uint8_t regFromModRM(uint8_t modRM){ return (modRM & 0x38) >> 3; }
+static inline uint8_t rmFromModRM(uint8_t modRM) { return (modRM & 0x7); }
+static inline uint8_t scaleFromSIB(uint8_t sib) { return (sib & 0xc0) >> 6; }
+static inline uint8_t indexFromSIB(uint8_t sib) { return (sib & 0x38) >> 3; }
+static inline uint8_t baseFromSIB(uint8_t sib) { return (sib & 0x7); }
+static inline uint8_t wFromREX(uint8_t rex) { return (rex & 0x8) >> 3; }
+static inline uint8_t rFromREX(uint8_t rex) { return (rex & 0x4) >> 2; }
+static inline uint8_t xFromREX(uint8_t rex) { return (rex & 0x2) >> 1; }
+static inline uint8_t bFromREX(uint8_t rex) { return (rex & 0x1); }
+
+/*
+ * These enums represent Intel registers for use by the decoder.
+ */
+
+#define REGS_8BIT \
+ ENTRY(AL) \
+ ENTRY(CL) \
+ ENTRY(DL) \
+ ENTRY(BL) \
+ ENTRY(AH) \
+ ENTRY(CH) \
+ ENTRY(DH) \
+ ENTRY(BH) \
+ ENTRY(R8B) \
+ ENTRY(R9B) \
+ ENTRY(R10B) \
+ ENTRY(R11B) \
+ ENTRY(R12B) \
+ ENTRY(R13B) \
+ ENTRY(R14B) \
+ ENTRY(R15B) \
+ ENTRY(SPL) \
+ ENTRY(BPL) \
+ ENTRY(SIL) \
+ ENTRY(DIL)
+
+#define EA_BASES_16BIT \
+ ENTRY(BX_SI) \
+ ENTRY(BX_DI) \
+ ENTRY(BP_SI) \
+ ENTRY(BP_DI) \
+ ENTRY(SI) \
+ ENTRY(DI) \
+ ENTRY(BP) \
+ ENTRY(BX) \
+ ENTRY(R8W) \
+ ENTRY(R9W) \
+ ENTRY(R10W) \
+ ENTRY(R11W) \
+ ENTRY(R12W) \
+ ENTRY(R13W) \
+ ENTRY(R14W) \
+ ENTRY(R15W)
+
+#define REGS_16BIT \
+ ENTRY(AX) \
+ ENTRY(CX) \
+ ENTRY(DX) \
+ ENTRY(BX) \
+ ENTRY(SP) \
+ ENTRY(BP) \
+ ENTRY(SI) \
+ ENTRY(DI) \
+ ENTRY(R8W) \
+ ENTRY(R9W) \
+ ENTRY(R10W) \
+ ENTRY(R11W) \
+ ENTRY(R12W) \
+ ENTRY(R13W) \
+ ENTRY(R14W) \
+ ENTRY(R15W)
+
+#define EA_BASES_32BIT \
+ ENTRY(EAX) \
+ ENTRY(ECX) \
+ ENTRY(EDX) \
+ ENTRY(EBX) \
+ ENTRY(sib) \
+ ENTRY(EBP) \
+ ENTRY(ESI) \
+ ENTRY(EDI) \
+ ENTRY(R8D) \
+ ENTRY(R9D) \
+ ENTRY(R10D) \
+ ENTRY(R11D) \
+ ENTRY(R12D) \
+ ENTRY(R13D) \
+ ENTRY(R14D) \
+ ENTRY(R15D)
+
+#define REGS_32BIT \
+ ENTRY(EAX) \
+ ENTRY(ECX) \
+ ENTRY(EDX) \
+ ENTRY(EBX) \
+ ENTRY(ESP) \
+ ENTRY(EBP) \
+ ENTRY(ESI) \
+ ENTRY(EDI) \
+ ENTRY(R8D) \
+ ENTRY(R9D) \
+ ENTRY(R10D) \
+ ENTRY(R11D) \
+ ENTRY(R12D) \
+ ENTRY(R13D) \
+ ENTRY(R14D) \
+ ENTRY(R15D)
+
+#define EA_BASES_64BIT \
+ ENTRY(RAX) \
+ ENTRY(RCX) \
+ ENTRY(RDX) \
+ ENTRY(RBX) \
+ ENTRY(sib64) \
+ ENTRY(RBP) \
+ ENTRY(RSI) \
+ ENTRY(RDI) \
+ ENTRY(R8) \
+ ENTRY(R9) \
+ ENTRY(R10) \
+ ENTRY(R11) \
+ ENTRY(R12) \
+ ENTRY(R13) \
+ ENTRY(R14) \
+ ENTRY(R15)
+
+#define REGS_64BIT \
+ ENTRY(RAX) \
+ ENTRY(RCX) \
+ ENTRY(RDX) \
+ ENTRY(RBX) \
+ ENTRY(RSP) \
+ ENTRY(RBP) \
+ ENTRY(RSI) \
+ ENTRY(RDI) \
+ ENTRY(R8) \
+ ENTRY(R9) \
+ ENTRY(R10) \
+ ENTRY(R11) \
+ ENTRY(R12) \
+ ENTRY(R13) \
+ ENTRY(R14) \
+ ENTRY(R15)
+
+#define REGS_MMX \
+ ENTRY(MM0) \
+ ENTRY(MM1) \
+ ENTRY(MM2) \
+ ENTRY(MM3) \
+ ENTRY(MM4) \
+ ENTRY(MM5) \
+ ENTRY(MM6) \
+ ENTRY(MM7)
+
+#define REGS_XMM \
+ ENTRY(XMM0) \
+ ENTRY(XMM1) \
+ ENTRY(XMM2) \
+ ENTRY(XMM3) \
+ ENTRY(XMM4) \
+ ENTRY(XMM5) \
+ ENTRY(XMM6) \
+ ENTRY(XMM7) \
+ ENTRY(XMM8) \
+ ENTRY(XMM9) \
+ ENTRY(XMM10) \
+ ENTRY(XMM11) \
+ ENTRY(XMM12) \
+ ENTRY(XMM13) \
+ ENTRY(XMM14) \
+ ENTRY(XMM15)
+
+#define REGS_SEGMENT \
+ ENTRY(ES) \
+ ENTRY(CS) \
+ ENTRY(SS) \
+ ENTRY(DS) \
+ ENTRY(FS) \
+ ENTRY(GS)
+
+#define REGS_DEBUG \
+ ENTRY(DR0) \
+ ENTRY(DR1) \
+ ENTRY(DR2) \
+ ENTRY(DR3) \
+ ENTRY(DR4) \
+ ENTRY(DR5) \
+ ENTRY(DR6) \
+ ENTRY(DR7)
+
+#define REGS_CONTROL_32BIT \
+ ENTRY(ECR0) \
+ ENTRY(ECR1) \
+ ENTRY(ECR2) \
+ ENTRY(ECR3) \
+ ENTRY(ECR4) \
+ ENTRY(ECR5) \
+ ENTRY(ECR6) \
+ ENTRY(ECR7)
+
+#define REGS_CONTROL_64BIT \
+ ENTRY(RCR0) \
+ ENTRY(RCR1) \
+ ENTRY(RCR2) \
+ ENTRY(RCR3) \
+ ENTRY(RCR4) \
+ ENTRY(RCR5) \
+ ENTRY(RCR6) \
+ ENTRY(RCR7) \
+ ENTRY(RCR8)
+
+#define ALL_EA_BASES \
+ EA_BASES_16BIT \
+ EA_BASES_32BIT \
+ EA_BASES_64BIT
+
+#define ALL_SIB_BASES \
+ REGS_32BIT \
+ REGS_64BIT
+
+#define ALL_REGS \
+ REGS_8BIT \
+ REGS_16BIT \
+ REGS_32BIT \
+ REGS_64BIT \
+ REGS_MMX \
+ REGS_XMM \
+ REGS_SEGMENT \
+ REGS_DEBUG \
+ REGS_CONTROL_32BIT \
+ REGS_CONTROL_64BIT \
+ ENTRY(RIP)
+
+/*
+ * EABase - All possible values of the base field for effective-address
+ * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We
+ * distinguish between bases (EA_BASE_*) and registers that just happen to be
+ * referred to when Mod == 0b11 (EA_REG_*).
+ */
+typedef enum {
+ EA_BASE_NONE,
+#define ENTRY(x) EA_BASE_##x,
+ ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) EA_REG_##x,
+ ALL_REGS
+#undef ENTRY
+ EA_max
+} EABase;
+
+/*
+ * SIBIndex - All possible values of the SIB index field.
+ * Borrows entries from ALL_EA_BASES with the special case that
+ * sib is synonymous with NONE.
+ */
+typedef enum {
+ SIB_INDEX_NONE,
+#define ENTRY(x) SIB_INDEX_##x,
+ ALL_EA_BASES
+#undef ENTRY
+ SIB_INDEX_max
+} SIBIndex;
+
+/*
+ * SIBBase - All possible values of the SIB base field.
+ */
+typedef enum {
+ SIB_BASE_NONE,
+#define ENTRY(x) SIB_BASE_##x,
+ ALL_SIB_BASES
+#undef ENTRY
+ SIB_BASE_max
+} SIBBase;
+
+/*
+ * EADisplacement - Possible displacement types for effective-address
+ * computations.
+ */
+typedef enum {
+ EA_DISP_NONE,
+ EA_DISP_8,
+ EA_DISP_16,
+ EA_DISP_32
+} EADisplacement;
+
+/*
+ * Reg - All possible values of the reg field in the ModR/M byte.
+ */
+typedef enum {
+#define ENTRY(x) REG_##x,
+ ALL_REGS
+#undef ENTRY
+ REG_max
+} Reg;
+
+/*
+ * SegmentOverride - All possible segment overrides.
+ */
+typedef enum {
+ SEG_OVERRIDE_NONE,
+ SEG_OVERRIDE_CS,
+ SEG_OVERRIDE_SS,
+ SEG_OVERRIDE_DS,
+ SEG_OVERRIDE_ES,
+ SEG_OVERRIDE_FS,
+ SEG_OVERRIDE_GS,
+ SEG_OVERRIDE_max
+} SegmentOverride;
+
+typedef uint8_t BOOL;
+
+/*
+ * byteReader_t - Type for the byte reader that the consumer must provide to
+ * the decoder. Reads a single byte from the instruction's address space.
+ * @param arg - A baton that the consumer can associate with any internal
+ * state that it needs.
+ * @param byte - A pointer to a single byte in memory that should be set to
+ * contain the value at address.
+ * @param address - The address in the instruction's address space that should
+ * be read from.
+ * @return - -1 if the byte cannot be read for any reason; 0 otherwise.
+ */
+typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address);
+
+/*
+ * dlog_t - Type for the logging function that the consumer can provide to
+ * get debugging output from the decoder.
+ * @param arg - A baton that the consumer can associate with any internal
+ * state that it needs.
+ * @param log - A string that contains the message. Will be reused after
+ * the logger returns.
+ */
+typedef void (*dlog_t)(void* arg, const char *log);
+
+/*
+ * The x86 internal instruction, which is produced by the decoder.
+ */
+struct InternalInstruction {
+ /* Reader interface (C) */
+ byteReader_t reader;
+ /* Opaque value passed to the reader */
+ void* readerArg;
+ /* The address of the next byte to read via the reader */
+ uint64_t readerCursor;
+
+ /* Logger interface (C) */
+ dlog_t dlog;
+ /* Opaque value passed to the logger */
+ void* dlogArg;
+
+ /* General instruction information */
+
+ /* The mode to disassemble for (64-bit, protected, real) */
+ DisassemblerMode mode;
+ /* The start of the instruction, usable with the reader */
+ uint64_t startLocation;
+ /* The length of the instruction, in bytes */
+ size_t length;
+
+ /* Prefix state */
+
+ /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
+ uint8_t prefixPresent[0x100];
+ /* contains the location (for use with the reader) of the prefix byte */
+ uint64_t prefixLocations[0x100];
+ /* The value of the REX prefix, if present */
+ uint8_t rexPrefix;
+ /* The location of the REX prefix */
+ uint64_t rexLocation;
+ /* The location where a mandatory prefix would have to be (i.e., right before
+ the opcode, or right before the REX prefix if one is present) */
+ uint64_t necessaryPrefixLocation;
+ /* The segment override type */
+ SegmentOverride segmentOverride;
+
+ /* Sizes of various critical pieces of data */
+ uint8_t registerSize;
+ uint8_t addressSize;
+ uint8_t displacementSize;
+ uint8_t immediateSize;
+
+ /* opcode state */
+
+ /* The value of the two-byte escape prefix (usually 0x0f) */
+ uint8_t twoByteEscape;
+ /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
+ uint8_t threeByteEscape;
+ /* The last byte of the opcode, not counting any ModR/M extension */
+ uint8_t opcode;
+ /* The ModR/M byte of the instruction, if it is an opcode extension */
+ uint8_t modRMExtension;
+
+ /* decode state */
+
+ /* The type of opcode, used for indexing into the array of decode tables */
+ OpcodeType opcodeType;
+ /* The instruction ID, extracted from the decode table */
+ uint16_t instructionID;
+ /* The specifier for the instruction, from the instruction info table */
+ struct InstructionSpecifier* spec;
+
+ /* state for additional bytes, consumed during operand decode. Pattern:
+ consumed___ indicates that the byte was already consumed and does not
+ need to be consumed again */
+
+ /* The ModR/M byte, which contains most register operands and some portion of
+ all memory operands */
+ BOOL consumedModRM;
+ uint8_t modRM;
+
+ /* The SIB byte, used for more complex 32- or 64-bit memory operands */
+ BOOL consumedSIB;
+ uint8_t sib;
+
+ /* The displacement, used for memory operands */
+ BOOL consumedDisplacement;
+ int32_t displacement;
+
+ /* Immediates. There can be two in some cases */
+ uint8_t numImmediatesConsumed;
+ uint8_t numImmediatesTranslated;
+ uint64_t immediates[2];
+
+ /* A register or immediate operand encoded into the opcode */
+ BOOL consumedOpcodeModifier;
+ uint8_t opcodeModifier;
+ Reg opcodeRegister;
+
+ /* Portions of the ModR/M byte */
+
+ /* These fields determine the allowable values for the ModR/M fields, which
+ depend on operand and address widths */
+ EABase eaBaseBase;
+ EABase eaRegBase;
+ Reg regBase;
+
+ /* The Mod and R/M fields can encode a base for an effective address, or a
+ register. These are separated into two fields here */
+ EABase eaBase;
+ EADisplacement eaDisplacement;
+ /* The reg field always encodes a register */
+ Reg reg;
+
+ /* SIB state */
+ SIBIndex sibIndex;
+ uint8_t sibScale;
+ SIBBase sibBase;
+};
+
+/* decodeInstruction - Decode one instruction and store the decoding results in
+ * a buffer provided by the consumer.
+ * @param insn - The buffer to store the instruction in. Allocated by the
+ * consumer.
+ * @param reader - The byteReader_t for the bytes to be read.
+ * @param readerArg - An argument to pass to the reader for storing context
+ * specific to the consumer. May be NULL.
+ * @param logger - The dlog_t to be used in printing status messages from the
+ * disassembler. May be NULL.
+ * @param loggerArg - An argument to pass to the logger for storing context
+ * specific to the logger. May be NULL.
+ * @param startLoc - The address (in the reader's address space) of the first
+ * byte in the instruction.
+ * @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in.
+ * @return - Nonzero if there was an error during decode, 0 otherwise.
+ */
+int decodeInstruction(struct InternalInstruction* insn,
+ byteReader_t reader,
+ void* readerArg,
+ dlog_t logger,
+ void* loggerArg,
+ uint64_t startLoc,
+ DisassemblerMode mode);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
new file mode 100644
index 0000000..b226257
--- /dev/null
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -0,0 +1,354 @@
+/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
+ *
+ * The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains common definitions used by both the disassembler and the table
+ * generator.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+/*
+ * This header file provides those definitions that need to be shared between
+ * the decoder and the table generator in a C-friendly manner.
+ */
+
+#ifndef X86DISASSEMBLERDECODERCOMMON_H
+#define X86DISASSEMBLERDECODERCOMMON_H
+
+#include "llvm/System/DataTypes.h"
+
+#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers
+#define CONTEXTS_SYM x86DisassemblerContexts
+#define ONEBYTE_SYM x86DisassemblerOneByteOpcodes
+#define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes
+#define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes
+#define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes
+
+#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"
+#define CONTEXTS_STR "x86DisassemblerContexts"
+#define ONEBYTE_STR "x86DisassemblerOneByteOpcodes"
+#define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes"
+#define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes"
+#define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes"
+
+/*
+ * Attributes of an instruction that must be known before the opcode can be
+ * processed correctly. Most of these indicate the presence of particular
+ * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
+ */
+#define ATTRIBUTE_BITS \
+ ENUM_ENTRY(ATTR_NONE, 0x00) \
+ ENUM_ENTRY(ATTR_64BIT, 0x01) \
+ ENUM_ENTRY(ATTR_XS, 0x02) \
+ ENUM_ENTRY(ATTR_XD, 0x04) \
+ ENUM_ENTRY(ATTR_REXW, 0x08) \
+ ENUM_ENTRY(ATTR_OPSIZE, 0x10)
+
+#define ENUM_ENTRY(n, v) n = v,
+enum attributeBits {
+ ATTRIBUTE_BITS
+ ATTR_max
+};
+#undef ENUM_ENTRY
+
+/*
+ * Combinations of the above attributes that are relevant to instruction
+ * decode. Although other combinations are possible, they can be reduced to
+ * these without affecting the ultimately decoded instruction.
+ */
+
+/* Class name Rank Rationale for rank assignment */
+#define INSTRUCTION_CONTEXTS \
+ ENUM_ENTRY(IC, 0, "says nothing about the instruction") \
+ ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \
+ "64-bit mode but no more") \
+ ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \
+ "operands change width") \
+ ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \
+ "but not the operands") \
+ ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \
+ "but not the operands") \
+ ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\
+ "change width; overrides IC_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \
+ ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \
+ "secondary") \
+ ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \
+ ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \
+ "opcode") \
+ ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \
+ "IC_64BIT_REXW_XS") \
+ ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \
+ "else because this changes most " \
+ "operands' meaning")
+
+#define ENUM_ENTRY(n, r, d) n,
+typedef enum {
+ INSTRUCTION_CONTEXTS
+ IC_max
+} InstructionContext;
+#undef ENUM_ENTRY
+
+/*
+ * Opcode types, which determine which decode table to use, both in the Intel
+ * manual and also for the decoder.
+ */
+typedef enum {
+ ONEBYTE = 0,
+ TWOBYTE = 1,
+ THREEBYTE_38 = 2,
+ THREEBYTE_3A = 3
+} OpcodeType;
+
+/*
+ * The following structs are used for the hierarchical decode table. After
+ * determining the instruction's class (i.e., which IC_* constant applies to
+ * it), the decoder reads the opcode. Some instructions require specific
+ * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
+ *
+ * If a ModR/M byte is not required, "required" is left unset, and the values
+ * for each instructionID are identical.
+ */
+
+typedef uint16_t InstrUID;
+
+/*
+ * ModRMDecisionType - describes the type of ModR/M decision, allowing the
+ * consumer to determine the number of entries in it.
+ *
+ * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
+ * instruction is the same.
+ * MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode
+ * corresponds to one instruction; otherwise, it corresponds to
+ * a different instruction.
+ * MODRM_FULL - Potentially, each value of the ModR/M byte could correspond
+ * to a different instruction.
+ */
+
+#define MODRMTYPES \
+ ENUM_ENTRY(MODRM_ONEENTRY) \
+ ENUM_ENTRY(MODRM_SPLITRM) \
+ ENUM_ENTRY(MODRM_FULL)
+
+#define ENUM_ENTRY(n) n,
+typedef enum {
+ MODRMTYPES
+ MODRM_max
+} ModRMDecisionType;
+#undef ENUM_ENTRY
+
+/*
+ * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
+ * instruction each possible value of the ModR/M byte corresponds to. Once
+ * this information is known, we have narrowed down to a single instruction.
+ */
+struct ModRMDecision {
+ uint8_t modrm_type;
+
+ /* The macro below must be defined wherever this file is included. */
+ INSTRUCTION_IDS
+};
+
+/*
+ * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
+ * given a particular opcode.
+ */
+struct OpcodeDecision {
+ struct ModRMDecision modRMDecisions[256];
+};
+
+/*
+ * ContextDecision - Specifies which opcode->instruction tables to look at given
+ * a particular context (set of attributes). Since there are many possible
+ * contexts, the decoder first uses CONTEXTS_SYM to determine which context
+ * applies given a specific set of attributes. Hence there are only IC_max
+ * entries in this table, rather than 2^(ATTR_max).
+ */
+struct ContextDecision {
+ struct OpcodeDecision opcodeDecisions[IC_max];
+};
+
+/*
+ * Physical encodings of instruction operands.
+ */
+
+#define ENCODINGS \
+ ENUM_ENTRY(ENCODING_NONE, "") \
+ ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \
+ ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \
+ ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \
+ ENUM_ENTRY(ENCODING_CW, "2-byte") \
+ ENUM_ENTRY(ENCODING_CD, "4-byte") \
+ ENUM_ENTRY(ENCODING_CP, "6-byte") \
+ ENUM_ENTRY(ENCODING_CO, "8-byte") \
+ ENUM_ENTRY(ENCODING_CT, "10-byte") \
+ ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \
+ ENUM_ENTRY(ENCODING_IW, "2-byte") \
+ ENUM_ENTRY(ENCODING_ID, "4-byte") \
+ ENUM_ENTRY(ENCODING_IO, "8-byte") \
+ ENUM_ENTRY(ENCODING_RB, "(AL..DIL, R8L..R15L) Register code added to " \
+ "the opcode byte") \
+ ENUM_ENTRY(ENCODING_RW, "(AX..DI, R8W..R15W)") \
+ ENUM_ENTRY(ENCODING_RD, "(EAX..EDI, R8D..R15D)") \
+ ENUM_ENTRY(ENCODING_RO, "(RAX..RDI, R8..R15)") \
+ ENUM_ENTRY(ENCODING_I, "Position on floating-point stack added to the " \
+ "opcode byte") \
+ \
+ ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \
+ ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \
+ ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \
+ "opcode byte") \
+ ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \
+ "in type")
+
+#define ENUM_ENTRY(n, d) n,
+ typedef enum {
+ ENCODINGS
+ ENCODING_max
+ } OperandEncoding;
+#undef ENUM_ENTRY
+
+/*
+ * Semantic interpretations of instruction operands.
+ */
+
+#define TYPES \
+ ENUM_ENTRY(TYPE_NONE, "") \
+ ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \
+ ENUM_ENTRY(TYPE_REL16, "2-byte") \
+ ENUM_ENTRY(TYPE_REL32, "4-byte") \
+ ENUM_ENTRY(TYPE_REL64, "8-byte") \
+ ENUM_ENTRY(TYPE_PTR1616, "2+2-byte segment+offset address") \
+ ENUM_ENTRY(TYPE_PTR1632, "2+4-byte") \
+ ENUM_ENTRY(TYPE_PTR1664, "2+8-byte") \
+ ENUM_ENTRY(TYPE_R8, "1-byte register operand") \
+ ENUM_ENTRY(TYPE_R16, "2-byte") \
+ ENUM_ENTRY(TYPE_R32, "4-byte") \
+ ENUM_ENTRY(TYPE_R64, "8-byte") \
+ ENUM_ENTRY(TYPE_IMM8, "1-byte immediate operand") \
+ ENUM_ENTRY(TYPE_IMM16, "2-byte") \
+ ENUM_ENTRY(TYPE_IMM32, "4-byte") \
+ ENUM_ENTRY(TYPE_IMM64, "8-byte") \
+ ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \
+ ENUM_ENTRY(TYPE_RM16, "2-byte") \
+ ENUM_ENTRY(TYPE_RM32, "4-byte") \
+ ENUM_ENTRY(TYPE_RM64, "8-byte") \
+ ENUM_ENTRY(TYPE_M, "Memory operand") \
+ ENUM_ENTRY(TYPE_M8, "1-byte") \
+ ENUM_ENTRY(TYPE_M16, "2-byte") \
+ ENUM_ENTRY(TYPE_M32, "4-byte") \
+ ENUM_ENTRY(TYPE_M64, "8-byte") \
+ ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \
+ ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \
+ ENUM_ENTRY(TYPE_M1632, "2+4-byte") \
+ ENUM_ENTRY(TYPE_M1664, "2+8-byte") \
+ ENUM_ENTRY(TYPE_M16_32, "2+4-byte two-part memory operand (LIDT, LGDT)") \
+ ENUM_ENTRY(TYPE_M16_16, "2+2-byte (BOUND)") \
+ ENUM_ENTRY(TYPE_M32_32, "4+4-byte (BOUND)") \
+ ENUM_ENTRY(TYPE_M16_64, "2+8-byte (LIDT, LGDT)") \
+ ENUM_ENTRY(TYPE_MOFFS8, "1-byte memory offset (relative to segment " \
+ "base)") \
+ ENUM_ENTRY(TYPE_MOFFS16, "2-byte") \
+ ENUM_ENTRY(TYPE_MOFFS32, "4-byte") \
+ ENUM_ENTRY(TYPE_MOFFS64, "8-byte") \
+ ENUM_ENTRY(TYPE_SREG, "Byte with single bit set: 0 = ES, 1 = CS, " \
+ "2 = SS, 3 = DS, 4 = FS, 5 = GS") \
+ ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \
+ ENUM_ENTRY(TYPE_M64FP, "64-bit") \
+ ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \
+ ENUM_ENTRY(TYPE_M16INT, "2-byte memory integer operand for use in " \
+ "floating-point instructions") \
+ ENUM_ENTRY(TYPE_M32INT, "4-byte") \
+ ENUM_ENTRY(TYPE_M64INT, "8-byte") \
+ ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \
+ ENUM_ENTRY(TYPE_MM, "MMX register operand") \
+ ENUM_ENTRY(TYPE_MM32, "4-byte MMX register or memory operand") \
+ ENUM_ENTRY(TYPE_MM64, "8-byte") \
+ ENUM_ENTRY(TYPE_XMM, "XMM register operand") \
+ ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \
+ ENUM_ENTRY(TYPE_XMM64, "8-byte") \
+ ENUM_ENTRY(TYPE_XMM128, "16-byte") \
+ ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
+ ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
+ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
+ ENUM_ENTRY(TYPE_CR32, "4-byte control register operand") \
+ ENUM_ENTRY(TYPE_CR64, "8-byte") \
+ \
+ ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
+ ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
+ ENUM_ENTRY(TYPE_IMMv, "Immediate operand of operand size") \
+ ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \
+ ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \
+ ENUM_ENTRY(TYPE_DUP1, "operand 1") \
+ ENUM_ENTRY(TYPE_DUP2, "operand 2") \
+ ENUM_ENTRY(TYPE_DUP3, "operand 3") \
+ ENUM_ENTRY(TYPE_DUP4, "operand 4") \
+ ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")
+
+#define ENUM_ENTRY(n, d) n,
+typedef enum {
+ TYPES
+ TYPE_max
+} OperandType;
+#undef ENUM_ENTRY
+
+/*
+ * OperandSpecifier - The specification for how to extract and interpret one
+ * operand.
+ */
+struct OperandSpecifier {
+ OperandEncoding encoding;
+ OperandType type;
+};
+
+/*
+ * Indicates where the opcode modifier (if any) is to be found. Extended
+ * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
+ */
+
+#define MODIFIER_TYPES \
+ ENUM_ENTRY(MODIFIER_NONE) \
+ ENUM_ENTRY(MODIFIER_OPCODE) \
+ ENUM_ENTRY(MODIFIER_MODRM)
+
+#define ENUM_ENTRY(n) n,
+typedef enum {
+ MODIFIER_TYPES
+ MODIFIER_max
+} ModifierType;
+#undef ENUM_ENTRY
+
+#define X86_MAX_OPERANDS 5
+
+/*
+ * The specification for how to extract and interpret a full instruction and
+ * its operands.
+ */
+struct InstructionSpecifier {
+ ModifierType modifierType;
+ uint8_t modifierBase;
+ struct OperandSpecifier operands[X86_MAX_OPERANDS];
+
+ /* The macro below must be defined wherever this file is included. */
+ INSTRUCTION_SPECIFIER_FIELDS
+};
+
+/*
+ * Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode
+ * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
+ * respectively.
+ */
+typedef enum {
+ MODE_16BIT,
+ MODE_32BIT,
+ MODE_64BIT
+} DisassemblerMode;
+
+#endif
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
index b311a6e..6098dbf 100644
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@@ -15,8 +15,8 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
X86GenRegisterInfo.inc X86GenInstrNames.inc \
X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \
X86GenAsmWriter1.inc X86GenDAGISel.inc \
- X86GenFastISel.inc \
- X86GenCallingConv.inc X86GenSubtarget.inc
+ X86GenDisassemblerTables.inc X86GenFastISel.inc \
+ X86GenCallingConv.inc X86GenSubtarget.inc \
DIRS = AsmPrinter AsmParser Disassembler TargetInfo
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 0152121..90d9083 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -38,6 +38,8 @@ static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
}
}
+extern "C" void LLVMInitializeX86Disassembler();
+
extern "C" void LLVMInitializeX86Target() {
// Register the target.
RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
@@ -47,6 +49,8 @@ extern "C" void LLVMInitializeX86Target() {
RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo);
RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo);
+ LLVMInitializeX86Disassembler();
+
// Register the code emitter.
TargetRegistry::RegisterCodeEmitter(TheX86_32Target, createX86MCCodeEmitter);
TargetRegistry::RegisterCodeEmitter(TheX86_64Target, createX86MCCodeEmitter);
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index daf8676..ce9b66f 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -23,6 +23,8 @@ add_executable(tblgen
TGValueTypes.cpp
TableGen.cpp
TableGenBackend.cpp
+ X86DisassemblerTables.cpp
+ X86RecognizableInstr.cpp
)
target_link_libraries(tblgen LLVMSupport LLVMSystem)
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index cc13125..61b9b15 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -10,7 +10,86 @@
#include "DisassemblerEmitter.h"
#include "CodeGenTarget.h"
#include "Record.h"
+#include "X86DisassemblerTables.h"
+#include "X86RecognizableInstr.h"
using namespace llvm;
+using namespace llvm::X86Disassembler;
+
+/// DisassemblerEmitter - Contains disassembler table emitters for various
+/// architectures.
+
+/// X86 Disassembler Emitter
+///
+/// *** IF YOU'RE HERE TO RESOLVE A "Primary decode conflict", LOOK DOWN NEAR
+/// THE END OF THIS COMMENT!
+///
+/// The X86 disassembler emitter is part of the X86 Disassembler, which is
+/// documented in lib/Target/X86/X86Disassembler.h.
+///
+/// The emitter produces the tables that the disassembler uses to translate
+/// instructions. The emitter generates the following tables:
+///
+/// - One table (CONTEXTS_SYM) that contains a mapping of attribute masks to
+/// instruction contexts. Although for each attribute there are cases where
+/// that attribute determines decoding, in the majority of cases decoding is
+/// the same whether or not an attribute is present. For example, a 64-bit
+/// instruction with an OPSIZE prefix and an XS prefix decodes the same way in
+/// all cases as a 64-bit instruction with only OPSIZE set. (The XS prefix
+/// may have effects on its execution, but does not change the instruction
+/// returned.) This allows considerable space savings in other tables.
+/// - Four tables (ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, and
+/// THREEBYTE3A_SYM) contain the hierarchy that the decoder traverses while
+/// decoding an instruction. At the lowest level of this hierarchy are
+/// instruction UIDs, 16-bit integers that can be used to uniquely identify
+/// the instruction and correspond exactly to its position in the list of
+/// CodeGenInstructions for the target.
+/// - One table (INSTRUCTIONS_SYM) contains information about the operands of
+/// each instruction and how to decode them.
+///
+/// During table generation, there may be conflicts between instructions that
+/// occupy the same space in the decode tables. These conflicts are resolved as
+/// follows in setTableFields() (X86DisassemblerTables.cpp)
+///
+/// - If the current context is the native context for one of the instructions
+/// (that is, the attributes specified for it in the LLVM tables specify
+/// precisely the current context), then it has priority.
+/// - If the current context isn't native for either of the instructions, then
+/// the higher-priority context wins (that is, the one that is more specific).
+/// That hierarchy is determined by outranks() (X86DisassemblerTables.cpp)
+/// - If the current context is native for both instructions, then the table
+/// emitter reports a conflict and dies.
+///
+/// *** RESOLUTION FOR "Primary decode conflict"S
+///
+/// If two instructions collide, typically the solution is (in order of
+/// likelihood):
+///
+/// (1) to filter out one of the instructions by editing filter()
+/// (X86RecognizableInstr.cpp). This is the most common resolution, but
+/// check the Intel manuals first to make sure that (2) and (3) are not the
+/// problem.
+/// (2) to fix the tables (X86.td and its subsidiaries) so the opcodes are
+/// accurate. Sometimes they are not.
+/// (3) to fix the tables to reflect the actual context (for example, required
+/// prefixes), and possibly to add a new context by editing
+/// lib/Target/X86/X86DisassemblerDecoderCommon.h. This is unlikely to be
+/// the cause.
+///
+/// DisassemblerEmitter.cpp contains the implementation for the emitter,
+/// which simply pulls out instructions from the CodeGenTarget and pushes them
+/// into X86DisassemblerTables.
+/// X86DisassemblerTables.h contains the interface for the instruction tables,
+/// which manage and emit the structures discussed above.
+/// X86DisassemblerTables.cpp contains the implementation for the instruction
+/// tables.
+/// X86ModRMFilters.h contains filters that can be used to determine which
+/// ModR/M values are valid for a particular instruction. These are used to
+/// populate ModRMDecisions.
+/// X86RecognizableInstr.h contains the interface for a single instruction,
+/// which knows how to translate itself from a CodeGenInstruction and provide
+/// the information necessary for integration into the tables.
+/// X86RecognizableInstr.cpp contains the implementation for a single
+/// instruction.
void DisassemblerEmitter::run(raw_ostream &OS) {
CodeGenTarget Target;
@@ -25,6 +104,26 @@ void DisassemblerEmitter::run(raw_ostream &OS) {
<< " *===---------------------------------------------------------------"
<< "-------===*/\n";
+ // X86 uses a custom disassembler.
+ if (Target.getName() == "X86") {
+ DisassemblerTables Tables;
+
+ std::vector<const CodeGenInstruction*> numberedInstructions;
+ Target.getInstructionsByEnumValue(numberedInstructions);
+
+ for (unsigned i = 0, e = numberedInstructions.size(); i != e; ++i)
+ RecognizableInstr::processInstr(Tables, *numberedInstructions[i], i);
+
+ // FIXME: As long as we are using exceptions, might as well drop this to the
+ // actual conflict site.
+ if (Tables.hasConflicts())
+ throw TGError(Target.getTargetRecord()->getLoc(),
+ "Primary decode conflict");
+
+ Tables.emit(OS);
+ return;
+ }
+
throw TGError(Target.getTargetRecord()->getLoc(),
"Unable to generate disassembler for this target");
}
diff --git a/utils/TableGen/X86DisassemblerShared.h b/utils/TableGen/X86DisassemblerShared.h
new file mode 100644
index 0000000..9003cbf
--- /dev/null
+++ b/utils/TableGen/X86DisassemblerShared.h
@@ -0,0 +1,37 @@
+//===- X86DisassemblerShared.h - Emitter shared header ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86DISASSEMBLERSHARED_H
+#define X86DISASSEMBLERSHARED_H
+
+#include <string>
+
+#define INSTRUCTION_SPECIFIER_FIELDS \
+ bool filtered; \
+ InstructionContext insnContext; \
+ std::string name; \
+ \
+ InstructionSpecifier() { \
+ filtered = false; \
+ insnContext = IC; \
+ name = ""; \
+ modifierType = MODIFIER_NONE; \
+ modifierBase = 0; \
+ bzero(operands, sizeof(operands)); \
+ }
+
+#define INSTRUCTION_IDS \
+ InstrUID instructionIDs[256];
+
+#include "../../lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h"
+
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+
+#endif
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
new file mode 100644
index 0000000..83284a7
--- /dev/null
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -0,0 +1,603 @@
+//===- X86DisassemblerTables.cpp - Disassembler tables ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler Emitter.
+// It contains the implementation of the disassembler tables.
+// Documentation for the disassembler emitter in general can be found in
+// X86DisasemblerEmitter.h.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86DisassemblerShared.h"
+#include "X86DisassemblerTables.h"
+
+#include "TableGenBackend.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+
+#include <string>
+
+using namespace llvm;
+using namespace X86Disassembler;
+
+/// inheritsFrom - Indicates whether all instructions in one class also belong
+/// to another class.
+///
+/// @param child - The class that may be the subset
+/// @param parent - The class that may be the superset
+/// @return - True if child is a subset of parent, false otherwise.
+static inline bool inheritsFrom(InstructionContext child,
+ InstructionContext parent) {
+ if (child == parent)
+ return true;
+
+ switch (parent) {
+ case IC:
+ return true;
+ case IC_64BIT:
+ return(inheritsFrom(child, IC_64BIT_REXW) ||
+ inheritsFrom(child, IC_64BIT_OPSIZE) ||
+ inheritsFrom(child, IC_64BIT_XD) ||
+ inheritsFrom(child, IC_64BIT_XS));
+ case IC_OPSIZE:
+ return(inheritsFrom(child, IC_64BIT_OPSIZE));
+ case IC_XD:
+ return(inheritsFrom(child, IC_64BIT_XD));
+ case IC_XS:
+ return(inheritsFrom(child, IC_64BIT_XS));
+ case IC_64BIT_REXW:
+ return(inheritsFrom(child, IC_64BIT_REXW_XS) ||
+ inheritsFrom(child, IC_64BIT_REXW_XD) ||
+ inheritsFrom(child, IC_64BIT_REXW_OPSIZE));
+ case IC_64BIT_OPSIZE:
+ return(inheritsFrom(child, IC_64BIT_REXW_OPSIZE));
+ case IC_64BIT_XD:
+ return(inheritsFrom(child, IC_64BIT_REXW_XD));
+ case IC_64BIT_XS:
+ return(inheritsFrom(child, IC_64BIT_REXW_XS));
+ case IC_64BIT_REXW_XD:
+ return false;
+ case IC_64BIT_REXW_XS:
+ return false;
+ case IC_64BIT_REXW_OPSIZE:
+ return false;
+ default:
+ return false;
+ }
+}
+
+/// outranks - Indicates whether, if an instruction has two different applicable
+/// classes, which class should be preferred when performing decode. This
+/// imposes a total ordering (ties are resolved toward "lower")
+///
+/// @param upper - The class that may be preferable
+/// @param lower - The class that may be less preferable
+/// @return - True if upper is to be preferred, false otherwise.
+static inline bool outranks(InstructionContext upper,
+ InstructionContext lower) {
+ assert(upper < IC_max);
+ assert(lower < IC_max);
+
+#define ENUM_ENTRY(n, r, d) r,
+ static int ranks[IC_max] = {
+ INSTRUCTION_CONTEXTS
+ };
+#undef ENUM_ENTRY
+
+ return (ranks[upper] > ranks[lower]);
+}
+
+/// stringForContext - Returns a string containing the name of a particular
+/// InstructionContext, usually for diagnostic purposes.
+///
+/// @param insnContext - The instruction class to transform to a string.
+/// @return - A statically-allocated string constant that contains the
+/// name of the instruction class.
+static inline const char* stringForContext(InstructionContext insnContext) {
+ switch (insnContext) {
+ default:
+ llvm_unreachable("Unhandled instruction class");
+#define ENUM_ENTRY(n, r, d) case n: return #n; break;
+ INSTRUCTION_CONTEXTS
+#undef ENUM_ENTRY
+ }
+}
+
+/// stringForOperandType - Like stringForContext, but for OperandTypes.
+static inline const char* stringForOperandType(OperandType type) {
+ switch (type) {
+ default:
+ llvm_unreachable("Unhandled type");
+#define ENUM_ENTRY(i, d) case i: return #i;
+ TYPES
+#undef ENUM_ENTRY
+ }
+}
+
+/// stringForOperandEncoding - like stringForContext, but for
+/// OperandEncodings.
+static inline const char* stringForOperandEncoding(OperandEncoding encoding) {
+ switch (encoding) {
+ default:
+ llvm_unreachable("Unhandled encoding");
+#define ENUM_ENTRY(i, d) case i: return #i;
+ ENCODINGS
+#undef ENUM_ENTRY
+ }
+}
+
+void DisassemblerTables::emitOneID(raw_ostream &o,
+ uint32_t &i,
+ InstrUID id,
+ bool addComma) const {
+ if (id)
+ o.indent(i * 2) << format("0x%hx", id);
+ else
+ o.indent(i * 2) << 0;
+
+ if (addComma)
+ o << ", ";
+ else
+ o << " ";
+
+ o << "/* ";
+ o << InstructionSpecifiers[id].name;
+ o << "*/";
+
+ o << "\n";
+}
+
+/// emitEmptyTable - Emits the modRMEmptyTable, which is used as a ID table by
+/// all ModR/M decisions for instructions that are invalid for all possible
+/// ModR/M byte values.
+///
+/// @param o - The output stream on which to emit the table.
+/// @param i - The indentation level for that output stream.
+static void emitEmptyTable(raw_ostream &o, uint32_t &i)
+{
+ o.indent(i * 2) << "InstrUID modRMEmptyTable[1] = { 0 };" << "\n";
+ o << "\n";
+}
+
+/// getDecisionType - Determines whether a ModRM decision with 255 entries can
+/// be compacted by eliminating redundant information.
+///
+/// @param decision - The decision to be compacted.
+/// @return - The compactest available representation for the decision.
+static ModRMDecisionType getDecisionType(ModRMDecision &decision)
+{
+ bool satisfiesOneEntry = true;
+ bool satisfiesSplitRM = true;
+
+ uint16_t index;
+
+ for (index = 0; index < 256; ++index) {
+ if (decision.instructionIDs[index] != decision.instructionIDs[0])
+ satisfiesOneEntry = false;
+
+ if (((index & 0xc0) == 0xc0) &&
+ (decision.instructionIDs[index] != decision.instructionIDs[0xc0]))
+ satisfiesSplitRM = false;
+
+ if (((index & 0xc0) != 0xc0) &&
+ (decision.instructionIDs[index] != decision.instructionIDs[0x00]))
+ satisfiesSplitRM = false;
+ }
+
+ if (satisfiesOneEntry)
+ return MODRM_ONEENTRY;
+
+ if (satisfiesSplitRM)
+ return MODRM_SPLITRM;
+
+ return MODRM_FULL;
+}
+
+/// stringForDecisionType - Returns a statically-allocated string corresponding
+/// to a particular decision type.
+///
+/// @param dt - The decision type.
+/// @return - A pointer to the statically-allocated string (e.g.,
+/// "MODRM_ONEENTRY" for MODRM_ONEENTRY).
+static const char* stringForDecisionType(ModRMDecisionType dt)
+{
+#define ENUM_ENTRY(n) case n: return #n;
+ switch (dt) {
+ default:
+ llvm_unreachable("Unknown decision type");
+ MODRMTYPES
+ };
+#undef ENUM_ENTRY
+}
+
+/// stringForModifierType - Returns a statically-allocated string corresponding
+/// to an opcode modifier type.
+///
+/// @param mt - The modifier type.
+/// @return - A pointer to the statically-allocated string (e.g.,
+/// "MODIFIER_NONE" for MODIFIER_NONE).
+static const char* stringForModifierType(ModifierType mt)
+{
+#define ENUM_ENTRY(n) case n: return #n;
+ switch(mt) {
+ default:
+ llvm_unreachable("Unknown modifier type");
+ MODIFIER_TYPES
+ };
+#undef ENUM_ENTRY
+}
+
+DisassemblerTables::DisassemblerTables() {
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ Tables[i] = new ContextDecision;
+ bzero(Tables[i], sizeof(ContextDecision));
+ }
+
+ HasConflicts = false;
+}
+
+DisassemblerTables::~DisassemblerTables() {
+ unsigned i;
+
+ for (i = 0; i < 4; i++)
+ delete Tables[i];
+}
+
+void DisassemblerTables::emitModRMDecision(raw_ostream &o1,
+ raw_ostream &o2,
+ uint32_t &i1,
+ uint32_t &i2,
+ ModRMDecision &decision)
+ const {
+ static uint64_t sTableNumber = 0;
+ uint64_t thisTableNumber = sTableNumber;
+ ModRMDecisionType dt = getDecisionType(decision);
+ uint16_t index;
+
+ if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0)
+ {
+ o2.indent(i2) << "{ /* ModRMDecision */" << "\n";
+ i2++;
+
+ o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
+ o2.indent(i2) << "modRMEmptyTable";
+
+ i2--;
+ o2.indent(i2) << "}";
+ return;
+ }
+
+ o1.indent(i1) << "InstrUID modRMTable" << thisTableNumber;
+
+ switch (dt) {
+ default:
+ llvm_unreachable("Unknown decision type");
+ case MODRM_ONEENTRY:
+ o1 << "[1]";
+ break;
+ case MODRM_SPLITRM:
+ o1 << "[2]";
+ break;
+ case MODRM_FULL:
+ o1 << "[256]";
+ break;
+ }
+
+ o1 << " = {" << "\n";
+ i1++;
+
+ switch (dt) {
+ default:
+ llvm_unreachable("Unknown decision type");
+ case MODRM_ONEENTRY:
+ emitOneID(o1, i1, decision.instructionIDs[0], false);
+ break;
+ case MODRM_SPLITRM:
+ emitOneID(o1, i1, decision.instructionIDs[0x00], true); // mod = 0b00
+ emitOneID(o1, i1, decision.instructionIDs[0xc0], false); // mod = 0b11
+ break;
+ case MODRM_FULL:
+ for (index = 0; index < 256; ++index)
+ emitOneID(o1, i1, decision.instructionIDs[index], index < 255);
+ break;
+ }
+
+ i1--;
+ o1.indent(i1) << "};" << "\n";
+ o1 << "\n";
+
+ o2.indent(i2) << "{ /* struct ModRMDecision */" << "\n";
+ i2++;
+
+ o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
+ o2.indent(i2) << "modRMTable" << sTableNumber << "\n";
+
+ i2--;
+ o2.indent(i2) << "}";
+
+ ++sTableNumber;
+}
+
+void DisassemblerTables::emitOpcodeDecision(
+ raw_ostream &o1,
+ raw_ostream &o2,
+ uint32_t &i1,
+ uint32_t &i2,
+ OpcodeDecision &decision) const {
+ uint16_t index;
+
+ o2.indent(i2) << "{ /* struct OpcodeDecision */" << "\n";
+ i2++;
+ o2.indent(i2) << "{" << "\n";
+ i2++;
+
+ for (index = 0; index < 256; ++index) {
+ o2.indent(i2);
+
+ o2 << "/* 0x" << format("%02hhx", index) << " */" << "\n";
+
+ emitModRMDecision(o1, o2, i1, i2, decision.modRMDecisions[index]);
+
+ if (index < 255)
+ o2 << ",";
+
+ o2 << "\n";
+ }
+
+ i2--;
+ o2.indent(i2) << "}" << "\n";
+ i2--;
+ o2.indent(i2) << "}" << "\n";
+}
+
+void DisassemblerTables::emitContextDecision(
+ raw_ostream &o1,
+ raw_ostream &o2,
+ uint32_t &i1,
+ uint32_t &i2,
+ ContextDecision &decision,
+ const char* name) const {
+ o2.indent(i2) << "struct ContextDecision " << name << " = {" << "\n";
+ i2++;
+ o2.indent(i2) << "{ /* opcodeDecisions */" << "\n";
+ i2++;
+
+ unsigned index;
+
+ for (index = 0; index < IC_max; ++index) {
+ o2.indent(i2) << "/* ";
+ o2 << stringForContext((InstructionContext)index);
+ o2 << " */";
+ o2 << "\n";
+
+ emitOpcodeDecision(o1, o2, i1, i2, decision.opcodeDecisions[index]);
+
+ if (index + 1 < IC_max)
+ o2 << ", ";
+ }
+
+ i2--;
+ o2.indent(i2) << "}" << "\n";
+ i2--;
+ o2.indent(i2) << "};" << "\n";
+}
+
+void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i)
+ const {
+ o.indent(i * 2) << "struct InstructionSpecifier ";
+ o << INSTRUCTIONS_STR << "[";
+ o << InstructionSpecifiers.size();
+ o << "] = {" << "\n";
+
+ i++;
+
+ uint16_t numInstructions = InstructionSpecifiers.size();
+ uint16_t index, operandIndex;
+
+ for (index = 0; index < numInstructions; ++index) {
+ o.indent(i * 2) << "{ /* " << index << " */" << "\n";
+ i++;
+
+ o.indent(i * 2) <<
+ stringForModifierType(InstructionSpecifiers[index].modifierType);
+ o << "," << "\n";
+
+ o.indent(i * 2) << "0x";
+ o << format("%02hhx", (uint16_t)InstructionSpecifiers[index].modifierBase);
+ o << "," << "\n";
+
+ o.indent(i * 2) << "{" << "\n";
+ i++;
+
+ for (operandIndex = 0; operandIndex < X86_MAX_OPERANDS; ++operandIndex) {
+ o.indent(i * 2) << "{ ";
+ o << stringForOperandEncoding(InstructionSpecifiers[index]
+ .operands[operandIndex]
+ .encoding);
+ o << ", ";
+ o << stringForOperandType(InstructionSpecifiers[index]
+ .operands[operandIndex]
+ .type);
+ o << " }";
+
+ if (operandIndex < X86_MAX_OPERANDS - 1)
+ o << ",";
+
+ o << "\n";
+ }
+
+ i--;
+ o.indent(i * 2) << "}," << "\n";
+
+ o.indent(i * 2) << "\"" << InstructionSpecifiers[index].name << "\"";
+ o << "\n";
+
+ i--;
+ o.indent(i * 2) << "}";
+
+ if (index + 1 < numInstructions)
+ o << ",";
+
+ o << "\n";
+ }
+
+ i--;
+ o.indent(i * 2) << "};" << "\n";
+}
+
+void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
+ uint16_t index;
+
+ o.indent(i * 2) << "InstructionContext ";
+ o << CONTEXTS_STR << "[256] = {" << "\n";
+ i++;
+
+ for (index = 0; index < 256; ++index) {
+ o.indent(i * 2);
+
+ if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
+ o << "IC_64BIT_REXW_XS";
+ else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD))
+ o << "IC_64BIT_REXW_XD";
+ else if ((index & ATTR_64BIT) && (index & ATTR_REXW) &&
+ (index & ATTR_OPSIZE))
+ o << "IC_64BIT_REXW_OPSIZE";
+ else if ((index & ATTR_64BIT) && (index & ATTR_XS))
+ o << "IC_64BIT_XS";
+ else if ((index & ATTR_64BIT) && (index & ATTR_XD))
+ o << "IC_64BIT_XD";
+ else if ((index & ATTR_64BIT) && (index & ATTR_OPSIZE))
+ o << "IC_64BIT_OPSIZE";
+ else if ((index & ATTR_64BIT) && (index & ATTR_REXW))
+ o << "IC_64BIT_REXW";
+ else if ((index & ATTR_64BIT))
+ o << "IC_64BIT";
+ else if (index & ATTR_XS)
+ o << "IC_XS";
+ else if (index & ATTR_XD)
+ o << "IC_XD";
+ else if (index & ATTR_OPSIZE)
+ o << "IC_OPSIZE";
+ else
+ o << "IC";
+
+ if (index < 255)
+ o << ",";
+ else
+ o << " ";
+
+ o << " /* " << index << " */";
+
+ o << "\n";
+ }
+
+ i--;
+ o.indent(i * 2) << "};" << "\n";
+}
+
+void DisassemblerTables::emitContextDecisions(raw_ostream &o1,
+ raw_ostream &o2,
+ uint32_t &i1,
+ uint32_t &i2)
+ const {
+ emitContextDecision(o1, o2, i1, i2, *Tables[0], ONEBYTE_STR);
+ emitContextDecision(o1, o2, i1, i2, *Tables[1], TWOBYTE_STR);
+ emitContextDecision(o1, o2, i1, i2, *Tables[2], THREEBYTE38_STR);
+ emitContextDecision(o1, o2, i1, i2, *Tables[3], THREEBYTE3A_STR);
+}
+
+void DisassemblerTables::emit(raw_ostream &o) const {
+ uint32_t i1 = 0;
+ uint32_t i2 = 0;
+
+ std::string s1;
+ std::string s2;
+
+ raw_string_ostream o1(s1);
+ raw_string_ostream o2(s2);
+
+ emitInstructionInfo(o, i2);
+ o << "\n";
+
+ emitContextTable(o, i2);
+ o << "\n";
+
+ emitEmptyTable(o1, i1);
+ emitContextDecisions(o1, o2, i1, i2);
+
+ o << o1.str();
+ o << "\n";
+ o << o2.str();
+ o << "\n";
+ o << "\n";
+}
+
+void DisassemblerTables::setTableFields(ModRMDecision &decision,
+ const ModRMFilter &filter,
+ InstrUID uid,
+ uint8_t opcode) {
+ unsigned index;
+
+ for (index = 0; index < 256; ++index) {
+ if (filter.accepts(index)) {
+ if (decision.instructionIDs[index] == uid)
+ continue;
+
+ if (decision.instructionIDs[index] != 0) {
+ InstructionSpecifier &newInfo =
+ InstructionSpecifiers[uid];
+ InstructionSpecifier &previousInfo =
+ InstructionSpecifiers[decision.instructionIDs[index]];
+
+ if(newInfo.filtered)
+ continue; // filtered instructions get lowest priority
+
+ if(previousInfo.name == "NOOP")
+ continue; // special case for XCHG32ar and NOOP
+
+ if (outranks(previousInfo.insnContext, newInfo.insnContext))
+ continue;
+
+ if (previousInfo.insnContext == newInfo.insnContext &&
+ !previousInfo.filtered) {
+ errs() << "Error: Primary decode conflict: ";
+ errs() << newInfo.name << " would overwrite " << previousInfo.name;
+ errs() << "\n";
+ errs() << "ModRM " << index << "\n";
+ errs() << "Opcode " << (uint16_t)opcode << "\n";
+ errs() << "Context " << stringForContext(newInfo.insnContext) << "\n";
+ HasConflicts = true;
+ }
+ }
+
+ decision.instructionIDs[index] = uid;
+ }
+ }
+}
+
+void DisassemblerTables::setTableFields(OpcodeType type,
+ InstructionContext insnContext,
+ uint8_t opcode,
+ const ModRMFilter &filter,
+ InstrUID uid) {
+ unsigned index;
+
+ ContextDecision &decision = *Tables[type];
+
+ for (index = 0; index < IC_max; ++index) {
+ if (inheritsFrom((InstructionContext)index,
+ InstructionSpecifiers[uid].insnContext))
+ setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode],
+ filter,
+ uid,
+ opcode);
+ }
+}
diff --git a/utils/TableGen/X86DisassemblerTables.h b/utils/TableGen/X86DisassemblerTables.h
new file mode 100644
index 0000000..08eba01
--- /dev/null
+++ b/utils/TableGen/X86DisassemblerTables.h
@@ -0,0 +1,291 @@
+//===- X86DisassemblerTables.h - Disassembler tables ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler Emitter.
+// It contains the interface of the disassembler tables.
+// Documentation for the disassembler emitter in general can be found in
+// X86DisasemblerEmitter.h.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86DISASSEMBLERTABLES_H
+#define X86DISASSEMBLERTABLES_H
+
+#include "X86DisassemblerShared.h"
+#include "X86ModRMFilters.h"
+
+#include "llvm/Support/raw_ostream.h"
+
+#include <vector>
+
+namespace llvm {
+
+namespace X86Disassembler {
+
+/// DisassemblerTables - Encapsulates all the decode tables being generated by
+/// the table emitter. Contains functions to populate the tables as well as
+/// to emit them as hierarchical C structures suitable for consumption by the
+/// runtime.
+class DisassemblerTables {
+private:
+ /// The decoder tables. There is one for each opcode type:
+ /// [0] one-byte opcodes
+ /// [1] two-byte opcodes of the form 0f __
+ /// [2] three-byte opcodes of the form 0f 38 __
+ /// [3] three-byte opcodes of the form 0f 3a __
+ ContextDecision* Tables[4];
+
+ /// The instruction information table
+ std::vector<InstructionSpecifier> InstructionSpecifiers;
+
+ /// True if there are primary decode conflicts in the instruction set
+ bool HasConflicts;
+
+ /// emitOneID - Emits a table entry for a single instruction entry, at the
+ /// innermost level of the structure hierarchy. The entry is printed out
+ /// in the format "nnnn, /* MNEMONIC */" where nnnn is the ID in decimal,
+ /// the comma is printed if addComma is true, and the menonic is the name
+ /// of the instruction as listed in the LLVM tables.
+ ///
+ /// @param o - The output stream to print the entry on.
+ /// @param i - The indentation level for o.
+ /// @param id - The unique ID of the instruction to print.
+ /// @param addComma - Whether or not to print a comma after the ID. True if
+ /// additional items will follow.
+ void emitOneID(raw_ostream &o,
+ uint32_t &i,
+ InstrUID id,
+ bool addComma) const;
+
+ /// emitModRMDecision - Emits a table of entries corresponding to a single
+ /// ModR/M decision. Compacts the ModR/M decision if possible. ModR/M
+ /// decisions are printed as:
+ ///
+ /// { /* struct ModRMDecision */
+ /// TYPE,
+ /// modRMTablennnn
+ /// }
+ ///
+ /// where nnnn is a unique ID for the corresponding table of IDs.
+ /// TYPE indicates whether the table has one entry that is the same
+ /// regardless of ModR/M byte, two entries - one for bytes 0x00-0xbf and one
+ /// for bytes 0xc0-0xff -, or 256 entries, one for each possible byte.
+ /// nnnn is the number of a table for looking up these values. The tables
+ /// are writen separately so that tables consisting entirely of zeros will
+ /// not be duplicated. (These all have the name modRMEmptyTable.) A table
+ /// is printed as:
+ ///
+ /// InstrUID modRMTablennnn[k] = {
+ /// nnnn, /* MNEMONIC */
+ /// ...
+ /// nnnn /* MNEMONIC */
+ /// };
+ ///
+ /// @param o1 - The output stream to print the ID table to.
+ /// @param o2 - The output stream to print the decision structure to.
+ /// @param i1 - The indentation level to use with stream o1.
+ /// @param i2 - The indentation level to use with stream o2.
+ /// @param decision - The ModR/M decision to emit. This decision has 256
+ /// entries - emitModRMDecision decides how to compact it.
+ void emitModRMDecision(raw_ostream &o1,
+ raw_ostream &o2,
+ uint32_t &i1,
+ uint32_t &i2,
+ ModRMDecision &decision) const;
+
+ /// emitOpcodeDecision - Emits an OpcodeDecision and all its subsidiary ModR/M
+ /// decisions. An OpcodeDecision is printed as:
+ ///
+ /// { /* struct OpcodeDecision */
+ /// /* 0x00 */
+ /// { /* struct ModRMDecision */
+ /// ...
+ /// }
+ /// ...
+ /// }
+ ///
+ /// where the ModRMDecision structure is printed as described in the
+ /// documentation for emitModRMDecision(). emitOpcodeDecision() passes on a
+ /// stream and indent level for the UID tables generated by
+ /// emitModRMDecision(), but does not use them itself.
+ ///
+ /// @param o1 - The output stream to print the ID tables generated by
+ /// emitModRMDecision() to.
+ /// @param o2 - The output stream for the decision structure itself.
+ /// @param i1 - The indent level to use with stream o1.
+ /// @param i2 - The indent level to use with stream o2.
+ /// @param decision - The OpcodeDecision to emit along with its subsidiary
+ /// structures.
+ void emitOpcodeDecision(raw_ostream &o1,
+ raw_ostream &o2,
+ uint32_t &i1,
+ uint32_t &i2,
+ OpcodeDecision &decision) const;
+
+ /// emitContextDecision - Emits a ContextDecision and all its subsidiary
+ /// Opcode and ModRMDecisions. A ContextDecision is printed as:
+ ///
+ /// struct ContextDecision NAME = {
+ /// { /* OpcodeDecisions */
+ /// /* IC */
+ /// { /* struct OpcodeDecision */
+ /// ...
+ /// },
+ /// ...
+ /// }
+ /// }
+ ///
+ /// NAME is the name of the ContextDecision (typically one of the four names
+ /// ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, and THREEBYTE3A_SYM from
+ /// X86DisassemblerDecoderCommon.h).
+ /// IC is one of the contexts in InstructionContext. There is an opcode
+ /// decision for each possible context.
+ /// The OpcodeDecision structures are printed as described in the
+ /// documentation for emitOpcodeDecision.
+ ///
+ /// @param o1 - The output stream to print the ID tables generated by
+ /// emitModRMDecision() to.
+ /// @param o2 - The output stream to print the decision structure to.
+ /// @param i1 - The indent level to use with stream o1.
+ /// @param i2 - The indent level to use with stream o2.
+ /// @param decision - The ContextDecision to emit along with its subsidiary
+ /// structures.
+ /// @param name - The name for the ContextDecision.
+ void emitContextDecision(raw_ostream &o1,
+ raw_ostream &o2,
+ uint32_t &i1,
+ uint32_t &i2,
+ ContextDecision &decision,
+ const char* name) const;
+
+ /// emitInstructionInfo - Prints the instruction specifier table, which has
+ /// one entry for each instruction, and contains name and operand
+ /// information. This table is printed as:
+ ///
+ /// struct InstructionSpecifier CONTEXTS_SYM[k] = {
+ /// {
+ /// /* nnnn */
+ /// "MNEMONIC",
+ /// 0xnn,
+ /// {
+ /// {
+ /// ENCODING,
+ /// TYPE
+ /// },
+ /// ...
+ /// }
+ /// },
+ /// };
+ ///
+ /// k is the total number of instructions.
+ /// nnnn is the ID of the current instruction (0-based). This table
+ /// includes entries for non-instructions like PHINODE.
+ /// 0xnn is the lowest possible opcode for the current instruction, used for
+ /// AddRegFrm instructions to compute the operand's value.
+ /// ENCODING and TYPE describe the encoding and type for a single operand.
+ ///
+ /// @param o - The output stream to which the instruction table should be
+ /// written.
+ /// @param i - The indent level for use with the stream.
+ void emitInstructionInfo(raw_ostream &o, uint32_t &i) const;
+
+ /// emitContextTable - Prints the table that is used to translate from an
+ /// instruction attribute mask to an instruction context. This table is
+ /// printed as:
+ ///
+ /// InstructionContext CONTEXTS_STR[256] = {
+ /// IC, /* 0x00 */
+ /// ...
+ /// };
+ ///
+ /// IC is the context corresponding to the mask 0x00, and there are 256
+ /// possible masks.
+ ///
+ /// @param o - The output stream to which the context table should be written.
+ /// @param i - The indent level for use with the stream.
+ void emitContextTable(raw_ostream &o, uint32_t &i) const;
+
+ /// emitContextDecisions - Prints all four ContextDecision structures using
+ /// emitContextDecision().
+ ///
+ /// @param o1 - The output stream to print the ID tables generated by
+ /// emitModRMDecision() to.
+ /// @param o2 - The output stream to print the decision structures to.
+ /// @param i1 - The indent level to use with stream o1.
+ /// @param i2 - The indent level to use with stream o2.
+ void emitContextDecisions(raw_ostream &o1,
+ raw_ostream &o2,
+ uint32_t &i1,
+ uint32_t &i2) const;
+
+ /// setTableFields - Uses a ModRMFilter to set the appropriate entries in a
+ /// ModRMDecision to refer to a particular instruction ID.
+ ///
+ /// @param decision - The ModRMDecision to populate.
+ /// @param filter - The filter to use in deciding which entries to populate.
+ /// @param uid - The unique ID to set matching entries to.
+ /// @param opcode - The opcode of the instruction, for error reporting.
+ void setTableFields(ModRMDecision &decision,
+ const ModRMFilter &filter,
+ InstrUID uid,
+ uint8_t opcode);
+public:
+ /// Constructor - Allocates space for the class decisions and clears them.
+ DisassemblerTables();
+
+ ~DisassemblerTables();
+
+ /// emit - Emits the instruction table, context table, and class decisions.
+ ///
+ /// @param o - The output stream to print the tables to.
+ void emit(raw_ostream &o) const;
+
+ /// setTableFields - Uses the opcode type, instruction context, opcode, and a
+ /// ModRMFilter as criteria to set a particular set of entries in the
+ /// decode tables to point to a specific uid.
+ ///
+ /// @param type - The opcode type (ONEBYTE, TWOBYTE, etc.)
+ /// @param insnContext - The context to use (IC, IC_64BIT, etc.)
+ /// @param opcode - The last byte of the opcode (not counting any escape
+ /// or extended opcodes).
+ /// @param filter - The ModRMFilter that decides which ModR/M byte values
+ /// correspond to the desired instruction.
+ /// @param uid - The unique ID of the instruction.
+ void setTableFields(OpcodeType type,
+ InstructionContext insnContext,
+ uint8_t opcode,
+ const ModRMFilter &filter,
+ InstrUID uid);
+
+ /// specForUID - Returns the instruction specifier for a given unique
+ /// instruction ID. Used when resolving collisions.
+ ///
+ /// @param uid - The unique ID of the instruction.
+ /// @return - A reference to the instruction specifier.
+ InstructionSpecifier& specForUID(InstrUID uid) {
+ if (uid >= InstructionSpecifiers.size())
+ InstructionSpecifiers.resize(uid + 1);
+
+ return InstructionSpecifiers[uid];
+ }
+
+ // hasConflicts - Reports whether there were primary decode conflicts
+ // from any instructions added to the tables.
+ // @return - true if there were; false otherwise.
+
+ bool hasConflicts() {
+ return HasConflicts;
+ }
+};
+
+} // namespace X86Disassembler
+
+} // namespace llvm
+
+#endif
diff --git a/utils/TableGen/X86ModRMFilters.h b/utils/TableGen/X86ModRMFilters.h
new file mode 100644
index 0000000..4fe4af3
--- /dev/null
+++ b/utils/TableGen/X86ModRMFilters.h
@@ -0,0 +1,197 @@
+//===- X86ModRMFilters.h - Disassembler ModR/M filterss ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler Emitter.
+// It contains ModR/M filters that determine which values of the ModR/M byte
+// are valid for a partiuclar instruction.
+// Documentation for the disassembler emitter in general can be found in
+// X86DisasemblerEmitter.h.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MODRMFILTERS_H
+#define X86MODRMFILTERS_H
+
+#include "llvm/System/DataTypes.h"
+
+namespace llvm {
+
+namespace X86Disassembler {
+
+/// ModRMFilter - Abstract base class for clases that recognize patterns in
+/// ModR/M bytes.
+class ModRMFilter {
+public:
+ /// Destructor - Override as necessary.
+ virtual ~ModRMFilter() { }
+
+ /// isDumb - Indicates whether this filter returns the same value for
+ /// any value of the ModR/M byte.
+ ///
+ /// @result - True if the filter returns the same value for any ModR/M
+ /// byte; false if not.
+ virtual bool isDumb() const { return false; }
+
+ /// accepts - Indicates whether the filter accepts a particular ModR/M
+ /// byte value.
+ ///
+ /// @result - True if the filter accepts the ModR/M byte; false if not.
+ virtual bool accepts(uint8_t modRM) const = 0;
+};
+
+/// DumbFilter - Accepts any ModR/M byte. Used for instructions that do not
+/// require a ModR/M byte or instructions where the entire ModR/M byte is used
+/// for operands.
+class DumbFilter : public ModRMFilter {
+public:
+ bool isDumb() const {
+ return true;
+ }
+
+ bool accepts(uint8_t modRM) const {
+ return true;
+ }
+};
+
+/// ModFilter - Filters based on the mod bits [bits 7-6] of the ModR/M byte.
+/// Some instructions are classified based on whether they are 11 or anything
+/// else. This filter performs that classification.
+class ModFilter : public ModRMFilter {
+private:
+ bool R;
+public:
+ /// Constructor
+ ///
+ /// @r - True if the mod bits of the ModR/M byte must be 11; false
+ /// otherwise. The name r derives from the fact that the mod
+ /// bits indicate whether the R/M bits [bits 2-0] signify a
+ /// register or a memory operand.
+ ModFilter(bool r) :
+ ModRMFilter(),
+ R(r) {
+ }
+
+ bool accepts(uint8_t modRM) const {
+ if (R == ((modRM & 0xc0) == 0xc0))
+ return true;
+ else
+ return false;
+ }
+};
+
+/// EscapeFilter - Filters escape opcodes, which are classified in two ways. If
+/// the ModR/M byte is between 0xc0 and 0xff, then there is one slot for each
+/// possible value. Otherwise, there is one instruction for each value of the
+/// nnn field [bits 5-3], known elsewhere as the reg field.
+class EscapeFilter : public ModRMFilter {
+private:
+ bool C0_FF;
+ uint8_t NNN_or_ModRM;
+public:
+ /// Constructor
+ ///
+ /// @c0_ff - True if the ModR/M byte must fall between 0xc0 and 0xff;
+ /// false otherwise.
+ /// @nnn_or_modRM - If c0_ff is true, the required value of the entire ModR/M
+ /// byte. If c0_ff is false, the required value of the nnn
+ /// field.
+ EscapeFilter(bool c0_ff, uint8_t nnn_or_modRM) :
+ ModRMFilter(),
+ C0_FF(c0_ff),
+ NNN_or_ModRM(nnn_or_modRM) {
+ }
+
+ bool accepts(uint8_t modRM) const {
+ if ((C0_FF && modRM >= 0xc0 && (modRM == NNN_or_ModRM)) ||
+ (!C0_FF && modRM < 0xc0 && ((modRM & 0x38) >> 3) == NNN_or_ModRM))
+ return true;
+ else
+ return false;
+ }
+};
+
+/// AddRegEscapeFilter - Some escape opcodes have one of the register operands
+/// added to the ModR/M byte, meaning that a range of eight ModR/M values
+/// maps to a single instruction. Such instructions require the ModR/M byte
+/// to fall between 0xc0 and 0xff.
+class AddRegEscapeFilter : public ModRMFilter {
+private:
+ uint8_t ModRM;
+public:
+ /// Constructor
+ ///
+ /// @modRM - The value of the ModR/M byte when the register operand
+ /// refers to the first register in the register set.
+ AddRegEscapeFilter(uint8_t modRM) : ModRM(modRM) {
+ }
+
+ bool accepts(uint8_t modRM) const {
+ if (modRM >= ModRM && modRM < ModRM + 8)
+ return true;
+ else
+ return false;
+ }
+};
+
+/// ExtendedFilter - Extended opcodes are classified based on the value of the
+/// mod field [bits 7-6] and the value of the nnn field [bits 5-3].
+class ExtendedFilter : public ModRMFilter {
+private:
+ bool R;
+ uint8_t NNN;
+public:
+ /// Constructor
+ ///
+ /// @r - True if the mod field must be set to 11; false otherwise.
+ /// The name is explained at ModFilter.
+ /// @nnn - The required value of the nnn field.
+ ExtendedFilter(bool r, uint8_t nnn) :
+ ModRMFilter(),
+ R(r),
+ NNN(nnn) {
+ }
+
+ bool accepts(uint8_t modRM) const {
+ if (((R && ((modRM & 0xc0) == 0xc0)) ||
+ (!R && ((modRM & 0xc0) != 0xc0))) &&
+ (((modRM & 0x38) >> 3) == NNN))
+ return true;
+ else
+ return false;
+ }
+};
+
+/// ExactFilter - The occasional extended opcode (such as VMCALL or MONITOR)
+/// requires the ModR/M byte to have a specific value.
+class ExactFilter : public ModRMFilter
+{
+private:
+ uint8_t ModRM;
+public:
+ /// Constructor
+ ///
+ /// @modRM - The required value of the full ModR/M byte.
+ ExactFilter(uint8_t modRM) :
+ ModRMFilter(),
+ ModRM(modRM) {
+ }
+
+ bool accepts(uint8_t modRM) const {
+ if (ModRM == modRM)
+ return true;
+ else
+ return false;
+ }
+};
+
+} // namespace X86Disassembler
+
+} // namespace llvm
+
+#endif \ No newline at end of file
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
new file mode 100644
index 0000000..8a21399
--- /dev/null
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -0,0 +1,959 @@
+//===- X86RecognizableInstr.cpp - Disassembler instruction spec --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler Emitter.
+// It contains the implementation of a single recognizable instruction.
+// Documentation for the disassembler emitter in general can be found in
+// X86DisasemblerEmitter.h.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86DisassemblerShared.h"
+#include "X86RecognizableInstr.h"
+#include "X86ModRMFilters.h"
+
+#include "llvm/Support/ErrorHandling.h"
+
+#include <string>
+
+using namespace llvm;
+
+// A clone of X86 since we can't depend on something that is generated.
+namespace X86Local {
+ enum {
+ Pseudo = 0,
+ RawFrm = 1,
+ AddRegFrm = 2,
+ MRMDestReg = 3,
+ MRMDestMem = 4,
+ MRMSrcReg = 5,
+ MRMSrcMem = 6,
+ MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19,
+ MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23,
+ MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27,
+ MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31,
+ MRMInitReg = 32
+ };
+
+ enum {
+ TB = 1,
+ REP = 2,
+ D8 = 3, D9 = 4, DA = 5, DB = 6,
+ DC = 7, DD = 8, DE = 9, DF = 10,
+ XD = 11, XS = 12,
+ T8 = 13, TA = 14
+ };
+}
+
+#define ONE_BYTE_EXTENSION_TABLES \
+ EXTENSION_TABLE(80) \
+ EXTENSION_TABLE(81) \
+ EXTENSION_TABLE(82) \
+ EXTENSION_TABLE(83) \
+ EXTENSION_TABLE(8f) \
+ EXTENSION_TABLE(c0) \
+ EXTENSION_TABLE(c1) \
+ EXTENSION_TABLE(c6) \
+ EXTENSION_TABLE(c7) \
+ EXTENSION_TABLE(d0) \
+ EXTENSION_TABLE(d1) \
+ EXTENSION_TABLE(d2) \
+ EXTENSION_TABLE(d3) \
+ EXTENSION_TABLE(f6) \
+ EXTENSION_TABLE(f7) \
+ EXTENSION_TABLE(fe) \
+ EXTENSION_TABLE(ff)
+
+#define TWO_BYTE_EXTENSION_TABLES \
+ EXTENSION_TABLE(00) \
+ EXTENSION_TABLE(01) \
+ EXTENSION_TABLE(18) \
+ EXTENSION_TABLE(71) \
+ EXTENSION_TABLE(72) \
+ EXTENSION_TABLE(73) \
+ EXTENSION_TABLE(ae) \
+ EXTENSION_TABLE(b9) \
+ EXTENSION_TABLE(ba) \
+ EXTENSION_TABLE(c7)
+
+#define TWO_BYTE_FULL_EXTENSION_TABLES \
+ EXTENSION_TABLE(01)
+
+
+using namespace X86Disassembler;
+
+/// needsModRMForDecode - Indicates whether a particular instruction requires a
+/// ModR/M byte for the instruction to be properly decoded. For example, a
+/// MRMDestReg instruction needs the Mod field in the ModR/M byte to be set to
+/// 0b11.
+///
+/// @param form - The form of the instruction.
+/// @return - true if the form implies that a ModR/M byte is required, false
+/// otherwise.
+static bool needsModRMForDecode(uint8_t form) {
+ if (form == X86Local::MRMDestReg ||
+ form == X86Local::MRMDestMem ||
+ form == X86Local::MRMSrcReg ||
+ form == X86Local::MRMSrcMem ||
+ (form >= X86Local::MRM0r && form <= X86Local::MRM7r) ||
+ (form >= X86Local::MRM0m && form <= X86Local::MRM7m))
+ return true;
+ else
+ return false;
+}
+
+/// isRegFormat - Indicates whether a particular form requires the Mod field of
+/// the ModR/M byte to be 0b11.
+///
+/// @param form - The form of the instruction.
+/// @return - true if the form implies that Mod must be 0b11, false
+/// otherwise.
+static bool isRegFormat(uint8_t form) {
+ if (form == X86Local::MRMDestReg ||
+ form == X86Local::MRMSrcReg ||
+ (form >= X86Local::MRM0r && form <= X86Local::MRM7r))
+ return true;
+ else
+ return false;
+}
+
+/// byteFromBitsInit - Extracts a value at most 8 bits in width from a BitsInit.
+/// Useful for switch statements and the like.
+///
+/// @param init - A reference to the BitsInit to be decoded.
+/// @return - The field, with the first bit in the BitsInit as the lowest
+/// order bit.
+static uint8_t byteFromBitsInit(BitsInit &init) {
+ int width = init.getNumBits();
+
+ assert(width <= 8 && "Field is too large for uint8_t!");
+
+ int index;
+ uint8_t mask = 0x01;
+
+ uint8_t ret = 0;
+
+ for (index = 0; index < width; index++) {
+ if (static_cast<BitInit*>(init.getBit(index))->getValue())
+ ret |= mask;
+
+ mask <<= 1;
+ }
+
+ return ret;
+}
+
+/// byteFromRec - Extract a value at most 8 bits in with from a Record given the
+/// name of the field.
+///
+/// @param rec - The record from which to extract the value.
+/// @param name - The name of the field in the record.
+/// @return - The field, as translated by byteFromBitsInit().
+static uint8_t byteFromRec(const Record* rec, const std::string &name) {
+ BitsInit* bits = rec->getValueAsBitsInit(name);
+ return byteFromBitsInit(*bits);
+}
+
+RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
+ const CodeGenInstruction &insn,
+ InstrUID uid) {
+ UID = uid;
+
+ Rec = insn.TheDef;
+ Name = Rec->getName();
+ Spec = &tables.specForUID(UID);
+
+ if (!Rec->isSubClassOf("X86Inst")) {
+ ShouldBeEmitted = false;
+ return;
+ }
+
+ Prefix = byteFromRec(Rec, "Prefix");
+ Opcode = byteFromRec(Rec, "Opcode");
+ Form = byteFromRec(Rec, "FormBits");
+ SegOvr = byteFromRec(Rec, "SegOvrBits");
+
+ HasOpSizePrefix = Rec->getValueAsBit("hasOpSizePrefix");
+ HasREX_WPrefix = Rec->getValueAsBit("hasREX_WPrefix");
+ HasLockPrefix = Rec->getValueAsBit("hasLockPrefix");
+ IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly");
+
+ Name = Rec->getName();
+ AsmString = Rec->getValueAsString("AsmString");
+
+ Operands = &insn.OperandList;
+
+ IsSSE = HasOpSizePrefix && (Name.find("16") == Name.npos);
+ HasFROperands = false;
+
+ ShouldBeEmitted = true;
+}
+
+void RecognizableInstr::processInstr(DisassemblerTables &tables,
+ const CodeGenInstruction &insn,
+ InstrUID uid)
+{
+ RecognizableInstr recogInstr(tables, insn, uid);
+
+ recogInstr.emitInstructionSpecifier(tables);
+
+ if (recogInstr.shouldBeEmitted())
+ recogInstr.emitDecodePath(tables);
+}
+
+InstructionContext RecognizableInstr::insnContext() const {
+ InstructionContext insnContext;
+
+ if (Name.find("64") != Name.npos || HasREX_WPrefix) {
+ if (HasREX_WPrefix && HasOpSizePrefix)
+ insnContext = IC_64BIT_REXW_OPSIZE;
+ else if (HasOpSizePrefix)
+ insnContext = IC_64BIT_OPSIZE;
+ else if (HasREX_WPrefix && Prefix == X86Local::XS)
+ insnContext = IC_64BIT_REXW_XS;
+ else if (HasREX_WPrefix && Prefix == X86Local::XD)
+ insnContext = IC_64BIT_REXW_XD;
+ else if (Prefix == X86Local::XD)
+ insnContext = IC_64BIT_XD;
+ else if (Prefix == X86Local::XS)
+ insnContext = IC_64BIT_XS;
+ else if (HasREX_WPrefix)
+ insnContext = IC_64BIT_REXW;
+ else
+ insnContext = IC_64BIT;
+ } else {
+ if (HasOpSizePrefix)
+ insnContext = IC_OPSIZE;
+ else if (Prefix == X86Local::XD)
+ insnContext = IC_XD;
+ else if (Prefix == X86Local::XS)
+ insnContext = IC_XS;
+ else
+ insnContext = IC;
+ }
+
+ return insnContext;
+}
+
+RecognizableInstr::filter_ret RecognizableInstr::filter() const {
+ // Filter out intrinsics
+
+ if (!Rec->isSubClassOf("X86Inst"))
+ return FILTER_STRONG;
+
+ if (Form == X86Local::Pseudo ||
+ IsCodeGenOnly)
+ return FILTER_STRONG;
+
+ // Filter out instructions with a LOCK prefix;
+ // prefer forms that do not have the prefix
+ if (HasLockPrefix)
+ return FILTER_WEAK;
+
+ // Filter out artificial instructions
+
+ if (Name.find("TAILJMP") != Name.npos ||
+ Name.find("_Int") != Name.npos ||
+ Name.find("_int") != Name.npos ||
+ Name.find("Int_") != Name.npos ||
+ Name.find("_NOREX") != Name.npos ||
+ Name.find("EH_RETURN") != Name.npos ||
+ Name.find("V_SET") != Name.npos ||
+ Name.find("LOCK_") != Name.npos ||
+ Name.find("WIN") != Name.npos)
+ return FILTER_STRONG;
+
+ // Special cases.
+
+ if (Name.find("PCMPISTRI") != Name.npos && Name != "PCMPISTRI")
+ return FILTER_WEAK;
+ if (Name.find("PCMPESTRI") != Name.npos && Name != "PCMPESTRI")
+ return FILTER_WEAK;
+
+ if (Name.find("MOV") != Name.npos && Name.find("r0") != Name.npos)
+ return FILTER_WEAK;
+ if (Name.find("MOVZ") != Name.npos && Name.find("MOVZX") == Name.npos)
+ return FILTER_WEAK;
+ if (Name.find("Fs") != Name.npos)
+ return FILTER_WEAK;
+ if (Name == "MOVLPDrr" ||
+ Name == "MOVLPSrr" ||
+ Name == "PUSHFQ" ||
+ Name == "BSF16rr" ||
+ Name == "BSF16rm" ||
+ Name == "BSR16rr" ||
+ Name == "BSR16rm" ||
+ Name == "MOVSX16rm8" ||
+ Name == "MOVSX16rr8" ||
+ Name == "MOVZX16rm8" ||
+ Name == "MOVZX16rr8" ||
+ Name == "PUSH32i16" ||
+ Name == "PUSH64i16" ||
+ Name == "MOVPQI2QImr" ||
+ Name == "MOVSDmr" ||
+ Name == "MOVSDrm" ||
+ Name == "MOVSSmr" ||
+ Name == "MOVSSrm" ||
+ Name == "MMX_MOVD64rrv164" ||
+ Name == "CRC32m16" ||
+ Name == "MOV64ri64i32" ||
+ Name == "CRC32r16")
+ return FILTER_WEAK;
+
+ // Filter out instructions with segment override prefixes.
+ // They're too messy to handle now and we'll special case them if needed.
+
+ if (SegOvr)
+ return FILTER_STRONG;
+
+ // Filter out instructions that can't be printed.
+
+ if (AsmString.size() == 0)
+ return FILTER_STRONG;
+
+ // Filter out instructions with subreg operands.
+
+ if (AsmString.find("subreg") != AsmString.npos)
+ return FILTER_STRONG;
+
+ assert(Form != X86Local::MRMInitReg &&
+ "FORMAT_MRMINITREG instruction not skipped");
+
+ if (HasFROperands && Name.find("MOV") != Name.npos &&
+ ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) ||
+ (Name.find("to") != Name.npos)))
+ return FILTER_WEAK;
+
+ return FILTER_NORMAL;
+}
+
+void RecognizableInstr::handleOperand(
+ bool optional,
+ unsigned &operandIndex,
+ unsigned &physicalOperandIndex,
+ unsigned &numPhysicalOperands,
+ unsigned *operandMapping,
+ OperandEncoding (*encodingFromString)(const std::string&, bool hasOpSizePrefix)) {
+ if (optional) {
+ if (physicalOperandIndex >= numPhysicalOperands)
+ return;
+ } else {
+ assert(physicalOperandIndex < numPhysicalOperands);
+ }
+
+ while (operandMapping[operandIndex] != operandIndex) {
+ Spec->operands[operandIndex].encoding = ENCODING_DUP;
+ Spec->operands[operandIndex].type =
+ (OperandType)(TYPE_DUP0 + operandMapping[operandIndex]);
+ ++operandIndex;
+ }
+
+ const std::string &typeName = (*Operands)[operandIndex].Rec->getName();
+
+ Spec->operands[operandIndex].encoding = encodingFromString(typeName,
+ HasOpSizePrefix);
+ Spec->operands[operandIndex].type = typeFromString(typeName,
+ IsSSE,
+ HasREX_WPrefix,
+ HasOpSizePrefix);
+
+ ++operandIndex;
+ ++physicalOperandIndex;
+}
+
+void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
+ Spec->name = Name;
+
+ if (!Rec->isSubClassOf("X86Inst"))
+ return;
+
+ switch (filter()) {
+ case FILTER_WEAK:
+ Spec->filtered = true;
+ break;
+ case FILTER_STRONG:
+ ShouldBeEmitted = false;
+ return;
+ case FILTER_NORMAL:
+ break;
+ }
+
+ Spec->insnContext = insnContext();
+
+ const std::vector<CodeGenInstruction::OperandInfo> &OperandList = *Operands;
+
+ unsigned operandIndex;
+ unsigned numOperands = OperandList.size();
+ unsigned numPhysicalOperands = 0;
+
+ // operandMapping maps from operands in OperandList to their originals.
+ // If operandMapping[i] != i, then the entry is a duplicate.
+ unsigned operandMapping[X86_MAX_OPERANDS];
+
+ bool hasFROperands = false;
+
+ assert(numOperands < X86_MAX_OPERANDS && "X86_MAX_OPERANDS is not large enough");
+
+ for (operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
+ if (OperandList[operandIndex].Constraints.size()) {
+ const std::string &constraint = OperandList[operandIndex].Constraints[0];
+ std::string::size_type tiedToPos;
+
+ if ((tiedToPos = constraint.find(" << 16) | (1 << TOI::TIED_TO))")) !=
+ constraint.npos) {
+ tiedToPos--;
+ operandMapping[operandIndex] = constraint[tiedToPos] - '0';
+ } else {
+ ++numPhysicalOperands;
+ operandMapping[operandIndex] = operandIndex;
+ }
+ } else {
+ ++numPhysicalOperands;
+ operandMapping[operandIndex] = operandIndex;
+ }
+
+ const std::string &recName = OperandList[operandIndex].Rec->getName();
+
+ if (recName.find("FR") != recName.npos)
+ hasFROperands = true;
+ }
+
+ if (hasFROperands && Name.find("MOV") != Name.npos &&
+ ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) ||
+ (Name.find("to") != Name.npos)))
+ ShouldBeEmitted = false;
+
+ if (!ShouldBeEmitted)
+ return;
+
+#define HANDLE_OPERAND(class) \
+ handleOperand(false, \
+ operandIndex, \
+ physicalOperandIndex, \
+ numPhysicalOperands, \
+ operandMapping, \
+ class##EncodingFromString);
+
+#define HANDLE_OPTIONAL(class) \
+ handleOperand(true, \
+ operandIndex, \
+ physicalOperandIndex, \
+ numPhysicalOperands, \
+ operandMapping, \
+ class##EncodingFromString);
+
+ // operandIndex should always be < numOperands
+ operandIndex = 0;
+ // physicalOperandIndex should always be < numPhysicalOperands
+ unsigned physicalOperandIndex = 0;
+
+ switch (Form) {
+ case X86Local::RawFrm:
+ // Operand 1 (optional) is an address or immediate.
+ // Operand 2 (optional) is an immediate.
+ assert(numPhysicalOperands <= 2 &&
+ "Unexpected number of operands for RawFrm");
+ HANDLE_OPTIONAL(relocation)
+ HANDLE_OPTIONAL(immediate)
+ break;
+ case X86Local::AddRegFrm:
+ // Operand 1 is added to the opcode.
+ // Operand 2 (optional) is an address.
+ assert(numPhysicalOperands >= 1 && numPhysicalOperands <= 2 &&
+ "Unexpected number of operands for AddRegFrm");
+ HANDLE_OPERAND(opcodeModifier)
+ HANDLE_OPTIONAL(relocation)
+ break;
+ case X86Local::MRMDestReg:
+ // Operand 1 is a register operand in the R/M field.
+ // Operand 2 is a register operand in the Reg/Opcode field.
+ // Operand 3 (optional) is an immediate.
+ assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
+ "Unexpected number of operands for MRMDestRegFrm");
+ HANDLE_OPERAND(rmRegister)
+ HANDLE_OPERAND(roRegister)
+ HANDLE_OPTIONAL(immediate)
+ break;
+ case X86Local::MRMDestMem:
+ // Operand 1 is a memory operand (possibly SIB-extended)
+ // Operand 2 is a register operand in the Reg/Opcode field.
+ // Operand 3 (optional) is an immediate.
+ assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
+ "Unexpected number of operands for MRMDestMemFrm");
+ HANDLE_OPERAND(memory)
+ HANDLE_OPERAND(roRegister)
+ HANDLE_OPTIONAL(immediate)
+ break;
+ case X86Local::MRMSrcReg:
+ // Operand 1 is a register operand in the Reg/Opcode field.
+ // Operand 2 is a register operand in the R/M field.
+ // Operand 3 (optional) is an immediate.
+ assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
+ "Unexpected number of operands for MRMSrcRegFrm");
+ HANDLE_OPERAND(roRegister)
+ HANDLE_OPERAND(rmRegister)
+ HANDLE_OPTIONAL(immediate)
+ break;
+ case X86Local::MRMSrcMem:
+ // Operand 1 is a register operand in the Reg/Opcode field.
+ // Operand 2 is a memory operand (possibly SIB-extended)
+ // Operand 3 (optional) is an immediate.
+ assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
+ "Unexpected number of operands for MRMSrcMemFrm");
+ HANDLE_OPERAND(roRegister)
+ HANDLE_OPERAND(memory)
+ HANDLE_OPTIONAL(immediate)
+ break;
+ case X86Local::MRM0r:
+ case X86Local::MRM1r:
+ case X86Local::MRM2r:
+ case X86Local::MRM3r:
+ case X86Local::MRM4r:
+ case X86Local::MRM5r:
+ case X86Local::MRM6r:
+ case X86Local::MRM7r:
+ // Operand 1 is a register operand in the R/M field.
+ // Operand 2 (optional) is an immediate or relocation.
+ assert(numPhysicalOperands <= 2 &&
+ "Unexpected number of operands for MRMnRFrm");
+ HANDLE_OPTIONAL(rmRegister)
+ HANDLE_OPTIONAL(relocation)
+ break;
+ case X86Local::MRM0m:
+ case X86Local::MRM1m:
+ case X86Local::MRM2m:
+ case X86Local::MRM3m:
+ case X86Local::MRM4m:
+ case X86Local::MRM5m:
+ case X86Local::MRM6m:
+ case X86Local::MRM7m:
+ // Operand 1 is a memory operand (possibly SIB-extended)
+ // Operand 2 (optional) is an immediate or relocation.
+ assert(numPhysicalOperands >= 1 && numPhysicalOperands <= 2 &&
+ "Unexpected number of operands for MRMnMFrm");
+ HANDLE_OPERAND(memory)
+ HANDLE_OPTIONAL(relocation)
+ break;
+ case X86Local::MRMInitReg:
+ // Ignored.
+ break;
+ }
+
+ #undef HANDLE_OPERAND
+ #undef HANDLE_OPTIONAL
+}
+
+void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
+ // Special cases where the LLVM tables are not complete
+
+#define EXACTCASE(class, name, lastbyte) \
+ if (Name == name) { \
+ tables.setTableFields(class, \
+ insnContext(), \
+ Opcode, \
+ ExactFilter(lastbyte), \
+ UID); \
+ Spec->modifierBase = Opcode; \
+ return; \
+ }
+
+ EXACTCASE(TWOBYTE, "MONITOR", 0xc8)
+ EXACTCASE(TWOBYTE, "MWAIT", 0xc9)
+ EXACTCASE(TWOBYTE, "SWPGS", 0xf8)
+ EXACTCASE(TWOBYTE, "INVEPT", 0x80)
+ EXACTCASE(TWOBYTE, "INVVPID", 0x81)
+ EXACTCASE(TWOBYTE, "VMCALL", 0xc1)
+ EXACTCASE(TWOBYTE, "VMLAUNCH", 0xc2)
+ EXACTCASE(TWOBYTE, "VMRESUME", 0xc3)
+ EXACTCASE(TWOBYTE, "VMXOFF", 0xc4)
+
+ if (Name == "INVLPG") {
+ tables.setTableFields(TWOBYTE,
+ insnContext(),
+ Opcode,
+ ExtendedFilter(false, 7),
+ UID);
+ Spec->modifierBase = Opcode;
+ return;
+ }
+
+ OpcodeType opcodeType = (OpcodeType)-1;
+
+ ModRMFilter* filter = NULL;
+ uint8_t opcodeToSet = 0;
+
+ switch (Prefix) {
+ // Extended two-byte opcodes can start with f2 0f, f3 0f, or 0f
+ case X86Local::XD:
+ case X86Local::XS:
+ case X86Local::TB:
+ opcodeType = TWOBYTE;
+
+ switch (Opcode) {
+#define EXTENSION_TABLE(n) case 0x##n:
+ TWO_BYTE_EXTENSION_TABLES
+#undef EXTENSION_TABLE
+ switch (Form) {
+ default:
+ llvm_unreachable("Unhandled two-byte extended opcode");
+ case X86Local::MRM0r:
+ case X86Local::MRM1r:
+ case X86Local::MRM2r:
+ case X86Local::MRM3r:
+ case X86Local::MRM4r:
+ case X86Local::MRM5r:
+ case X86Local::MRM6r:
+ case X86Local::MRM7r:
+ filter = new ExtendedFilter(true, Form - X86Local::MRM0r);
+ break;
+ case X86Local::MRM0m:
+ case X86Local::MRM1m:
+ case X86Local::MRM2m:
+ case X86Local::MRM3m:
+ case X86Local::MRM4m:
+ case X86Local::MRM5m:
+ case X86Local::MRM6m:
+ case X86Local::MRM7m:
+ filter = new ExtendedFilter(false, Form - X86Local::MRM0m);
+ break;
+ } // switch (Form)
+ break;
+ default:
+ if (needsModRMForDecode(Form))
+ filter = new ModFilter(isRegFormat(Form));
+ else
+ filter = new DumbFilter();
+
+ break;
+ } // switch (opcode)
+ opcodeToSet = Opcode;
+ break;
+ case X86Local::T8:
+ opcodeType = THREEBYTE_38;
+ if (needsModRMForDecode(Form))
+ filter = new ModFilter(isRegFormat(Form));
+ else
+ filter = new DumbFilter();
+ opcodeToSet = Opcode;
+ break;
+ case X86Local::TA:
+ opcodeType = THREEBYTE_3A;
+ if (needsModRMForDecode(Form))
+ filter = new ModFilter(isRegFormat(Form));
+ else
+ filter = new DumbFilter();
+ opcodeToSet = Opcode;
+ break;
+ case X86Local::D8:
+ case X86Local::D9:
+ case X86Local::DA:
+ case X86Local::DB:
+ case X86Local::DC:
+ case X86Local::DD:
+ case X86Local::DE:
+ case X86Local::DF:
+ assert(Opcode >= 0xc0 && "Unexpected opcode for an escape opcode");
+ opcodeType = ONEBYTE;
+ if (Form == X86Local::AddRegFrm) {
+ Spec->modifierType = MODIFIER_MODRM;
+ Spec->modifierBase = Opcode;
+ filter = new AddRegEscapeFilter(Opcode);
+ } else {
+ filter = new EscapeFilter(true, Opcode);
+ }
+ opcodeToSet = 0xd8 + (Prefix - X86Local::D8);
+ break;
+ default:
+ opcodeType = ONEBYTE;
+ switch (Opcode) {
+#define EXTENSION_TABLE(n) case 0x##n:
+ ONE_BYTE_EXTENSION_TABLES
+#undef EXTENSION_TABLE
+ switch (Form) {
+ default:
+ llvm_unreachable("Fell through the cracks of a single-byte "
+ "extended opcode");
+ case X86Local::MRM0r:
+ case X86Local::MRM1r:
+ case X86Local::MRM2r:
+ case X86Local::MRM3r:
+ case X86Local::MRM4r:
+ case X86Local::MRM5r:
+ case X86Local::MRM6r:
+ case X86Local::MRM7r:
+ filter = new ExtendedFilter(true, Form - X86Local::MRM0r);
+ break;
+ case X86Local::MRM0m:
+ case X86Local::MRM1m:
+ case X86Local::MRM2m:
+ case X86Local::MRM3m:
+ case X86Local::MRM4m:
+ case X86Local::MRM5m:
+ case X86Local::MRM6m:
+ case X86Local::MRM7m:
+ filter = new ExtendedFilter(false, Form - X86Local::MRM0m);
+ break;
+ } // switch (Form)
+ break;
+ case 0xd8:
+ case 0xd9:
+ case 0xda:
+ case 0xdb:
+ case 0xdc:
+ case 0xdd:
+ case 0xde:
+ case 0xdf:
+ filter = new EscapeFilter(false, Form - X86Local::MRM0m);
+ break;
+ default:
+ if (needsModRMForDecode(Form))
+ filter = new ModFilter(isRegFormat(Form));
+ else
+ filter = new DumbFilter();
+ break;
+ } // switch (Opcode)
+ opcodeToSet = Opcode;
+ } // switch (Prefix)
+
+ assert(opcodeType != (OpcodeType)-1 &&
+ "Opcode type not set");
+ assert(filter && "Filter not set");
+
+ if (Form == X86Local::AddRegFrm) {
+ if(Spec->modifierType != MODIFIER_MODRM) {
+ assert(opcodeToSet < 0xf9 &&
+ "Not enough room for all ADDREG_FRM operands");
+
+ uint8_t currentOpcode;
+
+ for (currentOpcode = opcodeToSet;
+ currentOpcode < opcodeToSet + 8;
+ ++currentOpcode)
+ tables.setTableFields(opcodeType,
+ insnContext(),
+ currentOpcode,
+ *filter,
+ UID);
+
+ Spec->modifierType = MODIFIER_OPCODE;
+ Spec->modifierBase = opcodeToSet;
+ } else {
+ // modifierBase was set where MODIFIER_MODRM was set
+ tables.setTableFields(opcodeType,
+ insnContext(),
+ opcodeToSet,
+ *filter,
+ UID);
+ }
+ } else {
+ tables.setTableFields(opcodeType,
+ insnContext(),
+ opcodeToSet,
+ *filter,
+ UID);
+
+ Spec->modifierType = MODIFIER_NONE;
+ Spec->modifierBase = opcodeToSet;
+ }
+
+ delete filter;
+}
+
+#define TYPE(str, type) if (s == str) return type;
+OperandType RecognizableInstr::typeFromString(const std::string &s,
+ bool isSSE,
+ bool hasREX_WPrefix,
+ bool hasOpSizePrefix) {
+ if (isSSE) {
+ // For SSE instructions, we ignore the OpSize prefix and force operand
+ // sizes.
+ TYPE("GR16", TYPE_R16)
+ TYPE("GR32", TYPE_R32)
+ TYPE("GR64", TYPE_R64)
+ }
+ if(hasREX_WPrefix) {
+ // For instructions with a REX_W prefix, a declared 32-bit register encoding
+ // is special.
+ TYPE("GR32", TYPE_R32)
+ }
+ if(!hasOpSizePrefix) {
+ // For instructions without an OpSize prefix, a declared 16-bit register or
+ // immediate encoding is special.
+ TYPE("GR16", TYPE_R16)
+ TYPE("i16imm", TYPE_IMM16)
+ }
+ TYPE("i16mem", TYPE_Mv)
+ TYPE("i16imm", TYPE_IMMv)
+ TYPE("i16i8imm", TYPE_IMMv)
+ TYPE("GR16", TYPE_Rv)
+ TYPE("i32mem", TYPE_Mv)
+ TYPE("i32imm", TYPE_IMMv)
+ TYPE("i32i8imm", TYPE_IMM32)
+ TYPE("GR32", TYPE_Rv)
+ TYPE("i64mem", TYPE_Mv)
+ TYPE("i64i32imm", TYPE_IMM64)
+ TYPE("i64i8imm", TYPE_IMM64)
+ TYPE("GR64", TYPE_R64)
+ TYPE("i8mem", TYPE_M8)
+ TYPE("i8imm", TYPE_IMM8)
+ TYPE("GR8", TYPE_R8)
+ TYPE("VR128", TYPE_XMM128)
+ TYPE("f128mem", TYPE_M128)
+ TYPE("FR64", TYPE_XMM64)
+ TYPE("f64mem", TYPE_M64FP)
+ TYPE("FR32", TYPE_XMM32)
+ TYPE("f32mem", TYPE_M32FP)
+ TYPE("RST", TYPE_ST)
+ TYPE("i128mem", TYPE_M128)
+ TYPE("i64i32imm_pcrel", TYPE_REL64)
+ TYPE("i32imm_pcrel", TYPE_REL32)
+ TYPE("SSECC", TYPE_IMM8)
+ TYPE("brtarget", TYPE_RELv)
+ TYPE("brtarget8", TYPE_REL8)
+ TYPE("f80mem", TYPE_M80FP)
+ TYPE("lea32mem", TYPE_M32)
+ TYPE("lea64_32mem", TYPE_M64)
+ TYPE("lea64mem", TYPE_M64)
+ TYPE("VR64", TYPE_MM64)
+ TYPE("i64imm", TYPE_IMMv)
+ TYPE("opaque32mem", TYPE_M1616)
+ TYPE("opaque48mem", TYPE_M1632)
+ TYPE("opaque80mem", TYPE_M1664)
+ TYPE("opaque512mem", TYPE_M512)
+ TYPE("SEGMENT_REG", TYPE_SEGMENTREG)
+ TYPE("DEBUG_REG", TYPE_DEBUGREG)
+ TYPE("CONTROL_REG_32", TYPE_CR32)
+ TYPE("CONTROL_REG_64", TYPE_CR64)
+ TYPE("offset8", TYPE_MOFFS8)
+ TYPE("offset16", TYPE_MOFFS16)
+ TYPE("offset32", TYPE_MOFFS32)
+ TYPE("offset64", TYPE_MOFFS64)
+ errs() << "Unhandled type string " << s << "\n";
+ llvm_unreachable("Unhandled type string");
+}
+#undef TYPE
+
+#define ENCODING(str, encoding) if (s == str) return encoding;
+OperandEncoding RecognizableInstr::immediateEncodingFromString
+ (const std::string &s,
+ bool hasOpSizePrefix) {
+ if(!hasOpSizePrefix) {
+ // For instructions without an OpSize prefix, a declared 16-bit register or
+ // immediate encoding is special.
+ ENCODING("i16imm", ENCODING_IW)
+ }
+ ENCODING("i32i8imm", ENCODING_IB)
+ ENCODING("SSECC", ENCODING_IB)
+ ENCODING("i16imm", ENCODING_Iv)
+ ENCODING("i16i8imm", ENCODING_IB)
+ ENCODING("i32imm", ENCODING_Iv)
+ ENCODING("i64i32imm", ENCODING_ID)
+ ENCODING("i64i8imm", ENCODING_IB)
+ ENCODING("i8imm", ENCODING_IB)
+ errs() << "Unhandled immediate encoding " << s << "\n";
+ llvm_unreachable("Unhandled immediate encoding");
+}
+
+OperandEncoding RecognizableInstr::rmRegisterEncodingFromString
+ (const std::string &s,
+ bool hasOpSizePrefix) {
+ ENCODING("GR16", ENCODING_RM)
+ ENCODING("GR32", ENCODING_RM)
+ ENCODING("GR64", ENCODING_RM)
+ ENCODING("GR8", ENCODING_RM)
+ ENCODING("VR128", ENCODING_RM)
+ ENCODING("FR64", ENCODING_RM)
+ ENCODING("FR32", ENCODING_RM)
+ ENCODING("VR64", ENCODING_RM)
+ errs() << "Unhandled R/M register encoding " << s << "\n";
+ llvm_unreachable("Unhandled R/M register encoding");
+}
+
+OperandEncoding RecognizableInstr::roRegisterEncodingFromString
+ (const std::string &s,
+ bool hasOpSizePrefix) {
+ ENCODING("GR16", ENCODING_REG)
+ ENCODING("GR32", ENCODING_REG)
+ ENCODING("GR64", ENCODING_REG)
+ ENCODING("GR8", ENCODING_REG)
+ ENCODING("VR128", ENCODING_REG)
+ ENCODING("FR64", ENCODING_REG)
+ ENCODING("FR32", ENCODING_REG)
+ ENCODING("VR64", ENCODING_REG)
+ ENCODING("SEGMENT_REG", ENCODING_REG)
+ ENCODING("DEBUG_REG", ENCODING_REG)
+ ENCODING("CONTROL_REG_32", ENCODING_REG)
+ ENCODING("CONTROL_REG_64", ENCODING_REG)
+ errs() << "Unhandled reg/opcode register encoding " << s << "\n";
+ llvm_unreachable("Unhandled reg/opcode register encoding");
+}
+
+OperandEncoding RecognizableInstr::memoryEncodingFromString
+ (const std::string &s,
+ bool hasOpSizePrefix) {
+ ENCODING("i16mem", ENCODING_RM)
+ ENCODING("i32mem", ENCODING_RM)
+ ENCODING("i64mem", ENCODING_RM)
+ ENCODING("i8mem", ENCODING_RM)
+ ENCODING("f128mem", ENCODING_RM)
+ ENCODING("f64mem", ENCODING_RM)
+ ENCODING("f32mem", ENCODING_RM)
+ ENCODING("i128mem", ENCODING_RM)
+ ENCODING("f80mem", ENCODING_RM)
+ ENCODING("lea32mem", ENCODING_RM)
+ ENCODING("lea64_32mem", ENCODING_RM)
+ ENCODING("lea64mem", ENCODING_RM)
+ ENCODING("opaque32mem", ENCODING_RM)
+ ENCODING("opaque48mem", ENCODING_RM)
+ ENCODING("opaque80mem", ENCODING_RM)
+ ENCODING("opaque512mem", ENCODING_RM)
+ errs() << "Unhandled memory encoding " << s << "\n";
+ llvm_unreachable("Unhandled memory encoding");
+}
+
+OperandEncoding RecognizableInstr::relocationEncodingFromString
+ (const std::string &s,
+ bool hasOpSizePrefix) {
+ if(!hasOpSizePrefix) {
+ // For instructions without an OpSize prefix, a declared 16-bit register or
+ // immediate encoding is special.
+ ENCODING("i16imm", ENCODING_IW)
+ }
+ ENCODING("i16imm", ENCODING_Iv)
+ ENCODING("i16i8imm", ENCODING_IB)
+ ENCODING("i32imm", ENCODING_Iv)
+ ENCODING("i32i8imm", ENCODING_IB)
+ ENCODING("i64i32imm", ENCODING_ID)
+ ENCODING("i64i8imm", ENCODING_IB)
+ ENCODING("i8imm", ENCODING_IB)
+ ENCODING("i64i32imm_pcrel", ENCODING_ID)
+ ENCODING("i32imm_pcrel", ENCODING_ID)
+ ENCODING("brtarget", ENCODING_Iv)
+ ENCODING("brtarget8", ENCODING_IB)
+ ENCODING("i64imm", ENCODING_IO)
+ ENCODING("offset8", ENCODING_Ia)
+ ENCODING("offset16", ENCODING_Ia)
+ ENCODING("offset32", ENCODING_Ia)
+ ENCODING("offset64", ENCODING_Ia)
+ errs() << "Unhandled relocation encoding " << s << "\n";
+ llvm_unreachable("Unhandled relocation encoding");
+}
+
+OperandEncoding RecognizableInstr::opcodeModifierEncodingFromString
+ (const std::string &s,
+ bool hasOpSizePrefix) {
+ ENCODING("RST", ENCODING_I)
+ ENCODING("GR32", ENCODING_Rv)
+ ENCODING("GR64", ENCODING_RO)
+ ENCODING("GR16", ENCODING_Rv)
+ ENCODING("GR8", ENCODING_RB)
+ errs() << "Unhandled opcode modifier encoding " << s << "\n";
+ llvm_unreachable("Unhandled opcode modifier encoding");
+}
+#undef ENCODING \ No newline at end of file
diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h
new file mode 100644
index 0000000..84374b0
--- /dev/null
+++ b/utils/TableGen/X86RecognizableInstr.h
@@ -0,0 +1,237 @@
+//===- X86RecognizableInstr.h - Disassembler instruction spec ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler Emitter.
+// It contains the interface of a single recognizable instruction.
+// Documentation for the disassembler emitter in general can be found in
+// X86DisasemblerEmitter.h.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86RECOGNIZABLEINSTR_H
+#define X86RECOGNIZABLEINSTR_H
+
+#include "X86DisassemblerTables.h"
+
+#include "CodeGenTarget.h"
+#include "Record.h"
+
+#include "llvm/System/DataTypes.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+namespace X86Disassembler {
+
+/// RecognizableInstr - Encapsulates all information required to decode a single
+/// instruction, as extracted from the LLVM instruction tables. Has methods
+/// to interpret the information available in the LLVM tables, and to emit the
+/// instruction into DisassemblerTables.
+class RecognizableInstr {
+private:
+ /// The opcode of the instruction, as used in an MCInst
+ InstrUID UID;
+ /// The record from the .td files corresponding to this instruction
+ const Record* Rec;
+ /// The prefix field from the record
+ uint8_t Prefix;
+ /// The opcode field from the record; this is the opcode used in the Intel
+ /// encoding and therefore distinct from the UID
+ uint8_t Opcode;
+ /// The form field from the record
+ uint8_t Form;
+ /// The segment override field from the record
+ uint8_t SegOvr;
+ /// The hasOpSizePrefix field from the record
+ bool HasOpSizePrefix;
+ /// The hasREX_WPrefix field from the record
+ bool HasREX_WPrefix;
+ /// The hasLockPrefix field from the record
+ bool HasLockPrefix;
+ /// The isCodeGenOnly filed from the record
+ bool IsCodeGenOnly;
+
+ /// The instruction name as listed in the tables
+ std::string Name;
+ /// The AT&T AsmString for the instruction
+ std::string AsmString;
+
+ /// Indicates whether the instruction is SSE
+ bool IsSSE;
+ /// Indicates whether the instruction has FR operands - MOVs with FR operands
+ /// are typically ignored
+ bool HasFROperands;
+ /// Indicates whether the instruction should be emitted into the decode
+ /// tables; regardless, it will be emitted into the instruction info table
+ bool ShouldBeEmitted;
+
+ /// The operands of the instruction, as listed in the CodeGenInstruction.
+ /// They are not one-to-one with operands listed in the MCInst; for example,
+ /// memory operands expand to 5 operands in the MCInst
+ const std::vector<CodeGenInstruction::OperandInfo>* Operands;
+ /// The description of the instruction that is emitted into the instruction
+ /// info table
+ InstructionSpecifier* Spec;
+
+ /// insnContext - Returns the primary context in which the instruction is
+ /// valid.
+ ///
+ /// @return - The context in which the instruction is valid.
+ InstructionContext insnContext() const;
+
+ enum filter_ret {
+ FILTER_STRONG, // instruction has no place in the instruction tables
+ FILTER_WEAK, // instruction may conflict, and should be eliminated if
+ // it does
+ FILTER_NORMAL // instruction should have high priority and generate an
+ // error if it conflcits with any other FILTER_NORMAL
+ // instruction
+ };
+
+ /// filter - Determines whether the instruction should be decodable. Some
+ /// instructions are pure intrinsics and use unencodable operands; many
+ /// synthetic instructions are duplicates of other instructions; other
+ /// instructions only differ in the logical way in which they are used, and
+ /// have the same decoding. Because these would cause decode conflicts,
+ /// they must be filtered out.
+ ///
+ /// @return - The degree of filtering to be applied (see filter_ret).
+ filter_ret filter() const;
+
+ /// typeFromString - Translates an operand type from the string provided in
+ /// the LLVM tables to an OperandType for use in the operand specifier.
+ ///
+ /// @param s - The string, as extracted by calling Rec->getName()
+ /// on a CodeGenInstruction::OperandInfo.
+ /// @param isSSE - Indicates whether the instruction is an SSE
+ /// instruction. For SSE instructions, immediates are
+ /// fixed-size rather than being affected by the
+ /// mandatory OpSize prefix.
+ /// @param hasREX_WPrefix - Indicates whether the instruction has a REX.W
+ /// prefix. If it does, 32-bit register operands stay
+ /// 32-bit regardless of the operand size.
+ /// @param hasOpSizePrefix- Indicates whether the instruction has an OpSize
+ /// prefix. If it does not, then 16-bit register
+ /// operands stay 16-bit.
+ /// @return - The operand's type.
+ static OperandType typeFromString(const std::string& s,
+ bool isSSE,
+ bool hasREX_WPrefix,
+ bool hasOpSizePrefix);
+
+ /// immediateEncodingFromString - Translates an immediate encoding from the
+ /// string provided in the LLVM tables to an OperandEncoding for use in
+ /// the operand specifier.
+ ///
+ /// @param s - See typeFromString().
+ /// @param hasOpSizePrefix - Indicates whether the instruction has an OpSize
+ /// prefix. If it does not, then 16-bit immediate
+ /// operands stay 16-bit.
+ /// @return - The operand's encoding.
+ static OperandEncoding immediateEncodingFromString(const std::string &s,
+ bool hasOpSizePrefix);
+
+ /// rmRegisterEncodingFromString - Like immediateEncodingFromString, but
+ /// handles operands that are in the REG field of the ModR/M byte.
+ static OperandEncoding rmRegisterEncodingFromString(const std::string &s,
+ bool hasOpSizePrefix);
+
+ /// rmRegisterEncodingFromString - Like immediateEncodingFromString, but
+ /// handles operands that are in the REG field of the ModR/M byte.
+ static OperandEncoding roRegisterEncodingFromString(const std::string &s,
+ bool hasOpSizePrefix);
+ static OperandEncoding memoryEncodingFromString(const std::string &s,
+ bool hasOpSizePrefix);
+ static OperandEncoding relocationEncodingFromString(const std::string &s,
+ bool hasOpSizePrefix);
+ static OperandEncoding opcodeModifierEncodingFromString(const std::string &s,
+ bool hasOpSizePrefix);
+
+ /// handleOperand - Converts a single operand from the LLVM table format to
+ /// the emitted table format, handling any duplicate operands it encounters
+ /// and then one non-duplicate.
+ ///
+ /// @param optional - Determines whether to assert that the
+ /// operand exists.
+ /// @param operandIndex - The index into the generated operand table.
+ /// Incremented by this function one or more
+ /// times to reflect possible duplicate
+ /// operands).
+ /// @param physicalOperandIndex - The index of the current operand into the
+ /// set of non-duplicate ('physical') operands.
+ /// Incremented by this function once.
+ /// @param numPhysicalOperands - The number of non-duplicate operands in the
+ /// instructions.
+ /// @param operandMapping - The operand mapping, which has an entry for
+ /// each operand that indicates whether it is a
+ /// duplicate, and of what.
+ void handleOperand(bool optional,
+ unsigned &operandIndex,
+ unsigned &physicalOperandIndex,
+ unsigned &numPhysicalOperands,
+ unsigned *operandMapping,
+ OperandEncoding (*encodingFromString)
+ (const std::string&,
+ bool hasOpSizePrefix));
+
+ /// shouldBeEmitted - Returns the shouldBeEmitted field. Although filter()
+ /// filters out many instructions, at various points in decoding we
+ /// determine that the instruction should not actually be decodable. In
+ /// particular, MMX MOV instructions aren't emitted, but they're only
+ /// identified during operand parsing.
+ ///
+ /// @return - true if at this point we believe the instruction should be
+ /// emitted; false if not. This will return false if filter() returns false
+ /// once emitInstructionSpecifier() has been called.
+ bool shouldBeEmitted() const {
+ return ShouldBeEmitted;
+ }
+
+ /// emitInstructionSpecifier - Loads the instruction specifier for the current
+ /// instruction into a DisassemblerTables.
+ ///
+ /// @arg tables - The DisassemblerTables to populate with the specifier for
+ /// the current instruction.
+ void emitInstructionSpecifier(DisassemblerTables &tables);
+
+ /// emitDecodePath - Populates the proper fields in the decode tables
+ /// corresponding to the decode paths for this instruction.
+ ///
+ /// @arg tables - The DisassemblerTables to populate with the decode
+ /// decode information for the current instruction.
+ void emitDecodePath(DisassemblerTables &tables) const;
+
+ /// Constructor - Initializes a RecognizableInstr with the appropriate fields
+ /// from a CodeGenInstruction.
+ ///
+ /// @arg tables - The DisassemblerTables that the specifier will be added to.
+ /// @arg insn - The CodeGenInstruction to extract information from.
+ /// @arg uid - The unique ID of the current instruction.
+ RecognizableInstr(DisassemblerTables &tables,
+ const CodeGenInstruction &insn,
+ InstrUID uid);
+public:
+ /// processInstr - Accepts a CodeGenInstruction and loads decode information
+ /// for it into a DisassemblerTables if appropriate.
+ ///
+ /// @arg tables - The DiassemblerTables to be populated with decode
+ /// information.
+ /// @arg insn - The CodeGenInstruction to be used as a source for this
+ /// information.
+ /// @uid - The unique ID of the instruction.
+ static void processInstr(DisassemblerTables &tables,
+ const CodeGenInstruction &insn,
+ InstrUID uid);
+};
+
+} // namespace X86Disassembler
+
+} // namespace llvm
+
+#endif