aboutsummaryrefslogtreecommitdiffstats
path: root/lib/MC
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-07-20 18:25:19 +0000
committerChris Lattner <sabre@nondot.org>2010-07-20 18:25:19 +0000
commit847da55716e9c1d39c08ed052bc86d28796cb91f (patch)
treee7cc007870c19d31799084136675b1c2a79417ec /lib/MC
parentc4fd9afaf959309f9e522aae8facf936aaaf9ee4 (diff)
downloadexternal_llvm-847da55716e9c1d39c08ed052bc86d28796cb91f.zip
external_llvm-847da55716e9c1d39c08ed052bc86d28796cb91f.tar.gz
external_llvm-847da55716e9c1d39c08ed052bc86d28796cb91f.tar.bz2
start straightening out libedis's dependencies and make it fit
better in the llvm world. Among other things, this changes: 1. The guts of libedis are now moved into lib/MC/MCDisassembler 2. llvm-mc now depends on lib/MC/MCDisassembler, not tools/edis, so edis and mc don't have to be built in series. 3. lib/MC/MCDisassembler no longer depends on the C api, the C API depends on it. 4. Various code cleanup changes. There is still a lot to be done to make edis fit with the llvm design, but this is an incremental step in the right direction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108869 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/MC')
-rw-r--r--lib/MC/MCDisassembler/EDDisassembler.cpp402
-rw-r--r--lib/MC/MCDisassembler/EDDisassembler.h271
-rw-r--r--lib/MC/MCDisassembler/EDInst.cpp207
-rw-r--r--lib/MC/MCDisassembler/EDInst.h181
-rw-r--r--lib/MC/MCDisassembler/EDOperand.cpp282
-rw-r--r--lib/MC/MCDisassembler/EDOperand.h91
-rw-r--r--lib/MC/MCDisassembler/EDToken.cpp206
-rw-r--r--lib/MC/MCDisassembler/EDToken.h138
-rw-r--r--lib/MC/Makefile2
9 files changed, 1779 insertions, 1 deletions
diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp
new file mode 100644
index 0000000..697b3d9
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDDisassembler.cpp
@@ -0,0 +1,402 @@
+//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's disassembler class.
+// The disassembler is responsible for vending individual instructions according
+// to a given architecture and disassembly syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSelect.h"
+using namespace llvm;
+
+bool EDDisassembler::sInitialized = false;
+EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
+
+struct TripleMap {
+ Triple::ArchType Arch;
+ const char *String;
+};
+
+static struct TripleMap triplemap[] = {
+ { Triple::x86, "i386-unknown-unknown" },
+ { Triple::x86_64, "x86_64-unknown-unknown" },
+ { Triple::arm, "arm-unknown-unknown" },
+ { Triple::thumb, "thumb-unknown-unknown" },
+ { Triple::InvalidArch, NULL, }
+};
+
+/// infoFromArch - Returns the TripleMap corresponding to a given architecture,
+/// or NULL if there is an error
+///
+/// @arg arch - The Triple::ArchType for the desired architecture
+static const char *tripleFromArch(Triple::ArchType arch) {
+ unsigned int infoIndex;
+
+ for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
+ if (arch == triplemap[infoIndex].Arch)
+ return triplemap[infoIndex].String;
+ }
+
+ return NULL;
+}
+
+/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
+/// for the desired assembly syntax, suitable for passing to
+/// Target::createMCInstPrinter()
+///
+/// @arg arch - The target architecture
+/// @arg syntax - The assembly syntax in sd form
+static int getLLVMSyntaxVariant(Triple::ArchType arch,
+ EDDisassembler::AssemblySyntax syntax) {
+ switch (syntax) {
+ default:
+ return -1;
+ // Mappings below from X86AsmPrinter.cpp
+ case EDDisassembler::kEDAssemblySyntaxX86ATT:
+ if (arch == Triple::x86 || arch == Triple::x86_64)
+ return 0;
+ else
+ return -1;
+ case EDDisassembler::kEDAssemblySyntaxX86Intel:
+ if (arch == Triple::x86 || arch == Triple::x86_64)
+ return 1;
+ else
+ return -1;
+ case EDDisassembler::kEDAssemblySyntaxARMUAL:
+ if (arch == Triple::arm || arch == Triple::thumb)
+ return 0;
+ else
+ return -1;
+ }
+}
+
+void EDDisassembler::initialize() {
+ if (sInitialized)
+ return;
+
+ sInitialized = true;
+
+ InitializeAllTargetInfos();
+ InitializeAllTargets();
+ InitializeAllAsmPrinters();
+ InitializeAllAsmParsers();
+ InitializeAllDisassemblers();
+}
+
+#undef BRINGUP_TARGET
+
+EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
+ AssemblySyntax syntax) {
+ CPUKey key;
+ key.Arch = arch;
+ key.Syntax = syntax;
+
+ EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
+
+ if (i != sDisassemblers.end()) {
+ return i->second;
+ } else {
+ EDDisassembler* sdd = new EDDisassembler(key);
+ if (!sdd->valid()) {
+ delete sdd;
+ return NULL;
+ }
+
+ sDisassemblers[key] = sdd;
+
+ return sdd;
+ }
+
+ return NULL;
+}
+
+EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
+ AssemblySyntax syntax) {
+ return getDisassembler(Triple(str).getArch(), syntax);
+}
+
+EDDisassembler::EDDisassembler(CPUKey &key) :
+ Valid(false),
+ HasSemantics(false),
+ ErrorStream(nulls()),
+ Key(key) {
+ const char *triple = tripleFromArch(key.Arch);
+
+ if (!triple)
+ return;
+
+ LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
+
+ if (LLVMSyntaxVariant < 0)
+ return;
+
+ std::string tripleString(triple);
+ std::string errorString;
+
+ Tgt = TargetRegistry::lookupTarget(tripleString,
+ errorString);
+
+ if (!Tgt)
+ return;
+
+ std::string featureString;
+
+ TargetMachine.reset(Tgt->createTargetMachine(tripleString,
+ featureString));
+
+ const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo();
+
+ if (!registerInfo)
+ return;
+
+ initMaps(*registerInfo);
+
+ AsmInfo.reset(Tgt->createAsmInfo(tripleString));
+
+ if (!AsmInfo)
+ return;
+
+ Disassembler.reset(Tgt->createMCDisassembler());
+
+ if (!Disassembler)
+ return;
+
+ InstInfos = Disassembler->getEDInfo();
+
+ InstString.reset(new std::string);
+ InstStream.reset(new raw_string_ostream(*InstString));
+ InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
+
+ if (!InstPrinter)
+ return;
+
+ GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
+ SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
+ SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
+
+ initMaps(*TargetMachine->getRegisterInfo());
+
+ Valid = true;
+}
+
+EDDisassembler::~EDDisassembler() {
+ if (!valid())
+ return;
+}
+
+namespace {
+ /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
+ /// as provided by the sd interface. See MemoryObject.
+ class EDMemoryObject : public llvm::MemoryObject {
+ private:
+ EDByteReaderCallback Callback;
+ void *Arg;
+ public:
+ EDMemoryObject(EDByteReaderCallback callback,
+ void *arg) : Callback(callback), Arg(arg) { }
+ ~EDMemoryObject() { }
+ uint64_t getBase() const { return 0x0; }
+ uint64_t getExtent() const { return (uint64_t)-1; }
+ int readByte(uint64_t address, uint8_t *ptr) const {
+ if (!Callback)
+ return -1;
+
+ if (Callback(ptr, address, Arg))
+ return -1;
+
+ return 0;
+ }
+ };
+}
+
+EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
+ uint64_t address,
+ void *arg) {
+ EDMemoryObject memoryObject(byteReader, arg);
+
+ MCInst* inst = new MCInst;
+ uint64_t byteSize;
+
+ if (!Disassembler->getInstruction(*inst,
+ byteSize,
+ memoryObject,
+ address,
+ ErrorStream)) {
+ delete inst;
+ return NULL;
+ } else {
+ const llvm::EDInstInfo *thisInstInfo;
+
+ thisInstInfo = &InstInfos[inst->getOpcode()];
+
+ EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
+ return sdInst;
+ }
+}
+
+void EDDisassembler::initMaps(const TargetRegisterInfo &registerInfo) {
+ unsigned numRegisters = registerInfo.getNumRegs();
+ unsigned registerIndex;
+
+ for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
+ const char* registerName = registerInfo.get(registerIndex).Name;
+
+ RegVec.push_back(registerName);
+ RegRMap[registerName] = registerIndex;
+ }
+
+ switch (Key.Arch) {
+ default:
+ break;
+ case Triple::x86:
+ case Triple::x86_64:
+ stackPointers.insert(registerIDWithName("SP"));
+ stackPointers.insert(registerIDWithName("ESP"));
+ stackPointers.insert(registerIDWithName("RSP"));
+
+ programCounters.insert(registerIDWithName("IP"));
+ programCounters.insert(registerIDWithName("EIP"));
+ programCounters.insert(registerIDWithName("RIP"));
+ break;
+ case Triple::arm:
+ case Triple::thumb:
+ stackPointers.insert(registerIDWithName("SP"));
+
+ programCounters.insert(registerIDWithName("PC"));
+ break;
+ }
+}
+
+const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
+ if (registerID >= RegVec.size())
+ return NULL;
+ else
+ return RegVec[registerID].c_str();
+}
+
+unsigned EDDisassembler::registerIDWithName(const char *name) const {
+ regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
+ if (iter == RegRMap.end())
+ return 0;
+ else
+ return (*iter).second;
+}
+
+bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
+ return (stackPointers.find(registerID) != stackPointers.end());
+}
+
+bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
+ return (programCounters.find(registerID) != programCounters.end());
+}
+
+int EDDisassembler::printInst(std::string &str, MCInst &inst) {
+ PrinterMutex.acquire();
+
+ InstPrinter->printInst(&inst, *InstStream);
+ InstStream->flush();
+ str = *InstString;
+ InstString->clear();
+
+ PrinterMutex.release();
+
+ return 0;
+}
+
+int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
+ SmallVectorImpl<AsmToken> &tokens,
+ const std::string &str) {
+ int ret = 0;
+
+ switch (Key.Arch) {
+ default:
+ return -1;
+ case Triple::x86:
+ case Triple::x86_64:
+ case Triple::arm:
+ case Triple::thumb:
+ break;
+ }
+
+ const char *cStr = str.c_str();
+ MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
+
+ StringRef instName;
+ SMLoc instLoc;
+
+ SourceMgr sourceMgr;
+ sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
+ MCContext context(*AsmInfo);
+ OwningPtr<MCStreamer> streamer(createNullStreamer(context));
+ OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
+ context, *streamer,
+ *AsmInfo));
+ OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*genericParser,
+ *TargetMachine));
+
+ AsmToken OpcodeToken = genericParser->Lex();
+ AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to
+
+ if (OpcodeToken.is(AsmToken::Identifier)) {
+ instName = OpcodeToken.getString();
+ instLoc = OpcodeToken.getLoc();
+
+ if (NextToken.isNot(AsmToken::Eof) &&
+ TargetParser->ParseInstruction(instName, instLoc, operands))
+ ret = -1;
+ } else {
+ ret = -1;
+ }
+
+ ParserMutex.acquire();
+
+ if (!ret) {
+ GenericAsmLexer->setBuffer(buf);
+
+ while (SpecificAsmLexer->Lex(),
+ SpecificAsmLexer->isNot(AsmToken::Eof) &&
+ SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
+ if (SpecificAsmLexer->is(AsmToken::Error)) {
+ ret = -1;
+ break;
+ }
+ tokens.push_back(SpecificAsmLexer->getTok());
+ }
+ }
+
+ ParserMutex.release();
+
+ return ret;
+}
+
+int EDDisassembler::llvmSyntaxVariant() const {
+ return LLVMSyntaxVariant;
+}
diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h
new file mode 100644
index 0000000..0fb853d
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDDisassembler.h
@@ -0,0 +1,271 @@
+//===-- EDDisassembler.h - LLVM Enhanced Disassembler -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// disassembler class. The disassembler is responsible for vending individual
+// instructions according to a given architecture and disassembly syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDDISASSEMBLER_H
+#define LLVM_EDDISASSEMBLER_H
+
+#include "EDInfo.inc"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+namespace llvm {
+class AsmLexer;
+class AsmToken;
+class MCContext;
+class MCAsmInfo;
+class MCAsmLexer;
+class AsmParser;
+class TargetAsmLexer;
+class TargetAsmParser;
+class MCDisassembler;
+class MCInstPrinter;
+class MCInst;
+class MCParsedAsmOperand;
+class MCStreamer;
+template <typename T> class SmallVectorImpl;
+class SourceMgr;
+class Target;
+class TargetMachine;
+class TargetRegisterInfo;
+
+struct EDInstInfo;
+struct EDInst;
+struct EDOperand;
+struct EDToken;
+
+typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
+
+/// EDDisassembler - Encapsulates a disassembler for a single architecture and
+/// disassembly syntax. Also manages the static disassembler registry.
+struct EDDisassembler {
+ typedef enum {
+ /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
+ kEDAssemblySyntaxX86Intel = 0,
+ /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
+ kEDAssemblySyntaxX86ATT = 1,
+ kEDAssemblySyntaxARMUAL = 2
+ } AssemblySyntax;
+
+
+ ////////////////////
+ // Static members //
+ ////////////////////
+
+ /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax
+ /// pair
+ struct CPUKey {
+ /// The architecture type
+ llvm::Triple::ArchType Arch;
+
+ /// The assembly syntax
+ AssemblySyntax Syntax;
+
+ /// operator== - Equality operator
+ bool operator==(const CPUKey &key) const {
+ return (Arch == key.Arch &&
+ Syntax == key.Syntax);
+ }
+
+ /// operator< - Less-than operator
+ bool operator<(const CPUKey &key) const {
+ if(Arch > key.Arch)
+ return false;
+ if(Syntax >= key.Syntax)
+ return false;
+ return true;
+ }
+ };
+
+ typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t;
+
+ /// True if the disassembler registry has been initialized; false if not
+ static bool sInitialized;
+ /// A map from disassembler specifications to disassemblers. Populated
+ /// lazily.
+ static DisassemblerMap_t sDisassemblers;
+
+ /// getDisassembler - Returns the specified disassemble, or NULL on failure
+ ///
+ /// @arg arch - The desired architecture
+ /// @arg syntax - The desired disassembly syntax
+ static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch,
+ AssemblySyntax syntax);
+
+ /// getDisassembler - Returns the disassembler for a given combination of
+ /// CPU type, CPU subtype, and assembly syntax, or NULL on failure
+ ///
+ /// @arg str - The string representation of the architecture triple, e.g.,
+ /// "x86_64-apple-darwin"
+ /// @arg syntax - The disassembly syntax for the required disassembler
+ static EDDisassembler *getDisassembler(llvm::StringRef str,
+ AssemblySyntax syntax);
+
+ /// initialize - Initializes the disassembler registry and the LLVM backend
+ static void initialize();
+
+ ////////////////////////
+ // Per-object members //
+ ////////////////////////
+
+ /// True only if the object has been successfully initialized
+ bool Valid;
+ /// True if the disassembler can provide semantic information
+ bool HasSemantics;
+
+ /// The stream to write errors to
+ llvm::raw_ostream &ErrorStream;
+
+ /// The architecture/syntax pair for the current architecture
+ CPUKey Key;
+ /// The LLVM target corresponding to the disassembler
+ const llvm::Target *Tgt;
+ /// The target machine instance.
+ llvm::OwningPtr<llvm::TargetMachine> TargetMachine;
+ /// The assembly information for the target architecture
+ llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
+ /// The disassembler for the target architecture
+ llvm::OwningPtr<const llvm::MCDisassembler> Disassembler;
+ /// The output string for the instruction printer; must be guarded with
+ /// PrinterMutex
+ llvm::OwningPtr<std::string> InstString;
+ /// The output stream for the disassembler; must be guarded with
+ /// PrinterMutex
+ llvm::OwningPtr<llvm::raw_string_ostream> InstStream;
+ /// The instruction printer for the target architecture; must be guarded with
+ /// PrinterMutex when printing
+ llvm::OwningPtr<llvm::MCInstPrinter> InstPrinter;
+ /// The mutex that guards the instruction printer's printing functions, which
+ /// use a shared stream
+ llvm::sys::Mutex PrinterMutex;
+ /// The array of instruction information provided by the TableGen backend for
+ /// the target architecture
+ const llvm::EDInstInfo *InstInfos;
+ /// The target-specific lexer for use in tokenizing strings, in
+ /// target-independent and target-specific portions
+ llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer;
+ llvm::OwningPtr<llvm::TargetAsmLexer> SpecificAsmLexer;
+ /// The guard for the above
+ llvm::sys::Mutex ParserMutex;
+ /// The LLVM number used for the target disassembly syntax variant
+ int LLVMSyntaxVariant;
+
+ typedef std::vector<std::string> regvec_t;
+ typedef std::map<std::string, unsigned> regrmap_t;
+
+ /// A vector of registers for quick mapping from LLVM register IDs to names
+ regvec_t RegVec;
+ /// A map of registers for quick mapping from register names to LLVM IDs
+ regrmap_t RegRMap;
+
+ /// A set of register IDs for aliases of the stack pointer for the current
+ /// architecture
+ std::set<unsigned> stackPointers;
+ /// A set of register IDs for aliases of the program counter for the current
+ /// architecture
+ std::set<unsigned> programCounters;
+
+ /// Constructor - initializes a disassembler with all the necessary objects,
+ /// which come pre-allocated from the registry accessor function
+ ///
+ /// @arg key - the architecture and disassembly syntax for the
+ /// disassembler
+ EDDisassembler(CPUKey& key);
+
+ /// valid - reports whether there was a failure in the constructor.
+ bool valid() {
+ return Valid;
+ }
+
+ /// hasSemantics - reports whether the disassembler can provide operands and
+ /// tokens.
+ bool hasSemantics() {
+ return HasSemantics;
+ }
+
+ ~EDDisassembler();
+
+ /// createInst - creates and returns an instruction given a callback and
+ /// memory address, or NULL on failure
+ ///
+ /// @arg byteReader - A callback function that provides machine code bytes
+ /// @arg address - The address of the first byte of the instruction,
+ /// suitable for passing to byteReader
+ /// @arg arg - An opaque argument for byteReader
+ EDInst *createInst(EDByteReaderCallback byteReader,
+ uint64_t address,
+ void *arg);
+
+ /// initMaps - initializes regVec and regRMap using the provided register
+ /// info
+ ///
+ /// @arg registerInfo - the register information to use as a source
+ void initMaps(const llvm::TargetRegisterInfo &registerInfo);
+ /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a
+ /// register for a given register ID, or NULL on failure
+ ///
+ /// @arg registerID - the ID of the register to be queried
+ const char *nameWithRegisterID(unsigned registerID) const;
+ /// registerIDWithName - Returns the ID of a register for a given register
+ /// name, or (unsigned)-1 on failure
+ ///
+ /// @arg name - The name of the register
+ unsigned registerIDWithName(const char *name) const;
+
+ /// registerIsStackPointer - reports whether a register ID is an alias for the
+ /// stack pointer register
+ ///
+ /// @arg registerID - The LLVM register ID
+ bool registerIsStackPointer(unsigned registerID);
+ /// registerIsStackPointer - reports whether a register ID is an alias for the
+ /// stack pointer register
+ ///
+ /// @arg registerID - The LLVM register ID
+ bool registerIsProgramCounter(unsigned registerID);
+
+ /// printInst - prints an MCInst to a string, returning 0 on success, or -1
+ /// otherwise
+ ///
+ /// @arg str - A reference to a string which is filled in with the string
+ /// representation of the instruction
+ /// @arg inst - A reference to the MCInst to be printed
+ int printInst(std::string& str,
+ llvm::MCInst& inst);
+
+ /// parseInst - extracts operands and tokens from a string for use in
+ /// tokenizing the string. Returns 0 on success, or -1 otherwise.
+ ///
+ /// @arg operands - A reference to a vector that will be filled in with the
+ /// parsed operands
+ /// @arg tokens - A reference to a vector that will be filled in with the
+ /// tokens
+ /// @arg str - The string representation of the instruction
+ int parseInst(llvm::SmallVectorImpl<llvm::MCParsedAsmOperand*> &operands,
+ llvm::SmallVectorImpl<llvm::AsmToken> &tokens,
+ const std::string &str);
+
+ /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler
+ int llvmSyntaxVariant() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/MC/MCDisassembler/EDInst.cpp b/lib/MC/MCDisassembler/EDInst.cpp
new file mode 100644
index 0000000..e22408f
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDInst.cpp
@@ -0,0 +1,207 @@
+//===-EDInst.cpp - LLVM Enhanced Disassembler -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's instruction class.
+// The instruction is responsible for vending the string representation,
+// individual tokens, and operands for a single instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDInst.h"
+#include "EDDisassembler.h"
+#include "EDOperand.h"
+#include "EDToken.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+EDInst::EDInst(llvm::MCInst *inst,
+ uint64_t byteSize,
+ EDDisassembler &disassembler,
+ const llvm::EDInstInfo *info) :
+ Disassembler(disassembler),
+ Inst(inst),
+ ThisInstInfo(info),
+ ByteSize(byteSize),
+ BranchTarget(-1),
+ MoveSource(-1),
+ MoveTarget(-1) {
+ OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()];
+}
+
+EDInst::~EDInst() {
+ unsigned int index;
+ unsigned int numOperands = Operands.size();
+
+ for (index = 0; index < numOperands; ++index)
+ delete Operands[index];
+
+ unsigned int numTokens = Tokens.size();
+
+ for (index = 0; index < numTokens; ++index)
+ delete Tokens[index];
+
+ delete Inst;
+}
+
+uint64_t EDInst::byteSize() {
+ return ByteSize;
+}
+
+int EDInst::stringify() {
+ if (StringifyResult.valid())
+ return StringifyResult.result();
+
+ if (Disassembler.printInst(String, *Inst))
+ return StringifyResult.setResult(-1);
+
+ return StringifyResult.setResult(0);
+}
+
+int EDInst::getString(const char*& str) {
+ if (stringify())
+ return -1;
+
+ str = String.c_str();
+
+ return 0;
+}
+
+unsigned EDInst::instID() {
+ return Inst->getOpcode();
+}
+
+bool EDInst::isBranch() {
+ if (ThisInstInfo)
+ return
+ ThisInstInfo->instructionType == kInstructionTypeBranch ||
+ ThisInstInfo->instructionType == kInstructionTypeCall;
+ else
+ return false;
+}
+
+bool EDInst::isMove() {
+ if (ThisInstInfo)
+ return ThisInstInfo->instructionType == kInstructionTypeMove;
+ else
+ return false;
+}
+
+int EDInst::parseOperands() {
+ if (ParseResult.valid())
+ return ParseResult.result();
+
+ if (!ThisInstInfo)
+ return ParseResult.setResult(-1);
+
+ unsigned int opIndex;
+ unsigned int mcOpIndex = 0;
+
+ for (opIndex = 0; opIndex < ThisInstInfo->numOperands; ++opIndex) {
+ if (isBranch() &&
+ (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)) {
+ BranchTarget = opIndex;
+ }
+ else if (isMove()) {
+ if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagSource)
+ MoveSource = opIndex;
+ else if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)
+ MoveTarget = opIndex;
+ }
+
+ EDOperand *operand = new EDOperand(Disassembler, *this, opIndex, mcOpIndex);
+
+ Operands.push_back(operand);
+ }
+
+ return ParseResult.setResult(0);
+}
+
+int EDInst::branchTargetID() {
+ if (parseOperands())
+ return -1;
+ return BranchTarget;
+}
+
+int EDInst::moveSourceID() {
+ if (parseOperands())
+ return -1;
+ return MoveSource;
+}
+
+int EDInst::moveTargetID() {
+ if (parseOperands())
+ return -1;
+ return MoveTarget;
+}
+
+int EDInst::numOperands() {
+ if (parseOperands())
+ return -1;
+ return Operands.size();
+}
+
+int EDInst::getOperand(EDOperand *&operand, unsigned int index) {
+ if (parseOperands())
+ return -1;
+
+ if (index >= Operands.size())
+ return -1;
+
+ operand = Operands[index];
+ return 0;
+}
+
+int EDInst::tokenize() {
+ if (TokenizeResult.valid())
+ return TokenizeResult.result();
+
+ if (stringify())
+ return TokenizeResult.setResult(-1);
+
+ return TokenizeResult.setResult(EDToken::tokenize(Tokens,
+ String,
+ OperandOrder,
+ Disassembler));
+
+}
+
+int EDInst::numTokens() {
+ if (tokenize())
+ return -1;
+ return Tokens.size();
+}
+
+int EDInst::getToken(EDToken *&token, unsigned int index) {
+ if (tokenize())
+ return -1;
+ token = Tokens[index];
+ return 0;
+}
+
+#ifdef __BLOCKS__
+int EDInst::visitTokens(EDTokenVisitor_t visitor) {
+ if (tokenize())
+ return -1;
+
+ tokvec_t::iterator iter;
+
+ for (iter = Tokens.begin(); iter != Tokens.end(); ++iter) {
+ int ret = visitor(*iter);
+ if (ret == 1)
+ return 0;
+ if (ret != 0)
+ return -1;
+ }
+
+ return 0;
+}
+#endif
diff --git a/lib/MC/MCDisassembler/EDInst.h b/lib/MC/MCDisassembler/EDInst.h
new file mode 100644
index 0000000..0a98fea
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDInst.h
@@ -0,0 +1,181 @@
+//===-- EDInst.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// instruction class. The instruction is responsible for vending the string
+// representation, individual tokens and operands for a single instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDINST_H
+#define LLVM_EDINST_H
+
+#include "llvm/ADT/SmallVector.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+ class MCInst;
+ struct EDInstInfo;
+ struct EDToken;
+ struct EDDisassembler;
+ struct EDOperand;
+
+#ifdef __BLOCKS__
+ typedef int (^EDTokenVisitor_t)(EDToken *token);
+#endif
+
+/// CachedResult - Encapsulates the result of a function along with the validity
+/// of that result, so that slow functions don't need to run twice
+struct CachedResult {
+ /// True if the result has been obtained by executing the function
+ bool Valid;
+ /// The result last obtained from the function
+ int Result;
+
+ /// Constructor - Initializes an invalid result
+ CachedResult() : Valid(false) { }
+ /// valid - Returns true if the result has been obtained by executing the
+ /// function and false otherwise
+ bool valid() { return Valid; }
+ /// result - Returns the result of the function or an undefined value if
+ /// valid() is false
+ int result() { return Result; }
+ /// setResult - Sets the result of the function and declares it valid
+ /// returning the result (so that setResult() can be called from inside a
+ /// return statement)
+ /// @arg result - The result of the function
+ int setResult(int result) { Result = result; Valid = true; return result; }
+};
+
+/// EDInst - Encapsulates a single instruction, which can be queried for its
+/// string representation, as well as its operands and tokens
+struct EDInst {
+ /// The parent disassembler
+ EDDisassembler &Disassembler;
+ /// The containing MCInst
+ llvm::MCInst *Inst;
+ /// The instruction information provided by TableGen for this instruction
+ const llvm::EDInstInfo *ThisInstInfo;
+ /// The number of bytes for the machine code representation of the instruction
+ uint64_t ByteSize;
+
+ /// The result of the stringify() function
+ CachedResult StringifyResult;
+ /// The string representation of the instruction
+ std::string String;
+ /// The order in which operands from the InstInfo's operand information appear
+ /// in String
+ const char* OperandOrder;
+
+ /// The result of the parseOperands() function
+ CachedResult ParseResult;
+ typedef llvm::SmallVector<EDOperand*, 5> opvec_t;
+ /// The instruction's operands
+ opvec_t Operands;
+ /// The operand corresponding to the target, if the instruction is a branch
+ int BranchTarget;
+ /// The operand corresponding to the source, if the instruction is a move
+ int MoveSource;
+ /// The operand corresponding to the target, if the instruction is a move
+ int MoveTarget;
+
+ /// The result of the tokenize() function
+ CachedResult TokenizeResult;
+ typedef std::vector<EDToken*> tokvec_t;
+ /// The instruction's tokens
+ tokvec_t Tokens;
+
+ /// Constructor - initializes an instruction given the output of the LLVM
+ /// C++ disassembler
+ ///
+ /// @arg inst - The MCInst, which will now be owned by this object
+ /// @arg byteSize - The size of the consumed instruction, in bytes
+ /// @arg disassembler - The parent disassembler
+ /// @arg instInfo - The instruction information produced by the table
+ /// generator for this instruction
+ EDInst(llvm::MCInst *inst,
+ uint64_t byteSize,
+ EDDisassembler &disassembler,
+ const llvm::EDInstInfo *instInfo);
+ ~EDInst();
+
+ /// byteSize - returns the number of bytes consumed by the machine code
+ /// representation of the instruction
+ uint64_t byteSize();
+ /// instID - returns the LLVM instruction ID of the instruction
+ unsigned instID();
+
+ /// stringify - populates the String and AsmString members of the instruction,
+ /// returning 0 on success or -1 otherwise
+ int stringify();
+ /// getString - retrieves a pointer to the string representation of the
+ /// instructinon, returning 0 on success or -1 otherwise
+ ///
+ /// @arg str - A reference to a pointer that, on success, is set to point to
+ /// the string representation of the instruction; this string is still owned
+ /// by the instruction and will be deleted when it is
+ int getString(const char *&str);
+
+ /// isBranch - Returns true if the instruction is a branch
+ bool isBranch();
+ /// isMove - Returns true if the instruction is a move
+ bool isMove();
+
+ /// parseOperands - populates the Operands member of the instruction,
+ /// returning 0 on success or -1 otherwise
+ int parseOperands();
+ /// branchTargetID - returns the ID (suitable for use with getOperand()) of
+ /// the target operand if the instruction is a branch, or -1 otherwise
+ int branchTargetID();
+ /// moveSourceID - returns the ID of the source operand if the instruction
+ /// is a move, or -1 otherwise
+ int moveSourceID();
+ /// moveTargetID - returns the ID of the target operand if the instruction
+ /// is a move, or -1 otherwise
+ int moveTargetID();
+
+ /// numOperands - returns the number of operands available to retrieve, or -1
+ /// on error
+ int numOperands();
+ /// getOperand - retrieves an operand from the instruction's operand list by
+ /// index, returning 0 on success or -1 on error
+ ///
+ /// @arg operand - A reference whose target is pointed at the operand on
+ /// success, although the operand is still owned by the EDInst
+ /// @arg index - The index of the operand in the instruction
+ int getOperand(EDOperand *&operand, unsigned int index);
+
+ /// tokenize - populates the Tokens member of the instruction, returning 0 on
+ /// success or -1 otherwise
+ int tokenize();
+ /// numTokens - returns the number of tokens in the instruction, or -1 on
+ /// error
+ int numTokens();
+ /// getToken - retrieves a token from the instruction's token list by index,
+ /// returning 0 on success or -1 on error
+ ///
+ /// @arg token - A reference whose target is pointed at the token on success,
+ /// although the token is still owned by the EDInst
+ /// @arg index - The index of the token in the instrcutino
+ int getToken(EDToken *&token, unsigned int index);
+
+#ifdef __BLOCKS__
+ /// visitTokens - Visits each token in turn and applies a block to it,
+ /// returning 0 if all blocks are visited and/or the block signals
+ /// termination by returning 1; returns -1 on error
+ ///
+ /// @arg visitor - The visitor block to apply to all tokens.
+ int visitTokens(EDTokenVisitor_t visitor);
+#endif
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp
new file mode 100644
index 0000000..2aed123
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDOperand.cpp
@@ -0,0 +1,282 @@
+//===-- EDOperand.cpp - LLVM Enhanced Disassembler ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's operand class. The
+// operand is responsible for allowing evaluation given a particular register
+// context.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDOperand.h"
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+using namespace llvm;
+
+EDOperand::EDOperand(const EDDisassembler &disassembler,
+ const EDInst &inst,
+ unsigned int opIndex,
+ unsigned int &mcOpIndex) :
+ Disassembler(disassembler),
+ Inst(inst),
+ OpIndex(opIndex),
+ MCOpIndex(mcOpIndex) {
+ unsigned int numMCOperands = 0;
+
+ if (Disassembler.Key.Arch == Triple::x86 ||
+ Disassembler.Key.Arch == Triple::x86_64) {
+ uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
+
+ switch (operandType) {
+ default:
+ break;
+ case kOperandTypeImmediate:
+ numMCOperands = 1;
+ break;
+ case kOperandTypeRegister:
+ numMCOperands = 1;
+ break;
+ case kOperandTypeX86Memory:
+ numMCOperands = 5;
+ break;
+ case kOperandTypeX86EffectiveAddress:
+ numMCOperands = 4;
+ break;
+ case kOperandTypeX86PCRelative:
+ numMCOperands = 1;
+ break;
+ }
+ }
+ else if (Disassembler.Key.Arch == Triple::arm ||
+ Disassembler.Key.Arch == Triple::thumb) {
+ uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
+
+ switch (operandType) {
+ default:
+ case kOperandTypeARMRegisterList:
+ break;
+ case kOperandTypeImmediate:
+ case kOperandTypeRegister:
+ case kOperandTypeARMBranchTarget:
+ case kOperandTypeARMSoImm:
+ case kOperandTypeThumb2SoImm:
+ case kOperandTypeARMSoImm2Part:
+ case kOperandTypeARMPredicate:
+ case kOperandTypeThumbITMask:
+ case kOperandTypeThumb2AddrModeImm8Offset:
+ case kOperandTypeARMTBAddrMode:
+ case kOperandTypeThumb2AddrModeImm8s4Offset:
+ numMCOperands = 1;
+ break;
+ case kOperandTypeThumb2SoReg:
+ case kOperandTypeARMAddrMode2Offset:
+ case kOperandTypeARMAddrMode3Offset:
+ case kOperandTypeARMAddrMode4:
+ case kOperandTypeARMAddrMode5:
+ case kOperandTypeARMAddrModePC:
+ case kOperandTypeThumb2AddrModeImm8:
+ case kOperandTypeThumb2AddrModeImm12:
+ case kOperandTypeThumb2AddrModeImm8s4:
+ case kOperandTypeThumbAddrModeRR:
+ case kOperandTypeThumbAddrModeSP:
+ numMCOperands = 2;
+ break;
+ case kOperandTypeARMSoReg:
+ case kOperandTypeARMAddrMode2:
+ case kOperandTypeARMAddrMode3:
+ case kOperandTypeThumb2AddrModeSoReg:
+ case kOperandTypeThumbAddrModeS1:
+ case kOperandTypeThumbAddrModeS2:
+ case kOperandTypeThumbAddrModeS4:
+ case kOperandTypeARMAddrMode6Offset:
+ numMCOperands = 3;
+ break;
+ case kOperandTypeARMAddrMode6:
+ numMCOperands = 4;
+ break;
+ }
+ }
+
+ mcOpIndex += numMCOperands;
+}
+
+EDOperand::~EDOperand() {
+}
+
+int EDOperand::evaluate(uint64_t &result,
+ EDRegisterReaderCallback callback,
+ void *arg) {
+ uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
+
+ switch (Disassembler.Key.Arch) {
+ default:
+ return -1;
+ case Triple::x86:
+ case Triple::x86_64:
+ switch (operandType) {
+ default:
+ return -1;
+ case kOperandTypeImmediate:
+ result = Inst.Inst->getOperand(MCOpIndex).getImm();
+ return 0;
+ case kOperandTypeRegister:
+ {
+ unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
+ return callback(&result, reg, arg);
+ }
+ case kOperandTypeX86PCRelative:
+ {
+ int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
+
+ uint64_t ripVal;
+
+ // TODO fix how we do this
+
+ if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg))
+ return -1;
+
+ result = ripVal + displacement;
+ return 0;
+ }
+ case kOperandTypeX86Memory:
+ case kOperandTypeX86EffectiveAddress:
+ {
+ unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg();
+ uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm();
+ unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg();
+ int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm();
+ //unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg();
+
+ uint64_t addr = 0;
+
+ if (baseReg) {
+ uint64_t baseVal;
+ if (callback(&baseVal, baseReg, arg))
+ return -1;
+ addr += baseVal;
+ }
+
+ if (indexReg) {
+ uint64_t indexVal;
+ if (callback(&indexVal, indexReg, arg))
+ return -1;
+ addr += (scaleAmount * indexVal);
+ }
+
+ addr += displacement;
+
+ result = addr;
+ return 0;
+ }
+ }
+ break;
+ case Triple::arm:
+ case Triple::thumb:
+ switch (operandType) {
+ default:
+ return -1;
+ case kOperandTypeImmediate:
+ result = Inst.Inst->getOperand(MCOpIndex).getImm();
+ return 0;
+ case kOperandTypeRegister:
+ {
+ unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
+ return callback(&result, reg, arg);
+ }
+ case kOperandTypeARMBranchTarget:
+ {
+ int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
+
+ uint64_t pcVal;
+
+ if (callback(&pcVal, Disassembler.registerIDWithName("PC"), arg))
+ return -1;
+
+ result = pcVal + displacement;
+ return 0;
+ }
+ }
+ }
+
+ return -1;
+}
+
+int EDOperand::isRegister() {
+ return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeRegister);
+}
+
+unsigned EDOperand::regVal() {
+ return Inst.Inst->getOperand(MCOpIndex).getReg();
+}
+
+int EDOperand::isImmediate() {
+ return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeImmediate);
+}
+
+uint64_t EDOperand::immediateVal() {
+ return Inst.Inst->getOperand(MCOpIndex).getImm();
+}
+
+int EDOperand::isMemory() {
+ uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
+
+ switch (operandType) {
+ default:
+ return 0;
+ case kOperandTypeX86Memory:
+ case kOperandTypeX86PCRelative:
+ case kOperandTypeX86EffectiveAddress:
+ case kOperandTypeARMSoReg:
+ case kOperandTypeARMSoImm:
+ case kOperandTypeARMAddrMode2:
+ case kOperandTypeARMAddrMode2Offset:
+ case kOperandTypeARMAddrMode3:
+ case kOperandTypeARMAddrMode3Offset:
+ case kOperandTypeARMAddrMode4:
+ case kOperandTypeARMAddrMode5:
+ case kOperandTypeARMAddrMode6:
+ case kOperandTypeARMAddrModePC:
+ case kOperandTypeARMBranchTarget:
+ case kOperandTypeThumbAddrModeS1:
+ case kOperandTypeThumbAddrModeS2:
+ case kOperandTypeThumbAddrModeS4:
+ case kOperandTypeThumbAddrModeRR:
+ case kOperandTypeThumbAddrModeSP:
+ case kOperandTypeThumb2SoImm:
+ case kOperandTypeThumb2AddrModeImm8:
+ case kOperandTypeThumb2AddrModeImm8Offset:
+ case kOperandTypeThumb2AddrModeImm12:
+ case kOperandTypeThumb2AddrModeSoReg:
+ case kOperandTypeThumb2AddrModeImm8s4:
+ return 1;
+ }
+}
+
+#ifdef __BLOCKS__
+struct RegisterReaderWrapper {
+ EDOperand::EDRegisterBlock_t regBlock;
+};
+
+int readerWrapperCallback(uint64_t *value,
+ unsigned regID,
+ void *arg) {
+ struct RegisterReaderWrapper *wrapper = (struct RegisterReaderWrapper *)arg;
+ return wrapper->regBlock(value, regID);
+}
+
+int EDOperand::evaluate(uint64_t &result,
+ EDRegisterBlock_t regBlock) {
+ struct RegisterReaderWrapper wrapper;
+ wrapper.regBlock = regBlock;
+ return evaluate(result,
+ readerWrapperCallback,
+ (void*)&wrapper);
+}
+#endif
diff --git a/lib/MC/MCDisassembler/EDOperand.h b/lib/MC/MCDisassembler/EDOperand.h
new file mode 100644
index 0000000..50260ec
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDOperand.h
@@ -0,0 +1,91 @@
+//===-EDOperand.h - LLVM Enhanced Disassembler ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// operand class. The operand is responsible for allowing evaluation given a
+// particular register context.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDOPERAND_H
+#define LLVM_EDOPERAND_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+struct EDDisassembler;
+struct EDInst;
+
+typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID,
+ void* arg);
+
+
+/// EDOperand - Encapsulates a single operand, which can be evaluated by the
+/// client
+struct EDOperand {
+ /// The parent disassembler
+ const EDDisassembler &Disassembler;
+ /// The parent instruction
+ const EDInst &Inst;
+
+ /// The index of the operand in the EDInst
+ unsigned int OpIndex;
+ /// The index of the first component of the operand in the MCInst
+ unsigned int MCOpIndex;
+
+ /// Constructor - Initializes an EDOperand
+ ///
+ /// @arg disassembler - The disassembler responsible for the operand
+ /// @arg inst - The instruction containing this operand
+ /// @arg opIndex - The index of the operand in inst
+ /// @arg mcOpIndex - The index of the operand in the original MCInst
+ EDOperand(const EDDisassembler &disassembler,
+ const EDInst &inst,
+ unsigned int opIndex,
+ unsigned int &mcOpIndex);
+ ~EDOperand();
+
+ /// evaluate - Returns the numeric value of an operand to the extent possible,
+ /// returning 0 on success or -1 if there was some problem (such as a
+ /// register not being readable)
+ ///
+ /// @arg result - A reference whose target is filled in with the value of
+ /// the operand (the address if it is a memory operand)
+ /// @arg callback - A function to call to obtain register values
+ /// @arg arg - An opaque argument to pass to callback
+ int evaluate(uint64_t &result,
+ EDRegisterReaderCallback callback,
+ void *arg);
+
+ /// isRegister - Returns 1 if the operand is a register or 0 otherwise
+ int isRegister();
+ /// regVal - Returns the register value.
+ unsigned regVal();
+
+ /// isImmediate - Returns 1 if the operand is an immediate or 0 otherwise
+ int isImmediate();
+ /// immediateVal - Returns the immediate value.
+ uint64_t immediateVal();
+
+ /// isMemory - Returns 1 if the operand is a memory location or 0 otherwise
+ int isMemory();
+
+#ifdef __BLOCKS__
+ typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID);
+
+ /// evaluate - Like evaluate for a callback, but uses a block instead
+ int evaluate(uint64_t &result,
+ EDRegisterBlock_t regBlock);
+#endif
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/MC/MCDisassembler/EDToken.cpp b/lib/MC/MCDisassembler/EDToken.cpp
new file mode 100644
index 0000000..400e164
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDToken.cpp
@@ -0,0 +1,206 @@
+//===-- EDToken.cpp - LLVM Enhanced Disassembler --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembler library's token class. The
+// token is responsible for vending information about the token, such as its
+// type and logical value.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDToken.h"
+#include "EDDisassembler.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+EDToken::EDToken(StringRef str,
+ enum tokenType type,
+ uint64_t localType,
+ EDDisassembler &disassembler) :
+ Disassembler(disassembler),
+ Str(str),
+ Type(type),
+ LocalType(localType),
+ OperandID(-1) {
+}
+
+EDToken::~EDToken() {
+}
+
+void EDToken::makeLiteral(bool sign, uint64_t absoluteValue) {
+ Type = kTokenLiteral;
+ LiteralSign = sign;
+ LiteralAbsoluteValue = absoluteValue;
+}
+
+void EDToken::makeRegister(unsigned registerID) {
+ Type = kTokenRegister;
+ RegisterID = registerID;
+}
+
+void EDToken::setOperandID(int operandID) {
+ OperandID = operandID;
+}
+
+enum EDToken::tokenType EDToken::type() const {
+ return Type;
+}
+
+uint64_t EDToken::localType() const {
+ return LocalType;
+}
+
+StringRef EDToken::string() const {
+ return Str;
+}
+
+int EDToken::operandID() const {
+ return OperandID;
+}
+
+int EDToken::literalSign() const {
+ if (Type != kTokenLiteral)
+ return -1;
+ return (LiteralSign ? 1 : 0);
+}
+
+int EDToken::literalAbsoluteValue(uint64_t &value) const {
+ if (Type != kTokenLiteral)
+ return -1;
+ value = LiteralAbsoluteValue;
+ return 0;
+}
+
+int EDToken::registerID(unsigned &registerID) const {
+ if (Type != kTokenRegister)
+ return -1;
+ registerID = RegisterID;
+ return 0;
+}
+
+int EDToken::tokenize(std::vector<EDToken*> &tokens,
+ std::string &str,
+ const char *operandOrder,
+ EDDisassembler &disassembler) {
+ SmallVector<MCParsedAsmOperand*, 5> parsedOperands;
+ SmallVector<AsmToken, 10> asmTokens;
+
+ if (disassembler.parseInst(parsedOperands, asmTokens, str))
+ return -1;
+
+ SmallVectorImpl<MCParsedAsmOperand*>::iterator operandIterator;
+ unsigned int operandIndex;
+ SmallVectorImpl<AsmToken>::iterator tokenIterator;
+
+ operandIterator = parsedOperands.begin();
+ operandIndex = 0;
+
+ bool readOpcode = false;
+
+ const char *wsPointer = asmTokens.begin()->getLoc().getPointer();
+
+ for (tokenIterator = asmTokens.begin();
+ tokenIterator != asmTokens.end();
+ ++tokenIterator) {
+ SMLoc tokenLoc = tokenIterator->getLoc();
+
+ const char *tokenPointer = tokenLoc.getPointer();
+
+ if (tokenPointer > wsPointer) {
+ unsigned long wsLength = tokenPointer - wsPointer;
+
+ EDToken *whitespaceToken = new EDToken(StringRef(wsPointer, wsLength),
+ EDToken::kTokenWhitespace,
+ 0,
+ disassembler);
+
+ tokens.push_back(whitespaceToken);
+ }
+
+ wsPointer = tokenPointer + tokenIterator->getString().size();
+
+ while (operandIterator != parsedOperands.end() &&
+ tokenLoc.getPointer() >
+ (*operandIterator)->getEndLoc().getPointer()) {
+ ++operandIterator;
+ ++operandIndex;
+ }
+
+ EDToken *token;
+
+ switch (tokenIterator->getKind()) {
+ case AsmToken::Identifier:
+ if (!readOpcode) {
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenOpcode,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+ readOpcode = true;
+ break;
+ }
+ // any identifier that isn't an opcode is mere punctuation; so we fall
+ // through
+ default:
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenPunctuation,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+ break;
+ case AsmToken::Integer:
+ {
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenLiteral,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+
+ int64_t intVal = tokenIterator->getIntVal();
+
+ if (intVal < 0)
+ token->makeLiteral(true, -intVal);
+ else
+ token->makeLiteral(false, intVal);
+ break;
+ }
+ case AsmToken::Register:
+ {
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenLiteral,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+
+ token->makeRegister((unsigned)tokenIterator->getRegVal());
+ break;
+ }
+ }
+
+ if (operandIterator != parsedOperands.end() &&
+ tokenLoc.getPointer() >=
+ (*operandIterator)->getStartLoc().getPointer()) {
+ /// operandIndex == 0 means the operand is the instruction (which the
+ /// AsmParser treats as an operand but edis does not). We therefore skip
+ /// operandIndex == 0 and subtract 1 from all other operand indices.
+
+ if (operandIndex > 0)
+ token->setOperandID(operandOrder[operandIndex - 1]);
+ }
+
+ tokens.push_back(token);
+ }
+
+ return 0;
+}
+
+int EDToken::getString(const char*& buf) {
+ if (PermStr.length() == 0) {
+ PermStr = Str.str();
+ }
+ buf = PermStr.c_str();
+ return 0;
+}
diff --git a/lib/MC/MCDisassembler/EDToken.h b/lib/MC/MCDisassembler/EDToken.h
new file mode 100644
index 0000000..ab596f5
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDToken.h
@@ -0,0 +1,138 @@
+//===-EDToken.h - LLVM Enhanced Disassembler --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's token
+// class. The token is responsible for vending information about the token,
+// such as its type and logical value.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDTOKEN_H
+#define LLVM_EDTOKEN_H
+
+#include "llvm/ADT/StringRef.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+struct EDDisassembler;
+
+/// EDToken - Encapsulates a single token, which can provide a string
+/// representation of itself or interpret itself in various ways, depending
+/// on the token type.
+struct EDToken {
+ enum tokenType {
+ kTokenWhitespace,
+ kTokenOpcode,
+ kTokenLiteral,
+ kTokenRegister,
+ kTokenPunctuation
+ };
+
+ /// The parent disassembler
+ EDDisassembler &Disassembler;
+
+ /// The token's string representation
+ llvm::StringRef Str;
+ /// The token's string representation, but in a form suitable for export
+ std::string PermStr;
+ /// The type of the token, as exposed through the external API
+ enum tokenType Type;
+ /// The type of the token, as recorded by the syntax-specific tokenizer
+ uint64_t LocalType;
+ /// The operand corresponding to the token, or (unsigned int)-1 if not
+ /// part of an operand.
+ int OperandID;
+
+ /// The sign if the token is a literal (1 if negative, 0 otherwise)
+ bool LiteralSign;
+ /// The absolute value if the token is a literal
+ uint64_t LiteralAbsoluteValue;
+ /// The LLVM register ID if the token is a register name
+ unsigned RegisterID;
+
+ /// Constructor - Initializes an EDToken with the information common to all
+ /// tokens
+ ///
+ /// @arg str - The string corresponding to the token
+ /// @arg type - The token's type as exposed through the public API
+ /// @arg localType - The token's type as recorded by the tokenizer
+ /// @arg disassembler - The disassembler responsible for the token
+ EDToken(llvm::StringRef str,
+ enum tokenType type,
+ uint64_t localType,
+ EDDisassembler &disassembler);
+
+ /// makeLiteral - Adds the information specific to a literal
+ /// @arg sign - The sign of the literal (1 if negative, 0
+ /// otherwise)
+ ///
+ /// @arg absoluteValue - The absolute value of the literal
+ void makeLiteral(bool sign, uint64_t absoluteValue);
+ /// makeRegister - Adds the information specific to a register
+ ///
+ /// @arg registerID - The LLVM register ID
+ void makeRegister(unsigned registerID);
+
+ /// setOperandID - Links the token to a numbered operand
+ ///
+ /// @arg operandID - The operand ID to link to
+ void setOperandID(int operandID);
+
+ ~EDToken();
+
+ /// type - Returns the public type of the token
+ enum tokenType type() const;
+ /// localType - Returns the tokenizer-specific type of the token
+ uint64_t localType() const;
+ /// string - Returns the string representation of the token
+ llvm::StringRef string() const;
+ /// operandID - Returns the operand ID of the token
+ int operandID() const;
+
+ /// literalSign - Returns the sign of the token
+ /// (1 if negative, 0 if positive or unsigned, -1 if it is not a literal)
+ int literalSign() const;
+ /// literalAbsoluteValue - Retrieves the absolute value of the token, and
+ /// returns -1 if the token is not a literal
+ /// @arg value - A reference to a value that is filled in with the absolute
+ /// value, if it is valid
+ int literalAbsoluteValue(uint64_t &value) const;
+ /// registerID - Retrieves the register ID of the token, and returns -1 if the
+ /// token is not a register
+ ///
+ /// @arg registerID - A reference to a value that is filled in with the
+ /// register ID, if it is valid
+ int registerID(unsigned &registerID) const;
+
+ /// tokenize - Tokenizes a string using the platform- and syntax-specific
+ /// tokenizer, and returns 0 on success (-1 on failure)
+ ///
+ /// @arg tokens - A vector that will be filled in with pointers to
+ /// allocated tokens
+ /// @arg str - The string, as outputted by the AsmPrinter
+ /// @arg operandOrder - The order of the operands from the operandFlags array
+ /// as they appear in str
+ /// @arg disassembler - The disassembler for the desired target and
+ // assembly syntax
+ static int tokenize(std::vector<EDToken*> &tokens,
+ std::string &str,
+ const char *operandOrder,
+ EDDisassembler &disassembler);
+
+ /// getString - Directs a character pointer to the string, returning 0 on
+ /// success (-1 on failure)
+ /// @arg buf - A reference to a pointer that is set to point to the string.
+ /// The string is still owned by the token.
+ int getString(const char*& buf);
+};
+
+} // end namespace llvm
+#endif
diff --git a/lib/MC/Makefile b/lib/MC/Makefile
index a661fa6..bf8b7c0 100644
--- a/lib/MC/Makefile
+++ b/lib/MC/Makefile
@@ -10,7 +10,7 @@
LEVEL = ../..
LIBRARYNAME = LLVMMC
BUILD_ARCHIVE := 1
-PARALLEL_DIRS := MCParser
+PARALLEL_DIRS := MCParser MCDisassembler
include $(LEVEL)/Makefile.common