diff options
| author | Stephen Hines <srhines@google.com> | 2013-06-12 13:32:42 -0700 |
|---|---|---|
| committer | Stephen Hines <srhines@google.com> | 2013-06-12 13:32:42 -0700 |
| commit | 1878f9a7874b1ff569d745c0269f49d3daf7203d (patch) | |
| tree | 19a8dbaaedf6a056c617e87596b32d3f452af137 /include/llvm/MC | |
| parent | 7a57f27b857ec4b243d83d392a399f02fc196c0a (diff) | |
| parent | 100fbdd06be7590b23c4707a98cd605bdb519498 (diff) | |
| download | external_llvm-1878f9a7874b1ff569d745c0269f49d3daf7203d.zip external_llvm-1878f9a7874b1ff569d745c0269f49d3daf7203d.tar.gz external_llvm-1878f9a7874b1ff569d745c0269f49d3daf7203d.tar.bz2 | |
Merge commit '100fbdd06be7590b23c4707a98cd605bdb519498' into merge_20130612
Diffstat (limited to 'include/llvm/MC')
| -rw-r--r-- | include/llvm/MC/MCAsmInfo.h | 18 | ||||
| -rw-r--r-- | include/llvm/MC/MCAtom.h | 174 | ||||
| -rw-r--r-- | include/llvm/MC/MCDisassembler.h | 37 | ||||
| -rw-r--r-- | include/llvm/MC/MCDwarf.h | 11 | ||||
| -rw-r--r-- | include/llvm/MC/MCELFObjectWriter.h | 7 | ||||
| -rw-r--r-- | include/llvm/MC/MCExpr.h | 6 | ||||
| -rw-r--r-- | include/llvm/MC/MCExternalSymbolizer.h | 58 | ||||
| -rw-r--r-- | include/llvm/MC/MCFunction.h | 122 | ||||
| -rw-r--r-- | include/llvm/MC/MCInstrAnalysis.h | 11 | ||||
| -rw-r--r-- | include/llvm/MC/MCModule.h | 93 | ||||
| -rw-r--r-- | include/llvm/MC/MCObjectDisassembler.h | 69 | ||||
| -rw-r--r-- | include/llvm/MC/MCObjectSymbolizer.h | 75 | ||||
| -rw-r--r-- | include/llvm/MC/MCParser/MCAsmParser.h | 12 | ||||
| -rw-r--r-- | include/llvm/MC/MCRegisterInfo.h | 130 | ||||
| -rw-r--r-- | include/llvm/MC/MCRelocationInfo.h | 55 | ||||
| -rw-r--r-- | include/llvm/MC/MCSymbolizer.h | 81 | ||||
| -rw-r--r-- | include/llvm/MC/MachineLocation.h | 29 |
17 files changed, 841 insertions, 147 deletions
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index d020de3..781a9d0 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -17,6 +17,7 @@ #define LLVM_MC_MCASMINFO_H #include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MachineLocation.h" #include <cassert> #include <vector> @@ -88,6 +89,10 @@ namespace llvm { /// which is needed to compute the size of an inline asm. unsigned MaxInstLength; // Defaults to 4. + /// MinInstAlignment - Every possible instruction length is a multiple of + /// this value. Factored out in .debug_frame and .debug_line. + unsigned MinInstAlignment; // Defaults to 1. + /// PCSymbol - The symbol used to represent the current PC. Used in PC /// relative expressions. const char *PCSymbol; // Defaults to "$". @@ -332,7 +337,7 @@ namespace llvm { //===--- Prologue State ----------------------------------------------===// - std::vector<MachineMove> InitialFrameState; + std::vector<MCCFIInstruction> InitialFrameState; public: explicit MCAsmInfo(); @@ -428,6 +433,9 @@ namespace llvm { unsigned getMaxInstLength() const { return MaxInstLength; } + unsigned getMinInstAlignment() const { + return MinInstAlignment; + } const char *getPCSymbol() const { return PCSymbol; } @@ -567,11 +575,11 @@ namespace llvm { return DwarfRegNumForCFI; } - void addInitialFrameState(MCSymbol *label, const MachineLocation &D, - const MachineLocation &S) { - InitialFrameState.push_back(MachineMove(label, D, S)); + void addInitialFrameState(const MCCFIInstruction &Inst) { + InitialFrameState.push_back(Inst); } - const std::vector<MachineMove> &getInitialFrameState() const { + + const std::vector<MCCFIInstruction> &getInitialFrameState() const { return InitialFrameState; } }; diff --git a/include/llvm/MC/MCAtom.h b/include/llvm/MC/MCAtom.h index ae5bf0b..6a93798 100644 --- a/include/llvm/MC/MCAtom.h +++ b/include/llvm/MC/MCAtom.h @@ -1,4 +1,4 @@ -//===-- llvm/MC/MCAtom.h - MCAtom class ---------------------*- C++ -*-===// +//===-- llvm/MC/MCAtom.h ----------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -9,7 +9,7 @@ // // This file contains the declaration of the MCAtom class, which is used to // represent a contiguous region in a decoded object that is uniformly data or -// instructions; +// instructions. // //===----------------------------------------------------------------------===// @@ -24,45 +24,169 @@ namespace llvm { class MCModule; -/// MCData - An entry in a data MCAtom. -// NOTE: This may change to a more complex type in the future. -typedef uint8_t MCData; +class MCAtom; +class MCTextAtom; +class MCDataAtom; /// MCAtom - Represents a contiguous range of either instructions (a TextAtom) /// or data (a DataAtom). Address ranges are expressed as _closed_ intervals. class MCAtom { - friend class MCModule; - typedef enum { TextAtom, DataAtom } AtomType; - - AtomType Type; +public: + virtual ~MCAtom() {} + + enum AtomKind { TextAtom, DataAtom }; + AtomKind getKind() const { return Kind; } + + /// \brief Get the start address of the atom. + uint64_t getBeginAddr() const { return Begin; } + /// \brief Get the end address, i.e. the last one inside the atom. + uint64_t getEndAddr() const { return End; } + + /// \name Atom modification methods: + /// When modifying a TextAtom, keep instruction boundaries in mind. + /// For instance, split must me given the start address of an instruction. + /// @{ + + /// \brief Splits the atom in two at a given address. + /// \param SplitPt Address at which to start a new atom, splitting this one. + /// \returns The newly created atom starting at \p SplitPt. + virtual MCAtom *split(uint64_t SplitPt) = 0; + + /// \brief Truncates an atom, discarding everything after \p TruncPt. + /// \param TruncPt Last byte address to be contained in this atom. + virtual void truncate(uint64_t TruncPt) = 0; + /// @} + + /// \name Naming: + /// + /// This is mostly for display purposes, and may contain anything that hints + /// at what the atom contains: section or symbol name, BB start address, .. + /// @{ + StringRef getName() const { return Name; } + void setName(StringRef NewName) { Name = NewName.str(); } + /// @} + +protected: + const AtomKind Kind; + std::string Name; MCModule *Parent; uint64_t Begin, End; - std::vector<std::pair<uint64_t, MCInst> > Text; - std::vector<MCData> Data; + friend class MCModule; + MCAtom(AtomKind K, MCModule *P, uint64_t B, uint64_t E) + : Kind(K), Name("(unknown)"), Parent(P), Begin(B), End(E) { } + + /// \name Atom remapping helpers + /// @{ + + /// \brief Remap the atom, using the given range, updating Begin/End. + /// One or both of the bounds can remain the same, but overlapping with other + /// atoms in the module is still forbidden. + void remap(uint64_t NewBegin, uint64_t NewEnd); + + /// \brief Remap the atom to prepare for a truncation at TruncPt. + /// Equivalent to: + /// \code + /// // Bound checks + /// remap(Begin, TruncPt); + /// \endcode + void remapForTruncate(uint64_t TruncPt); + + /// \brief Remap the atom to prepare for a split at SplitPt. + /// The bounds for the resulting atoms are returned in {L,R}{Begin,End}. + /// The current atom is truncated to \p LEnd. + void remapForSplit(uint64_t SplitPt, + uint64_t &LBegin, uint64_t &LEnd, + uint64_t &RBegin, uint64_t &REnd); + /// @} +}; - // Private constructor - only callable by MCModule - MCAtom(AtomType T, MCModule *P, uint64_t B, uint64_t E) - : Type(T), Parent(P), Begin(B), End(E) { } +/// \name Text atom +/// @{ + +/// \brief An entry in an MCTextAtom: a disassembled instruction. +/// NOTE: Both the Address and Size field are actually redundant when taken in +/// the context of the text atom, and may better be exposed in an iterator +/// instead of stored in the atom, which would replace this class. +class MCDecodedInst { +public: + MCInst Inst; + uint64_t Address; + uint64_t Size; + MCDecodedInst(const MCInst &Inst, uint64_t Address, uint64_t Size) + : Inst(Inst), Address(Address), Size(Size) {} +}; + +/// \brief An atom consisting of disassembled instructions. +class MCTextAtom : public MCAtom { +private: + typedef std::vector<MCDecodedInst> InstListTy; + InstListTy Insts; + /// \brief The address of the next appended instruction, i.e., the + /// address immediately after the last instruction in the atom. + uint64_t NextInstAddress; public: - bool isTextAtom() const { return Type == TextAtom; } - bool isDataAtom() const { return Type == DataAtom; } + /// Append an instruction, expanding the atom if necessary. + void addInst(const MCInst &Inst, uint64_t Size); + + /// \name Instruction list access + /// @{ + typedef InstListTy::const_iterator const_iterator; + const_iterator begin() const { return Insts.begin(); } + const_iterator end() const { return Insts.end(); } + + const MCDecodedInst &back() const { return Insts.back(); } + const MCDecodedInst &at(size_t n) const { return Insts.at(n); } + uint64_t size() const { return Insts.size(); } + /// @} + + /// \name Atom type specific split/truncate logic. + /// @{ + MCTextAtom *split(uint64_t SplitPt) LLVM_OVERRIDE; + void truncate(uint64_t TruncPt) LLVM_OVERRIDE; + /// @} + + // Class hierarchy. + static bool classof(const MCAtom *A) { return A->getKind() == TextAtom; } +private: + friend class MCModule; + // Private constructor - only callable by MCModule + MCTextAtom(MCModule *P, uint64_t Begin, uint64_t End) + : MCAtom(TextAtom, P, Begin, End), NextInstAddress(Begin) {} +}; +/// @} + +/// \name Data atom +/// @{ + +/// \brief An entry in an MCDataAtom. +// NOTE: This may change to a more complex type in the future. +typedef uint8_t MCData; - void addInst(const MCInst &I, uint64_t Address, unsigned Size); +/// \brief An atom consising of a sequence of bytes. +class MCDataAtom : public MCAtom { + std::vector<MCData> Data; + +public: + /// Append a data entry, expanding the atom if necessary. void addData(const MCData &D); - /// split - Splits the atom in two at a given address, which must align with - /// and instruction boundary if this is a TextAtom. Returns the newly created - /// atom representing the high part of the split. - MCAtom *split(uint64_t SplitPt); + /// \name Atom type specific split/truncate logic. + /// @{ + MCDataAtom *split(uint64_t SplitPt) LLVM_OVERRIDE; + void truncate(uint64_t TruncPt) LLVM_OVERRIDE; + /// @} - /// truncate - Truncates an atom so that TruncPt is the last byte address - /// contained in the atom. - void truncate(uint64_t TruncPt); + // Class hierarchy. + static bool classof(const MCAtom *A) { return A->getKind() == DataAtom; } +private: + friend class MCModule; + // Private constructor - only callable by MCModule + MCDataAtom(MCModule *P, uint64_t Begin, uint64_t End) + : MCAtom(DataAtom, P, Begin, End), Data(End - Begin) {} }; } #endif - diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler.h index 36fbcb0..e82b0c1 100644 --- a/include/llvm/MC/MCDisassembler.h +++ b/include/llvm/MC/MCDisassembler.h @@ -10,6 +10,9 @@ #define LLVM_MC_MCDISASSEMBLER_H #include "llvm-c/Disassembler.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/MC/MCSymbolizer.h" +#include "llvm/MC/MCRelocationInfo.h" #include "llvm/Support/DataTypes.h" namespace llvm { @@ -55,7 +58,8 @@ public: /// Constructor - Performs initial setup for the disassembler. MCDisassembler(const MCSubtargetInfo &STI) : GetOpInfo(0), SymbolLookUp(0), DisInfo(0), Ctx(0), - STI(STI), CommentStream(0) {} + STI(STI), Symbolizer(0), + CommentStream(0) {} virtual ~MCDisassembler(); @@ -81,7 +85,6 @@ public: uint64_t address, raw_ostream &vStream, raw_ostream &cStream) const = 0; - private: // // Hooks for symbolic disassembly via the public 'C' interface. @@ -95,20 +98,32 @@ private: // The assembly context for creating symbols and MCExprs in place of // immediate operands when there is symbolic information. MCContext *Ctx; + protected: // Subtarget information, for instruction decoding predicates if required. const MCSubtargetInfo &STI; + OwningPtr<MCSymbolizer> Symbolizer; public: - void setupForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo, - LLVMSymbolLookupCallback symbolLookUp, - void *disInfo, - MCContext *ctx) { - GetOpInfo = getOpInfo; - SymbolLookUp = symbolLookUp; - DisInfo = disInfo; - Ctx = ctx; - } + // Helpers around MCSymbolizer + bool tryAddingSymbolicOperand(MCInst &Inst, + int64_t Value, + uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t InstSize) const; + + void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const; + + /// Set \p Symzer as the current symbolizer. + /// This takes ownership of \p Symzer, and deletes the previously set one. + void setSymbolizer(OwningPtr<MCSymbolizer> &Symzer); + + /// Sets up an external symbolizer that uses the C API callbacks. + void setupForSymbolicDisassembly(LLVMOpInfoCallback GetOpInfo, + LLVMSymbolLookupCallback SymbolLookUp, + void *DisInfo, + MCContext *Ctx, + OwningPtr<MCRelocationInfo> &RelInfo); + LLVMOpInfoCallback getLLVMOpInfoCallback() const { return GetOpInfo; } LLVMSymbolLookupCallback getLLVMSymbolLookupCallback() const { return SymbolLookUp; diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h index 1a392e8..3ece262 100644 --- a/include/llvm/MC/MCDwarf.h +++ b/include/llvm/MC/MCDwarf.h @@ -24,7 +24,6 @@ namespace llvm { class MCContext; - class MCObjectWriter; class MCSection; class MCStreamer; class MCSymbol; @@ -229,15 +228,12 @@ namespace llvm { class MCDwarfLineAddr { public: /// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas. - static void Encode(int64_t LineDelta, uint64_t AddrDelta, raw_ostream &OS); + static void Encode(MCContext &Context, int64_t LineDelta, + uint64_t AddrDelta, raw_ostream &OS); /// Utility function to emit the encoding to a streamer. static void Emit(MCStreamer *MCOS, int64_t LineDelta,uint64_t AddrDelta); - - /// Utility function to write the encoding to an object writer. - static void Write(MCObjectWriter *OW, - int64_t LineDelta, uint64_t AddrDelta); }; class MCGenDwarfInfo { @@ -418,7 +414,8 @@ namespace llvm { static void Emit(MCStreamer &streamer, bool usingCFI, bool isEH); static void EmitAdvanceLoc(MCStreamer &Streamer, uint64_t AddrDelta); - static void EncodeAdvanceLoc(uint64_t AddrDelta, raw_ostream &OS); + static void EncodeAdvanceLoc(MCContext &Context, uint64_t AddrDelta, + raw_ostream &OS); }; } // end namespace llvm diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h index a59776d..92ad1b1 100644 --- a/include/llvm/MC/MCELFObjectWriter.h +++ b/include/llvm/MC/MCELFObjectWriter.h @@ -42,11 +42,6 @@ struct ELFRelocationEntry { const MCSymbol *Sym, uint64_t Addend, const MCFixup &Fixup) : r_offset(RelocOffset), Index(Idx), Type(RelType), Symbol(Sym), r_addend(Addend), Fixup(&Fixup) {} - - // Support lexicographic sorting. - bool operator<(const ELFRelocationEntry &RE) const { - return RE.r_offset < r_offset; - } }; class MCELFObjectTargetWriter { @@ -87,8 +82,6 @@ public: virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const; - virtual void adjustFixupOffset(const MCFixup &Fixup, - uint64_t &RelocOffset); virtual void sortRelocs(const MCAssembler &Asm, std::vector<ELFRelocationEntry> &Relocs); diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h index a2c5bd3..d851f64 100644 --- a/include/llvm/MC/MCExpr.h +++ b/include/llvm/MC/MCExpr.h @@ -173,10 +173,8 @@ public: VK_PPC_TOC, // TOC base VK_PPC_TOC_ENTRY, // TOC entry - VK_PPC_DARWIN_HA16, // ha16(symbol) - VK_PPC_DARWIN_LO16, // lo16(symbol) - VK_PPC_GAS_HA16, // symbol@ha - VK_PPC_GAS_LO16, // symbol@l + VK_PPC_ADDR16_HA, // symbol@ha + VK_PPC_ADDR16_LO, // symbol@l VK_PPC_TPREL16_HA, // symbol@tprel@ha VK_PPC_TPREL16_LO, // symbol@tprel@l VK_PPC_DTPREL16_HA, // symbol@dtprel@ha diff --git a/include/llvm/MC/MCExternalSymbolizer.h b/include/llvm/MC/MCExternalSymbolizer.h new file mode 100644 index 0000000..c942adc --- /dev/null +++ b/include/llvm/MC/MCExternalSymbolizer.h @@ -0,0 +1,58 @@ +//===-- llvm/MC/MCExternalSymbolizer.h - ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MCExternalSymbolizer class, which +// enables library users to provide callbacks (through the C API) to do the +// symbolization externally. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCEXTERNALSYMBOLIZER_H +#define LLVM_MC_MCEXTERNALSYMBOLIZER_H + +#include "llvm-c/Disassembler.h" +#include "llvm/MC/MCSymbolizer.h" + +namespace llvm { + +/// \brief Symbolize using user-provided, C API, callbacks. +/// +/// See llvm-c/Disassembler.h. +class MCExternalSymbolizer : public MCSymbolizer { + + /// \name Hooks for symbolic disassembly via the public 'C' interface. + /// @{ + /// The function to get the symbolic information for operands. + LLVMOpInfoCallback GetOpInfo; + /// The function to lookup a symbol name. + LLVMSymbolLookupCallback SymbolLookUp; + /// The pointer to the block of symbolic information for above call back. + void *DisInfo; + /// @} + +public: + MCExternalSymbolizer(MCContext &Ctx, + OwningPtr<MCRelocationInfo> &RelInfo, + LLVMOpInfoCallback getOpInfo, + LLVMSymbolLookupCallback symbolLookUp, + void *disInfo) + : MCSymbolizer(Ctx, RelInfo), + GetOpInfo(getOpInfo), SymbolLookUp(symbolLookUp), DisInfo(disInfo) {} + + bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream, + int64_t Value, + uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t InstSize); + void tryAddingPcLoadReferenceComment(raw_ostream &CommentStream, + int64_t Value, uint64_t Address); +}; + +} + +#endif diff --git a/include/llvm/MC/MCFunction.h b/include/llvm/MC/MCFunction.h new file mode 100644 index 0000000..b85011e --- /dev/null +++ b/include/llvm/MC/MCFunction.h @@ -0,0 +1,122 @@ +//===-- llvm/MC/MCFunction.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the data structures to hold a CFG reconstructed from +// machine code. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCFUNCTION_H +#define LLVM_MC_MCFUNCTION_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInst.h" +#include <string> +#include <vector> + +namespace llvm { + +class MCFunction; +class MCModule; +class MCTextAtom; + +/// \brief Basic block containing a sequence of disassembled instructions. +/// The basic block is backed by an MCTextAtom, which holds the instructions, +/// and the address range it covers. +/// Create a basic block using MCFunction::createBlock. +class MCBasicBlock { + const MCTextAtom *Insts; + + // MCFunction owns the basic block. + MCFunction *Parent; + friend class MCFunction; + MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent); + + /// \name Predecessors/Successors, to represent the CFG. + /// @{ + typedef std::vector<const MCBasicBlock *> BasicBlockListTy; + BasicBlockListTy Successors; + BasicBlockListTy Predecessors; + /// @} +public: + + /// \brief Get the backing MCTextAtom, containing the instruction sequence. + const MCTextAtom *getInsts() const { return Insts; } + + /// \name Get the owning MCFunction. + /// @{ + const MCFunction *getParent() const { return Parent; } + MCFunction *getParent() { return Parent; } + /// @} + + /// MC CFG access: Predecessors/Successors. + /// @{ + typedef BasicBlockListTy::const_iterator succ_const_iterator; + succ_const_iterator succ_begin() const { return Successors.begin(); } + succ_const_iterator succ_end() const { return Successors.end(); } + + typedef BasicBlockListTy::const_iterator pred_const_iterator; + pred_const_iterator pred_begin() const { return Predecessors.begin(); } + pred_const_iterator pred_end() const { return Predecessors.end(); } + + void addSuccessor(const MCBasicBlock *MCBB); + bool isSuccessor(const MCBasicBlock *MCBB) const; + + void addPredecessor(const MCBasicBlock *MCBB); + bool isPredecessor(const MCBasicBlock *MCBB) const; + /// @} +}; + +/// \brief Represents a function in machine code, containing MCBasicBlocks. +/// MCFunctions are created using MCModule::createFunction. +class MCFunction { + MCFunction (const MCFunction&) LLVM_DELETED_FUNCTION; + MCFunction& operator=(const MCFunction&) LLVM_DELETED_FUNCTION; + + std::string Name; + typedef std::vector<MCBasicBlock*> BasicBlockListTy; + BasicBlockListTy Blocks; + + // MCModule owns the function. + friend class MCModule; + MCFunction(StringRef Name); +public: + ~MCFunction(); + + /// \brief Create an MCBasicBlock backed by Insts and add it to this function. + /// \param Insts Sequence of straight-line code backing the basic block. + /// \returns The newly created basic block. + MCBasicBlock &createBlock(const MCTextAtom &Insts); + + StringRef getName() const { return Name; } + + /// \name Access to the function's basic blocks. No ordering is enforced. + /// @{ + /// \brief Get the entry point basic block. + const MCBasicBlock *getEntryBlock() const { return front(); } + MCBasicBlock *getEntryBlock() { return front(); } + + // NOTE: Dereferencing iterators gives pointers, so maybe a list is best here. + typedef BasicBlockListTy::const_iterator const_iterator; + typedef BasicBlockListTy:: iterator iterator; + const_iterator begin() const { return Blocks.begin(); } + iterator begin() { return Blocks.begin(); } + const_iterator end() const { return Blocks.end(); } + iterator end() { return Blocks.end(); } + + const MCBasicBlock* front() const { return Blocks.front(); } + MCBasicBlock* front() { return Blocks.front(); } + const MCBasicBlock* back() const { return Blocks.back(); } + MCBasicBlock* back() { return Blocks.back(); } + /// @} +}; + +} + +#endif diff --git a/include/llvm/MC/MCInstrAnalysis.h b/include/llvm/MC/MCInstrAnalysis.h index acad633..17bfd15 100644 --- a/include/llvm/MC/MCInstrAnalysis.h +++ b/include/llvm/MC/MCInstrAnalysis.h @@ -52,10 +52,15 @@ public: return Info->get(Inst.getOpcode()).isReturn(); } + virtual bool isTerminator(const MCInst &Inst) const { + return Info->get(Inst.getOpcode()).isTerminator(); + } + /// evaluateBranch - Given a branch instruction try to get the address the - /// branch targets. Otherwise return -1. - virtual uint64_t - evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size) const; + /// branch targets. Return true on success, and the address in Target. + virtual bool + evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target) const; }; } diff --git a/include/llvm/MC/MCModule.h b/include/llvm/MC/MCModule.h index 755fa02..02f8ca0 100644 --- a/include/llvm/MC/MCModule.h +++ b/include/llvm/MC/MCModule.h @@ -15,44 +15,93 @@ #ifndef LLVM_MC_MCMODULE_H #define LLVM_MC_MCMODULE_H -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/IntervalMap.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" +#include <vector> namespace llvm { class MCAtom; +class MCDataAtom; +class MCFunction; +class MCObjectDisassembler; +class MCTextAtom; -/// MCModule - This class represent a completely disassembled object file or -/// executable. It comprises a list of MCAtom's, and a branch target table. -/// Each atom represents a contiguous range of either instructions or data. +/// \brief A completely disassembled object file or executable. +/// It comprises a list of MCAtom's, each representing a contiguous range of +/// either instructions or data. +/// An MCModule is created using MCObjectDisassembler::buildModule. class MCModule { - /// AtomAllocationTracker - An MCModule owns its component MCAtom's, so it - /// must track them in order to ensure they are properly freed as atoms are - /// merged or otherwise manipulated. - SmallPtrSet<MCAtom*, 8> AtomAllocationTracker; + /// \name Atom tracking + /// @{ - /// OffsetMap - Efficiently maps offset ranges to MCAtom's. - IntervalMap<uint64_t, MCAtom*> OffsetMap; - - /// BranchTargetMap - Maps offsets that are determined to be branches and - /// can be statically resolved to their target offsets. - DenseMap<uint64_t, MCAtom*> BranchTargetMap; + /// \brief Atoms in this module, sorted by begin address. + /// FIXME: This doesn't handle overlapping atoms (which happen when a basic + /// block starts in the middle of an instruction of another basic block.) + typedef std::vector<MCAtom*> AtomListTy; + AtomListTy Atoms; friend class MCAtom; - - /// remap - Update the interval mapping for an MCAtom. + /// \brief Remap \p Atom to the given range, and update its Begin/End fields. + /// \param Atom An atom belonging to this module. + /// An atom should always use this method to update its bounds, because this + /// enables the owning MCModule to keep track of its atoms. void remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd); + /// \brief Insert an atom in the module, using its Begin and End addresses. + void map(MCAtom *NewAtom); + /// @} + + /// \name Function tracking + /// @{ + typedef std::vector<MCFunction*> FunctionListTy; + FunctionListTy Functions; + /// @} + + MCModule (const MCModule &) LLVM_DELETED_FUNCTION; + MCModule& operator=(const MCModule &) LLVM_DELETED_FUNCTION; + + // MCObjectDisassembler creates MCModules. + friend class MCObjectDisassembler; + MCModule() : Atoms() { } + public: - MCModule(IntervalMap<uint64_t, MCAtom*>::Allocator &A) : OffsetMap(A) { } + ~MCModule(); - /// createAtom - Creates a new MCAtom covering the specified offset range. - MCAtom *createAtom(MCAtom::AtomType Type, uint64_t Begin, uint64_t End); + /// \name Create a new MCAtom covering the specified offset range. + /// @{ + MCTextAtom *createTextAtom(uint64_t Begin, uint64_t End); + MCDataAtom *createDataAtom(uint64_t Begin, uint64_t End); + /// @} + + /// \name Access to the owned atom list, ordered by begin address. + /// @{ + const MCAtom *findAtomContaining(uint64_t Addr) const; + MCAtom *findAtomContaining(uint64_t Addr); + + typedef AtomListTy::const_iterator const_atom_iterator; + typedef AtomListTy:: iterator atom_iterator; + const_atom_iterator atom_begin() const { return Atoms.begin(); } + atom_iterator atom_begin() { return Atoms.begin(); } + const_atom_iterator atom_end() const { return Atoms.end(); } + atom_iterator atom_end() { return Atoms.end(); } + /// @} + + /// \name Create a new MCFunction. + MCFunction *createFunction(const StringRef &Name); + + /// \name Access to the owned function list. + /// @{ + typedef FunctionListTy::const_iterator const_func_iterator; + typedef FunctionListTy:: iterator func_iterator; + const_func_iterator func_begin() const { return Functions.begin(); } + func_iterator func_begin() { return Functions.begin(); } + const_func_iterator func_end() const { return Functions.end(); } + func_iterator func_end() { return Functions.end(); } + /// @} }; } #endif - diff --git a/include/llvm/MC/MCObjectDisassembler.h b/include/llvm/MC/MCObjectDisassembler.h new file mode 100644 index 0000000..749a54e --- /dev/null +++ b/include/llvm/MC/MCObjectDisassembler.h @@ -0,0 +1,69 @@ +//===-- llvm/MC/MCObjectDisassembler.h --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MCObjectDisassembler class, which +// can be used to construct an MCModule and an MC CFG from an ObjectFile. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCOBJECTDISASSEMBLER_H +#define LLVM_MC_MCOBJECTDISASSEMBLER_H + +namespace llvm { + +namespace object { + class ObjectFile; +} + +class MCBasicBlock; +class MCDisassembler; +class MCFunction; +class MCInstrAnalysis; +class MCModule; + +/// \brief Disassemble an ObjectFile to an MCModule and MCFunctions. +/// This class builds on MCDisassembler to disassemble whole sections, creating +/// MCAtom (MCTextAtom for disassembled sections and MCDataAtom for raw data). +/// It can also be used to create a control flow graph consisting of MCFunctions +/// and MCBasicBlocks. +class MCObjectDisassembler { + const object::ObjectFile &Obj; + const MCDisassembler &Dis; + const MCInstrAnalysis &MIA; + +public: + MCObjectDisassembler(const object::ObjectFile &Obj, + const MCDisassembler &Dis, + const MCInstrAnalysis &MIA); + + /// \brief Build an MCModule, creating atoms and optionally functions. + /// \param withCFG Also build a CFG by adding MCFunctions to the Module. + /// If withCFG is false, the MCModule built only contains atoms, representing + /// what was found in the object file. If withCFG is true, MCFunctions are + /// created, containing MCBasicBlocks. All text atoms are split to form basic + /// block atoms, which then each back an MCBasicBlock. + MCModule *buildModule(bool withCFG = false); + +private: + /// \brief Fill \p Module by creating an atom for each section. + /// This could be made much smarter, using information like symbols, but also + /// format-specific features, like mach-o function_start or data_in_code LCs. + void buildSectionAtoms(MCModule *Module); + + /// \brief Enrich \p Module with a CFG consisting of MCFunctions. + /// \param Module An MCModule returned by buildModule, with no CFG. + /// NOTE: Each MCBasicBlock in a MCFunction is backed by a single MCTextAtom. + /// When the CFG is built, contiguous instructions that were previously in a + /// single MCTextAtom will be split in multiple basic block atoms. + void buildCFG(MCModule *Module); +}; + +} + +#endif diff --git a/include/llvm/MC/MCObjectSymbolizer.h b/include/llvm/MC/MCObjectSymbolizer.h new file mode 100644 index 0000000..555cf51 --- /dev/null +++ b/include/llvm/MC/MCObjectSymbolizer.h @@ -0,0 +1,75 @@ +//===-- llvm/MC/MCObjectSymbolizer.h --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MCObjectSymbolizer class, an MCSymbolizer that is +// backed by an object::ObjectFile. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCOBJECTSYMBOLIZER_H +#define LLVM_MC_MCOBJECTSYMBOLIZER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCSymbolizer.h" +#include "llvm/Object/ObjectFile.h" +#include <vector> + +namespace llvm { + +class MCExpr; +class MCInst; +class MCRelocationInfo; +class raw_ostream; + +/// \brief An ObjectFile-backed symbolizer. +class MCObjectSymbolizer : public MCSymbolizer { +protected: + const object::ObjectFile *Obj; + + typedef DenseMap<uint64_t, object::RelocationRef> AddrToRelocMap; + typedef std::vector<object::SectionRef> SortedSectionList; + SortedSectionList SortedSections; + + // Map a load address to the first relocation that applies there. As far as I + // know, if there are several relocations at the exact same address, they are + // related and the others can be determined from the first that was found in + // the relocation table. For instance, on x86-64 mach-o, a SUBTRACTOR + // relocation (referencing the minuend symbol) is followed by an UNSIGNED + // relocation (referencing the subtrahend symbol). + AddrToRelocMap AddrToReloc; + + // Helpers around SortedSections. + SortedSectionList::const_iterator findSectionContaining(uint64_t Addr) const; + void insertSection(object::SectionRef Section); + + + MCObjectSymbolizer(MCContext &Ctx, OwningPtr<MCRelocationInfo> &RelInfo, + const object::ObjectFile *Obj); + +public: + /// \name Overridden MCSymbolizer methods: + /// @{ + bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream, + int64_t Value, + uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t InstSize); + + void tryAddingPcLoadReferenceComment(raw_ostream &cStream, + int64_t Value, uint64_t Address); + /// @} + + /// \brief Create an object symbolizer for \p Obj. + static MCObjectSymbolizer * + createObjectSymbolizer(MCContext &Ctx, OwningPtr<MCRelocationInfo> &RelInfo, + const object::ObjectFile *Obj); +}; + +} + +#endif diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h index 4ca5b9a..1d15534 100644 --- a/include/llvm/MC/MCParser/MCAsmParser.h +++ b/include/llvm/MC/MCParser/MCAsmParser.h @@ -47,9 +47,10 @@ public: } } InlineAsmIdentifierInfo; - virtual ~MCAsmParserSemaCallback(); + virtual ~MCAsmParserSemaCallback(); virtual void *LookupInlineAsmIdentifier(StringRef &LineBuf, - InlineAsmIdentifierInfo &Info) = 0; + InlineAsmIdentifierInfo &Info, + bool IsUnevaluatedContext) = 0; virtual bool LookupInlineAsmField(StringRef Base, StringRef Member, unsigned &Offset) = 0; @@ -121,14 +122,14 @@ public: /// /// \return The return value is true, if warnings are fatal. virtual bool Warning(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) = 0; + ArrayRef<SMRange> Ranges = None) = 0; /// Error - Emit an error at the location \p L, with the message \p Msg. /// /// \return The return value is always true, as an idiomatic convenience to /// clients. virtual bool Error(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) = 0; + ArrayRef<SMRange> Ranges = None) = 0; /// Lex - Get the next AsmToken in the stream, possibly handling file /// inclusion first. @@ -138,8 +139,7 @@ public: const AsmToken &getTok(); /// \brief Report an error at the current lexer location. - bool TokError(const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()); + bool TokError(const Twine &Msg, ArrayRef<SMRange> Ranges = None); /// parseIdentifier - Parse an identifier or string (as a quoted identifier) /// and set \p Res to the identifier contents. diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h index f5b4ddd..3fa89c1 100644 --- a/include/llvm/MC/MCRegisterInfo.h +++ b/include/llvm/MC/MCRegisterInfo.h @@ -99,19 +99,15 @@ public: bool isAllocatable() const { return Allocatable; } }; -/// MCRegisterDesc - This record contains all of the information known about -/// a particular register. The Overlaps field contains a pointer to a zero -/// terminated array of registers that this register aliases, starting with -/// itself. This is needed for architectures like X86 which have AL alias AX -/// alias EAX. The SubRegs field is a zero terminated array of registers that -/// are sub-registers of the specific register, e.g. AL, AH are sub-registers of -/// AX. The SuperRegs field is a zero terminated array of registers that are -/// super-registers of the specific register, e.g. RAX, EAX, are super-registers -/// of AX. +/// MCRegisterDesc - This record contains information about a particular +/// register. The SubRegs field is a zero terminated array of registers that +/// are sub-registers of the specific register, e.g. AL, AH are sub-registers +/// of AX. The SuperRegs field is a zero terminated array of registers that are +/// super-registers of the specific register, e.g. RAX, EAX, are +/// super-registers of AX. /// struct MCRegisterDesc { uint32_t Name; // Printable name for the reg (for debugging) - uint32_t Overlaps; // Overlapping registers, described above uint32_t SubRegs; // Sub-register set, described above uint32_t SuperRegs; // Super-register set, described above @@ -148,6 +144,13 @@ public: bool operator<(DwarfLLVMRegPair RHS) const { return FromReg < RHS.FromReg; } }; + + /// SubRegCoveredBits - Emitted by tablegen: bit range covered by a subreg + /// index, -1 in any being invalid. + struct SubRegCoveredBits { + uint16_t Offset; + uint16_t Size; + }; private: const MCRegisterDesc *Desc; // Pointer to the descriptor array unsigned NumRegs; // Number of entries in the array @@ -161,6 +164,8 @@ private: const char *RegStrings; // Pointer to the string table. const uint16_t *SubRegIndices; // Pointer to the subreg lookup // array. + const SubRegCoveredBits *SubRegIdxRanges; // Pointer to the subreg covered + // bit ranges array. unsigned NumSubRegIndices; // Number of subreg indices. const uint16_t *RegEncodingTable; // Pointer to array of register // encodings. @@ -226,7 +231,6 @@ public: // internal list pointers. friend class MCSubRegIterator; friend class MCSuperRegIterator; - friend class MCRegAliasIterator; friend class MCRegUnitIterator; friend class MCRegUnitRootIterator; @@ -241,6 +245,7 @@ public: const char *Strings, const uint16_t *SubIndices, unsigned NumIndices, + const SubRegCoveredBits *SubIdxRanges, const uint16_t *RET) { Desc = D; NumRegs = NR; @@ -254,6 +259,7 @@ public: NumRegUnits = NRU; SubRegIndices = SubIndices; NumSubRegIndices = NumIndices; + SubRegIdxRanges = SubIdxRanges; RegEncodingTable = RET; } @@ -332,6 +338,16 @@ public: /// otherwise. unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const; + /// \brief Get the size of the bit range covered by a sub-register index. + /// If the index isn't continuous, return the sum of the sizes of its parts. + /// If the index is used to access subregisters of different sizes, return -1. + unsigned getSubRegIdxSize(unsigned Idx) const; + + /// \brief Get the offset of the bit range covered by a sub-register index. + /// If an Offset doesn't make sense (the index isn't continuous, or is used to + /// access sub-registers at different offsets), return -1. + unsigned getSubRegIdxOffset(unsigned Idx) const; + /// \brief Return the human-readable symbolic target-specific name for the /// specified physical register. const char *getName(unsigned RegNo) const { @@ -421,30 +437,26 @@ public: // aliasing registers. Use these iterator classes to traverse the lists. /// MCSubRegIterator enumerates all sub-registers of Reg. +/// If IncludeSelf is set, Reg itself is included in the list. class MCSubRegIterator : public MCRegisterInfo::DiffListIterator { public: - MCSubRegIterator(unsigned Reg, const MCRegisterInfo *MCRI) { + MCSubRegIterator(unsigned Reg, const MCRegisterInfo *MCRI, + bool IncludeSelf = false) { init(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs); - ++*this; + // Initially, the iterator points to Reg itself. + if (!IncludeSelf) + ++*this; } }; /// MCSuperRegIterator enumerates all super-registers of Reg. +/// If IncludeSelf is set, Reg itself is included in the list. class MCSuperRegIterator : public MCRegisterInfo::DiffListIterator { public: - MCSuperRegIterator(unsigned Reg, const MCRegisterInfo *MCRI) { + MCSuperRegIterator() {} + MCSuperRegIterator(unsigned Reg, const MCRegisterInfo *MCRI, + bool IncludeSelf = false) { init(Reg, MCRI->DiffLists + MCRI->get(Reg).SuperRegs); - ++*this; - } -}; - -/// MCRegAliasIterator enumerates all registers aliasing Reg. -/// If IncludeSelf is set, Reg itself is included in the list. -class MCRegAliasIterator : public MCRegisterInfo::DiffListIterator { -public: - MCRegAliasIterator(unsigned Reg, const MCRegisterInfo *MCRI, - bool IncludeSelf) { - init(Reg, MCRI->DiffLists + MCRI->get(Reg).Overlaps); // Initially, the iterator points to Reg itself. if (!IncludeSelf) ++*this; @@ -478,6 +490,7 @@ class MCRegUnitIterator : public MCRegisterInfo::DiffListIterator { public: /// MCRegUnitIterator - Create an iterator that traverses the register units /// in Reg. + MCRegUnitIterator() {} MCRegUnitIterator(unsigned Reg, const MCRegisterInfo *MCRI) { assert(Reg && "Null register has no regunits"); // Decode the RegUnits MCRegisterDesc field. @@ -502,9 +515,7 @@ public: // super-registers. All registers aliasing Unit can be visited like this: // // for (MCRegUnitRootIterator RI(Unit, MCRI); RI.isValid(); ++RI) { -// unsigned Root = *RI; -// visit(Root); -// for (MCSuperRegIterator SI(Root, MCRI); SI.isValid(); ++SI) +// for (MCSuperRegIterator SI(*RI, MCRI, true); SI.isValid(); ++SI) // visit(*SI); // } @@ -513,6 +524,7 @@ class MCRegUnitRootIterator { uint16_t Reg0; uint16_t Reg1; public: + MCRegUnitRootIterator() : Reg0(0), Reg1(0) {} MCRegUnitRootIterator(unsigned RegUnit, const MCRegisterInfo *MCRI) { assert(RegUnit < MCRI->getNumRegUnits() && "Invalid register unit"); Reg0 = MCRI->RegUnitRoots[RegUnit][0]; @@ -537,6 +549,68 @@ public: } }; +/// MCRegAliasIterator enumerates all registers aliasing Reg. If IncludeSelf is +/// set, Reg itself is included in the list. This iterator does not guarantee +/// any ordering or that entries are unique. +class MCRegAliasIterator { +private: + unsigned Reg; + const MCRegisterInfo *MCRI; + bool IncludeSelf; + + MCRegUnitIterator RI; + MCRegUnitRootIterator RRI; + MCSuperRegIterator SI; +public: + MCRegAliasIterator(unsigned Reg, const MCRegisterInfo *MCRI, + bool IncludeSelf) + : Reg(Reg), MCRI(MCRI), IncludeSelf(IncludeSelf) { + + // Initialize the iterators. + for (RI = MCRegUnitIterator(Reg, MCRI); RI.isValid(); ++RI) { + for (RRI = MCRegUnitRootIterator(*RI, MCRI); RRI.isValid(); ++RRI) { + for (SI = MCSuperRegIterator(*RRI, MCRI, true); SI.isValid(); ++SI) { + if (!(!IncludeSelf && Reg == *SI)) + return; + } + } + } + } + + bool isValid() const { + return RI.isValid(); + } + + unsigned operator*() const { + assert (SI.isValid() && "Cannot dereference an invalid iterator."); + return *SI; + } + + void advance() { + // Assuming SI is valid. + ++SI; + if (SI.isValid()) return; + + ++RRI; + if (RRI.isValid()) { + SI = MCSuperRegIterator(*RRI, MCRI, true); + return; + } + + ++RI; + if (RI.isValid()) { + RRI = MCRegUnitRootIterator(*RI, MCRI); + SI = MCSuperRegIterator(*RRI, MCRI, true); + } + } + + void operator++() { + assert(isValid() && "Cannot move off the end of the list."); + do advance(); + while (!IncludeSelf && isValid() && *SI == Reg); + } +}; + } // End llvm namespace #endif diff --git a/include/llvm/MC/MCRelocationInfo.h b/include/llvm/MC/MCRelocationInfo.h new file mode 100644 index 0000000..9dab900 --- /dev/null +++ b/include/llvm/MC/MCRelocationInfo.h @@ -0,0 +1,55 @@ +//==-- llvm/MC/MCRelocationInfo.h --------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MCRelocationInfo class, which provides methods to +// create MCExprs from relocations, either found in an object::ObjectFile +// (object::RelocationRef), or provided through the C API. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCRELOCATIONINFO_H +#define LLVM_MC_MCRELOCATIONINFO_H + +#include "llvm/Support/Compiler.h" + +namespace llvm { + +namespace object { +class RelocationRef; +} +class MCExpr; +class MCContext; + +/// \brief Create MCExprs from relocations found in an object file. +class MCRelocationInfo { + MCRelocationInfo(const MCRelocationInfo &) LLVM_DELETED_FUNCTION; + void operator=(const MCRelocationInfo &) LLVM_DELETED_FUNCTION; + +protected: + MCContext &Ctx; + +public: + MCRelocationInfo(MCContext &Ctx); + virtual ~MCRelocationInfo(); + + /// \brief Create an MCExpr for the relocation \p Rel. + /// \returns If possible, an MCExpr corresponding to Rel, else 0. + virtual const MCExpr *createExprForRelocation(object::RelocationRef Rel); + + /// \brief Create an MCExpr for the target-specific \p VariantKind. + /// The VariantKinds are defined in llvm-c/Disassembler.h. + /// Used by MCExternalSymbolizer. + /// \returns If possible, an MCExpr corresponding to VariantKind, else 0. + virtual const MCExpr *createExprForCAPIVariantKind(const MCExpr *SubExpr, + unsigned VariantKind); +}; + +} + +#endif diff --git a/include/llvm/MC/MCSymbolizer.h b/include/llvm/MC/MCSymbolizer.h new file mode 100644 index 0000000..e42a214 --- /dev/null +++ b/include/llvm/MC/MCSymbolizer.h @@ -0,0 +1,81 @@ +//===-- llvm/MC/MCSymbolizer.h - MCSymbolizer class -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MCSymbolizer class, which is used +// to symbolize instructions decoded from an object, that is, transform their +// immediate operands to MCExprs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCSYMBOLIZER_H +#define LLVM_MC_MCSYMBOLIZER_H + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/MC/MCRelocationInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class MCContext; +class MCInst; +class raw_ostream; + +/// \brief Symbolize and annotate disassembled instructions. +/// +/// For now this mimics the old symbolization logic (from both ARM and x86), that +/// relied on user-provided (C API) callbacks to do the actual symbol lookup in +/// the object file. This was moved to MCExternalSymbolizer. +/// A better API would not rely on actually calling the two methods here from +/// inside each disassembler, but would use the instr info to determine what +/// operands are actually symbolizable, and in what way. I don't think this +/// information exists right now. +class MCSymbolizer { + MCSymbolizer(const MCSymbolizer &) LLVM_DELETED_FUNCTION; + void operator=(const MCSymbolizer &) LLVM_DELETED_FUNCTION; + +protected: + MCContext &Ctx; + OwningPtr<MCRelocationInfo> RelInfo; + +public: + /// \brief Construct an MCSymbolizer, taking ownership of \p RelInfo. + MCSymbolizer(MCContext &Ctx, OwningPtr<MCRelocationInfo> &RelInfo); + virtual ~MCSymbolizer(); + + /// \brief Try to add a symbolic operand instead of \p Value to the MCInst. + /// + /// Instead of having a difficult to read immediate, a symbolic operand would + /// represent this immediate in a more understandable way, for instance as a + /// symbol or an offset from a symbol. Relocations can also be used to enrich + /// the symbolic expression. + /// @param Inst - The MCInst where to insert the symbolic operand. + /// @param cStream - Stream to print comments and annotations on. + /// @param Value - Operand value, pc-adjusted by the caller if necessary. + /// @param Address - Load address of the instruction. + /// @param IsBranch - Is the instruction a branch? + /// @param Offset - Byte offset of the operand inside the inst. + /// @param InstSize - Size of the instruction in bytes. + /// @return Whether a symbolic operand was added. + virtual bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, + int64_t Value, uint64_t Address, + bool IsBranch, uint64_t Offset, + uint64_t InstSize) = 0; + + /// \brief Try to add a comment on the PC-relative load. + /// For instance, in Mach-O, this is used to add annotations to instructions + /// that use C string literals, as found in __cstring. + virtual void tryAddingPcLoadReferenceComment(raw_ostream &cStream, + int64_t Value, + uint64_t Address) = 0; +}; + +} + +#endif diff --git a/include/llvm/MC/MachineLocation.h b/include/llvm/MC/MachineLocation.h index 83c8b72..c4a9660 100644 --- a/include/llvm/MC/MachineLocation.h +++ b/include/llvm/MC/MachineLocation.h @@ -10,11 +10,6 @@ // frame. Locations will be one of two forms; a register or an address formed // from a base address plus an offset. Register indirection can be specified by // explicitly passing an offset to the constructor. -// -// The MachineMove class is used to represent abstract move operations in the -// prolog/epilog of a compiled function. A collection of these objects can be -// used by a debug consumer to track the location of values when unwinding stack -// frames. //===----------------------------------------------------------------------===// @@ -74,30 +69,6 @@ public: void dump(); #endif }; - -/// MachineMove - This class represents the save or restore of a callee saved -/// register that exception or debug info needs to know about. -class MachineMove { -private: - /// Label - Symbol for post-instruction address when result of move takes - /// effect. - MCSymbol *Label; - - // Move to & from location. - MachineLocation Destination, Source; -public: - MachineMove() : Label(0) {} - - MachineMove(MCSymbol *label, const MachineLocation &D, - const MachineLocation &S) - : Label(label), Destination(D), Source(S) {} - - // Accessors - MCSymbol *getLabel() const { return Label; } - const MachineLocation &getDestination() const { return Destination; } - const MachineLocation &getSource() const { return Source; } -}; - } // End llvm namespace #endif |
