aboutsummaryrefslogtreecommitdiffstats
path: root/lib/MC/MCAnalysis
diff options
context:
space:
mode:
Diffstat (limited to 'lib/MC/MCAnalysis')
-rw-r--r--lib/MC/MCAnalysis/Android.mk37
-rw-r--r--lib/MC/MCAnalysis/CMakeLists.txt8
-rw-r--r--lib/MC/MCAnalysis/LLVMBuild.txt5
-rw-r--r--lib/MC/MCAnalysis/MCAtom.cpp114
-rw-r--r--lib/MC/MCAnalysis/MCFunction.cpp76
-rw-r--r--lib/MC/MCAnalysis/MCModule.cpp142
-rw-r--r--lib/MC/MCAnalysis/MCModuleYAML.cpp464
-rw-r--r--lib/MC/MCAnalysis/MCObjectDisassembler.cpp574
-rw-r--r--lib/MC/MCAnalysis/MCObjectSymbolizer.cpp268
-rw-r--r--lib/MC/MCAnalysis/Makefile14
10 files changed, 1702 insertions, 0 deletions
diff --git a/lib/MC/MCAnalysis/Android.mk b/lib/MC/MCAnalysis/Android.mk
new file mode 100644
index 0000000..27f848a
--- /dev/null
+++ b/lib/MC/MCAnalysis/Android.mk
@@ -0,0 +1,37 @@
+LOCAL_PATH:= $(call my-dir)
+
+mc_analysis_SRC_FILES := \
+ MCAtom.cpp \
+ MCFunction.cpp \
+ MCModule.cpp \
+ MCModuleYAML.cpp \
+ MCObjectDisassembler.cpp \
+ MCObjectSymbolizer.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(mc_analysis_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMMCAnalysis
+
+LOCAL_MODULE_TAGS := optional
+
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
+
+LOCAL_SRC_FILES := $(mc_analysis_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMMCAnalysis
+
+LOCAL_MODULE_TAGS := optional
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/MC/MCAnalysis/CMakeLists.txt b/lib/MC/MCAnalysis/CMakeLists.txt
new file mode 100644
index 0000000..81eae2d
--- /dev/null
+++ b/lib/MC/MCAnalysis/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_llvm_library(LLVMMCAnalysis
+ MCAtom.cpp
+ MCFunction.cpp
+ MCModule.cpp
+ MCModuleYAML.cpp
+ MCObjectDisassembler.cpp
+ MCObjectSymbolizer.cpp
+)
diff --git a/lib/MC/MCAnalysis/LLVMBuild.txt b/lib/MC/MCAnalysis/LLVMBuild.txt
new file mode 100644
index 0000000..1b58fec
--- /dev/null
+++ b/lib/MC/MCAnalysis/LLVMBuild.txt
@@ -0,0 +1,5 @@
+[component_0]
+type = Library
+name = MCAnalysis
+parent = Libraries
+required_libraries = MC Object Support
diff --git a/lib/MC/MCAnalysis/MCAtom.cpp b/lib/MC/MCAnalysis/MCAtom.cpp
new file mode 100644
index 0000000..82056ee
--- /dev/null
+++ b/lib/MC/MCAnalysis/MCAtom.cpp
@@ -0,0 +1,114 @@
+//===- lib/MC/MCAtom.cpp - MCAtom implementation --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAnalysis/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCModule.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <iterator>
+
+using namespace llvm;
+
+// Pin the vtable to this file.
+void MCAtom::anchor() {}
+
+void MCAtom::remap(uint64_t NewBegin, uint64_t NewEnd) {
+ Parent->remap(this, NewBegin, NewEnd);
+}
+
+void MCAtom::remapForTruncate(uint64_t TruncPt) {
+ assert((TruncPt >= Begin && TruncPt < End) &&
+ "Truncation point not contained in atom!");
+ remap(Begin, TruncPt);
+}
+
+void MCAtom::remapForSplit(uint64_t SplitPt,
+ uint64_t &LBegin, uint64_t &LEnd,
+ uint64_t &RBegin, uint64_t &REnd) {
+ assert((SplitPt > Begin && SplitPt <= End) &&
+ "Splitting at point not contained in atom!");
+
+ // Compute the new begin/end points.
+ LBegin = Begin;
+ LEnd = SplitPt - 1;
+ RBegin = SplitPt;
+ REnd = End;
+
+ // Remap this atom to become the lower of the two new ones.
+ remap(LBegin, LEnd);
+}
+
+// MCDataAtom
+
+void MCDataAtom::addData(const MCData &D) {
+ Data.push_back(D);
+ if (Data.size() > End + 1 - Begin)
+ remap(Begin, End + 1);
+}
+
+void MCDataAtom::truncate(uint64_t TruncPt) {
+ remapForTruncate(TruncPt);
+
+ Data.resize(TruncPt - Begin + 1);
+}
+
+MCDataAtom *MCDataAtom::split(uint64_t SplitPt) {
+ uint64_t LBegin, LEnd, RBegin, REnd;
+ remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
+
+ MCDataAtom *RightAtom = Parent->createDataAtom(RBegin, REnd);
+ RightAtom->setName(getName());
+
+ std::vector<MCData>::iterator I = Data.begin() + (RBegin - LBegin);
+ assert(I != Data.end() && "Split point not found in range!");
+
+ std::copy(I, Data.end(), std::back_inserter(RightAtom->Data));
+ Data.erase(I, Data.end());
+ return RightAtom;
+}
+
+// MCTextAtom
+
+void MCTextAtom::addInst(const MCInst &I, uint64_t Size) {
+ if (NextInstAddress + Size - 1 > End)
+ remap(Begin, NextInstAddress + Size - 1);
+ Insts.push_back(MCDecodedInst(I, NextInstAddress, Size));
+ NextInstAddress += Size;
+}
+
+void MCTextAtom::truncate(uint64_t TruncPt) {
+ remapForTruncate(TruncPt);
+
+ InstListTy::iterator I = Insts.begin();
+ while (I != Insts.end() && I->Address <= TruncPt) ++I;
+
+ assert(I != Insts.end() && "Truncation point not found in disassembly!");
+ assert(I->Address == TruncPt + 1 &&
+ "Truncation point does not fall on instruction boundary");
+
+ Insts.erase(I, Insts.end());
+}
+
+MCTextAtom *MCTextAtom::split(uint64_t SplitPt) {
+ uint64_t LBegin, LEnd, RBegin, REnd;
+ remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
+
+ MCTextAtom *RightAtom = Parent->createTextAtom(RBegin, REnd);
+ RightAtom->setName(getName());
+
+ InstListTy::iterator I = Insts.begin();
+ while (I != Insts.end() && I->Address < SplitPt) ++I;
+ assert(I != Insts.end() && "Split point not found in disassembly!");
+ assert(I->Address == SplitPt &&
+ "Split point does not fall on instruction boundary!");
+
+ std::copy(I, Insts.end(), std::back_inserter(RightAtom->Insts));
+ Insts.erase(I, Insts.end());
+ Parent->splitBasicBlocksForAtom(this, RightAtom);
+ return RightAtom;
+}
diff --git a/lib/MC/MCAnalysis/MCFunction.cpp b/lib/MC/MCAnalysis/MCFunction.cpp
new file mode 100644
index 0000000..4e09d1a
--- /dev/null
+++ b/lib/MC/MCAnalysis/MCFunction.cpp
@@ -0,0 +1,76 @@
+//===-- lib/MC/MCFunction.cpp -----------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAnalysis/MCFunction.h"
+#include "llvm/MC/MCAnalysis/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCModule.h"
+#include <algorithm>
+
+using namespace llvm;
+
+// MCFunction
+
+MCFunction::MCFunction(StringRef Name, MCModule *Parent)
+ : Name(Name), ParentModule(Parent)
+{}
+
+MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) {
+ std::unique_ptr<MCBasicBlock> MCBB(new MCBasicBlock(TA, this));
+ Blocks.push_back(std::move(MCBB));
+ return *Blocks.back();
+}
+
+MCBasicBlock *MCFunction::find(uint64_t StartAddr) {
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ if ((*I)->getInsts()->getBeginAddr() == StartAddr)
+ return I->get();
+ return nullptr;
+}
+
+const MCBasicBlock *MCFunction::find(uint64_t StartAddr) const {
+ return const_cast<MCFunction *>(this)->find(StartAddr);
+}
+
+// MCBasicBlock
+
+MCBasicBlock::MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent)
+ : Insts(&Insts), Parent(Parent) {
+ getParent()->getParent()->trackBBForAtom(&Insts, this);
+}
+
+void MCBasicBlock::addSuccessor(const MCBasicBlock *MCBB) {
+ if (!isSuccessor(MCBB))
+ Successors.push_back(MCBB);
+}
+
+bool MCBasicBlock::isSuccessor(const MCBasicBlock *MCBB) const {
+ return std::find(Successors.begin(), Successors.end(),
+ MCBB) != Successors.end();
+}
+
+void MCBasicBlock::addPredecessor(const MCBasicBlock *MCBB) {
+ if (!isPredecessor(MCBB))
+ Predecessors.push_back(MCBB);
+}
+
+bool MCBasicBlock::isPredecessor(const MCBasicBlock *MCBB) const {
+ return std::find(Predecessors.begin(), Predecessors.end(),
+ MCBB) != Predecessors.end();
+}
+
+void MCBasicBlock::splitBasicBlock(MCBasicBlock *SplitBB) {
+ assert(Insts->getEndAddr() + 1 == SplitBB->Insts->getBeginAddr() &&
+ "Splitting unrelated basic blocks!");
+ SplitBB->addPredecessor(this);
+ assert(SplitBB->Successors.empty() &&
+ "Split basic block shouldn't already have successors!");
+ SplitBB->Successors = Successors;
+ Successors.clear();
+ addSuccessor(SplitBB);
+}
diff --git a/lib/MC/MCAnalysis/MCModule.cpp b/lib/MC/MCAnalysis/MCModule.cpp
new file mode 100644
index 0000000..7512299
--- /dev/null
+++ b/lib/MC/MCAnalysis/MCModule.cpp
@@ -0,0 +1,142 @@
+//===- lib/MC/MCModule.cpp - MCModule implementation ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAnalysis/MCModule.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCAnalysis/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCFunction.h"
+#include <algorithm>
+
+using namespace llvm;
+
+static bool AtomComp(const MCAtom *L, uint64_t Addr) {
+ return L->getEndAddr() < Addr;
+}
+
+static bool AtomCompInv(uint64_t Addr, const MCAtom *R) {
+ return Addr < R->getEndAddr();
+}
+
+void MCModule::map(MCAtom *NewAtom) {
+ uint64_t Begin = NewAtom->Begin;
+
+ assert(Begin <= NewAtom->End && "Creating MCAtom with endpoints reversed?");
+
+ // Check for atoms already covering this range.
+ AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+ Begin, AtomComp);
+ assert((I == atom_end() || (*I)->getBeginAddr() > NewAtom->End)
+ && "Offset range already occupied!");
+
+ // Insert the new atom to the list.
+ Atoms.insert(I, NewAtom);
+}
+
+MCTextAtom *MCModule::createTextAtom(uint64_t Begin, uint64_t End) {
+ MCTextAtom *NewAtom = new MCTextAtom(this, Begin, End);
+ map(NewAtom);
+ return NewAtom;
+}
+
+MCDataAtom *MCModule::createDataAtom(uint64_t Begin, uint64_t End) {
+ MCDataAtom *NewAtom = new MCDataAtom(this, Begin, End);
+ map(NewAtom);
+ return NewAtom;
+}
+
+// remap - Update the interval mapping for an atom.
+void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) {
+ // Find and erase the old mapping.
+ AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
+ Atom->Begin, AtomComp);
+ assert(I != atom_end() && "Atom offset not found in module!");
+ assert(*I == Atom && "Previous atom mapping was invalid!");
+ Atoms.erase(I);
+
+ // FIXME: special case NewBegin == Atom->Begin
+
+ // Insert the new mapping.
+ AtomListTy::iterator NewI = std::lower_bound(atom_begin(), atom_end(),
+ NewBegin, AtomComp);
+ assert((NewI == atom_end() || (*NewI)->getBeginAddr() > Atom->End)
+ && "Offset range already occupied!");
+ Atoms.insert(NewI, Atom);
+
+ // Update the atom internal bounds.
+ Atom->Begin = NewBegin;
+ Atom->End = NewEnd;
+}
+
+const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const {
+ AtomListTy::const_iterator I = std::lower_bound(atom_begin(), atom_end(),
+ Addr, AtomComp);
+ if (I != atom_end() && (*I)->getBeginAddr() <= Addr)
+ return *I;
+ return nullptr;
+}
+
+MCAtom *MCModule::findAtomContaining(uint64_t Addr) {
+ return const_cast<MCAtom*>(
+ const_cast<const MCModule *>(this)->findAtomContaining(Addr));
+}
+
+const MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) const {
+ AtomListTy::const_iterator I = std::upper_bound(atom_begin(), atom_end(),
+ Addr, AtomCompInv);
+ if (I != atom_end())
+ return *I;
+ return nullptr;
+}
+
+MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) {
+ return const_cast<MCAtom*>(
+ const_cast<const MCModule *>(this)->findFirstAtomAfter(Addr));
+}
+
+MCFunction *MCModule::createFunction(StringRef Name) {
+ std::unique_ptr<MCFunction> MCF(new MCFunction(Name, this));
+ Functions.push_back(std::move(MCF));
+ return Functions.back().get();
+}
+
+static bool CompBBToAtom(MCBasicBlock *BB, const MCTextAtom *Atom) {
+ return BB->getInsts() < Atom;
+}
+
+void MCModule::splitBasicBlocksForAtom(const MCTextAtom *TA,
+ const MCTextAtom *NewTA) {
+ BBsByAtomTy::iterator
+ I = std::lower_bound(BBsByAtom.begin(), BBsByAtom.end(),
+ TA, CompBBToAtom);
+ for (; I != BBsByAtom.end() && (*I)->getInsts() == TA; ++I) {
+ MCBasicBlock *BB = *I;
+ MCBasicBlock *NewBB = &BB->getParent()->createBlock(*NewTA);
+ BB->splitBasicBlock(NewBB);
+ }
+}
+
+void MCModule::trackBBForAtom(const MCTextAtom *Atom, MCBasicBlock *BB) {
+ assert(Atom == BB->getInsts() && "Text atom doesn't back the basic block!");
+ BBsByAtomTy::iterator I = std::lower_bound(BBsByAtom.begin(),
+ BBsByAtom.end(),
+ Atom, CompBBToAtom);
+ for (; I != BBsByAtom.end() && (*I)->getInsts() == Atom; ++I)
+ if (*I == BB)
+ return;
+ BBsByAtom.insert(I, BB);
+}
+
+MCModule::MCModule() : Entrypoint(0) { }
+
+MCModule::~MCModule() {
+ for (AtomListTy::iterator AI = atom_begin(),
+ AE = atom_end();
+ AI != AE; ++AI)
+ delete *AI;
+}
diff --git a/lib/MC/MCAnalysis/MCModuleYAML.cpp b/lib/MC/MCAnalysis/MCModuleYAML.cpp
new file mode 100644
index 0000000..876b06d
--- /dev/null
+++ b/lib/MC/MCAnalysis/MCModuleYAML.cpp
@@ -0,0 +1,464 @@
+//===- MCModuleYAML.cpp - MCModule YAMLIO implementation ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines classes for handling the YAML representation of MCModule.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAnalysis/MCModuleYAML.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/MC/MCAnalysis/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCFunction.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/YAML.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <vector>
+
+namespace llvm {
+
+namespace {
+
+// This class is used to map opcode and register names to enum values.
+//
+// There are at least 3 obvious ways to do this:
+// 1- Generate an MII/MRI method using a tablegen StringMatcher
+// 2- Write an MII/MRI method using std::lower_bound and the assumption that
+// the enums are sorted (starting at a fixed value).
+// 3- Do the matching manually as is done here.
+//
+// Why 3?
+// 1- A StringMatcher function for thousands of entries would incur
+// a non-negligible binary size overhead.
+// 2- The lower_bound comparators would be somewhat involved and aren't
+// obviously reusable (see LessRecordRegister in llvm/TableGen/Record.h)
+// 3- This isn't actually something useful outside tests (but the same argument
+// can be made against having {MII,MRI}::getName).
+//
+// If this becomes useful outside this specific situation, feel free to do
+// the Right Thing (tm) and move the functionality to MII/MRI.
+//
+class InstrRegInfoHolder {
+ typedef StringMap<unsigned, BumpPtrAllocator> EnumValByNameTy;
+ EnumValByNameTy InstEnumValueByName;
+ EnumValByNameTy RegEnumValueByName;
+
+public:
+ const MCInstrInfo &MII;
+ const MCRegisterInfo &MRI;
+ InstrRegInfoHolder(const MCInstrInfo &MII, const MCRegisterInfo &MRI)
+ : InstEnumValueByName(NextPowerOf2(MII.getNumOpcodes())),
+ RegEnumValueByName(NextPowerOf2(MRI.getNumRegs())), MII(MII), MRI(MRI) {
+ for (int i = 0, e = MII.getNumOpcodes(); i != e; ++i)
+ InstEnumValueByName[MII.getName(i)] = i;
+ for (int i = 0, e = MRI.getNumRegs(); i != e; ++i)
+ RegEnumValueByName[MRI.getName(i)] = i;
+ }
+
+ bool matchRegister(StringRef Name, unsigned &Reg) {
+ EnumValByNameTy::const_iterator It = RegEnumValueByName.find(Name);
+ if (It == RegEnumValueByName.end())
+ return false;
+ Reg = It->getValue();
+ return true;
+ }
+ bool matchOpcode(StringRef Name, unsigned &Opc) {
+ EnumValByNameTy::const_iterator It = InstEnumValueByName.find(Name);
+ if (It == InstEnumValueByName.end())
+ return false;
+ Opc = It->getValue();
+ return true;
+ }
+};
+
+} // end unnamed namespace
+
+namespace MCModuleYAML {
+
+LLVM_YAML_STRONG_TYPEDEF(unsigned, OpcodeEnum)
+
+struct Operand {
+ MCOperand MCOp;
+};
+
+struct Inst {
+ OpcodeEnum Opcode;
+ std::vector<Operand> Operands;
+ uint64_t Size;
+};
+
+struct Atom {
+ MCAtom::AtomKind Type;
+ yaml::Hex64 StartAddress;
+ uint64_t Size;
+
+ std::vector<Inst> Insts;
+ yaml::BinaryRef Data;
+};
+
+struct BasicBlock {
+ yaml::Hex64 Address;
+ std::vector<yaml::Hex64> Preds;
+ std::vector<yaml::Hex64> Succs;
+};
+
+struct Function {
+ StringRef Name;
+ std::vector<BasicBlock> BasicBlocks;
+};
+
+struct Module {
+ std::vector<Atom> Atoms;
+ std::vector<Function> Functions;
+};
+
+} // end namespace MCModuleYAML
+} // end namespace llvm
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::Hex64)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::MCModuleYAML::Operand)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Inst)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Atom)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::BasicBlock)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Function)
+
+namespace llvm {
+
+namespace yaml {
+
+template <> struct ScalarEnumerationTraits<MCAtom::AtomKind> {
+ static void enumeration(IO &IO, MCAtom::AtomKind &Kind);
+};
+
+template <> struct MappingTraits<MCModuleYAML::Atom> {
+ static void mapping(IO &IO, MCModuleYAML::Atom &A);
+};
+
+template <> struct MappingTraits<MCModuleYAML::Inst> {
+ static void mapping(IO &IO, MCModuleYAML::Inst &I);
+};
+
+template <> struct MappingTraits<MCModuleYAML::BasicBlock> {
+ static void mapping(IO &IO, MCModuleYAML::BasicBlock &BB);
+};
+
+template <> struct MappingTraits<MCModuleYAML::Function> {
+ static void mapping(IO &IO, MCModuleYAML::Function &Fn);
+};
+
+template <> struct MappingTraits<MCModuleYAML::Module> {
+ static void mapping(IO &IO, MCModuleYAML::Module &M);
+};
+
+template <> struct ScalarTraits<MCModuleYAML::Operand> {
+ static void output(const MCModuleYAML::Operand &, void *,
+ llvm::raw_ostream &);
+ static StringRef input(StringRef, void *, MCModuleYAML::Operand &);
+ static bool mustQuote(StringRef) { return false; }
+};
+
+template <> struct ScalarTraits<MCModuleYAML::OpcodeEnum> {
+ static void output(const MCModuleYAML::OpcodeEnum &, void *,
+ llvm::raw_ostream &);
+ static StringRef input(StringRef, void *, MCModuleYAML::OpcodeEnum &);
+ static bool mustQuote(StringRef) { return false; }
+};
+
+void ScalarEnumerationTraits<MCAtom::AtomKind>::enumeration(
+ IO &IO, MCAtom::AtomKind &Value) {
+ IO.enumCase(Value, "Text", MCAtom::TextAtom);
+ IO.enumCase(Value, "Data", MCAtom::DataAtom);
+}
+
+void MappingTraits<MCModuleYAML::Atom>::mapping(IO &IO, MCModuleYAML::Atom &A) {
+ IO.mapRequired("StartAddress", A.StartAddress);
+ IO.mapRequired("Size", A.Size);
+ IO.mapRequired("Type", A.Type);
+ if (A.Type == MCAtom::TextAtom)
+ IO.mapRequired("Content", A.Insts);
+ else if (A.Type == MCAtom::DataAtom)
+ IO.mapRequired("Content", A.Data);
+}
+
+void MappingTraits<MCModuleYAML::Inst>::mapping(IO &IO, MCModuleYAML::Inst &I) {
+ IO.mapRequired("Inst", I.Opcode);
+ IO.mapRequired("Size", I.Size);
+ IO.mapRequired("Ops", I.Operands);
+}
+
+void
+MappingTraits<MCModuleYAML::BasicBlock>::mapping(IO &IO,
+ MCModuleYAML::BasicBlock &BB) {
+ IO.mapRequired("Address", BB.Address);
+ IO.mapRequired("Preds", BB.Preds);
+ IO.mapRequired("Succs", BB.Succs);
+}
+
+void MappingTraits<MCModuleYAML::Function>::mapping(IO &IO,
+ MCModuleYAML::Function &F) {
+ IO.mapRequired("Name", F.Name);
+ IO.mapRequired("BasicBlocks", F.BasicBlocks);
+}
+
+void MappingTraits<MCModuleYAML::Module>::mapping(IO &IO,
+ MCModuleYAML::Module &M) {
+ IO.mapRequired("Atoms", M.Atoms);
+ IO.mapOptional("Functions", M.Functions);
+}
+
+void
+ScalarTraits<MCModuleYAML::Operand>::output(const MCModuleYAML::Operand &Val,
+ void *Ctx, raw_ostream &Out) {
+ InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
+
+ // FIXME: Doesn't support FPImm and expr/inst, but do these make sense?
+ if (Val.MCOp.isImm())
+ Out << "I" << Val.MCOp.getImm();
+ else if (Val.MCOp.isReg())
+ Out << "R" << IRI->MRI.getName(Val.MCOp.getReg());
+ else
+ llvm_unreachable("Trying to output invalid MCOperand!");
+}
+
+StringRef
+ScalarTraits<MCModuleYAML::Operand>::input(StringRef Scalar, void *Ctx,
+ MCModuleYAML::Operand &Val) {
+ InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
+ char Type = 0;
+ if (Scalar.size() >= 1)
+ Type = Scalar.front();
+ if (Type != 'R' && Type != 'I')
+ return "Operand must start with 'R' (register) or 'I' (immediate).";
+ if (Type == 'R') {
+ unsigned Reg;
+ if (!IRI->matchRegister(Scalar.substr(1), Reg))
+ return "Invalid register name.";
+ Val.MCOp = MCOperand::CreateReg(Reg);
+ } else if (Type == 'I') {
+ int64_t RIVal;
+ if (Scalar.substr(1).getAsInteger(10, RIVal))
+ return "Invalid immediate value.";
+ Val.MCOp = MCOperand::CreateImm(RIVal);
+ } else {
+ Val.MCOp = MCOperand();
+ }
+ return StringRef();
+}
+
+void ScalarTraits<MCModuleYAML::OpcodeEnum>::output(
+ const MCModuleYAML::OpcodeEnum &Val, void *Ctx, raw_ostream &Out) {
+ InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
+ Out << IRI->MII.getName(Val);
+}
+
+StringRef
+ScalarTraits<MCModuleYAML::OpcodeEnum>::input(StringRef Scalar, void *Ctx,
+ MCModuleYAML::OpcodeEnum &Val) {
+ InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
+ unsigned Opc;
+ if (!IRI->matchOpcode(Scalar, Opc))
+ return "Invalid instruction opcode.";
+ Val = Opc;
+ return "";
+}
+
+} // end namespace yaml
+
+namespace {
+
+class MCModule2YAML {
+ const MCModule &MCM;
+ MCModuleYAML::Module YAMLModule;
+ void dumpAtom(const MCAtom *MCA);
+ void dumpFunction(const MCFunction &MCF);
+ void dumpBasicBlock(const MCBasicBlock *MCBB);
+
+public:
+ MCModule2YAML(const MCModule &MCM);
+ MCModuleYAML::Module &getYAMLModule();
+};
+
+class YAML2MCModule {
+ MCModule &MCM;
+
+public:
+ YAML2MCModule(MCModule &MCM);
+ StringRef parse(const MCModuleYAML::Module &YAMLModule);
+};
+
+} // end unnamed namespace
+
+MCModule2YAML::MCModule2YAML(const MCModule &MCM) : MCM(MCM), YAMLModule() {
+ for (MCModule::const_atom_iterator AI = MCM.atom_begin(), AE = MCM.atom_end();
+ AI != AE; ++AI)
+ dumpAtom(*AI);
+ for (MCModule::const_func_iterator FI = MCM.func_begin(), FE = MCM.func_end();
+ FI != FE; ++FI)
+ dumpFunction(**FI);
+}
+
+void MCModule2YAML::dumpAtom(const MCAtom *MCA) {
+ YAMLModule.Atoms.resize(YAMLModule.Atoms.size() + 1);
+ MCModuleYAML::Atom &A = YAMLModule.Atoms.back();
+ A.Type = MCA->getKind();
+ A.StartAddress = MCA->getBeginAddr();
+ A.Size = MCA->getEndAddr() - MCA->getBeginAddr() + 1;
+ if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(MCA)) {
+ const size_t InstCount = TA->size();
+ A.Insts.resize(InstCount);
+ for (size_t i = 0; i != InstCount; ++i) {
+ const MCDecodedInst &MCDI = TA->at(i);
+ A.Insts[i].Opcode = MCDI.Inst.getOpcode();
+ A.Insts[i].Size = MCDI.Size;
+ const unsigned OpCount = MCDI.Inst.getNumOperands();
+ A.Insts[i].Operands.resize(OpCount);
+ for (unsigned oi = 0; oi != OpCount; ++oi)
+ A.Insts[i].Operands[oi].MCOp = MCDI.Inst.getOperand(oi);
+ }
+ } else if (const MCDataAtom *DA = dyn_cast<MCDataAtom>(MCA)) {
+ A.Data = DA->getData();
+ } else {
+ llvm_unreachable("Unknown atom type.");
+ }
+}
+
+void MCModule2YAML::dumpFunction(const MCFunction &MCF) {
+ YAMLModule.Functions.resize(YAMLModule.Functions.size() + 1);
+ MCModuleYAML::Function &F = YAMLModule.Functions.back();
+ F.Name = MCF.getName();
+ for (MCFunction::const_iterator BBI = MCF.begin(), BBE = MCF.end();
+ BBI != BBE; ++BBI) {
+ const MCBasicBlock &MCBB = **BBI;
+ F.BasicBlocks.resize(F.BasicBlocks.size() + 1);
+ MCModuleYAML::BasicBlock &BB = F.BasicBlocks.back();
+ BB.Address = MCBB.getInsts()->getBeginAddr();
+ for (MCBasicBlock::pred_const_iterator PI = MCBB.pred_begin(),
+ PE = MCBB.pred_end();
+ PI != PE; ++PI)
+ BB.Preds.push_back((*PI)->getInsts()->getBeginAddr());
+ for (MCBasicBlock::succ_const_iterator SI = MCBB.succ_begin(),
+ SE = MCBB.succ_end();
+ SI != SE; ++SI)
+ BB.Succs.push_back((*SI)->getInsts()->getBeginAddr());
+ }
+}
+
+MCModuleYAML::Module &MCModule2YAML::getYAMLModule() { return YAMLModule; }
+
+YAML2MCModule::YAML2MCModule(MCModule &MCM) : MCM(MCM) {}
+
+StringRef YAML2MCModule::parse(const MCModuleYAML::Module &YAMLModule) {
+ typedef std::vector<MCModuleYAML::Atom>::const_iterator AtomIt;
+ typedef std::vector<MCModuleYAML::Inst>::const_iterator InstIt;
+ typedef std::vector<MCModuleYAML::Operand>::const_iterator OpIt;
+
+ typedef DenseMap<uint64_t, MCTextAtom *> AddrToTextAtomTy;
+ AddrToTextAtomTy TAByAddr;
+
+ for (AtomIt AI = YAMLModule.Atoms.begin(), AE = YAMLModule.Atoms.end();
+ AI != AE; ++AI) {
+ uint64_t StartAddress = AI->StartAddress;
+ if (AI->Size == 0)
+ return "Atoms can't be empty!";
+ uint64_t EndAddress = StartAddress + AI->Size - 1;
+ switch (AI->Type) {
+ case MCAtom::TextAtom: {
+ MCTextAtom *TA = MCM.createTextAtom(StartAddress, EndAddress);
+ TAByAddr[StartAddress] = TA;
+ for (InstIt II = AI->Insts.begin(), IE = AI->Insts.end(); II != IE;
+ ++II) {
+ MCInst MI;
+ MI.setOpcode(II->Opcode);
+ for (OpIt OI = II->Operands.begin(), OE = II->Operands.end(); OI != OE;
+ ++OI)
+ MI.addOperand(OI->MCOp);
+ TA->addInst(MI, II->Size);
+ }
+ break;
+ }
+ case MCAtom::DataAtom: {
+ MCDataAtom *DA = MCM.createDataAtom(StartAddress, EndAddress);
+ SmallVector<char, 64> Data;
+ raw_svector_ostream OS(Data);
+ AI->Data.writeAsBinary(OS);
+ OS.flush();
+ for (size_t i = 0, e = Data.size(); i != e; ++i)
+ DA->addData((uint8_t)Data[i]);
+ break;
+ }
+ }
+ }
+
+ typedef std::vector<MCModuleYAML::Function>::const_iterator FuncIt;
+ typedef std::vector<MCModuleYAML::BasicBlock>::const_iterator BBIt;
+ typedef std::vector<yaml::Hex64>::const_iterator AddrIt;
+ for (FuncIt FI = YAMLModule.Functions.begin(),
+ FE = YAMLModule.Functions.end();
+ FI != FE; ++FI) {
+ MCFunction *MCFN = MCM.createFunction(FI->Name);
+ for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end();
+ BBI != BBE; ++BBI) {
+ AddrToTextAtomTy::const_iterator It = TAByAddr.find(BBI->Address);
+ if (It == TAByAddr.end())
+ return "Basic block start address doesn't match any text atom!";
+ MCFN->createBlock(*It->second);
+ }
+ for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end();
+ BBI != BBE; ++BBI) {
+ MCBasicBlock *MCBB = MCFN->find(BBI->Address);
+ if (!MCBB)
+ return "Couldn't find matching basic block in function.";
+ for (AddrIt PI = BBI->Preds.begin(), PE = BBI->Preds.end(); PI != PE;
+ ++PI) {
+ MCBasicBlock *Pred = MCFN->find(*PI);
+ if (!Pred)
+ return "Couldn't find predecessor basic block.";
+ MCBB->addPredecessor(Pred);
+ }
+ for (AddrIt SI = BBI->Succs.begin(), SE = BBI->Succs.end(); SI != SE;
+ ++SI) {
+ MCBasicBlock *Succ = MCFN->find(*SI);
+ if (!Succ)
+ return "Couldn't find predecessor basic block.";
+ MCBB->addSuccessor(Succ);
+ }
+ }
+ }
+ return "";
+}
+
+StringRef mcmodule2yaml(raw_ostream &OS, const MCModule &MCM,
+ const MCInstrInfo &MII, const MCRegisterInfo &MRI) {
+ MCModule2YAML Dumper(MCM);
+ InstrRegInfoHolder IRI(MII, MRI);
+ yaml::Output YOut(OS, (void *)&IRI);
+ YOut << Dumper.getYAMLModule();
+ return "";
+}
+
+StringRef yaml2mcmodule(std::unique_ptr<MCModule> &MCM, StringRef YamlContent,
+ const MCInstrInfo &MII, const MCRegisterInfo &MRI) {
+ MCM.reset(new MCModule);
+ YAML2MCModule Parser(*MCM);
+ MCModuleYAML::Module YAMLModule;
+ InstrRegInfoHolder IRI(MII, MRI);
+ yaml::Input YIn(YamlContent, (void *)&IRI);
+ YIn >> YAMLModule;
+ if (std::error_code ec = YIn.error())
+ return ec.message();
+ StringRef err = Parser.parse(YAMLModule);
+ if (!err.empty())
+ return err;
+ return "";
+}
+
+} // end namespace llvm
diff --git a/lib/MC/MCAnalysis/MCObjectDisassembler.cpp b/lib/MC/MCAnalysis/MCObjectDisassembler.cpp
new file mode 100644
index 0000000..0f789ff
--- /dev/null
+++ b/lib/MC/MCAnalysis/MCObjectDisassembler.cpp
@@ -0,0 +1,574 @@
+//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCObjectDisassembler.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAnalysis/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCFunction.h"
+#include "llvm/MC/MCAnalysis/MCModule.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCObjectSymbolizer.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MachO.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/StringRefMemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+using namespace llvm;
+using namespace object;
+
+#define DEBUG_TYPE "mc"
+
+MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj,
+ const MCDisassembler &Dis,
+ const MCInstrAnalysis &MIA)
+ : Obj(Obj), Dis(Dis), MIA(MIA), MOS(nullptr) {}
+
+uint64_t MCObjectDisassembler::getEntrypoint() {
+ for (const SymbolRef &Symbol : Obj.symbols()) {
+ StringRef Name;
+ Symbol.getName(Name);
+ if (Name == "main" || Name == "_main") {
+ uint64_t Entrypoint;
+ Symbol.getAddress(Entrypoint);
+ return getEffectiveLoadAddr(Entrypoint);
+ }
+ }
+ return 0;
+}
+
+ArrayRef<uint64_t> MCObjectDisassembler::getStaticInitFunctions() {
+ return ArrayRef<uint64_t>();
+}
+
+ArrayRef<uint64_t> MCObjectDisassembler::getStaticExitFunctions() {
+ return ArrayRef<uint64_t>();
+}
+
+MemoryObject *MCObjectDisassembler::getRegionFor(uint64_t Addr) {
+ // FIXME: Keep track of object sections.
+ return FallbackRegion.get();
+}
+
+uint64_t MCObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
+ return Addr;
+}
+
+uint64_t MCObjectDisassembler::getOriginalLoadAddr(uint64_t Addr) {
+ return Addr;
+}
+
+MCModule *MCObjectDisassembler::buildEmptyModule() {
+ MCModule *Module = new MCModule;
+ Module->Entrypoint = getEntrypoint();
+ return Module;
+}
+
+MCModule *MCObjectDisassembler::buildModule(bool withCFG) {
+ MCModule *Module = buildEmptyModule();
+
+ buildSectionAtoms(Module);
+ if (withCFG)
+ buildCFG(Module);
+ return Module;
+}
+
+void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
+ for (const SectionRef &Section : Obj.sections()) {
+ bool isText;
+ Section.isText(isText);
+ bool isData;
+ Section.isData(isData);
+ if (!isData && !isText)
+ continue;
+
+ uint64_t StartAddr;
+ Section.getAddress(StartAddr);
+ uint64_t SecSize;
+ Section.getSize(SecSize);
+ if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize)
+ continue;
+ StartAddr = getEffectiveLoadAddr(StartAddr);
+
+ StringRef Contents;
+ Section.getContents(Contents);
+ StringRefMemoryObject memoryObject(Contents, StartAddr);
+
+ // We don't care about things like non-file-backed sections yet.
+ if (Contents.size() != SecSize || !SecSize)
+ continue;
+ uint64_t EndAddr = StartAddr + SecSize - 1;
+
+ StringRef SecName;
+ Section.getName(SecName);
+
+ if (isText) {
+ MCTextAtom *Text = nullptr;
+ MCDataAtom *InvalidData = nullptr;
+
+ uint64_t InstSize;
+ for (uint64_t Index = 0; Index < SecSize; Index += InstSize) {
+ const uint64_t CurAddr = StartAddr + Index;
+ MCInst Inst;
+ if (Dis.getInstruction(Inst, InstSize, memoryObject, CurAddr, nulls(),
+ nulls())) {
+ if (!Text) {
+ Text = Module->createTextAtom(CurAddr, CurAddr);
+ Text->setName(SecName);
+ }
+ Text->addInst(Inst, InstSize);
+ InvalidData = nullptr;
+ } else {
+ assert(InstSize && "getInstruction() consumed no bytes");
+ if (!InvalidData) {
+ Text = nullptr;
+ InvalidData = Module->createDataAtom(CurAddr, CurAddr+InstSize - 1);
+ }
+ for (uint64_t I = 0; I < InstSize; ++I)
+ InvalidData->addData(Contents[Index+I]);
+ }
+ }
+ } else {
+ MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr);
+ Data->setName(SecName);
+ for (uint64_t Index = 0; Index < SecSize; ++Index)
+ Data->addData(Contents[Index]);
+ }
+ }
+}
+
+namespace {
+ struct BBInfo;
+ typedef SmallPtrSet<BBInfo*, 2> BBInfoSetTy;
+
+ struct BBInfo {
+ MCTextAtom *Atom;
+ MCBasicBlock *BB;
+ BBInfoSetTy Succs;
+ BBInfoSetTy Preds;
+ MCObjectDisassembler::AddressSetTy SuccAddrs;
+
+ BBInfo() : Atom(nullptr), BB(nullptr) {}
+
+ void addSucc(BBInfo &Succ) {
+ Succs.insert(&Succ);
+ Succ.Preds.insert(this);
+ }
+ };
+}
+
+static void RemoveDupsFromAddressVector(MCObjectDisassembler::AddressSetTy &V) {
+ std::sort(V.begin(), V.end());
+ V.erase(std::unique(V.begin(), V.end()), V.end());
+}
+
+void MCObjectDisassembler::buildCFG(MCModule *Module) {
+ typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
+ BBInfoByAddrTy BBInfos;
+ AddressSetTy Splits;
+ AddressSetTy Calls;
+
+ for (const SymbolRef &Symbol : Obj.symbols()) {
+ SymbolRef::Type SymType;
+ Symbol.getType(SymType);
+ if (SymType == SymbolRef::ST_Function) {
+ uint64_t SymAddr;
+ Symbol.getAddress(SymAddr);
+ SymAddr = getEffectiveLoadAddr(SymAddr);
+ Calls.push_back(SymAddr);
+ Splits.push_back(SymAddr);
+ }
+ }
+
+ assert(Module->func_begin() == Module->func_end()
+ && "Module already has a CFG!");
+
+ // First, determine the basic block boundaries and call targets.
+ for (MCModule::atom_iterator AI = Module->atom_begin(),
+ AE = Module->atom_end();
+ AI != AE; ++AI) {
+ MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
+ if (!TA) continue;
+ Calls.push_back(TA->getBeginAddr());
+ BBInfos[TA->getBeginAddr()].Atom = TA;
+ for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
+ II != IE; ++II) {
+ if (MIA.isTerminator(II->Inst))
+ Splits.push_back(II->Address + II->Size);
+ uint64_t Target;
+ if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) {
+ if (MIA.isCall(II->Inst))
+ Calls.push_back(Target);
+ Splits.push_back(Target);
+ }
+ }
+ }
+
+ RemoveDupsFromAddressVector(Splits);
+ RemoveDupsFromAddressVector(Calls);
+
+ // Split text atoms into basic block atoms.
+ for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end();
+ SI != SE; ++SI) {
+ MCAtom *A = Module->findAtomContaining(*SI);
+ if (!A) continue;
+ MCTextAtom *TA = cast<MCTextAtom>(A);
+ if (TA->getBeginAddr() == *SI)
+ continue;
+ MCTextAtom *NewAtom = TA->split(*SI);
+ BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom;
+ StringRef BBName = TA->getName();
+ BBName = BBName.substr(0, BBName.find_last_of(':'));
+ NewAtom->setName((BBName + ":" + utohexstr(*SI)).str());
+ }
+
+ // Compute succs/preds.
+ for (MCModule::atom_iterator AI = Module->atom_begin(),
+ AE = Module->atom_end();
+ AI != AE; ++AI) {
+ MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
+ if (!TA) continue;
+ BBInfo &CurBB = BBInfos[TA->getBeginAddr()];
+ const MCDecodedInst &LI = TA->back();
+ if (MIA.isBranch(LI.Inst)) {
+ uint64_t Target;
+ if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target))
+ CurBB.addSucc(BBInfos[Target]);
+ if (MIA.isConditionalBranch(LI.Inst))
+ CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
+ } else if (!MIA.isTerminator(LI.Inst))
+ CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
+ }
+
+
+ // Create functions and basic blocks.
+ for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end();
+ CI != CE; ++CI) {
+ BBInfo &BBI = BBInfos[*CI];
+ if (!BBI.Atom) continue;
+
+ MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName());
+
+ // Create MCBBs.
+ SmallSetVector<BBInfo*, 16> Worklist;
+ Worklist.insert(&BBI);
+ for (size_t wi = 0; wi < Worklist.size(); ++wi) {
+ BBInfo *BBI = Worklist[wi];
+ if (!BBI->Atom)
+ continue;
+ BBI->BB = &MCFN.createBlock(*BBI->Atom);
+ // Add all predecessors and successors to the worklist.
+ for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
+ SI != SE; ++SI)
+ Worklist.insert(*SI);
+ for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
+ PI != PE; ++PI)
+ Worklist.insert(*PI);
+ }
+
+ // Set preds/succs.
+ for (size_t wi = 0; wi < Worklist.size(); ++wi) {
+ BBInfo *BBI = Worklist[wi];
+ MCBasicBlock *MCBB = BBI->BB;
+ if (!MCBB)
+ continue;
+ for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
+ SI != SE; ++SI)
+ if ((*SI)->BB)
+ MCBB->addSuccessor((*SI)->BB);
+ for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
+ PI != PE; ++PI)
+ if ((*PI)->BB)
+ MCBB->addPredecessor((*PI)->BB);
+ }
+ }
+}
+
+// Basic idea of the disassembly + discovery:
+//
+// start with the wanted address, insert it in the worklist
+// while worklist not empty, take next address in the worklist:
+// - check if atom exists there
+// - if middle of atom:
+// - split basic blocks referencing the atom
+// - look for an already encountered BBInfo (using a map<atom, bbinfo>)
+// - if there is, split it (new one, fallthrough, move succs, etc..)
+// - if start of atom: nothing else to do
+// - if no atom: create new atom and new bbinfo
+// - look at the last instruction in the atom, add succs to worklist
+// for all elements in the worklist:
+// - create basic block, update preds/succs, etc..
+//
+MCBasicBlock *MCObjectDisassembler::getBBAt(MCModule *Module, MCFunction *MCFN,
+ uint64_t BBBeginAddr,
+ AddressSetTy &CallTargets,
+ AddressSetTy &TailCallTargets) {
+ typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
+ typedef SmallSetVector<uint64_t, 16> AddrWorklistTy;
+ BBInfoByAddrTy BBInfos;
+ AddrWorklistTy Worklist;
+
+ Worklist.insert(BBBeginAddr);
+ for (size_t wi = 0; wi < Worklist.size(); ++wi) {
+ const uint64_t BeginAddr = Worklist[wi];
+ BBInfo *BBI = &BBInfos[BeginAddr];
+
+ MCTextAtom *&TA = BBI->Atom;
+ assert(!TA && "Discovered basic block already has an associated atom!");
+
+ // Look for an atom at BeginAddr.
+ if (MCAtom *A = Module->findAtomContaining(BeginAddr)) {
+ // FIXME: We don't care about mixed atoms, see above.
+ TA = cast<MCTextAtom>(A);
+
+ // The found atom doesn't begin at BeginAddr, we have to split it.
+ if (TA->getBeginAddr() != BeginAddr) {
+ // FIXME: Handle overlapping atoms: middle-starting instructions, etc..
+ MCTextAtom *NewTA = TA->split(BeginAddr);
+
+ // Look for an already encountered basic block that needs splitting
+ BBInfoByAddrTy::iterator It = BBInfos.find(TA->getBeginAddr());
+ if (It != BBInfos.end() && It->second.Atom) {
+ BBI->SuccAddrs = It->second.SuccAddrs;
+ It->second.SuccAddrs.clear();
+ It->second.SuccAddrs.push_back(BeginAddr);
+ }
+ TA = NewTA;
+ }
+ BBI->Atom = TA;
+ } else {
+ // If we didn't find an atom, then we have to disassemble to create one!
+
+ MemoryObject *Region = getRegionFor(BeginAddr);
+ if (!Region)
+ llvm_unreachable(("Couldn't find suitable region for disassembly at " +
+ utostr(BeginAddr)).c_str());
+
+ uint64_t InstSize;
+ uint64_t EndAddr = Region->getBase() + Region->getExtent();
+
+ // We want to stop before the next atom and have a fallthrough to it.
+ if (MCTextAtom *NextAtom =
+ cast_or_null<MCTextAtom>(Module->findFirstAtomAfter(BeginAddr)))
+ EndAddr = std::min(EndAddr, NextAtom->getBeginAddr());
+
+ for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) {
+ MCInst Inst;
+ if (Dis.getInstruction(Inst, InstSize, *Region, Addr, nulls(),
+ nulls())) {
+ if (!TA)
+ TA = Module->createTextAtom(Addr, Addr);
+ TA->addInst(Inst, InstSize);
+ } else {
+ // We don't care about splitting mixed atoms either.
+ llvm_unreachable("Couldn't disassemble instruction in atom.");
+ }
+
+ uint64_t BranchTarget;
+ if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) {
+ if (MIA.isCall(Inst))
+ CallTargets.push_back(BranchTarget);
+ }
+
+ if (MIA.isTerminator(Inst))
+ break;
+ }
+ BBI->Atom = TA;
+ }
+
+ assert(TA && "Couldn't disassemble atom, none was created!");
+ assert(TA->begin() != TA->end() && "Empty atom!");
+
+ MemoryObject *Region = getRegionFor(TA->getBeginAddr());
+ assert(Region && "Couldn't find region for already disassembled code!");
+ uint64_t EndRegion = Region->getBase() + Region->getExtent();
+
+ // Now we have a basic block atom, add successors.
+ // Add the fallthrough block.
+ if ((MIA.isConditionalBranch(TA->back().Inst) ||
+ !MIA.isTerminator(TA->back().Inst)) &&
+ (TA->getEndAddr() + 1 < EndRegion)) {
+ BBI->SuccAddrs.push_back(TA->getEndAddr() + 1);
+ Worklist.insert(TA->getEndAddr() + 1);
+ }
+
+ // If the terminator is a branch, add the target block.
+ if (MIA.isBranch(TA->back().Inst)) {
+ uint64_t BranchTarget;
+ if (MIA.evaluateBranch(TA->back().Inst, TA->back().Address,
+ TA->back().Size, BranchTarget)) {
+ StringRef ExtFnName;
+ if (MOS)
+ ExtFnName =
+ MOS->findExternalFunctionAt(getOriginalLoadAddr(BranchTarget));
+ if (!ExtFnName.empty()) {
+ TailCallTargets.push_back(BranchTarget);
+ CallTargets.push_back(BranchTarget);
+ } else {
+ BBI->SuccAddrs.push_back(BranchTarget);
+ Worklist.insert(BranchTarget);
+ }
+ }
+ }
+ }
+
+ for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
+ const uint64_t BeginAddr = Worklist[wi];
+ BBInfo *BBI = &BBInfos[BeginAddr];
+
+ assert(BBI->Atom && "Found a basic block without an associated atom!");
+
+ // Look for a basic block at BeginAddr.
+ BBI->BB = MCFN->find(BeginAddr);
+ if (BBI->BB) {
+ // FIXME: check that the succs/preds are the same
+ continue;
+ }
+ // If there was none, we have to create one from the atom.
+ BBI->BB = &MCFN->createBlock(*BBI->Atom);
+ }
+
+ for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
+ const uint64_t BeginAddr = Worklist[wi];
+ BBInfo *BBI = &BBInfos[BeginAddr];
+ MCBasicBlock *BB = BBI->BB;
+
+ RemoveDupsFromAddressVector(BBI->SuccAddrs);
+ for (AddressSetTy::const_iterator SI = BBI->SuccAddrs.begin(),
+ SE = BBI->SuccAddrs.end();
+ SE != SE; ++SI) {
+ MCBasicBlock *Succ = BBInfos[*SI].BB;
+ BB->addSuccessor(Succ);
+ Succ->addPredecessor(BB);
+ }
+ }
+
+ assert(BBInfos[Worklist[0]].BB &&
+ "No basic block created at requested address?");
+
+ return BBInfos[Worklist[0]].BB;
+}
+
+MCFunction *
+MCObjectDisassembler::createFunction(MCModule *Module, uint64_t BeginAddr,
+ AddressSetTy &CallTargets,
+ AddressSetTy &TailCallTargets) {
+ // First, check if this is an external function.
+ StringRef ExtFnName;
+ if (MOS)
+ ExtFnName = MOS->findExternalFunctionAt(getOriginalLoadAddr(BeginAddr));
+ if (!ExtFnName.empty())
+ return Module->createFunction(ExtFnName);
+
+ // If it's not, look for an existing function.
+ for (MCModule::func_iterator FI = Module->func_begin(),
+ FE = Module->func_end();
+ FI != FE; ++FI) {
+ if ((*FI)->empty())
+ continue;
+ // FIXME: MCModule should provide a findFunctionByAddr()
+ if ((*FI)->getEntryBlock()->getInsts()->getBeginAddr() == BeginAddr)
+ return FI->get();
+ }
+
+ // Finally, just create a new one.
+ MCFunction *MCFN = Module->createFunction("");
+ getBBAt(Module, MCFN, BeginAddr, CallTargets, TailCallTargets);
+ return MCFN;
+}
+
+// MachO MCObjectDisassembler implementation.
+
+MCMachOObjectDisassembler::MCMachOObjectDisassembler(
+ const MachOObjectFile &MOOF, const MCDisassembler &Dis,
+ const MCInstrAnalysis &MIA, uint64_t VMAddrSlide,
+ uint64_t HeaderLoadAddress)
+ : MCObjectDisassembler(MOOF, Dis, MIA), MOOF(MOOF),
+ VMAddrSlide(VMAddrSlide), HeaderLoadAddress(HeaderLoadAddress) {
+
+ for (const SectionRef &Section : MOOF.sections()) {
+ StringRef Name;
+ Section.getName(Name);
+ // FIXME: We should use the S_ section type instead of the name.
+ if (Name == "__mod_init_func") {
+ DEBUG(dbgs() << "Found __mod_init_func section!\n");
+ Section.getContents(ModInitContents);
+ } else if (Name == "__mod_exit_func") {
+ DEBUG(dbgs() << "Found __mod_exit_func section!\n");
+ Section.getContents(ModExitContents);
+ }
+ }
+}
+
+// FIXME: Only do the translations for addresses actually inside the object.
+uint64_t MCMachOObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
+ return Addr + VMAddrSlide;
+}
+
+uint64_t
+MCMachOObjectDisassembler::getOriginalLoadAddr(uint64_t EffectiveAddr) {
+ return EffectiveAddr - VMAddrSlide;
+}
+
+uint64_t MCMachOObjectDisassembler::getEntrypoint() {
+ uint64_t EntryFileOffset = 0;
+
+ // Look for LC_MAIN.
+ {
+ uint32_t LoadCommandCount = MOOF.getHeader().ncmds;
+ MachOObjectFile::LoadCommandInfo Load = MOOF.getFirstLoadCommandInfo();
+ for (unsigned I = 0;; ++I) {
+ if (Load.C.cmd == MachO::LC_MAIN) {
+ EntryFileOffset =
+ ((const MachO::entry_point_command *)Load.Ptr)->entryoff;
+ break;
+ }
+
+ if (I == LoadCommandCount - 1)
+ break;
+ else
+ Load = MOOF.getNextLoadCommandInfo(Load);
+ }
+ }
+
+ // If we didn't find anything, default to the common implementation.
+ // FIXME: Maybe we could also look at LC_UNIXTHREAD and friends?
+ if (EntryFileOffset)
+ return MCObjectDisassembler::getEntrypoint();
+
+ return EntryFileOffset + HeaderLoadAddress;
+}
+
+ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticInitFunctions() {
+ // FIXME: We only handle 64bit mach-o
+ assert(MOOF.is64Bit());
+
+ size_t EntrySize = 8;
+ size_t EntryCount = ModInitContents.size() / EntrySize;
+ return ArrayRef<uint64_t>(
+ reinterpret_cast<const uint64_t *>(ModInitContents.data()), EntryCount);
+}
+
+ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticExitFunctions() {
+ // FIXME: We only handle 64bit mach-o
+ assert(MOOF.is64Bit());
+
+ size_t EntrySize = 8;
+ size_t EntryCount = ModExitContents.size() / EntrySize;
+ return ArrayRef<uint64_t>(
+ reinterpret_cast<const uint64_t *>(ModExitContents.data()), EntryCount);
+}
diff --git a/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp b/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp
new file mode 100644
index 0000000..b149596
--- /dev/null
+++ b/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp
@@ -0,0 +1,268 @@
+//===-- lib/MC/MCObjectSymbolizer.cpp -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCObjectSymbolizer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRelocationInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+using namespace object;
+
+//===- MCMachObjectSymbolizer ---------------------------------------------===//
+
+namespace {
+class MCMachObjectSymbolizer : public MCObjectSymbolizer {
+ const MachOObjectFile *MOOF;
+ // __TEXT;__stubs support.
+ uint64_t StubsStart;
+ uint64_t StubsCount;
+ uint64_t StubSize;
+ uint64_t StubsIndSymIndex;
+
+public:
+ MCMachObjectSymbolizer(MCContext &Ctx,
+ std::unique_ptr<MCRelocationInfo> RelInfo,
+ const MachOObjectFile *MOOF);
+
+ StringRef findExternalFunctionAt(uint64_t Addr) override;
+
+ void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value,
+ uint64_t Address) override;
+};
+} // End unnamed namespace
+
+MCMachObjectSymbolizer::MCMachObjectSymbolizer(
+ MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo,
+ const MachOObjectFile *MOOF)
+ : MCObjectSymbolizer(Ctx, std::move(RelInfo), MOOF), MOOF(MOOF),
+ StubsStart(0), StubsCount(0), StubSize(0), StubsIndSymIndex(0) {
+
+ for (const SectionRef &Section : MOOF->sections()) {
+ StringRef Name;
+ Section.getName(Name);
+ if (Name == "__stubs") {
+ SectionRef StubsSec = Section;
+ if (MOOF->is64Bit()) {
+ MachO::section_64 S = MOOF->getSection64(StubsSec.getRawDataRefImpl());
+ StubsIndSymIndex = S.reserved1;
+ StubSize = S.reserved2;
+ } else {
+ MachO::section S = MOOF->getSection(StubsSec.getRawDataRefImpl());
+ StubsIndSymIndex = S.reserved1;
+ StubSize = S.reserved2;
+ }
+ assert(StubSize && "Mach-O stub entry size can't be zero!");
+ StubsSec.getAddress(StubsStart);
+ StubsSec.getSize(StubsCount);
+ StubsCount /= StubSize;
+ }
+ }
+}
+
+StringRef MCMachObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) {
+ // FIXME: also, this can all be done at the very beginning, by iterating over
+ // all stubs and creating the calls to outside functions. Is it worth it
+ // though?
+ if (!StubSize)
+ return StringRef();
+ uint64_t StubIdx = (Addr - StubsStart) / StubSize;
+ if (StubIdx >= StubsCount)
+ return StringRef();
+
+ uint32_t SymtabIdx =
+ MOOF->getIndirectSymbolTableEntry(MOOF->getDysymtabLoadCommand(), StubIdx);
+
+ StringRef SymName;
+ symbol_iterator SI = MOOF->symbol_begin();
+ for (uint32_t i = 0; i != SymtabIdx; ++i)
+ ++SI;
+ SI->getName(SymName);
+ assert(SI != MOOF->symbol_end() && "Stub wasn't found in the symbol table!");
+ assert(SymName.front() == '_' && "Mach-O symbol doesn't start with '_'!");
+ return SymName.substr(1);
+}
+
+void MCMachObjectSymbolizer::
+tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value,
+ uint64_t Address) {
+ if (const RelocationRef *R = findRelocationAt(Address)) {
+ const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R);
+ if (!RelExpr || RelExpr->EvaluateAsAbsolute(Value) == false)
+ return;
+ }
+ uint64_t Addr = Value;
+ if (const SectionRef *S = findSectionContaining(Addr)) {
+ StringRef Name; S->getName(Name);
+ uint64_t SAddr; S->getAddress(SAddr);
+ if (Name == "__cstring") {
+ StringRef Contents;
+ S->getContents(Contents);
+ Contents = Contents.substr(Addr - SAddr);
+ cStream << " ## literal pool for: "
+ << Contents.substr(0, Contents.find_first_of(0));
+ }
+ }
+}
+
+//===- MCObjectSymbolizer -------------------------------------------------===//
+
+MCObjectSymbolizer::MCObjectSymbolizer(
+ MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo,
+ const ObjectFile *Obj)
+ : MCSymbolizer(Ctx, std::move(RelInfo)), Obj(Obj), SortedSections(),
+ AddrToReloc() {}
+
+bool MCObjectSymbolizer::
+tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream,
+ int64_t Value, uint64_t Address, bool IsBranch,
+ uint64_t Offset, uint64_t InstSize) {
+ if (IsBranch) {
+ StringRef ExtFnName = findExternalFunctionAt((uint64_t)Value);
+ if (!ExtFnName.empty()) {
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(ExtFnName);
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ MI.addOperand(MCOperand::CreateExpr(Expr));
+ return true;
+ }
+ }
+
+ if (const RelocationRef *R = findRelocationAt(Address + Offset)) {
+ if (const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R)) {
+ MI.addOperand(MCOperand::CreateExpr(RelExpr));
+ return true;
+ }
+ // Only try to create a symbol+offset expression if there is no relocation.
+ return false;
+ }
+
+ // Interpret Value as a branch target.
+ if (IsBranch == false)
+ return false;
+ uint64_t UValue = Value;
+ // FIXME: map instead of looping each time?
+ for (const SymbolRef &Symbol : Obj->symbols()) {
+ uint64_t SymAddr;
+ Symbol.getAddress(SymAddr);
+ uint64_t SymSize;
+ Symbol.getSize(SymSize);
+ StringRef SymName;
+ Symbol.getName(SymName);
+ SymbolRef::Type SymType;
+ Symbol.getType(SymType);
+ if (SymAddr == UnknownAddressOrSize || SymSize == UnknownAddressOrSize ||
+ SymName.empty() || SymType != SymbolRef::ST_Function)
+ continue;
+
+ if ( SymAddr == UValue ||
+ (SymAddr <= UValue && SymAddr + SymSize > UValue)) {
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName);
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ if (SymAddr != UValue) {
+ const MCExpr *Off = MCConstantExpr::Create(UValue - SymAddr, Ctx);
+ Expr = MCBinaryExpr::CreateAdd(Expr, Off, Ctx);
+ }
+ MI.addOperand(MCOperand::CreateExpr(Expr));
+ return true;
+ }
+ }
+ return false;
+}
+
+void MCObjectSymbolizer::
+tryAddingPcLoadReferenceComment(raw_ostream &cStream,
+ int64_t Value, uint64_t Address) {
+}
+
+StringRef MCObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) {
+ return StringRef();
+}
+
+MCObjectSymbolizer *MCObjectSymbolizer::createObjectSymbolizer(
+ MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo,
+ const ObjectFile *Obj) {
+ if (const MachOObjectFile *MOOF = dyn_cast<MachOObjectFile>(Obj))
+ return new MCMachObjectSymbolizer(Ctx, std::move(RelInfo), MOOF);
+ return new MCObjectSymbolizer(Ctx, std::move(RelInfo), Obj);
+}
+
+// SortedSections implementation.
+
+static bool SectionStartsBefore(const SectionRef &S, uint64_t Addr) {
+ uint64_t SAddr; S.getAddress(SAddr);
+ return SAddr < Addr;
+}
+
+const SectionRef *MCObjectSymbolizer::findSectionContaining(uint64_t Addr) {
+ if (SortedSections.empty())
+ buildSectionList();
+
+ SortedSectionList::iterator
+ EndIt = SortedSections.end(),
+ It = std::lower_bound(SortedSections.begin(), EndIt,
+ Addr, SectionStartsBefore);
+ if (It == EndIt)
+ return nullptr;
+ uint64_t SAddr; It->getAddress(SAddr);
+ uint64_t SSize; It->getSize(SSize);
+ if (Addr >= SAddr + SSize)
+ return nullptr;
+ return &*It;
+}
+
+const RelocationRef *MCObjectSymbolizer::findRelocationAt(uint64_t Addr) {
+ if (AddrToReloc.empty())
+ buildRelocationByAddrMap();
+
+ AddrToRelocMap::const_iterator RI = AddrToReloc.find(Addr);
+ if (RI == AddrToReloc.end())
+ return nullptr;
+ return &RI->second;
+}
+
+void MCObjectSymbolizer::buildSectionList() {
+ for (const SectionRef &Section : Obj->sections()) {
+ bool RequiredForExec;
+ Section.isRequiredForExecution(RequiredForExec);
+ if (RequiredForExec == false)
+ continue;
+ uint64_t SAddr;
+ Section.getAddress(SAddr);
+ uint64_t SSize;
+ Section.getSize(SSize);
+ SortedSectionList::iterator It =
+ std::lower_bound(SortedSections.begin(), SortedSections.end(), SAddr,
+ SectionStartsBefore);
+ if (It != SortedSections.end()) {
+ uint64_t FoundSAddr; It->getAddress(FoundSAddr);
+ if (FoundSAddr < SAddr + SSize)
+ llvm_unreachable("Inserting overlapping sections");
+ }
+ SortedSections.insert(It, Section);
+ }
+}
+
+void MCObjectSymbolizer::buildRelocationByAddrMap() {
+ for (const SectionRef &Section : Obj->sections()) {
+ for (const RelocationRef &Reloc : Section.relocations()) {
+ uint64_t Address;
+ Reloc.getAddress(Address);
+ // At a specific address, only keep the first relocation.
+ if (AddrToReloc.find(Address) == AddrToReloc.end())
+ AddrToReloc[Address] = Reloc;
+ }
+ }
+}
diff --git a/lib/MC/MCAnalysis/Makefile b/lib/MC/MCAnalysis/Makefile
new file mode 100644
index 0000000..add2dbd
--- /dev/null
+++ b/lib/MC/MCAnalysis/Makefile
@@ -0,0 +1,14 @@
+##===- lib/MC/MCAnalysys/Makefile --------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMCAnalysis
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common