diff options
Diffstat (limited to 'lib/MC/MCAnalysis')
-rw-r--r-- | lib/MC/MCAnalysis/Android.mk | 37 | ||||
-rw-r--r-- | lib/MC/MCAnalysis/CMakeLists.txt | 8 | ||||
-rw-r--r-- | lib/MC/MCAnalysis/LLVMBuild.txt | 5 | ||||
-rw-r--r-- | lib/MC/MCAnalysis/MCAtom.cpp | 114 | ||||
-rw-r--r-- | lib/MC/MCAnalysis/MCFunction.cpp | 76 | ||||
-rw-r--r-- | lib/MC/MCAnalysis/MCModule.cpp | 142 | ||||
-rw-r--r-- | lib/MC/MCAnalysis/MCModuleYAML.cpp | 464 | ||||
-rw-r--r-- | lib/MC/MCAnalysis/MCObjectDisassembler.cpp | 574 | ||||
-rw-r--r-- | lib/MC/MCAnalysis/MCObjectSymbolizer.cpp | 268 | ||||
-rw-r--r-- | lib/MC/MCAnalysis/Makefile | 14 |
10 files changed, 1702 insertions, 0 deletions
diff --git a/lib/MC/MCAnalysis/Android.mk b/lib/MC/MCAnalysis/Android.mk new file mode 100644 index 0000000..27f848a --- /dev/null +++ b/lib/MC/MCAnalysis/Android.mk @@ -0,0 +1,37 @@ +LOCAL_PATH:= $(call my-dir) + +mc_analysis_SRC_FILES := \ + MCAtom.cpp \ + MCFunction.cpp \ + MCModule.cpp \ + MCModuleYAML.cpp \ + MCObjectDisassembler.cpp \ + MCObjectSymbolizer.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(mc_analysis_SRC_FILES) + +LOCAL_MODULE:= libLLVMMCAnalysis + +LOCAL_MODULE_TAGS := optional + +include $(LLVM_HOST_BUILD_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) +ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS)) + +LOCAL_SRC_FILES := $(mc_analysis_SRC_FILES) + +LOCAL_MODULE:= libLLVMMCAnalysis + +LOCAL_MODULE_TAGS := optional + +include $(LLVM_DEVICE_BUILD_MK) +include $(BUILD_STATIC_LIBRARY) +endif diff --git a/lib/MC/MCAnalysis/CMakeLists.txt b/lib/MC/MCAnalysis/CMakeLists.txt new file mode 100644 index 0000000..81eae2d --- /dev/null +++ b/lib/MC/MCAnalysis/CMakeLists.txt @@ -0,0 +1,8 @@ +add_llvm_library(LLVMMCAnalysis + MCAtom.cpp + MCFunction.cpp + MCModule.cpp + MCModuleYAML.cpp + MCObjectDisassembler.cpp + MCObjectSymbolizer.cpp +) diff --git a/lib/MC/MCAnalysis/LLVMBuild.txt b/lib/MC/MCAnalysis/LLVMBuild.txt new file mode 100644 index 0000000..1b58fec --- /dev/null +++ b/lib/MC/MCAnalysis/LLVMBuild.txt @@ -0,0 +1,5 @@ +[component_0] +type = Library +name = MCAnalysis +parent = Libraries +required_libraries = MC Object Support diff --git a/lib/MC/MCAnalysis/MCAtom.cpp b/lib/MC/MCAnalysis/MCAtom.cpp new file mode 100644 index 0000000..82056ee --- /dev/null +++ b/lib/MC/MCAnalysis/MCAtom.cpp @@ -0,0 +1,114 @@ +//===- lib/MC/MCAtom.cpp - MCAtom implementation --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCModule.h" +#include "llvm/Support/ErrorHandling.h" +#include <iterator> + +using namespace llvm; + +// Pin the vtable to this file. +void MCAtom::anchor() {} + +void MCAtom::remap(uint64_t NewBegin, uint64_t NewEnd) { + Parent->remap(this, NewBegin, NewEnd); +} + +void MCAtom::remapForTruncate(uint64_t TruncPt) { + assert((TruncPt >= Begin && TruncPt < End) && + "Truncation point not contained in atom!"); + remap(Begin, TruncPt); +} + +void MCAtom::remapForSplit(uint64_t SplitPt, + uint64_t &LBegin, uint64_t &LEnd, + uint64_t &RBegin, uint64_t &REnd) { + assert((SplitPt > Begin && SplitPt <= End) && + "Splitting at point not contained in atom!"); + + // Compute the new begin/end points. + LBegin = Begin; + LEnd = SplitPt - 1; + RBegin = SplitPt; + REnd = End; + + // Remap this atom to become the lower of the two new ones. + remap(LBegin, LEnd); +} + +// MCDataAtom + +void MCDataAtom::addData(const MCData &D) { + Data.push_back(D); + if (Data.size() > End + 1 - Begin) + remap(Begin, End + 1); +} + +void MCDataAtom::truncate(uint64_t TruncPt) { + remapForTruncate(TruncPt); + + Data.resize(TruncPt - Begin + 1); +} + +MCDataAtom *MCDataAtom::split(uint64_t SplitPt) { + uint64_t LBegin, LEnd, RBegin, REnd; + remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd); + + MCDataAtom *RightAtom = Parent->createDataAtom(RBegin, REnd); + RightAtom->setName(getName()); + + std::vector<MCData>::iterator I = Data.begin() + (RBegin - LBegin); + assert(I != Data.end() && "Split point not found in range!"); + + std::copy(I, Data.end(), std::back_inserter(RightAtom->Data)); + Data.erase(I, Data.end()); + return RightAtom; +} + +// MCTextAtom + +void MCTextAtom::addInst(const MCInst &I, uint64_t Size) { + if (NextInstAddress + Size - 1 > End) + remap(Begin, NextInstAddress + Size - 1); + Insts.push_back(MCDecodedInst(I, NextInstAddress, Size)); + NextInstAddress += Size; +} + +void MCTextAtom::truncate(uint64_t TruncPt) { + remapForTruncate(TruncPt); + + InstListTy::iterator I = Insts.begin(); + while (I != Insts.end() && I->Address <= TruncPt) ++I; + + assert(I != Insts.end() && "Truncation point not found in disassembly!"); + assert(I->Address == TruncPt + 1 && + "Truncation point does not fall on instruction boundary"); + + Insts.erase(I, Insts.end()); +} + +MCTextAtom *MCTextAtom::split(uint64_t SplitPt) { + uint64_t LBegin, LEnd, RBegin, REnd; + remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd); + + MCTextAtom *RightAtom = Parent->createTextAtom(RBegin, REnd); + RightAtom->setName(getName()); + + InstListTy::iterator I = Insts.begin(); + while (I != Insts.end() && I->Address < SplitPt) ++I; + assert(I != Insts.end() && "Split point not found in disassembly!"); + assert(I->Address == SplitPt && + "Split point does not fall on instruction boundary!"); + + std::copy(I, Insts.end(), std::back_inserter(RightAtom->Insts)); + Insts.erase(I, Insts.end()); + Parent->splitBasicBlocksForAtom(this, RightAtom); + return RightAtom; +} diff --git a/lib/MC/MCAnalysis/MCFunction.cpp b/lib/MC/MCAnalysis/MCFunction.cpp new file mode 100644 index 0000000..4e09d1a --- /dev/null +++ b/lib/MC/MCAnalysis/MCFunction.cpp @@ -0,0 +1,76 @@ +//===-- lib/MC/MCFunction.cpp -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAnalysis/MCFunction.h" +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCModule.h" +#include <algorithm> + +using namespace llvm; + +// MCFunction + +MCFunction::MCFunction(StringRef Name, MCModule *Parent) + : Name(Name), ParentModule(Parent) +{} + +MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) { + std::unique_ptr<MCBasicBlock> MCBB(new MCBasicBlock(TA, this)); + Blocks.push_back(std::move(MCBB)); + return *Blocks.back(); +} + +MCBasicBlock *MCFunction::find(uint64_t StartAddr) { + for (const_iterator I = begin(), E = end(); I != E; ++I) + if ((*I)->getInsts()->getBeginAddr() == StartAddr) + return I->get(); + return nullptr; +} + +const MCBasicBlock *MCFunction::find(uint64_t StartAddr) const { + return const_cast<MCFunction *>(this)->find(StartAddr); +} + +// MCBasicBlock + +MCBasicBlock::MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent) + : Insts(&Insts), Parent(Parent) { + getParent()->getParent()->trackBBForAtom(&Insts, this); +} + +void MCBasicBlock::addSuccessor(const MCBasicBlock *MCBB) { + if (!isSuccessor(MCBB)) + Successors.push_back(MCBB); +} + +bool MCBasicBlock::isSuccessor(const MCBasicBlock *MCBB) const { + return std::find(Successors.begin(), Successors.end(), + MCBB) != Successors.end(); +} + +void MCBasicBlock::addPredecessor(const MCBasicBlock *MCBB) { + if (!isPredecessor(MCBB)) + Predecessors.push_back(MCBB); +} + +bool MCBasicBlock::isPredecessor(const MCBasicBlock *MCBB) const { + return std::find(Predecessors.begin(), Predecessors.end(), + MCBB) != Predecessors.end(); +} + +void MCBasicBlock::splitBasicBlock(MCBasicBlock *SplitBB) { + assert(Insts->getEndAddr() + 1 == SplitBB->Insts->getBeginAddr() && + "Splitting unrelated basic blocks!"); + SplitBB->addPredecessor(this); + assert(SplitBB->Successors.empty() && + "Split basic block shouldn't already have successors!"); + SplitBB->Successors = Successors; + Successors.clear(); + addSuccessor(SplitBB); +} diff --git a/lib/MC/MCAnalysis/MCModule.cpp b/lib/MC/MCAnalysis/MCModule.cpp new file mode 100644 index 0000000..7512299 --- /dev/null +++ b/lib/MC/MCAnalysis/MCModule.cpp @@ -0,0 +1,142 @@ +//===- lib/MC/MCModule.cpp - MCModule implementation ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAnalysis/MCModule.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCFunction.h" +#include <algorithm> + +using namespace llvm; + +static bool AtomComp(const MCAtom *L, uint64_t Addr) { + return L->getEndAddr() < Addr; +} + +static bool AtomCompInv(uint64_t Addr, const MCAtom *R) { + return Addr < R->getEndAddr(); +} + +void MCModule::map(MCAtom *NewAtom) { + uint64_t Begin = NewAtom->Begin; + + assert(Begin <= NewAtom->End && "Creating MCAtom with endpoints reversed?"); + + // Check for atoms already covering this range. + AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(), + Begin, AtomComp); + assert((I == atom_end() || (*I)->getBeginAddr() > NewAtom->End) + && "Offset range already occupied!"); + + // Insert the new atom to the list. + Atoms.insert(I, NewAtom); +} + +MCTextAtom *MCModule::createTextAtom(uint64_t Begin, uint64_t End) { + MCTextAtom *NewAtom = new MCTextAtom(this, Begin, End); + map(NewAtom); + return NewAtom; +} + +MCDataAtom *MCModule::createDataAtom(uint64_t Begin, uint64_t End) { + MCDataAtom *NewAtom = new MCDataAtom(this, Begin, End); + map(NewAtom); + return NewAtom; +} + +// remap - Update the interval mapping for an atom. +void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) { + // Find and erase the old mapping. + AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(), + Atom->Begin, AtomComp); + assert(I != atom_end() && "Atom offset not found in module!"); + assert(*I == Atom && "Previous atom mapping was invalid!"); + Atoms.erase(I); + + // FIXME: special case NewBegin == Atom->Begin + + // Insert the new mapping. + AtomListTy::iterator NewI = std::lower_bound(atom_begin(), atom_end(), + NewBegin, AtomComp); + assert((NewI == atom_end() || (*NewI)->getBeginAddr() > Atom->End) + && "Offset range already occupied!"); + Atoms.insert(NewI, Atom); + + // Update the atom internal bounds. + Atom->Begin = NewBegin; + Atom->End = NewEnd; +} + +const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const { + AtomListTy::const_iterator I = std::lower_bound(atom_begin(), atom_end(), + Addr, AtomComp); + if (I != atom_end() && (*I)->getBeginAddr() <= Addr) + return *I; + return nullptr; +} + +MCAtom *MCModule::findAtomContaining(uint64_t Addr) { + return const_cast<MCAtom*>( + const_cast<const MCModule *>(this)->findAtomContaining(Addr)); +} + +const MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) const { + AtomListTy::const_iterator I = std::upper_bound(atom_begin(), atom_end(), + Addr, AtomCompInv); + if (I != atom_end()) + return *I; + return nullptr; +} + +MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) { + return const_cast<MCAtom*>( + const_cast<const MCModule *>(this)->findFirstAtomAfter(Addr)); +} + +MCFunction *MCModule::createFunction(StringRef Name) { + std::unique_ptr<MCFunction> MCF(new MCFunction(Name, this)); + Functions.push_back(std::move(MCF)); + return Functions.back().get(); +} + +static bool CompBBToAtom(MCBasicBlock *BB, const MCTextAtom *Atom) { + return BB->getInsts() < Atom; +} + +void MCModule::splitBasicBlocksForAtom(const MCTextAtom *TA, + const MCTextAtom *NewTA) { + BBsByAtomTy::iterator + I = std::lower_bound(BBsByAtom.begin(), BBsByAtom.end(), + TA, CompBBToAtom); + for (; I != BBsByAtom.end() && (*I)->getInsts() == TA; ++I) { + MCBasicBlock *BB = *I; + MCBasicBlock *NewBB = &BB->getParent()->createBlock(*NewTA); + BB->splitBasicBlock(NewBB); + } +} + +void MCModule::trackBBForAtom(const MCTextAtom *Atom, MCBasicBlock *BB) { + assert(Atom == BB->getInsts() && "Text atom doesn't back the basic block!"); + BBsByAtomTy::iterator I = std::lower_bound(BBsByAtom.begin(), + BBsByAtom.end(), + Atom, CompBBToAtom); + for (; I != BBsByAtom.end() && (*I)->getInsts() == Atom; ++I) + if (*I == BB) + return; + BBsByAtom.insert(I, BB); +} + +MCModule::MCModule() : Entrypoint(0) { } + +MCModule::~MCModule() { + for (AtomListTy::iterator AI = atom_begin(), + AE = atom_end(); + AI != AE; ++AI) + delete *AI; +} diff --git a/lib/MC/MCAnalysis/MCModuleYAML.cpp b/lib/MC/MCAnalysis/MCModuleYAML.cpp new file mode 100644 index 0000000..876b06d --- /dev/null +++ b/lib/MC/MCAnalysis/MCModuleYAML.cpp @@ -0,0 +1,464 @@ +//===- MCModuleYAML.cpp - MCModule YAMLIO implementation ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of MCModule. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAnalysis/MCModuleYAML.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCFunction.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/YAML.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/YAMLTraits.h" +#include <vector> + +namespace llvm { + +namespace { + +// This class is used to map opcode and register names to enum values. +// +// There are at least 3 obvious ways to do this: +// 1- Generate an MII/MRI method using a tablegen StringMatcher +// 2- Write an MII/MRI method using std::lower_bound and the assumption that +// the enums are sorted (starting at a fixed value). +// 3- Do the matching manually as is done here. +// +// Why 3? +// 1- A StringMatcher function for thousands of entries would incur +// a non-negligible binary size overhead. +// 2- The lower_bound comparators would be somewhat involved and aren't +// obviously reusable (see LessRecordRegister in llvm/TableGen/Record.h) +// 3- This isn't actually something useful outside tests (but the same argument +// can be made against having {MII,MRI}::getName). +// +// If this becomes useful outside this specific situation, feel free to do +// the Right Thing (tm) and move the functionality to MII/MRI. +// +class InstrRegInfoHolder { + typedef StringMap<unsigned, BumpPtrAllocator> EnumValByNameTy; + EnumValByNameTy InstEnumValueByName; + EnumValByNameTy RegEnumValueByName; + +public: + const MCInstrInfo &MII; + const MCRegisterInfo &MRI; + InstrRegInfoHolder(const MCInstrInfo &MII, const MCRegisterInfo &MRI) + : InstEnumValueByName(NextPowerOf2(MII.getNumOpcodes())), + RegEnumValueByName(NextPowerOf2(MRI.getNumRegs())), MII(MII), MRI(MRI) { + for (int i = 0, e = MII.getNumOpcodes(); i != e; ++i) + InstEnumValueByName[MII.getName(i)] = i; + for (int i = 0, e = MRI.getNumRegs(); i != e; ++i) + RegEnumValueByName[MRI.getName(i)] = i; + } + + bool matchRegister(StringRef Name, unsigned &Reg) { + EnumValByNameTy::const_iterator It = RegEnumValueByName.find(Name); + if (It == RegEnumValueByName.end()) + return false; + Reg = It->getValue(); + return true; + } + bool matchOpcode(StringRef Name, unsigned &Opc) { + EnumValByNameTy::const_iterator It = InstEnumValueByName.find(Name); + if (It == InstEnumValueByName.end()) + return false; + Opc = It->getValue(); + return true; + } +}; + +} // end unnamed namespace + +namespace MCModuleYAML { + +LLVM_YAML_STRONG_TYPEDEF(unsigned, OpcodeEnum) + +struct Operand { + MCOperand MCOp; +}; + +struct Inst { + OpcodeEnum Opcode; + std::vector<Operand> Operands; + uint64_t Size; +}; + +struct Atom { + MCAtom::AtomKind Type; + yaml::Hex64 StartAddress; + uint64_t Size; + + std::vector<Inst> Insts; + yaml::BinaryRef Data; +}; + +struct BasicBlock { + yaml::Hex64 Address; + std::vector<yaml::Hex64> Preds; + std::vector<yaml::Hex64> Succs; +}; + +struct Function { + StringRef Name; + std::vector<BasicBlock> BasicBlocks; +}; + +struct Module { + std::vector<Atom> Atoms; + std::vector<Function> Functions; +}; + +} // end namespace MCModuleYAML +} // end namespace llvm + +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::Hex64) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::MCModuleYAML::Operand) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Inst) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Atom) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::BasicBlock) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Function) + +namespace llvm { + +namespace yaml { + +template <> struct ScalarEnumerationTraits<MCAtom::AtomKind> { + static void enumeration(IO &IO, MCAtom::AtomKind &Kind); +}; + +template <> struct MappingTraits<MCModuleYAML::Atom> { + static void mapping(IO &IO, MCModuleYAML::Atom &A); +}; + +template <> struct MappingTraits<MCModuleYAML::Inst> { + static void mapping(IO &IO, MCModuleYAML::Inst &I); +}; + +template <> struct MappingTraits<MCModuleYAML::BasicBlock> { + static void mapping(IO &IO, MCModuleYAML::BasicBlock &BB); +}; + +template <> struct MappingTraits<MCModuleYAML::Function> { + static void mapping(IO &IO, MCModuleYAML::Function &Fn); +}; + +template <> struct MappingTraits<MCModuleYAML::Module> { + static void mapping(IO &IO, MCModuleYAML::Module &M); +}; + +template <> struct ScalarTraits<MCModuleYAML::Operand> { + static void output(const MCModuleYAML::Operand &, void *, + llvm::raw_ostream &); + static StringRef input(StringRef, void *, MCModuleYAML::Operand &); + static bool mustQuote(StringRef) { return false; } +}; + +template <> struct ScalarTraits<MCModuleYAML::OpcodeEnum> { + static void output(const MCModuleYAML::OpcodeEnum &, void *, + llvm::raw_ostream &); + static StringRef input(StringRef, void *, MCModuleYAML::OpcodeEnum &); + static bool mustQuote(StringRef) { return false; } +}; + +void ScalarEnumerationTraits<MCAtom::AtomKind>::enumeration( + IO &IO, MCAtom::AtomKind &Value) { + IO.enumCase(Value, "Text", MCAtom::TextAtom); + IO.enumCase(Value, "Data", MCAtom::DataAtom); +} + +void MappingTraits<MCModuleYAML::Atom>::mapping(IO &IO, MCModuleYAML::Atom &A) { + IO.mapRequired("StartAddress", A.StartAddress); + IO.mapRequired("Size", A.Size); + IO.mapRequired("Type", A.Type); + if (A.Type == MCAtom::TextAtom) + IO.mapRequired("Content", A.Insts); + else if (A.Type == MCAtom::DataAtom) + IO.mapRequired("Content", A.Data); +} + +void MappingTraits<MCModuleYAML::Inst>::mapping(IO &IO, MCModuleYAML::Inst &I) { + IO.mapRequired("Inst", I.Opcode); + IO.mapRequired("Size", I.Size); + IO.mapRequired("Ops", I.Operands); +} + +void +MappingTraits<MCModuleYAML::BasicBlock>::mapping(IO &IO, + MCModuleYAML::BasicBlock &BB) { + IO.mapRequired("Address", BB.Address); + IO.mapRequired("Preds", BB.Preds); + IO.mapRequired("Succs", BB.Succs); +} + +void MappingTraits<MCModuleYAML::Function>::mapping(IO &IO, + MCModuleYAML::Function &F) { + IO.mapRequired("Name", F.Name); + IO.mapRequired("BasicBlocks", F.BasicBlocks); +} + +void MappingTraits<MCModuleYAML::Module>::mapping(IO &IO, + MCModuleYAML::Module &M) { + IO.mapRequired("Atoms", M.Atoms); + IO.mapOptional("Functions", M.Functions); +} + +void +ScalarTraits<MCModuleYAML::Operand>::output(const MCModuleYAML::Operand &Val, + void *Ctx, raw_ostream &Out) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + + // FIXME: Doesn't support FPImm and expr/inst, but do these make sense? + if (Val.MCOp.isImm()) + Out << "I" << Val.MCOp.getImm(); + else if (Val.MCOp.isReg()) + Out << "R" << IRI->MRI.getName(Val.MCOp.getReg()); + else + llvm_unreachable("Trying to output invalid MCOperand!"); +} + +StringRef +ScalarTraits<MCModuleYAML::Operand>::input(StringRef Scalar, void *Ctx, + MCModuleYAML::Operand &Val) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + char Type = 0; + if (Scalar.size() >= 1) + Type = Scalar.front(); + if (Type != 'R' && Type != 'I') + return "Operand must start with 'R' (register) or 'I' (immediate)."; + if (Type == 'R') { + unsigned Reg; + if (!IRI->matchRegister(Scalar.substr(1), Reg)) + return "Invalid register name."; + Val.MCOp = MCOperand::CreateReg(Reg); + } else if (Type == 'I') { + int64_t RIVal; + if (Scalar.substr(1).getAsInteger(10, RIVal)) + return "Invalid immediate value."; + Val.MCOp = MCOperand::CreateImm(RIVal); + } else { + Val.MCOp = MCOperand(); + } + return StringRef(); +} + +void ScalarTraits<MCModuleYAML::OpcodeEnum>::output( + const MCModuleYAML::OpcodeEnum &Val, void *Ctx, raw_ostream &Out) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + Out << IRI->MII.getName(Val); +} + +StringRef +ScalarTraits<MCModuleYAML::OpcodeEnum>::input(StringRef Scalar, void *Ctx, + MCModuleYAML::OpcodeEnum &Val) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + unsigned Opc; + if (!IRI->matchOpcode(Scalar, Opc)) + return "Invalid instruction opcode."; + Val = Opc; + return ""; +} + +} // end namespace yaml + +namespace { + +class MCModule2YAML { + const MCModule &MCM; + MCModuleYAML::Module YAMLModule; + void dumpAtom(const MCAtom *MCA); + void dumpFunction(const MCFunction &MCF); + void dumpBasicBlock(const MCBasicBlock *MCBB); + +public: + MCModule2YAML(const MCModule &MCM); + MCModuleYAML::Module &getYAMLModule(); +}; + +class YAML2MCModule { + MCModule &MCM; + +public: + YAML2MCModule(MCModule &MCM); + StringRef parse(const MCModuleYAML::Module &YAMLModule); +}; + +} // end unnamed namespace + +MCModule2YAML::MCModule2YAML(const MCModule &MCM) : MCM(MCM), YAMLModule() { + for (MCModule::const_atom_iterator AI = MCM.atom_begin(), AE = MCM.atom_end(); + AI != AE; ++AI) + dumpAtom(*AI); + for (MCModule::const_func_iterator FI = MCM.func_begin(), FE = MCM.func_end(); + FI != FE; ++FI) + dumpFunction(**FI); +} + +void MCModule2YAML::dumpAtom(const MCAtom *MCA) { + YAMLModule.Atoms.resize(YAMLModule.Atoms.size() + 1); + MCModuleYAML::Atom &A = YAMLModule.Atoms.back(); + A.Type = MCA->getKind(); + A.StartAddress = MCA->getBeginAddr(); + A.Size = MCA->getEndAddr() - MCA->getBeginAddr() + 1; + if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(MCA)) { + const size_t InstCount = TA->size(); + A.Insts.resize(InstCount); + for (size_t i = 0; i != InstCount; ++i) { + const MCDecodedInst &MCDI = TA->at(i); + A.Insts[i].Opcode = MCDI.Inst.getOpcode(); + A.Insts[i].Size = MCDI.Size; + const unsigned OpCount = MCDI.Inst.getNumOperands(); + A.Insts[i].Operands.resize(OpCount); + for (unsigned oi = 0; oi != OpCount; ++oi) + A.Insts[i].Operands[oi].MCOp = MCDI.Inst.getOperand(oi); + } + } else if (const MCDataAtom *DA = dyn_cast<MCDataAtom>(MCA)) { + A.Data = DA->getData(); + } else { + llvm_unreachable("Unknown atom type."); + } +} + +void MCModule2YAML::dumpFunction(const MCFunction &MCF) { + YAMLModule.Functions.resize(YAMLModule.Functions.size() + 1); + MCModuleYAML::Function &F = YAMLModule.Functions.back(); + F.Name = MCF.getName(); + for (MCFunction::const_iterator BBI = MCF.begin(), BBE = MCF.end(); + BBI != BBE; ++BBI) { + const MCBasicBlock &MCBB = **BBI; + F.BasicBlocks.resize(F.BasicBlocks.size() + 1); + MCModuleYAML::BasicBlock &BB = F.BasicBlocks.back(); + BB.Address = MCBB.getInsts()->getBeginAddr(); + for (MCBasicBlock::pred_const_iterator PI = MCBB.pred_begin(), + PE = MCBB.pred_end(); + PI != PE; ++PI) + BB.Preds.push_back((*PI)->getInsts()->getBeginAddr()); + for (MCBasicBlock::succ_const_iterator SI = MCBB.succ_begin(), + SE = MCBB.succ_end(); + SI != SE; ++SI) + BB.Succs.push_back((*SI)->getInsts()->getBeginAddr()); + } +} + +MCModuleYAML::Module &MCModule2YAML::getYAMLModule() { return YAMLModule; } + +YAML2MCModule::YAML2MCModule(MCModule &MCM) : MCM(MCM) {} + +StringRef YAML2MCModule::parse(const MCModuleYAML::Module &YAMLModule) { + typedef std::vector<MCModuleYAML::Atom>::const_iterator AtomIt; + typedef std::vector<MCModuleYAML::Inst>::const_iterator InstIt; + typedef std::vector<MCModuleYAML::Operand>::const_iterator OpIt; + + typedef DenseMap<uint64_t, MCTextAtom *> AddrToTextAtomTy; + AddrToTextAtomTy TAByAddr; + + for (AtomIt AI = YAMLModule.Atoms.begin(), AE = YAMLModule.Atoms.end(); + AI != AE; ++AI) { + uint64_t StartAddress = AI->StartAddress; + if (AI->Size == 0) + return "Atoms can't be empty!"; + uint64_t EndAddress = StartAddress + AI->Size - 1; + switch (AI->Type) { + case MCAtom::TextAtom: { + MCTextAtom *TA = MCM.createTextAtom(StartAddress, EndAddress); + TAByAddr[StartAddress] = TA; + for (InstIt II = AI->Insts.begin(), IE = AI->Insts.end(); II != IE; + ++II) { + MCInst MI; + MI.setOpcode(II->Opcode); + for (OpIt OI = II->Operands.begin(), OE = II->Operands.end(); OI != OE; + ++OI) + MI.addOperand(OI->MCOp); + TA->addInst(MI, II->Size); + } + break; + } + case MCAtom::DataAtom: { + MCDataAtom *DA = MCM.createDataAtom(StartAddress, EndAddress); + SmallVector<char, 64> Data; + raw_svector_ostream OS(Data); + AI->Data.writeAsBinary(OS); + OS.flush(); + for (size_t i = 0, e = Data.size(); i != e; ++i) + DA->addData((uint8_t)Data[i]); + break; + } + } + } + + typedef std::vector<MCModuleYAML::Function>::const_iterator FuncIt; + typedef std::vector<MCModuleYAML::BasicBlock>::const_iterator BBIt; + typedef std::vector<yaml::Hex64>::const_iterator AddrIt; + for (FuncIt FI = YAMLModule.Functions.begin(), + FE = YAMLModule.Functions.end(); + FI != FE; ++FI) { + MCFunction *MCFN = MCM.createFunction(FI->Name); + for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end(); + BBI != BBE; ++BBI) { + AddrToTextAtomTy::const_iterator It = TAByAddr.find(BBI->Address); + if (It == TAByAddr.end()) + return "Basic block start address doesn't match any text atom!"; + MCFN->createBlock(*It->second); + } + for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end(); + BBI != BBE; ++BBI) { + MCBasicBlock *MCBB = MCFN->find(BBI->Address); + if (!MCBB) + return "Couldn't find matching basic block in function."; + for (AddrIt PI = BBI->Preds.begin(), PE = BBI->Preds.end(); PI != PE; + ++PI) { + MCBasicBlock *Pred = MCFN->find(*PI); + if (!Pred) + return "Couldn't find predecessor basic block."; + MCBB->addPredecessor(Pred); + } + for (AddrIt SI = BBI->Succs.begin(), SE = BBI->Succs.end(); SI != SE; + ++SI) { + MCBasicBlock *Succ = MCFN->find(*SI); + if (!Succ) + return "Couldn't find predecessor basic block."; + MCBB->addSuccessor(Succ); + } + } + } + return ""; +} + +StringRef mcmodule2yaml(raw_ostream &OS, const MCModule &MCM, + const MCInstrInfo &MII, const MCRegisterInfo &MRI) { + MCModule2YAML Dumper(MCM); + InstrRegInfoHolder IRI(MII, MRI); + yaml::Output YOut(OS, (void *)&IRI); + YOut << Dumper.getYAMLModule(); + return ""; +} + +StringRef yaml2mcmodule(std::unique_ptr<MCModule> &MCM, StringRef YamlContent, + const MCInstrInfo &MII, const MCRegisterInfo &MRI) { + MCM.reset(new MCModule); + YAML2MCModule Parser(*MCM); + MCModuleYAML::Module YAMLModule; + InstrRegInfoHolder IRI(MII, MRI); + yaml::Input YIn(YamlContent, (void *)&IRI); + YIn >> YAMLModule; + if (std::error_code ec = YIn.error()) + return ec.message(); + StringRef err = Parser.parse(YAMLModule); + if (!err.empty()) + return err; + return ""; +} + +} // end namespace llvm diff --git a/lib/MC/MCAnalysis/MCObjectDisassembler.cpp b/lib/MC/MCAnalysis/MCObjectDisassembler.cpp new file mode 100644 index 0000000..0f789ff --- /dev/null +++ b/lib/MC/MCAnalysis/MCObjectDisassembler.cpp @@ -0,0 +1,574 @@ +//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCObjectDisassembler.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCFunction.h" +#include "llvm/MC/MCAnalysis/MCModule.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCObjectSymbolizer.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/StringRefMemoryObject.h" +#include "llvm/Support/raw_ostream.h" +#include <map> + +using namespace llvm; +using namespace object; + +#define DEBUG_TYPE "mc" + +MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj, + const MCDisassembler &Dis, + const MCInstrAnalysis &MIA) + : Obj(Obj), Dis(Dis), MIA(MIA), MOS(nullptr) {} + +uint64_t MCObjectDisassembler::getEntrypoint() { + for (const SymbolRef &Symbol : Obj.symbols()) { + StringRef Name; + Symbol.getName(Name); + if (Name == "main" || Name == "_main") { + uint64_t Entrypoint; + Symbol.getAddress(Entrypoint); + return getEffectiveLoadAddr(Entrypoint); + } + } + return 0; +} + +ArrayRef<uint64_t> MCObjectDisassembler::getStaticInitFunctions() { + return ArrayRef<uint64_t>(); +} + +ArrayRef<uint64_t> MCObjectDisassembler::getStaticExitFunctions() { + return ArrayRef<uint64_t>(); +} + +MemoryObject *MCObjectDisassembler::getRegionFor(uint64_t Addr) { + // FIXME: Keep track of object sections. + return FallbackRegion.get(); +} + +uint64_t MCObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) { + return Addr; +} + +uint64_t MCObjectDisassembler::getOriginalLoadAddr(uint64_t Addr) { + return Addr; +} + +MCModule *MCObjectDisassembler::buildEmptyModule() { + MCModule *Module = new MCModule; + Module->Entrypoint = getEntrypoint(); + return Module; +} + +MCModule *MCObjectDisassembler::buildModule(bool withCFG) { + MCModule *Module = buildEmptyModule(); + + buildSectionAtoms(Module); + if (withCFG) + buildCFG(Module); + return Module; +} + +void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) { + for (const SectionRef &Section : Obj.sections()) { + bool isText; + Section.isText(isText); + bool isData; + Section.isData(isData); + if (!isData && !isText) + continue; + + uint64_t StartAddr; + Section.getAddress(StartAddr); + uint64_t SecSize; + Section.getSize(SecSize); + if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize) + continue; + StartAddr = getEffectiveLoadAddr(StartAddr); + + StringRef Contents; + Section.getContents(Contents); + StringRefMemoryObject memoryObject(Contents, StartAddr); + + // We don't care about things like non-file-backed sections yet. + if (Contents.size() != SecSize || !SecSize) + continue; + uint64_t EndAddr = StartAddr + SecSize - 1; + + StringRef SecName; + Section.getName(SecName); + + if (isText) { + MCTextAtom *Text = nullptr; + MCDataAtom *InvalidData = nullptr; + + uint64_t InstSize; + for (uint64_t Index = 0; Index < SecSize; Index += InstSize) { + const uint64_t CurAddr = StartAddr + Index; + MCInst Inst; + if (Dis.getInstruction(Inst, InstSize, memoryObject, CurAddr, nulls(), + nulls())) { + if (!Text) { + Text = Module->createTextAtom(CurAddr, CurAddr); + Text->setName(SecName); + } + Text->addInst(Inst, InstSize); + InvalidData = nullptr; + } else { + assert(InstSize && "getInstruction() consumed no bytes"); + if (!InvalidData) { + Text = nullptr; + InvalidData = Module->createDataAtom(CurAddr, CurAddr+InstSize - 1); + } + for (uint64_t I = 0; I < InstSize; ++I) + InvalidData->addData(Contents[Index+I]); + } + } + } else { + MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr); + Data->setName(SecName); + for (uint64_t Index = 0; Index < SecSize; ++Index) + Data->addData(Contents[Index]); + } + } +} + +namespace { + struct BBInfo; + typedef SmallPtrSet<BBInfo*, 2> BBInfoSetTy; + + struct BBInfo { + MCTextAtom *Atom; + MCBasicBlock *BB; + BBInfoSetTy Succs; + BBInfoSetTy Preds; + MCObjectDisassembler::AddressSetTy SuccAddrs; + + BBInfo() : Atom(nullptr), BB(nullptr) {} + + void addSucc(BBInfo &Succ) { + Succs.insert(&Succ); + Succ.Preds.insert(this); + } + }; +} + +static void RemoveDupsFromAddressVector(MCObjectDisassembler::AddressSetTy &V) { + std::sort(V.begin(), V.end()); + V.erase(std::unique(V.begin(), V.end()), V.end()); +} + +void MCObjectDisassembler::buildCFG(MCModule *Module) { + typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy; + BBInfoByAddrTy BBInfos; + AddressSetTy Splits; + AddressSetTy Calls; + + for (const SymbolRef &Symbol : Obj.symbols()) { + SymbolRef::Type SymType; + Symbol.getType(SymType); + if (SymType == SymbolRef::ST_Function) { + uint64_t SymAddr; + Symbol.getAddress(SymAddr); + SymAddr = getEffectiveLoadAddr(SymAddr); + Calls.push_back(SymAddr); + Splits.push_back(SymAddr); + } + } + + assert(Module->func_begin() == Module->func_end() + && "Module already has a CFG!"); + + // First, determine the basic block boundaries and call targets. + for (MCModule::atom_iterator AI = Module->atom_begin(), + AE = Module->atom_end(); + AI != AE; ++AI) { + MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI); + if (!TA) continue; + Calls.push_back(TA->getBeginAddr()); + BBInfos[TA->getBeginAddr()].Atom = TA; + for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end(); + II != IE; ++II) { + if (MIA.isTerminator(II->Inst)) + Splits.push_back(II->Address + II->Size); + uint64_t Target; + if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) { + if (MIA.isCall(II->Inst)) + Calls.push_back(Target); + Splits.push_back(Target); + } + } + } + + RemoveDupsFromAddressVector(Splits); + RemoveDupsFromAddressVector(Calls); + + // Split text atoms into basic block atoms. + for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end(); + SI != SE; ++SI) { + MCAtom *A = Module->findAtomContaining(*SI); + if (!A) continue; + MCTextAtom *TA = cast<MCTextAtom>(A); + if (TA->getBeginAddr() == *SI) + continue; + MCTextAtom *NewAtom = TA->split(*SI); + BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom; + StringRef BBName = TA->getName(); + BBName = BBName.substr(0, BBName.find_last_of(':')); + NewAtom->setName((BBName + ":" + utohexstr(*SI)).str()); + } + + // Compute succs/preds. + for (MCModule::atom_iterator AI = Module->atom_begin(), + AE = Module->atom_end(); + AI != AE; ++AI) { + MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI); + if (!TA) continue; + BBInfo &CurBB = BBInfos[TA->getBeginAddr()]; + const MCDecodedInst &LI = TA->back(); + if (MIA.isBranch(LI.Inst)) { + uint64_t Target; + if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target)) + CurBB.addSucc(BBInfos[Target]); + if (MIA.isConditionalBranch(LI.Inst)) + CurBB.addSucc(BBInfos[LI.Address + LI.Size]); + } else if (!MIA.isTerminator(LI.Inst)) + CurBB.addSucc(BBInfos[LI.Address + LI.Size]); + } + + + // Create functions and basic blocks. + for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end(); + CI != CE; ++CI) { + BBInfo &BBI = BBInfos[*CI]; + if (!BBI.Atom) continue; + + MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName()); + + // Create MCBBs. + SmallSetVector<BBInfo*, 16> Worklist; + Worklist.insert(&BBI); + for (size_t wi = 0; wi < Worklist.size(); ++wi) { + BBInfo *BBI = Worklist[wi]; + if (!BBI->Atom) + continue; + BBI->BB = &MCFN.createBlock(*BBI->Atom); + // Add all predecessors and successors to the worklist. + for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); + SI != SE; ++SI) + Worklist.insert(*SI); + for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); + PI != PE; ++PI) + Worklist.insert(*PI); + } + + // Set preds/succs. + for (size_t wi = 0; wi < Worklist.size(); ++wi) { + BBInfo *BBI = Worklist[wi]; + MCBasicBlock *MCBB = BBI->BB; + if (!MCBB) + continue; + for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); + SI != SE; ++SI) + if ((*SI)->BB) + MCBB->addSuccessor((*SI)->BB); + for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); + PI != PE; ++PI) + if ((*PI)->BB) + MCBB->addPredecessor((*PI)->BB); + } + } +} + +// Basic idea of the disassembly + discovery: +// +// start with the wanted address, insert it in the worklist +// while worklist not empty, take next address in the worklist: +// - check if atom exists there +// - if middle of atom: +// - split basic blocks referencing the atom +// - look for an already encountered BBInfo (using a map<atom, bbinfo>) +// - if there is, split it (new one, fallthrough, move succs, etc..) +// - if start of atom: nothing else to do +// - if no atom: create new atom and new bbinfo +// - look at the last instruction in the atom, add succs to worklist +// for all elements in the worklist: +// - create basic block, update preds/succs, etc.. +// +MCBasicBlock *MCObjectDisassembler::getBBAt(MCModule *Module, MCFunction *MCFN, + uint64_t BBBeginAddr, + AddressSetTy &CallTargets, + AddressSetTy &TailCallTargets) { + typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy; + typedef SmallSetVector<uint64_t, 16> AddrWorklistTy; + BBInfoByAddrTy BBInfos; + AddrWorklistTy Worklist; + + Worklist.insert(BBBeginAddr); + for (size_t wi = 0; wi < Worklist.size(); ++wi) { + const uint64_t BeginAddr = Worklist[wi]; + BBInfo *BBI = &BBInfos[BeginAddr]; + + MCTextAtom *&TA = BBI->Atom; + assert(!TA && "Discovered basic block already has an associated atom!"); + + // Look for an atom at BeginAddr. + if (MCAtom *A = Module->findAtomContaining(BeginAddr)) { + // FIXME: We don't care about mixed atoms, see above. + TA = cast<MCTextAtom>(A); + + // The found atom doesn't begin at BeginAddr, we have to split it. + if (TA->getBeginAddr() != BeginAddr) { + // FIXME: Handle overlapping atoms: middle-starting instructions, etc.. + MCTextAtom *NewTA = TA->split(BeginAddr); + + // Look for an already encountered basic block that needs splitting + BBInfoByAddrTy::iterator It = BBInfos.find(TA->getBeginAddr()); + if (It != BBInfos.end() && It->second.Atom) { + BBI->SuccAddrs = It->second.SuccAddrs; + It->second.SuccAddrs.clear(); + It->second.SuccAddrs.push_back(BeginAddr); + } + TA = NewTA; + } + BBI->Atom = TA; + } else { + // If we didn't find an atom, then we have to disassemble to create one! + + MemoryObject *Region = getRegionFor(BeginAddr); + if (!Region) + llvm_unreachable(("Couldn't find suitable region for disassembly at " + + utostr(BeginAddr)).c_str()); + + uint64_t InstSize; + uint64_t EndAddr = Region->getBase() + Region->getExtent(); + + // We want to stop before the next atom and have a fallthrough to it. + if (MCTextAtom *NextAtom = + cast_or_null<MCTextAtom>(Module->findFirstAtomAfter(BeginAddr))) + EndAddr = std::min(EndAddr, NextAtom->getBeginAddr()); + + for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) { + MCInst Inst; + if (Dis.getInstruction(Inst, InstSize, *Region, Addr, nulls(), + nulls())) { + if (!TA) + TA = Module->createTextAtom(Addr, Addr); + TA->addInst(Inst, InstSize); + } else { + // We don't care about splitting mixed atoms either. + llvm_unreachable("Couldn't disassemble instruction in atom."); + } + + uint64_t BranchTarget; + if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) { + if (MIA.isCall(Inst)) + CallTargets.push_back(BranchTarget); + } + + if (MIA.isTerminator(Inst)) + break; + } + BBI->Atom = TA; + } + + assert(TA && "Couldn't disassemble atom, none was created!"); + assert(TA->begin() != TA->end() && "Empty atom!"); + + MemoryObject *Region = getRegionFor(TA->getBeginAddr()); + assert(Region && "Couldn't find region for already disassembled code!"); + uint64_t EndRegion = Region->getBase() + Region->getExtent(); + + // Now we have a basic block atom, add successors. + // Add the fallthrough block. + if ((MIA.isConditionalBranch(TA->back().Inst) || + !MIA.isTerminator(TA->back().Inst)) && + (TA->getEndAddr() + 1 < EndRegion)) { + BBI->SuccAddrs.push_back(TA->getEndAddr() + 1); + Worklist.insert(TA->getEndAddr() + 1); + } + + // If the terminator is a branch, add the target block. + if (MIA.isBranch(TA->back().Inst)) { + uint64_t BranchTarget; + if (MIA.evaluateBranch(TA->back().Inst, TA->back().Address, + TA->back().Size, BranchTarget)) { + StringRef ExtFnName; + if (MOS) + ExtFnName = + MOS->findExternalFunctionAt(getOriginalLoadAddr(BranchTarget)); + if (!ExtFnName.empty()) { + TailCallTargets.push_back(BranchTarget); + CallTargets.push_back(BranchTarget); + } else { + BBI->SuccAddrs.push_back(BranchTarget); + Worklist.insert(BranchTarget); + } + } + } + } + + for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { + const uint64_t BeginAddr = Worklist[wi]; + BBInfo *BBI = &BBInfos[BeginAddr]; + + assert(BBI->Atom && "Found a basic block without an associated atom!"); + + // Look for a basic block at BeginAddr. + BBI->BB = MCFN->find(BeginAddr); + if (BBI->BB) { + // FIXME: check that the succs/preds are the same + continue; + } + // If there was none, we have to create one from the atom. + BBI->BB = &MCFN->createBlock(*BBI->Atom); + } + + for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { + const uint64_t BeginAddr = Worklist[wi]; + BBInfo *BBI = &BBInfos[BeginAddr]; + MCBasicBlock *BB = BBI->BB; + + RemoveDupsFromAddressVector(BBI->SuccAddrs); + for (AddressSetTy::const_iterator SI = BBI->SuccAddrs.begin(), + SE = BBI->SuccAddrs.end(); + SE != SE; ++SI) { + MCBasicBlock *Succ = BBInfos[*SI].BB; + BB->addSuccessor(Succ); + Succ->addPredecessor(BB); + } + } + + assert(BBInfos[Worklist[0]].BB && + "No basic block created at requested address?"); + + return BBInfos[Worklist[0]].BB; +} + +MCFunction * +MCObjectDisassembler::createFunction(MCModule *Module, uint64_t BeginAddr, + AddressSetTy &CallTargets, + AddressSetTy &TailCallTargets) { + // First, check if this is an external function. + StringRef ExtFnName; + if (MOS) + ExtFnName = MOS->findExternalFunctionAt(getOriginalLoadAddr(BeginAddr)); + if (!ExtFnName.empty()) + return Module->createFunction(ExtFnName); + + // If it's not, look for an existing function. + for (MCModule::func_iterator FI = Module->func_begin(), + FE = Module->func_end(); + FI != FE; ++FI) { + if ((*FI)->empty()) + continue; + // FIXME: MCModule should provide a findFunctionByAddr() + if ((*FI)->getEntryBlock()->getInsts()->getBeginAddr() == BeginAddr) + return FI->get(); + } + + // Finally, just create a new one. + MCFunction *MCFN = Module->createFunction(""); + getBBAt(Module, MCFN, BeginAddr, CallTargets, TailCallTargets); + return MCFN; +} + +// MachO MCObjectDisassembler implementation. + +MCMachOObjectDisassembler::MCMachOObjectDisassembler( + const MachOObjectFile &MOOF, const MCDisassembler &Dis, + const MCInstrAnalysis &MIA, uint64_t VMAddrSlide, + uint64_t HeaderLoadAddress) + : MCObjectDisassembler(MOOF, Dis, MIA), MOOF(MOOF), + VMAddrSlide(VMAddrSlide), HeaderLoadAddress(HeaderLoadAddress) { + + for (const SectionRef &Section : MOOF.sections()) { + StringRef Name; + Section.getName(Name); + // FIXME: We should use the S_ section type instead of the name. + if (Name == "__mod_init_func") { + DEBUG(dbgs() << "Found __mod_init_func section!\n"); + Section.getContents(ModInitContents); + } else if (Name == "__mod_exit_func") { + DEBUG(dbgs() << "Found __mod_exit_func section!\n"); + Section.getContents(ModExitContents); + } + } +} + +// FIXME: Only do the translations for addresses actually inside the object. +uint64_t MCMachOObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) { + return Addr + VMAddrSlide; +} + +uint64_t +MCMachOObjectDisassembler::getOriginalLoadAddr(uint64_t EffectiveAddr) { + return EffectiveAddr - VMAddrSlide; +} + +uint64_t MCMachOObjectDisassembler::getEntrypoint() { + uint64_t EntryFileOffset = 0; + + // Look for LC_MAIN. + { + uint32_t LoadCommandCount = MOOF.getHeader().ncmds; + MachOObjectFile::LoadCommandInfo Load = MOOF.getFirstLoadCommandInfo(); + for (unsigned I = 0;; ++I) { + if (Load.C.cmd == MachO::LC_MAIN) { + EntryFileOffset = + ((const MachO::entry_point_command *)Load.Ptr)->entryoff; + break; + } + + if (I == LoadCommandCount - 1) + break; + else + Load = MOOF.getNextLoadCommandInfo(Load); + } + } + + // If we didn't find anything, default to the common implementation. + // FIXME: Maybe we could also look at LC_UNIXTHREAD and friends? + if (EntryFileOffset) + return MCObjectDisassembler::getEntrypoint(); + + return EntryFileOffset + HeaderLoadAddress; +} + +ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticInitFunctions() { + // FIXME: We only handle 64bit mach-o + assert(MOOF.is64Bit()); + + size_t EntrySize = 8; + size_t EntryCount = ModInitContents.size() / EntrySize; + return ArrayRef<uint64_t>( + reinterpret_cast<const uint64_t *>(ModInitContents.data()), EntryCount); +} + +ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticExitFunctions() { + // FIXME: We only handle 64bit mach-o + assert(MOOF.is64Bit()); + + size_t EntrySize = 8; + size_t EntryCount = ModExitContents.size() / EntrySize; + return ArrayRef<uint64_t>( + reinterpret_cast<const uint64_t *>(ModExitContents.data()), EntryCount); +} diff --git a/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp b/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp new file mode 100644 index 0000000..b149596 --- /dev/null +++ b/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp @@ -0,0 +1,268 @@ +//===-- lib/MC/MCObjectSymbolizer.cpp -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCObjectSymbolizer.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRelocationInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachO.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +using namespace llvm; +using namespace object; + +//===- MCMachObjectSymbolizer ---------------------------------------------===// + +namespace { +class MCMachObjectSymbolizer : public MCObjectSymbolizer { + const MachOObjectFile *MOOF; + // __TEXT;__stubs support. + uint64_t StubsStart; + uint64_t StubsCount; + uint64_t StubSize; + uint64_t StubsIndSymIndex; + +public: + MCMachObjectSymbolizer(MCContext &Ctx, + std::unique_ptr<MCRelocationInfo> RelInfo, + const MachOObjectFile *MOOF); + + StringRef findExternalFunctionAt(uint64_t Addr) override; + + void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, + uint64_t Address) override; +}; +} // End unnamed namespace + +MCMachObjectSymbolizer::MCMachObjectSymbolizer( + MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo, + const MachOObjectFile *MOOF) + : MCObjectSymbolizer(Ctx, std::move(RelInfo), MOOF), MOOF(MOOF), + StubsStart(0), StubsCount(0), StubSize(0), StubsIndSymIndex(0) { + + for (const SectionRef &Section : MOOF->sections()) { + StringRef Name; + Section.getName(Name); + if (Name == "__stubs") { + SectionRef StubsSec = Section; + if (MOOF->is64Bit()) { + MachO::section_64 S = MOOF->getSection64(StubsSec.getRawDataRefImpl()); + StubsIndSymIndex = S.reserved1; + StubSize = S.reserved2; + } else { + MachO::section S = MOOF->getSection(StubsSec.getRawDataRefImpl()); + StubsIndSymIndex = S.reserved1; + StubSize = S.reserved2; + } + assert(StubSize && "Mach-O stub entry size can't be zero!"); + StubsSec.getAddress(StubsStart); + StubsSec.getSize(StubsCount); + StubsCount /= StubSize; + } + } +} + +StringRef MCMachObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) { + // FIXME: also, this can all be done at the very beginning, by iterating over + // all stubs and creating the calls to outside functions. Is it worth it + // though? + if (!StubSize) + return StringRef(); + uint64_t StubIdx = (Addr - StubsStart) / StubSize; + if (StubIdx >= StubsCount) + return StringRef(); + + uint32_t SymtabIdx = + MOOF->getIndirectSymbolTableEntry(MOOF->getDysymtabLoadCommand(), StubIdx); + + StringRef SymName; + symbol_iterator SI = MOOF->symbol_begin(); + for (uint32_t i = 0; i != SymtabIdx; ++i) + ++SI; + SI->getName(SymName); + assert(SI != MOOF->symbol_end() && "Stub wasn't found in the symbol table!"); + assert(SymName.front() == '_' && "Mach-O symbol doesn't start with '_'!"); + return SymName.substr(1); +} + +void MCMachObjectSymbolizer:: +tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, + uint64_t Address) { + if (const RelocationRef *R = findRelocationAt(Address)) { + const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R); + if (!RelExpr || RelExpr->EvaluateAsAbsolute(Value) == false) + return; + } + uint64_t Addr = Value; + if (const SectionRef *S = findSectionContaining(Addr)) { + StringRef Name; S->getName(Name); + uint64_t SAddr; S->getAddress(SAddr); + if (Name == "__cstring") { + StringRef Contents; + S->getContents(Contents); + Contents = Contents.substr(Addr - SAddr); + cStream << " ## literal pool for: " + << Contents.substr(0, Contents.find_first_of(0)); + } + } +} + +//===- MCObjectSymbolizer -------------------------------------------------===// + +MCObjectSymbolizer::MCObjectSymbolizer( + MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo, + const ObjectFile *Obj) + : MCSymbolizer(Ctx, std::move(RelInfo)), Obj(Obj), SortedSections(), + AddrToReloc() {} + +bool MCObjectSymbolizer:: +tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream, + int64_t Value, uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t InstSize) { + if (IsBranch) { + StringRef ExtFnName = findExternalFunctionAt((uint64_t)Value); + if (!ExtFnName.empty()) { + MCSymbol *Sym = Ctx.GetOrCreateSymbol(ExtFnName); + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + MI.addOperand(MCOperand::CreateExpr(Expr)); + return true; + } + } + + if (const RelocationRef *R = findRelocationAt(Address + Offset)) { + if (const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R)) { + MI.addOperand(MCOperand::CreateExpr(RelExpr)); + return true; + } + // Only try to create a symbol+offset expression if there is no relocation. + return false; + } + + // Interpret Value as a branch target. + if (IsBranch == false) + return false; + uint64_t UValue = Value; + // FIXME: map instead of looping each time? + for (const SymbolRef &Symbol : Obj->symbols()) { + uint64_t SymAddr; + Symbol.getAddress(SymAddr); + uint64_t SymSize; + Symbol.getSize(SymSize); + StringRef SymName; + Symbol.getName(SymName); + SymbolRef::Type SymType; + Symbol.getType(SymType); + if (SymAddr == UnknownAddressOrSize || SymSize == UnknownAddressOrSize || + SymName.empty() || SymType != SymbolRef::ST_Function) + continue; + + if ( SymAddr == UValue || + (SymAddr <= UValue && SymAddr + SymSize > UValue)) { + MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName); + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + if (SymAddr != UValue) { + const MCExpr *Off = MCConstantExpr::Create(UValue - SymAddr, Ctx); + Expr = MCBinaryExpr::CreateAdd(Expr, Off, Ctx); + } + MI.addOperand(MCOperand::CreateExpr(Expr)); + return true; + } + } + return false; +} + +void MCObjectSymbolizer:: +tryAddingPcLoadReferenceComment(raw_ostream &cStream, + int64_t Value, uint64_t Address) { +} + +StringRef MCObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) { + return StringRef(); +} + +MCObjectSymbolizer *MCObjectSymbolizer::createObjectSymbolizer( + MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo, + const ObjectFile *Obj) { + if (const MachOObjectFile *MOOF = dyn_cast<MachOObjectFile>(Obj)) + return new MCMachObjectSymbolizer(Ctx, std::move(RelInfo), MOOF); + return new MCObjectSymbolizer(Ctx, std::move(RelInfo), Obj); +} + +// SortedSections implementation. + +static bool SectionStartsBefore(const SectionRef &S, uint64_t Addr) { + uint64_t SAddr; S.getAddress(SAddr); + return SAddr < Addr; +} + +const SectionRef *MCObjectSymbolizer::findSectionContaining(uint64_t Addr) { + if (SortedSections.empty()) + buildSectionList(); + + SortedSectionList::iterator + EndIt = SortedSections.end(), + It = std::lower_bound(SortedSections.begin(), EndIt, + Addr, SectionStartsBefore); + if (It == EndIt) + return nullptr; + uint64_t SAddr; It->getAddress(SAddr); + uint64_t SSize; It->getSize(SSize); + if (Addr >= SAddr + SSize) + return nullptr; + return &*It; +} + +const RelocationRef *MCObjectSymbolizer::findRelocationAt(uint64_t Addr) { + if (AddrToReloc.empty()) + buildRelocationByAddrMap(); + + AddrToRelocMap::const_iterator RI = AddrToReloc.find(Addr); + if (RI == AddrToReloc.end()) + return nullptr; + return &RI->second; +} + +void MCObjectSymbolizer::buildSectionList() { + for (const SectionRef &Section : Obj->sections()) { + bool RequiredForExec; + Section.isRequiredForExecution(RequiredForExec); + if (RequiredForExec == false) + continue; + uint64_t SAddr; + Section.getAddress(SAddr); + uint64_t SSize; + Section.getSize(SSize); + SortedSectionList::iterator It = + std::lower_bound(SortedSections.begin(), SortedSections.end(), SAddr, + SectionStartsBefore); + if (It != SortedSections.end()) { + uint64_t FoundSAddr; It->getAddress(FoundSAddr); + if (FoundSAddr < SAddr + SSize) + llvm_unreachable("Inserting overlapping sections"); + } + SortedSections.insert(It, Section); + } +} + +void MCObjectSymbolizer::buildRelocationByAddrMap() { + for (const SectionRef &Section : Obj->sections()) { + for (const RelocationRef &Reloc : Section.relocations()) { + uint64_t Address; + Reloc.getAddress(Address); + // At a specific address, only keep the first relocation. + if (AddrToReloc.find(Address) == AddrToReloc.end()) + AddrToReloc[Address] = Reloc; + } + } +} diff --git a/lib/MC/MCAnalysis/Makefile b/lib/MC/MCAnalysis/Makefile new file mode 100644 index 0000000..add2dbd --- /dev/null +++ b/lib/MC/MCAnalysis/Makefile @@ -0,0 +1,14 @@ +##===- lib/MC/MCAnalysys/Makefile --------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMMCAnalysis +BUILD_ARCHIVE := 1 + +include $(LEVEL)/Makefile.common |