diff options
author | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2013-05-24 01:07:04 +0000 |
---|---|---|
committer | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2013-05-24 01:07:04 +0000 |
commit | ef99356dfebb96f6f90efb912c2877214bad060e (patch) | |
tree | 76250a4be7eff5e9bec963f6ff0daef8cb8d84bf /tools/llvm-objdump/MCFunction.cpp | |
parent | 2c94d0faa0e1c268893d5e04dc77e8a35889db00 (diff) | |
download | external_llvm-ef99356dfebb96f6f90efb912c2877214bad060e.zip external_llvm-ef99356dfebb96f6f90efb912c2877214bad060e.tar.gz external_llvm-ef99356dfebb96f6f90efb912c2877214bad060e.tar.bz2 |
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182628 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'tools/llvm-objdump/MCFunction.cpp')
-rw-r--r-- | tools/llvm-objdump/MCFunction.cpp | 138 |
1 files changed, 0 insertions, 138 deletions
diff --git a/tools/llvm-objdump/MCFunction.cpp b/tools/llvm-objdump/MCFunction.cpp deleted file mode 100644 index 5c67f1b..0000000 --- a/tools/llvm-objdump/MCFunction.cpp +++ /dev/null @@ -1,138 +0,0 @@ -//===-- MCFunction.cpp ----------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the algorithm to break down a region of machine code -// into basic blocks and try to reconstruct a CFG from it. -// -//===----------------------------------------------------------------------===// - -#include "MCFunction.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCDisassembler.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCInstrAnalysis.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/Support/MemoryObject.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" -#include <set> -using namespace llvm; - -MCFunction -MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm, - const MemoryObject &Region, uint64_t Start, - uint64_t End, const MCInstrAnalysis *Ana, - raw_ostream &DebugOut, - SmallVectorImpl<uint64_t> &Calls) { - std::vector<MCDecodedInst> Instructions; - std::set<uint64_t> Splits; - Splits.insert(Start); - uint64_t Size; - - MCFunction f(Name); - - { - DenseSet<uint64_t> VisitedInsts; - SmallVector<uint64_t, 16> WorkList; - WorkList.push_back(Start); - // Disassemble code and gather basic block split points. - while (!WorkList.empty()) { - uint64_t Index = WorkList.pop_back_val(); - if (VisitedInsts.find(Index) != VisitedInsts.end()) - continue; // Already visited this location. - - for (;Index < End; Index += Size) { - VisitedInsts.insert(Index); - - MCInst Inst; - if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){ - Instructions.push_back(MCDecodedInst(Index, Size, Inst)); - if (Ana->isBranch(Inst)) { - uint64_t targ = Ana->evaluateBranch(Inst, Index, Size); - if (targ != -1ULL && targ == Index+Size) - continue; // Skip nop jumps. - - // If we could determine the branch target, make a note to start a - // new basic block there and add the target to the worklist. - if (targ != -1ULL) { - Splits.insert(targ); - WorkList.push_back(targ); - WorkList.push_back(Index+Size); - } - Splits.insert(Index+Size); - break; - } else if (Ana->isReturn(Inst)) { - // Return instruction. This basic block ends here. - Splits.insert(Index+Size); - break; - } else if (Ana->isCall(Inst)) { - uint64_t targ = Ana->evaluateBranch(Inst, Index, Size); - // Add the call to the call list if the destination is known. - if (targ != -1ULL && targ != Index+Size) - Calls.push_back(targ); - } - } else { - errs().write_hex(Index) << ": warning: invalid instruction encoding\n"; - if (Size == 0) - Size = 1; // skip illegible bytes - } - } - } - } - - // Make sure the instruction list is sorted. - std::sort(Instructions.begin(), Instructions.end()); - - // Create basic blocks. - unsigned ii = 0, ie = Instructions.size(); - for (std::set<uint64_t>::iterator spi = Splits.begin(), - spe = llvm::prior(Splits.end()); spi != spe; ++spi) { - MCBasicBlock BB; - uint64_t BlockEnd = *llvm::next(spi); - // Add instructions to the BB. - for (; ii != ie; ++ii) { - if (Instructions[ii].Address < *spi || - Instructions[ii].Address >= BlockEnd) - break; - BB.addInst(Instructions[ii]); - } - f.addBlock(*spi, BB); - } - - std::sort(f.Blocks.begin(), f.Blocks.end()); - - // Calculate successors of each block. - for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) { - MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second); - if (BB.getInsts().empty()) continue; - const MCDecodedInst &Inst = BB.getInsts().back(); - - if (Ana->isBranch(Inst.Inst)) { - uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size); - if (targ == -1ULL) { - // Indirect branch. Bail and add all blocks of the function as a - // successor. - for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) - BB.addSucc(i->first); - } else if (targ != Inst.Address+Inst.Size) - BB.addSucc(targ); - // Conditional branches can also fall through to the next block. - if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e) - BB.addSucc(llvm::next(i)->first); - } else { - // No branch. Fall through to the next block. - if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e) - BB.addSucc(llvm::next(i)->first); - } - } - - return f; -} |