aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2011-07-20 19:37:35 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2011-07-20 19:37:35 +0000
commit685a2501b20baf688f6cc087f4b92bbafcd8028e (patch)
tree3f0fca76506239b383567c664fbc53c820cebabd /tools
parenta4d0bd84f7bdc78784e44b623ded448988022e4b (diff)
downloadexternal_llvm-685a2501b20baf688f6cc087f4b92bbafcd8028e.zip
external_llvm-685a2501b20baf688f6cc087f4b92bbafcd8028e.tar.gz
external_llvm-685a2501b20baf688f6cc087f4b92bbafcd8028e.tar.bz2
Sketch out an CFG reconstruction mode for llvm-objdump.
- Not great yet, but it's a start. - Requires an object file with a symbol table. (I really want to fix this, but it'll need a whole new algorithm) - ELF and COFF won't work at the moment due to libObject shortcomings. To try it out run $ llvm-objdump -d --cfg foo.o This will create a graphviz file for every symbol in the object file's text section containing a CFG. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@135608 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'tools')
-rw-r--r--tools/llvm-objdump/MCFunction.cpp113
-rw-r--r--tools/llvm-objdump/MCFunction.h88
-rw-r--r--tools/llvm-objdump/llvm-objdump.cpp44
3 files changed, 242 insertions, 3 deletions
diff --git a/tools/llvm-objdump/MCFunction.cpp b/tools/llvm-objdump/MCFunction.cpp
new file mode 100644
index 0000000..dd31402
--- /dev/null
+++ b/tools/llvm-objdump/MCFunction.cpp
@@ -0,0 +1,113 @@
+//===-- MCFunction.cpp ----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the algorithm to break down a region of machine code
+// into basic blocks and try to reconstruct a CFG from it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCFunction.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <set>
+using namespace llvm;
+
+MCFunction
+MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
+ const MemoryObject &Region, uint64_t Start,
+ uint64_t End, const MCInstrInfo *InstrInfo,
+ raw_ostream &DebugOut) {
+ std::set<uint64_t> Splits;
+ Splits.insert(Start);
+ std::vector<MCDecodedInst> Instructions;
+ uint64_t Size;
+
+ // Disassemble code and gather basic block split points.
+ for (uint64_t Index = Start; Index < End; Index += Size) {
+ MCInst Inst;
+
+ if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut)) {
+ const MCInstrDesc &Desc = InstrInfo->get(Inst.getOpcode());
+ if (Desc.isBranch()) {
+ if (Desc.OpInfo[0].OperandType == MCOI::OPERAND_PCREL) {
+ int64_t Imm = Inst.getOperand(0).getImm();
+ // FIXME: Distinguish relocations from nop jumps.
+ if (Imm != 0) {
+ assert(Index+Imm+Size < End && "Branch out of function.");
+ Splits.insert(Index+Imm+Size);
+ }
+ }
+ Splits.insert(Index+Size);
+ }
+
+ Instructions.push_back(MCDecodedInst(Index, Size, Inst));
+ } else {
+ errs() << "warning: invalid instruction encoding\n";
+ if (Size == 0)
+ Size = 1; // skip illegible bytes
+ }
+
+ }
+
+ MCFunction f(Name);
+
+ // Create basic blocks.
+ unsigned ii = 0, ie = Instructions.size();
+ for (std::set<uint64_t>::iterator spi = Splits.begin(),
+ spe = Splits.end(); spi != spe; ++spi) {
+ MCBasicBlock BB;
+ uint64_t BlockEnd = llvm::next(spi) == spe ? End : *llvm::next(spi);
+ // Add instructions to the BB.
+ for (; ii != ie; ++ii) {
+ if (Instructions[ii].Address < *spi ||
+ Instructions[ii].Address >= BlockEnd)
+ break;
+ BB.addInst(Instructions[ii]);
+ }
+ f.addBlock(*spi, BB);
+ }
+
+ // Calculate successors of each block.
+ for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
+ MCBasicBlock &BB = i->second;
+ if (BB.getInsts().empty()) continue;
+ const MCDecodedInst &Inst = BB.getInsts().back();
+ const MCInstrDesc &Desc = InstrInfo->get(Inst.Inst.getOpcode());
+
+ if (Desc.isBranch()) {
+ // PCRel branch, we know the destination.
+ if (Desc.OpInfo[0].OperandType == MCOI::OPERAND_PCREL) {
+ int64_t Imm = Inst.Inst.getOperand(0).getImm();
+ if (Imm != 0)
+ BB.addSucc(&f.getBlockAtAddress(Inst.Address+Inst.Size+Imm));
+ // Conditional branches can also fall through to the next block.
+ if (Desc.isConditionalBranch() && llvm::next(i) != e)
+ BB.addSucc(&next(i)->second);
+ } else {
+ // Indirect branch. Bail and add all blocks of the function as a
+ // successor.
+ for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i)
+ BB.addSucc(&i->second);
+ }
+ } else {
+ // No branch. Fall through to the next block.
+ if (!Desc.isReturn() && next(i) != e)
+ BB.addSucc(&next(i)->second);
+ }
+ }
+
+ return f;
+}
diff --git a/tools/llvm-objdump/MCFunction.h b/tools/llvm-objdump/MCFunction.h
new file mode 100644
index 0000000..60f6429
--- /dev/null
+++ b/tools/llvm-objdump/MCFunction.h
@@ -0,0 +1,88 @@
+//===-- MCFunction.h ------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the data structures to hold a CFG reconstructed from
+// machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/MC/MCInst.h"
+#include <map>
+
+namespace llvm {
+
+class MCDisassembler;
+class MCInstrInfo;
+class MemoryObject;
+class raw_ostream;
+
+/// MCDecodedInst - Small container to hold an MCInst and associated info like
+/// address and size.
+struct MCDecodedInst {
+ uint64_t Address;
+ uint64_t Size;
+ MCInst Inst;
+
+ MCDecodedInst(uint64_t Address, uint64_t Size, MCInst Inst)
+ : Address(Address), Size(Size), Inst(Inst) {}
+};
+
+/// MCBasicBlock - Consists of multiple MCDecodedInsts and a list of successing
+/// MCBasicBlocks.
+class MCBasicBlock {
+ SmallVector<MCDecodedInst, 8> Insts;
+ typedef SmallPtrSet<MCBasicBlock*, 8> SetTy;
+ SetTy Succs;
+public:
+ ArrayRef<MCDecodedInst> getInsts() const { return Insts; }
+
+ typedef SetTy::const_iterator succ_iterator;
+ succ_iterator succ_begin() const { return Succs.begin(); }
+ succ_iterator succ_end() const { return Succs.end(); }
+
+ void addInst(const MCDecodedInst &Inst) { Insts.push_back(Inst); }
+ void addSucc(MCBasicBlock *BB) { Succs.insert(BB); }
+};
+
+/// MCFunction - Represents a named function in machine code, containing
+/// multiple MCBasicBlocks.
+class MCFunction {
+ const StringRef Name;
+ // Keep BBs sorted by address.
+ typedef std::map<uint64_t, MCBasicBlock> MapTy;
+ MapTy Blocks;
+public:
+ MCFunction(StringRef Name) : Name(Name) {}
+
+ // Create an MCFunction from a region of binary machine code.
+ static MCFunction
+ createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
+ const MemoryObject &Region, uint64_t Start, uint64_t End,
+ const MCInstrInfo *InstrInfo, raw_ostream &DebugOut);
+
+ typedef MapTy::iterator iterator;
+ iterator begin() { return Blocks.begin(); }
+ iterator end() { return Blocks.end(); }
+
+ StringRef getName() const { return Name; }
+
+ MCBasicBlock &addBlock(uint64_t Address, const MCBasicBlock &BB) {
+ assert(!Blocks.count(Address) && "Already a BB at address.");
+ return Blocks[Address] = BB;
+ }
+
+ MCBasicBlock &getBlockAtAddress(uint64_t Address) {
+ assert(Blocks.count(Address) && "No BB at address.");
+ return Blocks[Address];
+ }
+};
+
+}
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 8f6e9df..21df7cc 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MCFunction.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/Triple.h"
@@ -21,6 +22,8 @@
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
@@ -52,6 +55,10 @@ namespace {
Disassembled("d", cl::desc("Alias for --disassemble"),
cl::aliasopt(Disassemble));
+ cl::opt<bool>
+ CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and"
+ "write it to a graphviz file"));
+
cl::opt<std::string>
TripleName("triple", cl::desc("Target triple to disassemble for, "
"see -version for available targets"));
@@ -156,6 +163,7 @@ static void DisassembleInput(const StringRef &Filename) {
// GetTarget prints out stuff.
return;
}
+ const MCInstrInfo *InstrInfo = TheTarget->createMCInstrInfo();
outs() << '\n';
outs() << Filename
@@ -233,15 +241,14 @@ static void DisassembleInput(const StringRef &Filename) {
uint64_t End = si == se-1 ? SectSize : Symbols[si + 1].first - 1;
outs() << '\n' << Symbols[si].second << ":\n";
- for (Index = Start; Index < End; Index += Size) {
- MCInst Inst;
-
#ifndef NDEBUG
raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
#else
raw_ostream &DebugOut = nulls();
#endif
+ for (Index = Start; Index < End; Index += Size) {
+ MCInst Inst;
if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, DebugOut)) {
uint64_t addr;
if (error(i->getAddress(addr))) break;
@@ -255,6 +262,36 @@ static void DisassembleInput(const StringRef &Filename) {
Size = 1; // skip illegible bytes
}
}
+
+ if (CFG) {
+ MCFunction f =
+ MCFunction::createFunctionFromMC(Symbols[si].second, DisAsm.get(),
+ memoryObject, Start, End, InstrInfo,
+ DebugOut);
+
+ // Start a new dot file.
+ std::string Error;
+ raw_fd_ostream Out((f.getName().str() + ".dot").c_str(), Error);
+
+ Out << "digraph " << f.getName() << " {\n";
+ Out << "graph [ rankdir = \"LR\" ];\n";
+ for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
+ Out << '"' << (uintptr_t)&i->second << "\" [ label=\"<a>";
+ // Print instructions.
+ for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie;
+ ++ii) {
+ IP->printInst(&i->second.getInsts()[ii].Inst, Out);
+ Out << '|';
+ }
+ Out << "<o>\" shape=\"record\" ];\n";
+
+ // Add edges.
+ for (MCBasicBlock::succ_iterator si = i->second.succ_begin(),
+ se = i->second.succ_end(); si != se; ++si)
+ Out << (uintptr_t)&i->second << ":o -> " << (uintptr_t)*si <<":a\n";
+ }
+ Out << "}\n";
+ }
}
}
}
@@ -271,6 +308,7 @@ int main(int argc, char **argv) {
llvm::InitializeAllTargets();
llvm::InitializeAllMCAsmInfos();
llvm::InitializeAllMCCodeGenInfos();
+ llvm::InitializeAllMCInstrInfos();
llvm::InitializeAllAsmPrinters();
llvm::InitializeAllAsmParsers();
llvm::InitializeAllDisassemblers();