diff options
author | Kevin Enderby <enderby@apple.com> | 2013-06-06 17:20:50 +0000 |
---|---|---|
committer | Kevin Enderby <enderby@apple.com> | 2013-06-06 17:20:50 +0000 |
commit | 54154f3bf1ae3d2dfd68cc9474cad061b3338a40 (patch) | |
tree | 5ebe75661f64525d36edb04be5363c1d10e112c9 /tools | |
parent | f2988a00843bf5ddef7c9ee8b26534a1aa8c56cf (diff) | |
download | external_llvm-54154f3bf1ae3d2dfd68cc9474cad061b3338a40.zip external_llvm-54154f3bf1ae3d2dfd68cc9474cad061b3338a40.tar.gz external_llvm-54154f3bf1ae3d2dfd68cc9474cad061b3338a40.tar.bz2 |
Teach llvm-objdump with the -macho parser how to use the data in code table
from the LC_DATA_IN_CODE load command. And when disassembling print
the data in code formatted for the kind of data it and not disassemble those
bytes.
I added the format specific functionality to the derived class MachOObjectFile
since these tables only appears in Mach-O object files. This is my first
attempt to modify the libObject stuff so if folks have better suggestions
how to fit this in or suggestions on the implementation please let me know.
rdar://11791371
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183424 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'tools')
-rw-r--r-- | tools/llvm-objdump/MachODump.cpp | 118 | ||||
-rw-r--r-- | tools/macho-dump/macho-dump.cpp | 2 |
2 files changed, 113 insertions, 7 deletions
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index 1ee3e42..27e1623 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -87,12 +87,73 @@ struct SymbolSorter { } }; +// Types for the storted data in code table that is built before disassembly +// and the predicate function to sort them. +typedef std::pair<uint64_t, DiceRef> DiceTableEntry; +typedef std::vector<DiceTableEntry> DiceTable; +typedef DiceTable::iterator dice_table_iterator; + +static bool +compareDiceTableEntries(const DiceTableEntry i, + const DiceTableEntry j) { + return i.first == j.first; +} + +static void DumpDataInCode(const char *bytes, uint64_t Size, + unsigned short Kind) { + uint64_t Value; + + switch (Kind) { + case macho::Data: + switch (Size) { + case 4: + Value = bytes[3] << 24 | + bytes[2] << 16 | + bytes[1] << 8 | + bytes[0]; + outs() << "\t.long " << Value; + break; + case 2: + Value = bytes[1] << 8 | + bytes[0]; + outs() << "\t.short " << Value; + break; + case 1: + Value = bytes[0]; + outs() << "\t.byte " << Value; + break; + } + outs() << "\t@ KIND_DATA\n"; + break; + case macho::JumpTable8: + Value = bytes[0]; + outs() << "\t.byte " << Value << "\t@ KIND_JUMP_TABLE8"; + break; + case macho::JumpTable16: + Value = bytes[1] << 8 | + bytes[0]; + outs() << "\t.short " << Value << "\t@ KIND_JUMP_TABLE16"; + break; + case macho::JumpTable32: + Value = bytes[3] << 24 | + bytes[2] << 16 | + bytes[1] << 8 | + bytes[0]; + outs() << "\t.long " << Value << "\t@ KIND_JUMP_TABLE32"; + break; + default: + outs() << "\t@ data in code kind = " << Kind << "\n"; + break; + } +} + static void getSectionsAndSymbols(const macho::Header Header, MachOObjectFile *MachOObj, std::vector<SectionRef> &Sections, std::vector<SymbolRef> &Symbols, - SmallVectorImpl<uint64_t> &FoundFns) { + SmallVectorImpl<uint64_t> &FoundFns, + uint64_t &BaseSegmentAddress) { error_code ec; for (symbol_iterator SI = MachOObj->begin_symbols(), SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec)) @@ -108,6 +169,7 @@ getSectionsAndSymbols(const macho::Header Header, MachOObjectFile::LoadCommandInfo Command = MachOObj->getFirstLoadCommandInfo(); + bool BaseSegmentAddressSet = false; for (unsigned i = 0; ; ++i) { if (Command.C.Type == macho::LCT_FunctionStarts) { // We found a function starts segment, parse the addresses for later @@ -117,6 +179,15 @@ getSectionsAndSymbols(const macho::Header Header, MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns); } + else if (Command.C.Type == macho::LCT_Segment) { + macho::SegmentLoadCommand SLC = + MachOObj->getSegmentLoadCommand(Command); + StringRef SegName = SLC.Name; + if(!BaseSegmentAddressSet && SegName != "__PAGEZERO") { + BaseSegmentAddressSet = true; + BaseSegmentAddress = SLC.VMAddress; + } + } if (i == Header.NumLoadCommands - 1) break; @@ -184,14 +255,32 @@ static void DisassembleInputMachO2(StringRef Filename, std::vector<SectionRef> Sections; std::vector<SymbolRef> Symbols; SmallVector<uint64_t, 8> FoundFns; + uint64_t BaseSegmentAddress; - getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns); + getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns, + BaseSegmentAddress); // Make a copy of the unsorted symbol list. FIXME: duplication std::vector<SymbolRef> UnsortedSymbols(Symbols); // Sort the symbols by address, just in case they didn't come in that way. std::sort(Symbols.begin(), Symbols.end(), SymbolSorter()); + // Build a data in code table that is sorted on by the address of each entry. + uint64_t BaseAddress = 0; + if (Header.FileType == macho::HFT_Object) + Sections[0].getAddress(BaseAddress); + else + BaseAddress = BaseSegmentAddress; + DiceTable Dices; + error_code ec; + for (dice_iterator DI = MachOOF->begin_dices(), DE = MachOOF->end_dices(); + DI != DE; DI.increment(ec)){ + uint32_t Offset; + DI->getOffset(Offset); + Dices.push_back(std::make_pair(BaseAddress + Offset, *DI)); + } + array_pod_sort(Dices.begin(), Dices.end()); + #ifndef NDEBUG raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls(); #else @@ -309,12 +398,29 @@ static void DisassembleInputMachO2(StringRef Filename, for (uint64_t Index = Start; Index < End; Index += Size) { MCInst Inst; + uint64_t SectAddress = 0; + Sections[SectIdx].getAddress(SectAddress); + outs() << format("%8" PRIx64 ":\t", SectAddress + Index); + + // Check the data in code table here to see if this is data not an + // instruction to be disassembled. + DiceTable Dice; + Dice.push_back(std::make_pair(SectAddress + Index, DiceRef())); + dice_table_iterator DTI = std::search(Dices.begin(), Dices.end(), + Dice.begin(), Dice.end(), + compareDiceTableEntries); + if (DTI != Dices.end()){ + uint16_t Length; + DTI->second.getLength(Length); + DumpBytes(StringRef(Bytes.data() + Index, Length)); + uint16_t Kind; + DTI->second.getKind(Kind); + DumpDataInCode(Bytes.data() + Index, Length, Kind); + continue; + } + if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, DebugOut, nulls())) { - uint64_t SectAddress = 0; - Sections[SectIdx].getAddress(SectAddress); - outs() << format("%8" PRIx64 ":\t", SectAddress + Index); - DumpBytes(StringRef(Bytes.data() + Index, Size)); IP->printInst(&Inst, outs(), ""); diff --git a/tools/macho-dump/macho-dump.cpp b/tools/macho-dump/macho-dump.cpp index 88fd452..897a785 100644 --- a/tools/macho-dump/macho-dump.cpp +++ b/tools/macho-dump/macho-dump.cpp @@ -292,7 +292,7 @@ DumpDataInCodeDataCommand(const MachOObjectFile &Obj, << " ('datasize', " << LLC.DataSize << ")\n" << " ('_data_regions', [\n"; - unsigned NumRegions = LLC.DataSize / 8; + unsigned NumRegions = LLC.DataSize / sizeof(macho::DataInCodeTableEntry); for (unsigned i = 0; i < NumRegions; ++i) { macho::DataInCodeTableEntry DICE = Obj.getDataInCodeTableEntry(LLC.DataOffset, i); |