Add a new dwarf accelerator table prototype with the goal of replacing

the pubnames and pubtypes tables. LLDB can currently use this format and a full spec is forthcoming and submission for standardization is planned. A basic summary: The dwarf accelerator tables are an indirect hash table optimized for null lookup rather than access to known data. They are output into an on-disk format that looks like this: .-------------. | HEADER | |-------------| | BUCKETS | |-------------| | HASHES | |-------------| | OFFSETS | |-------------| | DATA | `-------------' where the header contains a magic number, version, type of hash function, the number of buckets, total number of hashes, and room for a special struct of data and the length of that struct. The buckets contain an index (e.g. 6) into the hashes array. The hashes section contains all of the 32-bit hash values in contiguous memory, and the offsets contain the offset into the data area for the particular hash. For a lookup example, we could hash a function name and take it modulo the number of buckets giving us our bucket. From there we take the bucket value as an index into the hashes table and look at each successive hash as long as the hash value is still the same modulo result (bucket value) as earlier. If we have a match we look at that same entry in the offsets table and grab the offset in the data for our final match. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143921 91177308-0d34-0410-b5e6-96231b3b80d8
author: Eric Christopher <echristo@apple.com> 2011-11-07 09:18:42 +0000
committer: Eric Christopher <echristo@apple.com> 2011-11-07 09:18:42 +0000
commit: bcbd3a4637f33036d05833364e180f9dfaabb67c (patch)
tree: 44a3d212cb1ef2ae0fc34b6b93a9a0656659bda8 /lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
parent: d8a8752d5b7f546fd7ebb295366a43b2b76afbd6 (diff)
download: external_llvm-bcbd3a4637f33036d05833364e180f9dfaabb67c.zip
external_llvm-bcbd3a4637f33036d05833364e180f9dfaabb67c.tar.gz
external_llvm-bcbd3a4637f33036d05833364e180f9dfaabb67c.tar.bz2
1 files changed, 250 insertions, 0 deletions
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
new file mode 100644
index 0000000..b7c8c6e
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -0,0 +1,250 @@
+//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "DwarfAccelTable.h"
+#include "DwarfDebug.h"
+#include "DIE.h"
+
+using namespace llvm;
+
+const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) {
+  switch (AT) {
+  default: llvm_unreachable("invalid AtomType!");
+  case eAtomTypeNULL: return "eAtomTypeNULL";
+  case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset";
+  case eAtomTypeCUOffset: return "eAtomTypeCUOffset";
+  case eAtomTypeTag: return "eAtomTypeTag";
+  case eAtomTypeNameFlags: return "eAtomTypeNameFlags";
+  case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags";
+  } 
+}
+
+// The general case would need to have a less hard coded size for the
+// length of the HeaderData, however, if we're constructing based on a
+// single Atom then we know it will always be: 4 + 4 + 2 + 2.
+DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) :
+  Header(12),
+  HeaderData(atom) {
+}
+
+void DwarfAccelTable::AddName(StringRef Name, DIE* die) {
+  // If the string is in the list already then add this die to the list
+  // otherwise add a new one.
+  DIEArray &DIEs = Entries[Name];
+  DIEs.push_back(die);
+}
+
+void DwarfAccelTable::ComputeBucketCount(void) {
+  // First get the number of unique hashes.
+  std::vector<uint32_t> uniques;
+  uniques.resize(Data.size());
+  for (size_t i = 0; i < Data.size(); ++i)
+    uniques[i] = Data[i]->HashValue;
+  std::sort(uniques.begin(), uniques.end());
+  std::vector<uint32_t>::iterator p =
+    std::unique(uniques.begin(), uniques.end());
+  uint32_t num = std::distance(uniques.begin(), p);
+
+  // Then compute the bucket size, minimum of 1 bucket.
+  if (num > 1024) Header.bucket_count = num/4;
+  if (num > 16) Header.bucket_count = num/2;
+  else Header.bucket_count = num > 0 ? num : 1;
+
+  Header.hashes_count = num;
+}
+
+void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) {
+  // Create the individual hash data outputs.
+  for (StringMap<DIEArray>::const_iterator
+         EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+    struct HashData *Entry = new HashData((*EI).getKeyData());
+    for (DIEArray::const_iterator DI = (*EI).second.begin(),
+           DE = (*EI).second.end();
+         DI != DE; ++DI)
+      Entry->addOffset((*DI)->getOffset());
+    Data.push_back(Entry);
+  }
+
+  // Figure out how many buckets we need, then compute the bucket
+  // contents and the final ordering. We'll emit the hashes and offsets
+  // by doing a walk during the emission phase. We add temporary
+  // symbols to the data so that we can reference them during the offset
+  // later, we'll emit them when we emit the data.
+  ComputeBucketCount();
+
+  // Compute bucket contents and final ordering.
+  Buckets.resize(Header.bucket_count);
+  for (size_t i = 0; i < Data.size(); ++i) {
+    uint32_t bucket = Data[i]->HashValue % Header.bucket_count;
+    Buckets[bucket].push_back(Data[i]);
+    Data[i]->Sym = Asm->GetTempSymbol(Prefix, i);
+  }
+}
+
+// Emits the header for the table via the AsmPrinter.
+void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
+  Asm->OutStreamer.AddComment("Header Magic");
+  Asm->EmitInt32(Header.magic);
+  Asm->OutStreamer.AddComment("Header Version");
+  Asm->EmitInt16(Header.version);
+  Asm->OutStreamer.AddComment("Header Hash Function");
+  Asm->EmitInt16(Header.hash_function);
+  Asm->OutStreamer.AddComment("Header Bucket Count");
+  Asm->EmitInt32(Header.bucket_count);
+  Asm->OutStreamer.AddComment("Header Hash Count");
+  Asm->EmitInt32(Header.hashes_count);
+  Asm->OutStreamer.AddComment("Header Data Length");
+  Asm->EmitInt32(Header.header_data_len);
+  Asm->OutStreamer.AddComment("HeaderData Die Offset Base");
+  Asm->EmitInt32(HeaderData.die_offset_base);
+  Asm->OutStreamer.AddComment("HeaderData Atom Count");
+  Asm->EmitInt32(HeaderData.Atoms.size());
+  for (size_t i = 0; i < HeaderData.Atoms.size(); i++) {
+    Atom A = HeaderData.Atoms[i];
+    Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type));
+    Asm->EmitInt16(A.type);
+    Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form));
+    Asm->EmitInt16(A.form);
+  }
+}
+
+// Walk through and emit the buckets for the table. This will look
+// like a list of numbers of how many elements are in each bucket.
+void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
+  unsigned index = 0;
+  for (size_t i = 0; i < Buckets.size(); ++i) {
+    Twine Comment = Twine("Bucket ") + Twine(i);
+    Asm->OutStreamer.AddComment(Comment);
+    if (Buckets[i].size() != 0)
+      Asm->EmitInt32(index);
+    else
+      Asm->EmitInt32(UINT32_MAX);
+    index += Buckets[i].size();
+  }
+}
+
+// Walk through the buckets and emit the individual hashes for each
+// bucket.
+void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+  for (size_t i = 0; i < Buckets.size(); ++i) {
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI) {
+      Twine Comment = Twine("Hash in Bucket ") + Twine(i);
+      Asm->OutStreamer.AddComment(Comment);
+      Asm->EmitInt32((*HI)->HashValue);
+    } 
+  }
+}
+
+// Walk through the buckets and emit the individual offsets for each
+// element in each bucket. This is done via a symbol subtraction from the
+// beginning of the section. The non-section symbol will be output later
+// when we emit the actual data.
+void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
+  for (size_t i = 0; i < Buckets.size(); ++i) {
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI) {
+      Twine Comment = Twine("Offset in Bucket ") + Twine(i);
+      Asm->OutStreamer.AddComment(Comment);
+      MCContext &Context = Asm->OutStreamer.getContext();
+      const MCExpr *Sub =
+        MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context),
+                                MCSymbolRefExpr::Create(SecBegin, Context),
+                                Context);
+      Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0);
+    }
+  }
+}
+
+// Walk through the buckets and emit the full data for each element in
+// the bucket. For the string case emit the dies and the various offsets.
+// Terminate each HashData bucket with 0.
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
+  uint64_t PrevHash = UINT64_MAX;
+  for (size_t i = 0; i < Buckets.size(); ++i) {
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI) {
+      // Remember to emit the label for our offset.
+      Asm->OutStreamer.EmitLabel((*HI)->Sym);
+      Asm->OutStreamer.AddComment((*HI)->Str);
+      Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str),
+                             D->getDwarfStrSectionSym());
+      Asm->OutStreamer.AddComment("Num DIEs");
+      Asm->EmitInt32((*HI)->DIEOffsets.size());
+      for (std::vector<uint32_t>::const_iterator
+             DI = (*HI)->DIEOffsets.begin(), DE = (*HI)->DIEOffsets.end();
+           DI != DE; ++DI) {
+        Asm->EmitInt32((*DI));
+      }
+      // Emit a 0 to terminate the data unless we have a hash collision.
+      if (PrevHash != (*HI)->HashValue)
+        Asm->EmitInt32(0);
+      PrevHash = (*HI)->HashValue;
+    }
+  }
+}
+
+// Emit the entire data structure to the output file.
+void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin,
+                           DwarfDebug *D) {
+  // Emit the header.
+  EmitHeader(Asm);
+
+  // Emit the buckets.
+  EmitBuckets(Asm);
+
+  // Emit the hashes.
+  EmitHashes(Asm);
+
+  // Emit the offsets.
+  EmitOffsets(Asm, SecBegin);
+
+  // Emit the hash data.
+  EmitData(Asm, D);
+}
+
+#ifndef NDEBUG
+void DwarfAccelTable::print(raw_ostream &O) {
+
+  Header.print(O);
+  HeaderData.print(O);
+
+  O << "Entries: \n";
+  for (StringMap<DIEArray>::const_iterator
+         EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+    O << "Name: " << (*EI).getKeyData() << "\n";
+    for (DIEArray::const_iterator DI = (*EI).second.begin(),
+           DE = (*EI).second.end();
+         DI != DE; ++DI)
+      (*DI)->print(O);
+  }
+
+  O << "Buckets and Hashes: \n";
+  for (size_t i = 0; i < Buckets.size(); ++i)
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI)
+      (*HI)->print(O);
+
+  O << "Data: \n";
+    for (std::vector<HashData*>::const_iterator
+           DI = Data.begin(), DE = Data.end(); DI != DE; ++DI)
+      (*DI)->print(O);
+  
+
+}
+#endif
author	Eric Christopher <echristo@apple.com>	2011-11-07 09:18:42 +0000
committer	Eric Christopher <echristo@apple.com>	2011-11-07 09:18:42 +0000
commit	bcbd3a4637f33036d05833364e180f9dfaabb67c (patch)
tree	44a3d212cb1ef2ae0fc34b6b93a9a0656659bda8 /lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
parent	d8a8752d5b7f546fd7ebb295366a43b2b76afbd6 (diff)
download	external_llvm-bcbd3a4637f33036d05833364e180f9dfaabb67c.zip external_llvm-bcbd3a4637f33036d05833364e180f9dfaabb67c.tar.gz external_llvm-bcbd3a4637f33036d05833364e180f9dfaabb67c.tar.bz2