From 23d7b3611759c7fb3a853dfce3ee3d43ef5ca67d Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 29 Oct 2006 23:42:03 +0000 Subject: add a highly efficient hash table that is specialized for mapping C strings to some other type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@31286 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/CStringMap.cpp | 134 +++++++++++++++++++++++++++++++++++++++++++++ lib/Support/StringMap.cpp | 134 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 268 insertions(+) create mode 100644 lib/Support/CStringMap.cpp create mode 100644 lib/Support/StringMap.cpp (limited to 'lib/Support') diff --git a/lib/Support/CStringMap.cpp b/lib/Support/CStringMap.cpp new file mode 100644 index 0000000..3229bfe --- /dev/null +++ b/lib/Support/CStringMap.cpp @@ -0,0 +1,134 @@ +//===--- CStringMap.cpp - CString Hash table map implementation -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CStringMap class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/CStringMap.h" +#include +using namespace llvm; + +CStringMapVisitor::~CStringMapVisitor() { +} + +CStringMapImpl::CStringMapImpl(unsigned InitSize, unsigned itemSize) { + assert((InitSize & (InitSize-1)) == 0 && + "Init Size must be a power of 2 or zero!"); + NumBuckets = InitSize ? InitSize : 512; + ItemSize = itemSize; + NumItems = 0; + + TheTable = new ItemBucket[NumBuckets](); + memset(TheTable, 0, NumBuckets*sizeof(ItemBucket)); +} + + +/// HashString - Compute a hash code for the specified string. +/// +static unsigned HashString(const char *Start, const char *End) { + unsigned int Result = 0; + // Perl hash function. + while (Start != End) + Result = Result * 33 + *Start++; + Result = Result + (Result >> 5); + return Result; +} + +/// LookupBucketFor - Look up the bucket that the specified string should end +/// up in. If it already exists as a key in the map, the Item pointer for the +/// specified bucket will be non-null. Otherwise, it will be null. In either +/// case, the FullHashValue field of the bucket will be set to the hash value +/// of the string. +unsigned CStringMapImpl::LookupBucketFor(const char *NameStart, + const char *NameEnd) { + unsigned HTSize = NumBuckets; + unsigned FullHashValue = HashString(NameStart, NameEnd); + unsigned BucketNo = FullHashValue & (HTSize-1); + + unsigned ProbeAmt = 1; + while (1) { + ItemBucket &Bucket = TheTable[BucketNo]; + void *BucketItem = Bucket.Item; + // If we found an empty bucket, this key isn't in the table yet, return it. + if (BucketItem == 0) { + Bucket.FullHashValue = FullHashValue; + return BucketNo; + } + + // If the full hash value matches, check deeply for a match. The common + // case here is that we are only looking at the buckets (for item info + // being non-null and for the full hash value) not at the items. This + // is important for cache locality. + if (Bucket.FullHashValue == FullHashValue) { + // Do the comparison like this because NameStart isn't necessarily + // null-terminated! + char *ItemStr = (char*)BucketItem+ItemSize; + if (strlen(ItemStr) == unsigned(NameEnd-NameStart) && + memcmp(ItemStr, NameStart, (NameEnd-NameStart)) == 0) { + // We found a match! + return BucketNo; + } + } + + // Okay, we didn't find the item. Probe to the next bucket. + BucketNo = (BucketNo+ProbeAmt) & (HTSize-1); + + // Use quadratic probing, it has fewer clumping artifacts than linear + // probing and has good cache behavior in the common case. + ++ProbeAmt; + } +} + +/// RehashTable - Grow the table, redistributing values into the buckets with +/// the appropriate mod-of-hashtable-size. +void CStringMapImpl::RehashTable() { + unsigned NewSize = NumBuckets*2; + ItemBucket *NewTableArray = new ItemBucket[NewSize](); + memset(NewTableArray, 0, NewSize*sizeof(ItemBucket)); + + // Rehash all the items into their new buckets. Luckily :) we already have + // the hash values available, so we don't have to rehash any strings. + for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) { + if (IB->Item) { + // Fast case, bucket available. + unsigned FullHash = IB->FullHashValue; + unsigned NewBucket = FullHash & (NewSize-1); + if (NewTableArray[NewBucket].Item == 0) { + NewTableArray[FullHash & (NewSize-1)].Item = IB->Item; + NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash; + continue; + } + + unsigned ProbeSize = 1; + do { + NewBucket = (NewBucket + ProbeSize++) & (NewSize-1); + } while (NewTableArray[NewBucket].Item); + + // Finally found a slot. Fill it in. + NewTableArray[NewBucket].Item = IB->Item; + NewTableArray[NewBucket].FullHashValue = FullHash; + } + } + + delete[] TheTable; + + TheTable = NewTableArray; + NumBuckets = NewSize; +} + + +/// VisitEntries - This method walks through all of the items, +/// invoking Visitor.Visit for each of them. +void CStringMapImpl::VisitEntries(const CStringMapVisitor &Visitor) const { + for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) { + if (void *Id = IB->Item) + Visitor.Visit((char*)Id + ItemSize, Id); + } +} diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp new file mode 100644 index 0000000..3229bfe --- /dev/null +++ b/lib/Support/StringMap.cpp @@ -0,0 +1,134 @@ +//===--- CStringMap.cpp - CString Hash table map implementation -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CStringMap class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/CStringMap.h" +#include +using namespace llvm; + +CStringMapVisitor::~CStringMapVisitor() { +} + +CStringMapImpl::CStringMapImpl(unsigned InitSize, unsigned itemSize) { + assert((InitSize & (InitSize-1)) == 0 && + "Init Size must be a power of 2 or zero!"); + NumBuckets = InitSize ? InitSize : 512; + ItemSize = itemSize; + NumItems = 0; + + TheTable = new ItemBucket[NumBuckets](); + memset(TheTable, 0, NumBuckets*sizeof(ItemBucket)); +} + + +/// HashString - Compute a hash code for the specified string. +/// +static unsigned HashString(const char *Start, const char *End) { + unsigned int Result = 0; + // Perl hash function. + while (Start != End) + Result = Result * 33 + *Start++; + Result = Result + (Result >> 5); + return Result; +} + +/// LookupBucketFor - Look up the bucket that the specified string should end +/// up in. If it already exists as a key in the map, the Item pointer for the +/// specified bucket will be non-null. Otherwise, it will be null. In either +/// case, the FullHashValue field of the bucket will be set to the hash value +/// of the string. +unsigned CStringMapImpl::LookupBucketFor(const char *NameStart, + const char *NameEnd) { + unsigned HTSize = NumBuckets; + unsigned FullHashValue = HashString(NameStart, NameEnd); + unsigned BucketNo = FullHashValue & (HTSize-1); + + unsigned ProbeAmt = 1; + while (1) { + ItemBucket &Bucket = TheTable[BucketNo]; + void *BucketItem = Bucket.Item; + // If we found an empty bucket, this key isn't in the table yet, return it. + if (BucketItem == 0) { + Bucket.FullHashValue = FullHashValue; + return BucketNo; + } + + // If the full hash value matches, check deeply for a match. The common + // case here is that we are only looking at the buckets (for item info + // being non-null and for the full hash value) not at the items. This + // is important for cache locality. + if (Bucket.FullHashValue == FullHashValue) { + // Do the comparison like this because NameStart isn't necessarily + // null-terminated! + char *ItemStr = (char*)BucketItem+ItemSize; + if (strlen(ItemStr) == unsigned(NameEnd-NameStart) && + memcmp(ItemStr, NameStart, (NameEnd-NameStart)) == 0) { + // We found a match! + return BucketNo; + } + } + + // Okay, we didn't find the item. Probe to the next bucket. + BucketNo = (BucketNo+ProbeAmt) & (HTSize-1); + + // Use quadratic probing, it has fewer clumping artifacts than linear + // probing and has good cache behavior in the common case. + ++ProbeAmt; + } +} + +/// RehashTable - Grow the table, redistributing values into the buckets with +/// the appropriate mod-of-hashtable-size. +void CStringMapImpl::RehashTable() { + unsigned NewSize = NumBuckets*2; + ItemBucket *NewTableArray = new ItemBucket[NewSize](); + memset(NewTableArray, 0, NewSize*sizeof(ItemBucket)); + + // Rehash all the items into their new buckets. Luckily :) we already have + // the hash values available, so we don't have to rehash any strings. + for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) { + if (IB->Item) { + // Fast case, bucket available. + unsigned FullHash = IB->FullHashValue; + unsigned NewBucket = FullHash & (NewSize-1); + if (NewTableArray[NewBucket].Item == 0) { + NewTableArray[FullHash & (NewSize-1)].Item = IB->Item; + NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash; + continue; + } + + unsigned ProbeSize = 1; + do { + NewBucket = (NewBucket + ProbeSize++) & (NewSize-1); + } while (NewTableArray[NewBucket].Item); + + // Finally found a slot. Fill it in. + NewTableArray[NewBucket].Item = IB->Item; + NewTableArray[NewBucket].FullHashValue = FullHash; + } + } + + delete[] TheTable; + + TheTable = NewTableArray; + NumBuckets = NewSize; +} + + +/// VisitEntries - This method walks through all of the items, +/// invoking Visitor.Visit for each of them. +void CStringMapImpl::VisitEntries(const CStringMapVisitor &Visitor) const { + for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) { + if (void *Id = IB->Item) + Visitor.Visit((char*)Id + ItemSize, Id); + } +} -- cgit v1.1