/* * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. * Copyright (C) 2010 Patrick Gansterer * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * */ #include "config.h" #include "AtomicString.h" #include "StringHash.h" #include #include #include #include namespace WTF { using namespace Unicode; COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size); class AtomicStringTable { public: static AtomicStringTable* create() { AtomicStringTable* table = new AtomicStringTable; WTFThreadData& data = wtfThreadData(); data.m_atomicStringTable = table; data.m_atomicStringTableDestructor = AtomicStringTable::destroy; return table; } HashSet& table() { return m_table; } private: static void destroy(AtomicStringTable* table) { HashSet::iterator end = table->m_table.end(); for (HashSet::iterator iter = table->m_table.begin(); iter != end; ++iter) (*iter)->setIsAtomic(false); delete table; } HashSet m_table; }; static inline HashSet& stringTable() { // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor). AtomicStringTable* table = wtfThreadData().atomicStringTable(); if (UNLIKELY(!table)) table = AtomicStringTable::create(); return table->table(); } template static inline PassRefPtr addToStringTable(const T& value) { pair::iterator, bool> addResult = stringTable().add(value); // If the string is newly-translated, then we need to adopt it. // The boolean in the pair tells us if that is so. return addResult.second ? adoptRef(*addResult.first) : *addResult.first; } struct CStringTranslator { static unsigned hash(const char* c) { return StringHasher::computeHash(c); } static bool equal(StringImpl* r, const char* s) { int length = r->length(); const UChar* d = r->characters(); for (int i = 0; i != length; ++i) { unsigned char c = s[i]; if (d[i] != c) return false; } return !s[length]; } static void translate(StringImpl*& location, const char* const& c, unsigned hash) { location = StringImpl::create(c).leakRef(); location->setHash(hash); location->setIsAtomic(true); } }; bool operator==(const AtomicString& a, const char* b) { StringImpl* impl = a.impl(); if ((!impl || !impl->characters()) && !b) return true; if ((!impl || !impl->characters()) || !b) return false; return CStringTranslator::equal(impl, b); } PassRefPtr AtomicString::add(const char* c) { if (!c) return 0; if (!*c) return StringImpl::empty(); return addToStringTable(c); } struct UCharBuffer { const UChar* s; unsigned length; }; static inline bool equal(StringImpl* string, const UChar* characters, unsigned length) { if (string->length() != length) return false; // FIXME: perhaps we should have a more abstract macro that indicates when // going 4 bytes at a time is unsafe #if CPU(ARM) || CPU(SH4) || CPU(MIPS) || CPU(SPARC) const UChar* stringCharacters = string->characters(); for (unsigned i = 0; i != length; ++i) { if (*stringCharacters++ != *characters++) return false; } return true; #else /* Do it 4-bytes-at-a-time on architectures where it's safe */ const uint32_t* stringCharacters = reinterpret_cast(string->characters()); const uint32_t* bufferCharacters = reinterpret_cast(characters); unsigned halfLength = length >> 1; for (unsigned i = 0; i != halfLength; ++i) { if (*stringCharacters++ != *bufferCharacters++) return false; } if (length & 1 && *reinterpret_cast(stringCharacters) != *reinterpret_cast(bufferCharacters)) return false; return true; #endif } bool operator==(const AtomicString& string, const Vector& vector) { return string.impl() && equal(string.impl(), vector.data(), vector.size()); } struct UCharBufferTranslator { static unsigned hash(const UCharBuffer& buf) { return StringHasher::computeHash(buf.s, buf.length); } static bool equal(StringImpl* const& str, const UCharBuffer& buf) { return WTF::equal(str, buf.s, buf.length); } static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash) { location = StringImpl::create(buf.s, buf.length).leakRef(); location->setHash(hash); location->setIsAtomic(true); } }; struct HashAndCharacters { unsigned hash; const UChar* characters; unsigned length; }; struct HashAndCharactersTranslator { static unsigned hash(const HashAndCharacters& buffer) { ASSERT(buffer.hash == StringHasher::computeHash(buffer.characters, buffer.length)); return buffer.hash; } static bool equal(StringImpl* const& string, const HashAndCharacters& buffer) { return WTF::equal(string, buffer.characters, buffer.length); } static void translate(StringImpl*& location, const HashAndCharacters& buffer, unsigned hash) { location = StringImpl::create(buffer.characters, buffer.length).leakRef(); location->setHash(hash); location->setIsAtomic(true); } }; struct HashAndUTF8Characters { unsigned hash; const char* characters; unsigned length; unsigned utf16Length; }; struct HashAndUTF8CharactersTranslator { static unsigned hash(const HashAndUTF8Characters& buffer) { return buffer.hash; } static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer) { if (buffer.utf16Length != string->length()) return false; const UChar* stringCharacters = string->characters(); // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same. if (buffer.utf16Length != buffer.length) return equalUTF16WithUTF8(stringCharacters, stringCharacters + string->length(), buffer.characters, buffer.characters + buffer.length); for (unsigned i = 0; i < buffer.length; ++i) { ASSERT(isASCII(buffer.characters[i])); if (stringCharacters[i] != buffer.characters[i]) return false; } return true; } static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash) { UChar* target; location = StringImpl::createUninitialized(buffer.utf16Length, target).releaseRef(); const char* source = buffer.characters; if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length) != conversionOK) ASSERT_NOT_REACHED(); location->setHash(hash); location->setIsAtomic(true); } }; PassRefPtr AtomicString::add(const UChar* s, unsigned length) { if (!s) return 0; if (!length) return StringImpl::empty(); UCharBuffer buffer = { s, length }; return addToStringTable(buffer); } PassRefPtr AtomicString::add(const UChar* s, unsigned length, unsigned existingHash) { ASSERT(s); ASSERT(existingHash); if (!length) return StringImpl::empty(); HashAndCharacters buffer = { existingHash, s, length }; return addToStringTable(buffer); } PassRefPtr AtomicString::add(const UChar* s) { if (!s) return 0; int length = 0; while (s[length] != UChar(0)) length++; if (!length) return StringImpl::empty(); UCharBuffer buffer = { s, length }; return addToStringTable(buffer); } PassRefPtr AtomicString::addSlowCase(StringImpl* r) { if (!r || r->isAtomic()) return r; if (!r->length()) return StringImpl::empty(); StringImpl* result = *stringTable().add(r).first; if (result == r) r->setIsAtomic(true); return result; } AtomicStringImpl* AtomicString::find(const UChar* s, unsigned length, unsigned existingHash) { ASSERT(s); ASSERT(existingHash); if (!length) return static_cast(StringImpl::empty()); HashAndCharacters buffer = { existingHash, s, length }; HashSet::iterator iterator = stringTable().find(buffer); if (iterator == stringTable().end()) return 0; return static_cast(*iterator); } void AtomicString::remove(StringImpl* r) { stringTable().remove(r); } AtomicString AtomicString::lower() const { // Note: This is a hot function in the Dromaeo benchmark. StringImpl* impl = this->impl(); if (UNLIKELY(!impl) || impl->isLower()) return *this; RefPtr newImpl = impl->lower(); if (LIKELY(newImpl == impl)) return *this; return AtomicString(newImpl); } AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd) { HashAndUTF8Characters buffer; buffer.characters = charactersStart; buffer.hash = calculateStringHashAndLengthFromUTF8(charactersStart, charactersEnd, buffer.length, buffer.utf16Length); if (!buffer.hash) return nullAtom; AtomicString atomicString; atomicString.m_string = addToStringTable(buffer); return atomicString; } } // namespace WTF