diff options
author | Steve Block <steveblock@google.com> | 2011-05-13 06:44:40 -0700 |
---|---|---|
committer | Android (Google) Code Review <android-gerrit@google.com> | 2011-05-13 06:44:40 -0700 |
commit | 08014c20784f3db5df3a89b73cce46037b77eb59 (patch) | |
tree | 47749210d31e19e6e2f64036fa8fae2ad693476f /Source/JavaScriptCore/wtf/text | |
parent | 860220379e56aeb66424861ad602b07ee22b4055 (diff) | |
parent | 4c3661f7918f8b3f139f824efb7855bedccb4c94 (diff) | |
download | external_webkit-08014c20784f3db5df3a89b73cce46037b77eb59.zip external_webkit-08014c20784f3db5df3a89b73cce46037b77eb59.tar.gz external_webkit-08014c20784f3db5df3a89b73cce46037b77eb59.tar.bz2 |
Merge changes Ide388898,Ic49f367c,I1158a808,Iacb6ca5d,I2100dd3a,I5c1abe54,Ib0ef9902,I31dbc523,I570314b3
* changes:
Merge WebKit at r75315: Update WebKit version
Merge WebKit at r75315: Add FrameLoaderClient PageCache stubs
Merge WebKit at r75315: Stub out AXObjectCache::remove()
Merge WebKit at r75315: Fix ImageBuffer
Merge WebKit at r75315: Fix PluginData::initPlugins()
Merge WebKit at r75315: Fix conflicts
Merge WebKit at r75315: Fix Makefiles
Merge WebKit at r75315: Move Android-specific WebCore files to Source
Merge WebKit at r75315: Initial merge by git.
Diffstat (limited to 'Source/JavaScriptCore/wtf/text')
18 files changed, 5268 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/wtf/text/AtomicString.cpp b/Source/JavaScriptCore/wtf/text/AtomicString.cpp new file mode 100644 index 0000000..93ad21d --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/AtomicString.cpp @@ -0,0 +1,389 @@ +/* + * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. + * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#include "config.h" + +#include "AtomicString.h" + +#include "StringHash.h" +#include <wtf/HashSet.h> +#include <wtf/Threading.h> +#include <wtf/WTFThreadData.h> +#include <wtf/unicode/UTF8.h> + +namespace WTF { + +using namespace Unicode; + +COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size); + +class AtomicStringTable { +public: + static AtomicStringTable* create() + { + AtomicStringTable* table = new AtomicStringTable; + + WTFThreadData& data = wtfThreadData(); + data.m_atomicStringTable = table; + data.m_atomicStringTableDestructor = AtomicStringTable::destroy; + + return table; + } + + HashSet<StringImpl*>& table() + { + return m_table; + } + +private: + static void destroy(AtomicStringTable* table) + { + HashSet<StringImpl*>::iterator end = table->m_table.end(); + for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter) + (*iter)->setIsAtomic(false); + delete table; + } + + HashSet<StringImpl*> m_table; +}; + +static inline HashSet<StringImpl*>& stringTable() +{ + // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor). + AtomicStringTable* table = wtfThreadData().atomicStringTable(); + if (UNLIKELY(!table)) + table = AtomicStringTable::create(); + return table->table(); +} + +struct CStringTranslator { + static unsigned hash(const char* c) + { + return StringImpl::computeHash(c); + } + + static bool equal(StringImpl* r, const char* s) + { + int length = r->length(); + const UChar* d = r->characters(); + for (int i = 0; i != length; ++i) { + unsigned char c = s[i]; + if (d[i] != c) + return false; + } + return !s[length]; + } + + static void translate(StringImpl*& location, const char* const& c, unsigned hash) + { + location = StringImpl::create(c).leakRef(); + location->setHash(hash); + location->setIsAtomic(true); + } +}; + +bool operator==(const AtomicString& a, const char* b) +{ + StringImpl* impl = a.impl(); + if ((!impl || !impl->characters()) && !b) + return true; + if ((!impl || !impl->characters()) || !b) + return false; + return CStringTranslator::equal(impl, b); +} + +PassRefPtr<StringImpl> AtomicString::add(const char* c) +{ + if (!c) + return 0; + if (!*c) + return StringImpl::empty(); + pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<const char*, CStringTranslator>(c); + if (!addResult.second) + return *addResult.first; + return adoptRef(*addResult.first); +} + +struct UCharBuffer { + const UChar* s; + unsigned length; +}; + +static inline bool equal(StringImpl* string, const UChar* characters, unsigned length) +{ + if (string->length() != length) + return false; + + // FIXME: perhaps we should have a more abstract macro that indicates when + // going 4 bytes at a time is unsafe +#if CPU(ARM) || CPU(SH4) || CPU(MIPS) + const UChar* stringCharacters = string->characters(); + for (unsigned i = 0; i != length; ++i) { + if (*stringCharacters++ != *characters++) + return false; + } + return true; +#else + /* Do it 4-bytes-at-a-time on architectures where it's safe */ + + const uint32_t* stringCharacters = reinterpret_cast<const uint32_t*>(string->characters()); + const uint32_t* bufferCharacters = reinterpret_cast<const uint32_t*>(characters); + + unsigned halfLength = length >> 1; + for (unsigned i = 0; i != halfLength; ++i) { + if (*stringCharacters++ != *bufferCharacters++) + return false; + } + + if (length & 1 && *reinterpret_cast<const uint16_t*>(stringCharacters) != *reinterpret_cast<const uint16_t*>(bufferCharacters)) + return false; + + return true; +#endif +} + +bool operator==(const AtomicString& string, const Vector<UChar>& vector) +{ + return string.impl() && equal(string.impl(), vector.data(), vector.size()); +} + +struct UCharBufferTranslator { + static unsigned hash(const UCharBuffer& buf) + { + return StringImpl::computeHash(buf.s, buf.length); + } + + static bool equal(StringImpl* const& str, const UCharBuffer& buf) + { + return WTF::equal(str, buf.s, buf.length); + } + + static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash) + { + location = StringImpl::create(buf.s, buf.length).leakRef(); + location->setHash(hash); + location->setIsAtomic(true); + } +}; + +struct HashAndCharacters { + unsigned hash; + const UChar* characters; + unsigned length; +}; + +struct HashAndCharactersTranslator { + static unsigned hash(const HashAndCharacters& buffer) + { + ASSERT(buffer.hash == StringImpl::computeHash(buffer.characters, buffer.length)); + return buffer.hash; + } + + static bool equal(StringImpl* const& string, const HashAndCharacters& buffer) + { + return WTF::equal(string, buffer.characters, buffer.length); + } + + static void translate(StringImpl*& location, const HashAndCharacters& buffer, unsigned hash) + { + location = StringImpl::create(buffer.characters, buffer.length).leakRef(); + location->setHash(hash); + location->setIsAtomic(true); + } +}; + +struct HashAndUTF8Characters { + unsigned hash; + const char* characters; + unsigned length; + unsigned utf16Length; +}; + +struct HashAndUTF8CharactersTranslator { + static unsigned hash(const HashAndUTF8Characters& buffer) + { + return buffer.hash; + } + + static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer) + { + if (buffer.utf16Length != string->length()) + return false; + + const UChar* stringCharacters = string->characters(); + + // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same. + if (buffer.utf16Length != buffer.length) + return equalUTF16WithUTF8(stringCharacters, stringCharacters + string->length(), buffer.characters, buffer.characters + buffer.length); + + for (unsigned i = 0; i < buffer.length; ++i) { + ASSERT(isASCII(buffer.characters[i])); + if (stringCharacters[i] != buffer.characters[i]) + return false; + } + + return true; + } + + static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash) + { + UChar* target; + location = StringImpl::createUninitialized(buffer.utf16Length, target).releaseRef(); + + const char* source = buffer.characters; + if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length) != conversionOK) + ASSERT_NOT_REACHED(); + + location->setHash(hash); + location->setIsAtomic(true); + } +}; + +PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length) +{ + if (!s) + return 0; + + if (!length) + return StringImpl::empty(); + + UCharBuffer buf = { s, length }; + pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<UCharBuffer, UCharBufferTranslator>(buf); + + // If the string is newly-translated, then we need to adopt it. + // The boolean in the pair tells us if that is so. + return addResult.second ? adoptRef(*addResult.first) : *addResult.first; +} + +PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash) +{ + ASSERT(s); + ASSERT(existingHash); + + if (!length) + return StringImpl::empty(); + + HashAndCharacters buffer = { existingHash, s, length }; + pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<HashAndCharacters, HashAndCharactersTranslator>(buffer); + if (!addResult.second) + return *addResult.first; + return adoptRef(*addResult.first); +} + +PassRefPtr<StringImpl> AtomicString::add(const UChar* s) +{ + if (!s) + return 0; + + int length = 0; + while (s[length] != UChar(0)) + length++; + + if (!length) + return StringImpl::empty(); + + UCharBuffer buf = {s, length}; + pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<UCharBuffer, UCharBufferTranslator>(buf); + + // If the string is newly-translated, then we need to adopt it. + // The boolean in the pair tells us if that is so. + return addResult.second ? adoptRef(*addResult.first) : *addResult.first; +} + +PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r) +{ + if (!r || r->isAtomic()) + return r; + + if (!r->length()) + return StringImpl::empty(); + + StringImpl* result = *stringTable().add(r).first; + if (result == r) + r->setIsAtomic(true); + return result; +} + +AtomicStringImpl* AtomicString::find(const UChar* s, unsigned length, unsigned existingHash) +{ + ASSERT(s); + ASSERT(existingHash); + + if (!length) + return static_cast<AtomicStringImpl*>(StringImpl::empty()); + + HashAndCharacters buffer = { existingHash, s, length }; + HashSet<StringImpl*>::iterator iterator = stringTable().find<HashAndCharacters, HashAndCharactersTranslator>(buffer); + if (iterator == stringTable().end()) + return 0; + return static_cast<AtomicStringImpl*>(*iterator); +} + +void AtomicString::remove(StringImpl* r) +{ + stringTable().remove(r); +} + +AtomicString AtomicString::lower() const +{ + // Note: This is a hot function in the Dromaeo benchmark. + StringImpl* impl = this->impl(); + if (UNLIKELY(!impl)) + return *this; + RefPtr<StringImpl> newImpl = impl->lower(); + if (LIKELY(newImpl == impl)) + return *this; + return AtomicString(newImpl); +} + +AtomicString AtomicString::fromUTF8(const char* characters, size_t length) +{ + if (!characters) + return AtomicString(); + + if (!length) + return emptyAtom; + + HashAndUTF8Characters buffer; + buffer.characters = characters; + buffer.length = length; + buffer.hash = calculateStringHashFromUTF8(characters, characters + length, buffer.utf16Length); + + if (!buffer.hash) + return AtomicString(); + + pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer); + + // If the string is newly-translated, then we need to adopt it. + // The boolean in the pair tells us if that is so. + AtomicString atomicString; + atomicString.m_string = addResult.second ? adoptRef(*addResult.first) : *addResult.first; + return atomicString; +} + +AtomicString AtomicString::fromUTF8(const char* characters) +{ + if (!characters) + return AtomicString(); + return fromUTF8(characters, strlen(characters)); +} + +} // namespace WTF diff --git a/Source/JavaScriptCore/wtf/text/AtomicString.h b/Source/JavaScriptCore/wtf/text/AtomicString.h new file mode 100644 index 0000000..ab5b366 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/AtomicString.h @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2004, 2005, 2006, 2008 Apple Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef AtomicString_h +#define AtomicString_h + +#include "AtomicStringImpl.h" +#include "WTFString.h" + +// Define 'NO_IMPLICIT_ATOMICSTRING' before including this header, +// to disallow (expensive) implicit String-->AtomicString conversions. +#ifdef NO_IMPLICIT_ATOMICSTRING +#define ATOMICSTRING_CONVERSION explicit +#else +#define ATOMICSTRING_CONVERSION +#endif + +namespace WTF { + +struct AtomicStringHash; + +class AtomicString { +public: + static void init(); + + AtomicString() { } + AtomicString(const char* s) : m_string(add(s)) { } + AtomicString(const UChar* s, unsigned length) : m_string(add(s, length)) { } + AtomicString(const UChar* s, unsigned length, unsigned existingHash) : m_string(add(s, length, existingHash)) { } + AtomicString(const UChar* s) : m_string(add(s)) { } + ATOMICSTRING_CONVERSION AtomicString(StringImpl* imp) : m_string(add(imp)) { } + AtomicString(AtomicStringImpl* imp) : m_string(imp) { } + ATOMICSTRING_CONVERSION AtomicString(const String& s) : m_string(add(s.impl())) { } + + // Hash table deleted values, which are only constructed and never copied or destroyed. + AtomicString(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { } + bool isHashTableDeletedValue() const { return m_string.isHashTableDeletedValue(); } + + static AtomicStringImpl* find(const UChar* s, unsigned length, unsigned existingHash); + + operator const String&() const { return m_string; } + const String& string() const { return m_string; }; + + AtomicStringImpl* impl() const { return static_cast<AtomicStringImpl *>(m_string.impl()); } + + const UChar* characters() const { return m_string.characters(); } + unsigned length() const { return m_string.length(); } + + UChar operator[](unsigned int i) const { return m_string[i]; } + + bool contains(UChar c) const { return m_string.contains(c); } + bool contains(const char* s, bool caseSensitive = true) const + { return m_string.contains(s, caseSensitive); } + bool contains(const String& s, bool caseSensitive = true) const + { return m_string.contains(s, caseSensitive); } + + size_t find(UChar c, size_t start = 0) const { return m_string.find(c, start); } + size_t find(const char* s, size_t start = 0, bool caseSentitive = true) const + { return m_string.find(s, start, caseSentitive); } + size_t find(const String& s, size_t start = 0, bool caseSentitive = true) const + { return m_string.find(s, start, caseSentitive); } + + bool startsWith(const String& s, bool caseSensitive = true) const + { return m_string.startsWith(s, caseSensitive); } + bool endsWith(const String& s, bool caseSensitive = true) const + { return m_string.endsWith(s, caseSensitive); } + + AtomicString lower() const; + AtomicString upper() const { return AtomicString(impl()->upper()); } + + int toInt(bool* ok = 0) const { return m_string.toInt(ok); } + double toDouble(bool* ok = 0) const { return m_string.toDouble(ok); } + float toFloat(bool* ok = 0) const { return m_string.toFloat(ok); } + bool percentage(int& p) const { return m_string.percentage(p); } + + bool isNull() const { return m_string.isNull(); } + bool isEmpty() const { return m_string.isEmpty(); } + + static void remove(StringImpl*); + +#if PLATFORM(CF) + AtomicString(CFStringRef s) : m_string(add(String(s).impl())) { } + CFStringRef createCFString() const { return m_string.createCFString(); } +#endif +#ifdef __OBJC__ + AtomicString(NSString* s) : m_string(add(String(s).impl())) { } + operator NSString*() const { return m_string; } +#endif +#if PLATFORM(QT) + AtomicString(const QString& s) : m_string(add(String(s).impl())) { } + operator QString() const { return m_string; } +#endif + + // AtomicString::fromUTF8 will return a null string if + // the input data contains invalid UTF-8 sequences. + static AtomicString fromUTF8(const char*, size_t); + static AtomicString fromUTF8(const char*); + +private: + String m_string; + + static PassRefPtr<StringImpl> add(const char*); + static PassRefPtr<StringImpl> add(const UChar*, unsigned length); + static PassRefPtr<StringImpl> add(const UChar*, unsigned length, unsigned existingHash); + static PassRefPtr<StringImpl> add(const UChar*); + ALWAYS_INLINE PassRefPtr<StringImpl> add(StringImpl* r) + { + if (!r || r->isAtomic()) + return r; + return addSlowCase(r); + } + static PassRefPtr<StringImpl> addSlowCase(StringImpl*); +}; + +inline bool operator==(const AtomicString& a, const AtomicString& b) { return a.impl() == b.impl(); } +bool operator==(const AtomicString& a, const char* b); +bool operator==(const AtomicString& a, const Vector<UChar>& b); +inline bool operator==(const AtomicString& a, const String& b) { return equal(a.impl(), b.impl()); } +inline bool operator==(const char* a, const AtomicString& b) { return b == a; } +inline bool operator==(const String& a, const AtomicString& b) { return equal(a.impl(), b.impl()); } +inline bool operator==(const Vector<UChar>& a, const AtomicString& b) { return b == a; } + +inline bool operator!=(const AtomicString& a, const AtomicString& b) { return a.impl() != b.impl(); } +inline bool operator!=(const AtomicString& a, const char *b) { return !(a == b); } +inline bool operator!=(const AtomicString& a, const String& b) { return !equal(a.impl(), b.impl()); } +inline bool operator!=(const AtomicString& a, const Vector<UChar>& b) { return !(a == b); } +inline bool operator!=(const char* a, const AtomicString& b) { return !(b == a); } +inline bool operator!=(const String& a, const AtomicString& b) { return !equal(a.impl(), b.impl()); } +inline bool operator!=(const Vector<UChar>& a, const AtomicString& b) { return !(a == b); } + +inline bool equalIgnoringCase(const AtomicString& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); } +inline bool equalIgnoringCase(const AtomicString& a, const char* b) { return equalIgnoringCase(a.impl(), b); } +inline bool equalIgnoringCase(const AtomicString& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); } +inline bool equalIgnoringCase(const char* a, const AtomicString& b) { return equalIgnoringCase(a, b.impl()); } +inline bool equalIgnoringCase(const String& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); } + +// Define external global variables for the commonly used atomic strings. +// These are only usable from the main thread. +#ifndef ATOMICSTRING_HIDE_GLOBALS + extern const JS_EXPORTDATA AtomicString nullAtom; + extern const JS_EXPORTDATA AtomicString emptyAtom; + extern const JS_EXPORTDATA AtomicString textAtom; + extern const JS_EXPORTDATA AtomicString commentAtom; + extern const JS_EXPORTDATA AtomicString starAtom; + extern const JS_EXPORTDATA AtomicString xmlAtom; + extern const JS_EXPORTDATA AtomicString xmlnsAtom; +#endif + + // AtomicStringHash is the default hash for AtomicString + template<typename T> struct DefaultHash; + template<> struct DefaultHash<AtomicString> { + typedef AtomicStringHash Hash; + }; + +} // namespace WTF + +#ifndef ATOMICSTRING_HIDE_GLOBALS +using WTF::AtomicString; +using WTF::nullAtom; +using WTF::emptyAtom; +using WTF::textAtom; +using WTF::commentAtom; +using WTF::starAtom; +using WTF::xmlAtom; +using WTF::xmlnsAtom; +#endif + +#endif // AtomicString_h diff --git a/Source/JavaScriptCore/wtf/text/AtomicStringHash.h b/Source/JavaScriptCore/wtf/text/AtomicStringHash.h new file mode 100644 index 0000000..f6e4ad1 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/AtomicStringHash.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef AtomicStringHash_h +#define AtomicStringHash_h + +#include <wtf/text/AtomicString.h> +#include <wtf/HashTraits.h> + +namespace WTF { + + struct AtomicStringHash { + static unsigned hash(const AtomicString& key) + { + return key.impl()->existingHash(); + } + + static bool equal(const AtomicString& a, const AtomicString& b) + { + return a == b; + } + + static const bool safeToCompareToEmptyOrDeleted = false; + }; + + // AtomicStringHash is the default hash for AtomicString + template<> struct HashTraits<WTF::AtomicString> : GenericHashTraits<WTF::AtomicString> { + static const bool emptyValueIsZero = true; + static void constructDeletedValue(WTF::AtomicString& slot) { new (&slot) WTF::AtomicString(HashTableDeletedValue); } + static bool isDeletedValue(const WTF::AtomicString& slot) { return slot.isHashTableDeletedValue(); } + }; + +} + +using WTF::AtomicStringHash; + +#endif diff --git a/Source/JavaScriptCore/wtf/text/AtomicStringImpl.h b/Source/JavaScriptCore/wtf/text/AtomicStringImpl.h new file mode 100644 index 0000000..3f0c376 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/AtomicStringImpl.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2006 Apple Computer, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef AtomicStringImpl_h +#define AtomicStringImpl_h + +#include "StringImpl.h" + +namespace WTF { + +class AtomicStringImpl : public StringImpl +{ +public: + AtomicStringImpl() : StringImpl(0) {} +}; + +} + +using WTF::AtomicStringImpl; + +#endif diff --git a/Source/JavaScriptCore/wtf/text/CString.cpp b/Source/JavaScriptCore/wtf/text/CString.cpp new file mode 100644 index 0000000..981d77a --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/CString.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2003, 2006, 2008, 2009, 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" +#include "CString.h" + +using namespace std; + +namespace WTF { + +CString::CString(const char* str) +{ + if (!str) + return; + + init(str, strlen(str)); +} + +CString::CString(const char* str, size_t length) +{ + init(str, length); +} + +void CString::init(const char* str, size_t length) +{ + if (!str) + return; + + // We need to be sure we can add 1 to length without overflowing. + // Since the passed-in length is the length of an actual existing + // string, and we know the string doesn't occupy the entire address + // space, we can assert here and there's no need for a runtime check. + ASSERT(length < numeric_limits<size_t>::max()); + + m_buffer = CStringBuffer::create(length + 1); + memcpy(m_buffer->mutableData(), str, length); + m_buffer->mutableData()[length] = '\0'; +} + +char* CString::mutableData() +{ + copyBufferIfNeeded(); + if (!m_buffer) + return 0; + return m_buffer->mutableData(); +} + +CString CString::newUninitialized(size_t length, char*& characterBuffer) +{ + if (length >= numeric_limits<size_t>::max()) + CRASH(); + + CString result; + result.m_buffer = CStringBuffer::create(length + 1); + char* bytes = result.m_buffer->mutableData(); + bytes[length] = '\0'; + characterBuffer = bytes; + return result; +} + +void CString::copyBufferIfNeeded() +{ + if (!m_buffer || m_buffer->hasOneRef()) + return; + + RefPtr<CStringBuffer> buffer = m_buffer.release(); + size_t length = buffer->length(); + m_buffer = CStringBuffer::create(length); + memcpy(m_buffer->mutableData(), buffer->data(), length); +} + +bool operator==(const CString& a, const CString& b) +{ + if (a.isNull() != b.isNull()) + return false; + if (a.length() != b.length()) + return false; + return !strncmp(a.data(), b.data(), min(a.length(), b.length())); +} + +} // namespace WTF diff --git a/Source/JavaScriptCore/wtf/text/CString.h b/Source/JavaScriptCore/wtf/text/CString.h new file mode 100644 index 0000000..343a7a5 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/CString.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2003, 2006, 2008, 2009, 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CString_h +#define CString_h + +#include "PassRefPtr.h" +#include "RefCounted.h" +#include "Vector.h" + +namespace WTF { + +class CStringBuffer : public RefCounted<CStringBuffer> { +public: + const char* data() { return m_vector.data(); } + size_t length() { return m_vector.size(); } + +private: + friend class CString; + + static PassRefPtr<CStringBuffer> create(size_t length) { return adoptRef(new CStringBuffer(length)); } + CStringBuffer(size_t length) : m_vector(length) { } + char* mutableData() { return m_vector.data(); } + + Vector<char> m_vector; +}; + +// A container for a null-terminated char array supporting copy-on-write +// assignment. The contained char array may be null. +class CString { +public: + CString() { } + CString(const char*); + CString(const char*, size_t length); + CString(CStringBuffer* buffer) : m_buffer(buffer) { } + static CString newUninitialized(size_t length, char*& characterBuffer); + + const char* data() const + { + return m_buffer ? m_buffer->data() : 0; + } + char* mutableData(); + size_t length() const + { + return m_buffer ? m_buffer->length() - 1 : 0; + } + + bool isNull() const { return !m_buffer; } + + CStringBuffer* buffer() const { return m_buffer.get(); } + +private: + void copyBufferIfNeeded(); + void init(const char*, size_t length); + RefPtr<CStringBuffer> m_buffer; +}; + +bool operator==(const CString& a, const CString& b); +inline bool operator!=(const CString& a, const CString& b) { return !(a == b); } + +} // namespace WTF + +using WTF::CString; + +#endif // CString_h diff --git a/Source/JavaScriptCore/wtf/text/StringBuffer.h b/Source/JavaScriptCore/wtf/text/StringBuffer.h new file mode 100644 index 0000000..a546bf3 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringBuffer.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2008, 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef StringBuffer_h +#define StringBuffer_h + +#include <wtf/Assertions.h> +#include <wtf/Noncopyable.h> +#include <wtf/unicode/Unicode.h> +#include <limits> + +namespace WTF { + +class StringBuffer : public Noncopyable { +public: + explicit StringBuffer(unsigned length) + : m_length(length) + { + if (m_length > std::numeric_limits<unsigned>::max() / sizeof(UChar)) + CRASH(); + m_data = static_cast<UChar*>(fastMalloc(m_length * sizeof(UChar))); + } + + ~StringBuffer() + { + fastFree(m_data); + } + + void shrink(unsigned newLength) + { + ASSERT(newLength <= m_length); + m_length = newLength; + } + + void resize(unsigned newLength) + { + if (newLength > m_length) { + if (newLength > std::numeric_limits<unsigned>::max() / sizeof(UChar)) + CRASH(); + m_data = static_cast<UChar*>(fastRealloc(m_data, newLength * sizeof(UChar))); + } + m_length = newLength; + } + + unsigned length() const { return m_length; } + UChar* characters() { return m_data; } + + UChar& operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; } + + UChar* release() { UChar* data = m_data; m_data = 0; return data; } + +private: + unsigned m_length; + UChar* m_data; +}; + +} // namespace WTF + +using WTF::StringBuffer; + +#endif // StringBuffer_h diff --git a/Source/JavaScriptCore/wtf/text/StringBuilder.cpp b/Source/JavaScriptCore/wtf/text/StringBuilder.cpp new file mode 100644 index 0000000..dfc9ff3 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringBuilder.cpp @@ -0,0 +1,172 @@ +/* + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "StringBuilder.h" + +#include "WTFString.h" + +namespace WTF { + +void StringBuilder::reifyString() +{ + // Check if the string already exists. + if (!m_string.isNull()) { + ASSERT(m_string.length() == m_length); + return; + } + + // Check for empty. + if (!m_length) { + m_string = StringImpl::empty(); + return; + } + + // Must be valid in the buffer, take a substring (unless string fills the buffer). + ASSERT(m_buffer && m_length <= m_buffer->length()); + m_string = (m_length == m_buffer->length()) + ? m_buffer.get() + : StringImpl::create(m_buffer, 0, m_length); +} + +void StringBuilder::resize(unsigned newSize) +{ + // Check newSize < m_length, hence m_length > 0. + ASSERT(newSize <= m_length); + if (newSize == m_length) + return; + ASSERT(m_length); + + // If there is a buffer, we only need to duplicate it if it has more than one ref. + if (m_buffer) { + if (!m_buffer->hasOneRef()) + allocateBuffer(m_buffer->characters(), m_buffer->length()); + m_length = newSize; + m_string = String(); + return; + } + + // Since m_length && !m_buffer, the string must be valid in m_string, and m_string.length() > 0. + ASSERT(!m_string.isEmpty()); + ASSERT(m_length == m_string.length()); + ASSERT(newSize < m_string.length()); + m_length = newSize; + m_string = StringImpl::create(m_string.impl(), 0, newSize); +} + +void StringBuilder::reserveCapacity(unsigned newCapacity) +{ + if (m_buffer) { + // If there is already a buffer, then grow if necessary. + if (newCapacity > m_buffer->length()) + allocateBuffer(m_buffer->characters(), newCapacity); + } else { + // Grow the string, if necessary. + if (newCapacity > m_length) + allocateBuffer(m_string.characters(), newCapacity); + } +} + +// Allocate a new buffer, copying in currentCharacters (these may come from either m_string +// or m_buffer, neither will be reassigned until the copy has completed). +void StringBuilder::allocateBuffer(const UChar* currentCharacters, unsigned requiredLength) +{ + // Copy the existing data into a new buffer, set result to point to the end of the existing data. + RefPtr<StringImpl> buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters); + memcpy(m_bufferCharacters, currentCharacters, static_cast<size_t>(m_length) * sizeof(UChar)); // This can't overflow. + + // Update the builder state. + m_buffer = buffer.release(); + m_string = String(); +} + +// Make 'length' additional capacity be available in m_buffer, update m_string & m_length, +// return a pointer to the newly allocated storage. +UChar* StringBuilder::appendUninitialized(unsigned length) +{ + ASSERT(length); + + // Calcuate the new size of the builder after appending. + unsigned requiredLength = length + m_length; + if (requiredLength < length) + CRASH(); + + if (m_buffer) { + // If the buffer is valid it must be at least as long as the current builder contents! + ASSERT(m_buffer->length() >= m_length); + + // Check if the buffer already has sufficient capacity. + if (requiredLength <= m_buffer->length()) { + unsigned currentLength = m_length; + m_string = String(); + m_length = requiredLength; + return m_bufferCharacters + currentLength; + } + + // We need to realloc the buffer. + allocateBuffer(m_buffer->characters(), std::max(requiredLength, m_buffer->length() * 2)); + } else { + ASSERT(m_string.length() == m_length); + allocateBuffer(m_string.characters(), std::max(requiredLength, requiredLength * 2)); + } + + UChar* result = m_bufferCharacters + m_length; + m_length = requiredLength; + return result; +} + +void StringBuilder::append(const UChar* characters, unsigned length) +{ + if (!length) + return; + ASSERT(characters); + + memcpy(appendUninitialized(length), characters, static_cast<size_t>(length) * 2); +} + +void StringBuilder::append(const char* characters, unsigned length) +{ + if (!length) + return; + ASSERT(characters); + + UChar* dest = appendUninitialized(length); + const char* end = characters + length; + while (characters < end) + *(dest++) = *(const unsigned char*)(characters++); +} + +void StringBuilder::shrinkToFit() +{ + // If the buffer is at least 80% full, don't bother copying. Need to tune this heuristic! + if (m_buffer && m_buffer->length() > (m_length + (m_length >> 2))) { + UChar* result; + m_string = StringImpl::createUninitialized(m_length, result); + memcpy(result, m_buffer->characters(), static_cast<size_t>(m_length) * 2); // This can't overflow. + m_buffer = 0; + } +} + +} // namespace WTF diff --git a/Source/JavaScriptCore/wtf/text/StringBuilder.h b/Source/JavaScriptCore/wtf/text/StringBuilder.h new file mode 100644 index 0000000..f10af64 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringBuilder.h @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2009, 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef StringBuilder_h +#define StringBuilder_h + +#include <wtf/Vector.h> +#include <wtf/text/WTFString.h> + +namespace WTF { + +class StringBuilder { +public: + StringBuilder() + : m_length(0) + { + } + + void append(const UChar*, unsigned); + void append(const char*, unsigned); + + void append(const String& string) + { + // If we're appending to an empty string, and there is not buffer + // (in case reserveCapacity has been called) then just retain the + // string. + if (!m_length && !m_buffer) { + m_string = string; + m_length = string.length(); + return; + } + append(string.characters(), string.length()); + } + + void append(const char* characters) + { + if (characters) + append(characters, strlen(characters)); + } + + void append(UChar c) + { + if (m_buffer && m_length < m_buffer->length() && m_string.isNull()) + m_bufferCharacters[m_length++] = c; + else + append(&c, 1); + } + + void append(char c) + { + if (m_buffer && m_length < m_buffer->length() && m_string.isNull()) + m_bufferCharacters[m_length++] = (unsigned char)c; + else + append(&c, 1); + } + + String toString() + { + if (m_string.isNull()) { + shrinkToFit(); + reifyString(); + } + return m_string; + } + + String toStringPreserveCapacity() + { + if (m_string.isNull()) + reifyString(); + return m_string; + } + + unsigned length() const + { + return m_length; + } + + bool isEmpty() const { return !length(); } + + void reserveCapacity(unsigned newCapacity); + + void resize(unsigned newSize); + + void shrinkToFit(); + + UChar operator[](unsigned i) const + { + ASSERT(i < m_length); + if (!m_string.isNull()) + return m_string[i]; + ASSERT(m_buffer); + return m_buffer->characters()[i]; + } + + void clear() + { + m_length = 0; + m_string = String(); + m_buffer = 0; + } + +private: + void allocateBuffer(const UChar* currentCharacters, unsigned requiredLength); + UChar* appendUninitialized(unsigned length); + void reifyString(); + + unsigned m_length; + String m_string; + RefPtr<StringImpl> m_buffer; + UChar* m_bufferCharacters; +}; + +} // namespace WTF + +using WTF::StringBuilder; + +#endif // StringBuilder_h diff --git a/Source/JavaScriptCore/wtf/text/StringConcatenate.h b/Source/JavaScriptCore/wtf/text/StringConcatenate.h new file mode 100644 index 0000000..92a2d06 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringConcatenate.h @@ -0,0 +1,493 @@ +/* + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef StringConcatenate_h +#define StringConcatenate_h + +#include <wtf/text/WTFString.h> + +namespace WTF { + +template<typename StringType> +class StringTypeAdapter { +}; + +template<> +class StringTypeAdapter<char> { +public: + StringTypeAdapter<char>(char buffer) + : m_buffer(buffer) + { + } + + unsigned length() { return 1; } + void writeTo(UChar* destination) { *destination = m_buffer; } + +private: + unsigned char m_buffer; +}; + +template<> +class StringTypeAdapter<UChar> { +public: + StringTypeAdapter<UChar>(UChar buffer) + : m_buffer(buffer) + { + } + + unsigned length() { return 1; } + void writeTo(UChar* destination) { *destination = m_buffer; } + +private: + UChar m_buffer; +}; + +template<> +class StringTypeAdapter<char*> { +public: + StringTypeAdapter<char*>(char* buffer) + : m_buffer(buffer) + , m_length(strlen(buffer)) + { + } + + unsigned length() { return m_length; } + + void writeTo(UChar* destination) + { + for (unsigned i = 0; i < m_length; ++i) { + unsigned char c = m_buffer[i]; + destination[i] = c; + } + } + +private: + const char* m_buffer; + unsigned m_length; +}; + +template<> +class StringTypeAdapter<const char*> { +public: + StringTypeAdapter<const char*>(const char* buffer) + : m_buffer(buffer) + , m_length(strlen(buffer)) + { + } + + unsigned length() { return m_length; } + + void writeTo(UChar* destination) + { + for (unsigned i = 0; i < m_length; ++i) { + unsigned char c = m_buffer[i]; + destination[i] = c; + } + } + +private: + const char* m_buffer; + unsigned m_length; +}; + +template<> +class StringTypeAdapter<Vector<char> > { +public: + StringTypeAdapter<Vector<char> >(const Vector<char>& buffer) + : m_buffer(buffer) + { + } + + size_t length() { return m_buffer.size(); } + + void writeTo(UChar* destination) + { + for (size_t i = 0; i < m_buffer.size(); ++i) { + unsigned char c = m_buffer[i]; + destination[i] = c; + } + } + +private: + const Vector<char>& m_buffer; +}; + +template<> +class StringTypeAdapter<String> { +public: + StringTypeAdapter<String>(const String& string) + : m_buffer(string) + { + } + + unsigned length() { return m_buffer.length(); } + + void writeTo(UChar* destination) + { + const UChar* data = m_buffer.characters(); + unsigned length = m_buffer.length(); + for (unsigned i = 0; i < length; ++i) + destination[i] = data[i]; + } + +private: + const String& m_buffer; +}; + +inline void sumWithOverflow(unsigned& total, unsigned addend, bool& overflow) +{ + unsigned oldTotal = total; + total = oldTotal + addend; + if (total < oldTotal) + overflow = true; +} + +template<typename StringType1, typename StringType2> +PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2) +{ + StringTypeAdapter<StringType1> adapter1(string1); + StringTypeAdapter<StringType2> adapter2(string2); + + UChar* buffer; + bool overflow = false; + unsigned length = adapter1.length(); + sumWithOverflow(length, adapter2.length(), overflow); + if (overflow) + return 0; + PassRefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + UChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + + return resultImpl; +} + +template<typename StringType1, typename StringType2, typename StringType3> +PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3) +{ + StringTypeAdapter<StringType1> adapter1(string1); + StringTypeAdapter<StringType2> adapter2(string2); + StringTypeAdapter<StringType3> adapter3(string3); + + UChar* buffer = 0; + bool overflow = false; + unsigned length = adapter1.length(); + sumWithOverflow(length, adapter2.length(), overflow); + sumWithOverflow(length, adapter3.length(), overflow); + if (overflow) + return 0; + PassRefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + UChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + + return resultImpl; +} + +template<typename StringType1, typename StringType2, typename StringType3, typename StringType4> +PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4) +{ + StringTypeAdapter<StringType1> adapter1(string1); + StringTypeAdapter<StringType2> adapter2(string2); + StringTypeAdapter<StringType3> adapter3(string3); + StringTypeAdapter<StringType4> adapter4(string4); + + UChar* buffer; + bool overflow = false; + unsigned length = adapter1.length(); + sumWithOverflow(length, adapter2.length(), overflow); + sumWithOverflow(length, adapter3.length(), overflow); + sumWithOverflow(length, adapter4.length(), overflow); + if (overflow) + return 0; + PassRefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + UChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + + return resultImpl; +} + +template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5> +PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5) +{ + StringTypeAdapter<StringType1> adapter1(string1); + StringTypeAdapter<StringType2> adapter2(string2); + StringTypeAdapter<StringType3> adapter3(string3); + StringTypeAdapter<StringType4> adapter4(string4); + StringTypeAdapter<StringType5> adapter5(string5); + + UChar* buffer; + bool overflow = false; + unsigned length = adapter1.length(); + sumWithOverflow(length, adapter2.length(), overflow); + sumWithOverflow(length, adapter3.length(), overflow); + sumWithOverflow(length, adapter4.length(), overflow); + sumWithOverflow(length, adapter5.length(), overflow); + if (overflow) + return 0; + PassRefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + UChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + result += adapter4.length(); + adapter5.writeTo(result); + + return resultImpl; +} + +template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6> +PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6) +{ + StringTypeAdapter<StringType1> adapter1(string1); + StringTypeAdapter<StringType2> adapter2(string2); + StringTypeAdapter<StringType3> adapter3(string3); + StringTypeAdapter<StringType4> adapter4(string4); + StringTypeAdapter<StringType5> adapter5(string5); + StringTypeAdapter<StringType6> adapter6(string6); + + UChar* buffer; + bool overflow = false; + unsigned length = adapter1.length(); + sumWithOverflow(length, adapter2.length(), overflow); + sumWithOverflow(length, adapter3.length(), overflow); + sumWithOverflow(length, adapter4.length(), overflow); + sumWithOverflow(length, adapter5.length(), overflow); + sumWithOverflow(length, adapter6.length(), overflow); + if (overflow) + return 0; + PassRefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + UChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + result += adapter4.length(); + adapter5.writeTo(result); + result += adapter5.length(); + adapter6.writeTo(result); + + return resultImpl; +} + +template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7> +PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7) +{ + StringTypeAdapter<StringType1> adapter1(string1); + StringTypeAdapter<StringType2> adapter2(string2); + StringTypeAdapter<StringType3> adapter3(string3); + StringTypeAdapter<StringType4> adapter4(string4); + StringTypeAdapter<StringType5> adapter5(string5); + StringTypeAdapter<StringType6> adapter6(string6); + StringTypeAdapter<StringType7> adapter7(string7); + + UChar* buffer; + bool overflow = false; + unsigned length = adapter1.length(); + sumWithOverflow(length, adapter2.length(), overflow); + sumWithOverflow(length, adapter3.length(), overflow); + sumWithOverflow(length, adapter4.length(), overflow); + sumWithOverflow(length, adapter5.length(), overflow); + sumWithOverflow(length, adapter6.length(), overflow); + sumWithOverflow(length, adapter7.length(), overflow); + if (overflow) + return 0; + PassRefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + UChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + result += adapter4.length(); + adapter5.writeTo(result); + result += adapter5.length(); + adapter6.writeTo(result); + result += adapter6.length(); + adapter7.writeTo(result); + + return resultImpl; +} + +template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7, typename StringType8> +PassRefPtr<StringImpl> tryMakeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8) +{ + StringTypeAdapter<StringType1> adapter1(string1); + StringTypeAdapter<StringType2> adapter2(string2); + StringTypeAdapter<StringType3> adapter3(string3); + StringTypeAdapter<StringType4> adapter4(string4); + StringTypeAdapter<StringType5> adapter5(string5); + StringTypeAdapter<StringType6> adapter6(string6); + StringTypeAdapter<StringType7> adapter7(string7); + StringTypeAdapter<StringType8> adapter8(string8); + + UChar* buffer; + bool overflow = false; + unsigned length = adapter1.length(); + sumWithOverflow(length, adapter2.length(), overflow); + sumWithOverflow(length, adapter3.length(), overflow); + sumWithOverflow(length, adapter4.length(), overflow); + sumWithOverflow(length, adapter5.length(), overflow); + sumWithOverflow(length, adapter6.length(), overflow); + sumWithOverflow(length, adapter7.length(), overflow); + sumWithOverflow(length, adapter8.length(), overflow); + if (overflow) + return 0; + PassRefPtr<StringImpl> resultImpl = StringImpl::tryCreateUninitialized(length, buffer); + if (!resultImpl) + return 0; + + UChar* result = buffer; + adapter1.writeTo(result); + result += adapter1.length(); + adapter2.writeTo(result); + result += adapter2.length(); + adapter3.writeTo(result); + result += adapter3.length(); + adapter4.writeTo(result); + result += adapter4.length(); + adapter5.writeTo(result); + result += adapter5.length(); + adapter6.writeTo(result); + result += adapter6.length(); + adapter7.writeTo(result); + result += adapter7.length(); + adapter8.writeTo(result); + + return resultImpl; +} + +// Convenience only. +template<typename StringType1> +String makeString(StringType1 string1) +{ + return String(string1); +} + +template<typename StringType1, typename StringType2> +String makeString(StringType1 string1, StringType2 string2) +{ + PassRefPtr<StringImpl> resultImpl = tryMakeString(string1, string2); + if (!resultImpl) + CRASH(); + return resultImpl; +} + +template<typename StringType1, typename StringType2, typename StringType3> +String makeString(StringType1 string1, StringType2 string2, StringType3 string3) +{ + PassRefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3); + if (!resultImpl) + CRASH(); + return resultImpl; +} + +template<typename StringType1, typename StringType2, typename StringType3, typename StringType4> +String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4) +{ + PassRefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4); + if (!resultImpl) + CRASH(); + return resultImpl; +} + +template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5> +String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5) +{ + PassRefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4, string5); + if (!resultImpl) + CRASH(); + return resultImpl; +} + +template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6> +String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6) +{ + PassRefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4, string5, string6); + if (!resultImpl) + CRASH(); + return resultImpl; +} + +template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7> +String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7) +{ + PassRefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4, string5, string6, string7); + if (!resultImpl) + CRASH(); + return resultImpl; +} + +template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7, typename StringType8> +String makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8) +{ + PassRefPtr<StringImpl> resultImpl = tryMakeString(string1, string2, string3, string4, string5, string6, string7, string8); + if (!resultImpl) + CRASH(); + return resultImpl; +} + +} // namespace WTF + +using WTF::makeString; + +#endif diff --git a/Source/JavaScriptCore/wtf/text/StringHash.h b/Source/JavaScriptCore/wtf/text/StringHash.h new file mode 100644 index 0000000..d7aabdb --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringHash.h @@ -0,0 +1,194 @@ +/* + * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved + * Copyright (C) Research In Motion Limited 2009. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef StringHash_h +#define StringHash_h + +#include "AtomicString.h" +#include "WTFString.h" +#include <wtf/Forward.h> +#include <wtf/HashTraits.h> +#include <wtf/StringHasher.h> +#include <wtf/unicode/Unicode.h> + +namespace WTF { + + // The hash() functions on StringHash and CaseFoldingHash do not support + // null strings. get(), contains(), and add() on HashMap<String,..., StringHash> + // cause a null-pointer dereference when passed null strings. + + // FIXME: We should really figure out a way to put the computeHash function that's + // currently a member function of StringImpl into this file so we can be a little + // closer to having all the nearly-identical hash functions in one place. + + struct StringHash { + static unsigned hash(StringImpl* key) { return key->hash(); } + static bool equal(const StringImpl* a, const StringImpl* b) + { + if (a == b) + return true; + if (!a || !b) + return false; + + unsigned aLength = a->length(); + unsigned bLength = b->length(); + if (aLength != bLength) + return false; + + // FIXME: perhaps we should have a more abstract macro that indicates when + // going 4 bytes at a time is unsafe +#if CPU(ARM) || CPU(SH4) || CPU(MIPS) + const UChar* aChars = a->characters(); + const UChar* bChars = b->characters(); + for (unsigned i = 0; i != aLength; ++i) { + if (*aChars++ != *bChars++) + return false; + } + return true; +#else + /* Do it 4-bytes-at-a-time on architectures where it's safe */ + const uint32_t* aChars = reinterpret_cast<const uint32_t*>(a->characters()); + const uint32_t* bChars = reinterpret_cast<const uint32_t*>(b->characters()); + + unsigned halfLength = aLength >> 1; + for (unsigned i = 0; i != halfLength; ++i) + if (*aChars++ != *bChars++) + return false; + + if (aLength & 1 && *reinterpret_cast<const uint16_t*>(aChars) != *reinterpret_cast<const uint16_t*>(bChars)) + return false; + + return true; +#endif + } + + static unsigned hash(const RefPtr<StringImpl>& key) { return key->hash(); } + static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b) + { + return equal(a.get(), b.get()); + } + + static unsigned hash(const String& key) { return key.impl()->hash(); } + static bool equal(const String& a, const String& b) + { + return equal(a.impl(), b.impl()); + } + + static const bool safeToCompareToEmptyOrDeleted = false; + }; + + class CaseFoldingHash { + public: + template<typename T> static inline UChar foldCase(T ch) + { + return WTF::Unicode::foldCase(ch); + } + + static unsigned hash(const UChar* data, unsigned length) + { + return StringHasher::createHash<UChar, foldCase<UChar> >(data, length); + } + + static unsigned hash(StringImpl* str) + { + return hash(str->characters(), str->length()); + } + + static unsigned hash(const char* data, unsigned length) + { + return StringHasher::createHash<char, foldCase<char> >(data, length); + } + + static bool equal(const StringImpl* a, const StringImpl* b) + { + if (a == b) + return true; + if (!a || !b) + return false; + unsigned length = a->length(); + if (length != b->length()) + return false; + return WTF::Unicode::umemcasecmp(a->characters(), b->characters(), length) == 0; + } + + static unsigned hash(const RefPtr<StringImpl>& key) + { + return hash(key.get()); + } + + static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b) + { + return equal(a.get(), b.get()); + } + + static unsigned hash(const String& key) + { + return hash(key.impl()); + } + static unsigned hash(const AtomicString& key) + { + return hash(key.impl()); + } + static bool equal(const String& a, const String& b) + { + return equal(a.impl(), b.impl()); + } + static bool equal(const AtomicString& a, const AtomicString& b) + { + return (a == b) || equal(a.impl(), b.impl()); + } + + static const bool safeToCompareToEmptyOrDeleted = false; + }; + + // This hash can be used in cases where the key is a hash of a string, but we don't + // want to store the string. It's not really specific to string hashing, but all our + // current uses of it are for strings. + struct AlreadyHashed : IntHash<unsigned> { + static unsigned hash(unsigned key) { return key; } + + // To use a hash value as a key for a hash table, we need to eliminate the + // "deleted" value, which is negative one. That could be done by changing + // the string hash function to never generate negative one, but this works + // and is still relatively efficient. + static unsigned avoidDeletedValue(unsigned hash) + { + ASSERT(hash); + unsigned newHash = hash | (!(hash + 1) << 31); + ASSERT(newHash); + ASSERT(newHash != 0xFFFFFFFF); + return newHash; + } + }; + + template<> struct HashTraits<String> : GenericHashTraits<String> { + static const bool emptyValueIsZero = true; + static void constructDeletedValue(String& slot) { new (&slot) String(HashTableDeletedValue); } + static bool isDeletedValue(const String& slot) { return slot.isHashTableDeletedValue(); } + }; + +} + +using WTF::StringHash; +using WTF::CaseFoldingHash; +using WTF::AlreadyHashed; + +#endif diff --git a/Source/JavaScriptCore/wtf/text/StringImpl.cpp b/Source/JavaScriptCore/wtf/text/StringImpl.cpp new file mode 100644 index 0000000..c83ec42 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringImpl.cpp @@ -0,0 +1,1073 @@ +/* + * Copyright (C) 1999 Lars Knoll (knoll@kde.org) + * (C) 1999 Antti Koivisto (koivisto@kde.org) + * (C) 2001 Dirk Mueller ( mueller@kde.org ) + * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. + * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#include "config.h" +#include "StringImpl.h" + +#include "AtomicString.h" +#include "StringBuffer.h" +#include "StringHash.h" +#include <wtf/StdLibExtras.h> +#include <wtf/WTFThreadData.h> + +using namespace std; + +namespace WTF { + +using namespace Unicode; + +static const unsigned minLengthToShare = 20; + +COMPILE_ASSERT(sizeof(StringImpl) == 2 * sizeof(int) + 3 * sizeof(void*), StringImpl_should_stay_small); + +StringImpl::~StringImpl() +{ + ASSERT(!isStatic()); + + if (isAtomic()) + AtomicString::remove(this); +#if USE(JSC) + if (isIdentifier()) { + if (!wtfThreadData().currentIdentifierTable()->remove(this)) + CRASH(); + } +#endif + + BufferOwnership ownership = bufferOwnership(); + if (ownership != BufferInternal) { + if (ownership == BufferOwned) { + ASSERT(!m_sharedBuffer); + ASSERT(m_data); + fastFree(const_cast<UChar*>(m_data)); + } else if (ownership == BufferSubstring) { + ASSERT(m_substringBuffer); + m_substringBuffer->deref(); + } else { + ASSERT(ownership == BufferShared); + ASSERT(m_sharedBuffer); + m_sharedBuffer->deref(); + } + } +} + +PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data) +{ + if (!length) { + data = 0; + return empty(); + } + + // Allocate a single buffer large enough to contain the StringImpl + // struct as well as the data which it contains. This removes one + // heap allocation from this call. + if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar))) + CRASH(); + size_t size = sizeof(StringImpl) + length * sizeof(UChar); + StringImpl* string = static_cast<StringImpl*>(fastMalloc(size)); + + data = reinterpret_cast<UChar*>(string + 1); + return adoptRef(new (string) StringImpl(length)); +} + +PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length) +{ + if (!characters || !length) + return empty(); + + UChar* data; + RefPtr<StringImpl> string = createUninitialized(length, data); + memcpy(data, characters, length * sizeof(UChar)); + return string.release(); +} + +PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length) +{ + if (!characters || !length) + return empty(); + + UChar* data; + RefPtr<StringImpl> string = createUninitialized(length, data); + for (unsigned i = 0; i != length; ++i) { + unsigned char c = characters[i]; + data[i] = c; + } + return string.release(); +} + +PassRefPtr<StringImpl> StringImpl::create(const char* string) +{ + if (!string) + return empty(); + size_t length = strlen(string); + if (length > numeric_limits<unsigned>::max()) + CRASH(); + return create(string, length); +} + +PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer) +{ + ASSERT(characters); + ASSERT(minLengthToShare && length >= minLengthToShare); + return adoptRef(new StringImpl(characters, length, sharedBuffer)); +} + +SharedUChar* StringImpl::sharedBuffer() +{ + if (m_length < minLengthToShare) + return 0; + // All static strings are smaller that the minimim length to share. + ASSERT(!isStatic()); + + BufferOwnership ownership = bufferOwnership(); + + if (ownership == BufferInternal) + return 0; + if (ownership == BufferSubstring) + return m_substringBuffer->sharedBuffer(); + if (ownership == BufferOwned) { + ASSERT(!m_sharedBuffer); + m_sharedBuffer = SharedUChar::create(new SharableUChar(m_data)).leakRef(); + m_refCountAndFlags = (m_refCountAndFlags & ~s_refCountMaskBufferOwnership) | BufferShared; + } + + ASSERT(bufferOwnership() == BufferShared); + ASSERT(m_sharedBuffer); + return m_sharedBuffer; +} + +bool StringImpl::containsOnlyWhitespace() +{ + // FIXME: The definition of whitespace here includes a number of characters + // that are not whitespace from the point of view of RenderText; I wonder if + // that's a problem in practice. + for (unsigned i = 0; i < m_length; i++) + if (!isASCIISpace(m_data[i])) + return false; + return true; +} + +PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) +{ + if (start >= m_length) + return empty(); + unsigned maxLength = m_length - start; + if (length >= maxLength) { + if (!start) + return this; + length = maxLength; + } + return create(m_data + start, length); +} + +UChar32 StringImpl::characterStartingAt(unsigned i) +{ + if (U16_IS_SINGLE(m_data[i])) + return m_data[i]; + if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1])) + return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]); + return 0; +} + +PassRefPtr<StringImpl> StringImpl::lower() +{ + // Note: This is a hot function in the Dromaeo benchmark, specifically the + // no-op code path up through the first 'return' statement. + + // First scan the string for uppercase and non-ASCII characters: + UChar ored = 0; + bool noUpper = true; + const UChar *end = m_data + m_length; + for (const UChar* chp = m_data; chp != end; chp++) { + if (UNLIKELY(isASCIIUpper(*chp))) + noUpper = false; + ored |= *chp; + } + + // Nothing to do if the string is all ASCII with no uppercase. + if (noUpper && !(ored & ~0x7F)) + return this; + + if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) + CRASH(); + int32_t length = m_length; + + UChar* data; + RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); + + if (!(ored & ~0x7F)) { + // Do a faster loop for the case where all the characters are ASCII. + for (int i = 0; i < length; i++) { + UChar c = m_data[i]; + data[i] = toASCIILower(c); + } + return newImpl; + } + + // Do a slower implementation for cases that include non-ASCII characters. + bool error; + int32_t realLength = Unicode::toLower(data, length, m_data, m_length, &error); + if (!error && realLength == length) + return newImpl; + newImpl = createUninitialized(realLength, data); + Unicode::toLower(data, realLength, m_data, m_length, &error); + if (error) + return this; + return newImpl; +} + +PassRefPtr<StringImpl> StringImpl::upper() +{ + // This function could be optimized for no-op cases the way lower() is, + // but in empirical testing, few actual calls to upper() are no-ops, so + // it wouldn't be worth the extra time for pre-scanning. + UChar* data; + RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); + + if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) + CRASH(); + int32_t length = m_length; + + // Do a faster loop for the case where all the characters are ASCII. + UChar ored = 0; + for (int i = 0; i < length; i++) { + UChar c = m_data[i]; + ored |= c; + data[i] = toASCIIUpper(c); + } + if (!(ored & ~0x7F)) + return newImpl.release(); + + // Do a slower implementation for cases that include non-ASCII characters. + bool error; + int32_t realLength = Unicode::toUpper(data, length, m_data, m_length, &error); + if (!error && realLength == length) + return newImpl; + newImpl = createUninitialized(realLength, data); + Unicode::toUpper(data, realLength, m_data, m_length, &error); + if (error) + return this; + return newImpl.release(); +} + +PassRefPtr<StringImpl> StringImpl::secure(UChar character, LastCharacterBehavior behavior) +{ + if (!m_length) + return this; + + UChar* data; + RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); + unsigned lastCharacterIndex = m_length - 1; + for (unsigned i = 0; i < lastCharacterIndex; ++i) + data[i] = character; + data[lastCharacterIndex] = (behavior == ObscureLastCharacter) ? character : m_data[lastCharacterIndex]; + return newImpl.release(); +} + +PassRefPtr<StringImpl> StringImpl::foldCase() +{ + UChar* data; + RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); + + if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) + CRASH(); + int32_t length = m_length; + + // Do a faster loop for the case where all the characters are ASCII. + UChar ored = 0; + for (int32_t i = 0; i < length; i++) { + UChar c = m_data[i]; + ored |= c; + data[i] = toASCIILower(c); + } + if (!(ored & ~0x7F)) + return newImpl.release(); + + // Do a slower implementation for cases that include non-ASCII characters. + bool error; + int32_t realLength = Unicode::foldCase(data, length, m_data, m_length, &error); + if (!error && realLength == length) + return newImpl.release(); + newImpl = createUninitialized(realLength, data); + Unicode::foldCase(data, realLength, m_data, m_length, &error); + if (error) + return this; + return newImpl.release(); +} + +PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() +{ + if (!m_length) + return empty(); + + unsigned start = 0; + unsigned end = m_length - 1; + + // skip white space from start + while (start <= end && isSpaceOrNewline(m_data[start])) + start++; + + // only white space + if (start > end) + return empty(); + + // skip white space from end + while (end && isSpaceOrNewline(m_data[end])) + end--; + + if (!start && end == m_length - 1) + return this; + return create(m_data + start, end + 1 - start); +} + +PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch) +{ + const UChar* from = m_data; + const UChar* fromend = from + m_length; + + // Assume the common case will not remove any characters + while (from != fromend && !findMatch(*from)) + from++; + if (from == fromend) + return this; + + StringBuffer data(m_length); + UChar* to = data.characters(); + unsigned outc = from - m_data; + + if (outc) + memcpy(to, m_data, outc * sizeof(UChar)); + + while (true) { + while (from != fromend && findMatch(*from)) + from++; + while (from != fromend && !findMatch(*from)) + to[outc++] = *from++; + if (from == fromend) + break; + } + + data.shrink(outc); + + return adopt(data); +} + +PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace() +{ + StringBuffer data(m_length); + + const UChar* from = m_data; + const UChar* fromend = from + m_length; + int outc = 0; + bool changedToSpace = false; + + UChar* to = data.characters(); + + while (true) { + while (from != fromend && isSpaceOrNewline(*from)) { + if (*from != ' ') + changedToSpace = true; + from++; + } + while (from != fromend && !isSpaceOrNewline(*from)) + to[outc++] = *from++; + if (from != fromend) + to[outc++] = ' '; + else + break; + } + + if (outc > 0 && to[outc - 1] == ' ') + outc--; + + if (static_cast<unsigned>(outc) == m_length && !changedToSpace) + return this; + + data.shrink(outc); + + return adopt(data); +} + +int StringImpl::toIntStrict(bool* ok, int base) +{ + return charactersToIntStrict(m_data, m_length, ok, base); +} + +unsigned StringImpl::toUIntStrict(bool* ok, int base) +{ + return charactersToUIntStrict(m_data, m_length, ok, base); +} + +int64_t StringImpl::toInt64Strict(bool* ok, int base) +{ + return charactersToInt64Strict(m_data, m_length, ok, base); +} + +uint64_t StringImpl::toUInt64Strict(bool* ok, int base) +{ + return charactersToUInt64Strict(m_data, m_length, ok, base); +} + +intptr_t StringImpl::toIntPtrStrict(bool* ok, int base) +{ + return charactersToIntPtrStrict(m_data, m_length, ok, base); +} + +int StringImpl::toInt(bool* ok) +{ + return charactersToInt(m_data, m_length, ok); +} + +unsigned StringImpl::toUInt(bool* ok) +{ + return charactersToUInt(m_data, m_length, ok); +} + +int64_t StringImpl::toInt64(bool* ok) +{ + return charactersToInt64(m_data, m_length, ok); +} + +uint64_t StringImpl::toUInt64(bool* ok) +{ + return charactersToUInt64(m_data, m_length, ok); +} + +intptr_t StringImpl::toIntPtr(bool* ok) +{ + return charactersToIntPtr(m_data, m_length, ok); +} + +double StringImpl::toDouble(bool* ok) +{ + return charactersToDouble(m_data, m_length, ok); +} + +float StringImpl::toFloat(bool* ok) +{ + return charactersToFloat(m_data, m_length, ok); +} + +static bool equal(const UChar* a, const char* b, int length) +{ + ASSERT(length >= 0); + while (length--) { + unsigned char bc = *b++; + if (*a++ != bc) + return false; + } + return true; +} + +bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) +{ + while (length--) { + unsigned char bc = *b++; + if (foldCase(*a++) != foldCase(bc)) + return false; + } + return true; +} + +static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) +{ + ASSERT(length >= 0); + return umemcasecmp(a, b, length) == 0; +} + +int codePointCompare(const StringImpl* s1, const StringImpl* s2) +{ + const unsigned l1 = s1 ? s1->length() : 0; + const unsigned l2 = s2 ? s2->length() : 0; + const unsigned lmin = l1 < l2 ? l1 : l2; + const UChar* c1 = s1 ? s1->characters() : 0; + const UChar* c2 = s2 ? s2->characters() : 0; + unsigned pos = 0; + while (pos < lmin && *c1 == *c2) { + c1++; + c2++; + pos++; + } + + if (pos < lmin) + return (c1[0] > c2[0]) ? 1 : -1; + + if (l1 == l2) + return 0; + + return (l1 > l2) ? 1 : -1; +} + +size_t StringImpl::find(UChar c, unsigned start) +{ + return WTF::find(m_data, m_length, c, start); +} + +size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start) +{ + return WTF::find(m_data, m_length, matchFunction, start); +} + +size_t StringImpl::find(const char* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + size_t matchStringLength = strlen(matchString); + if (matchStringLength > numeric_limits<unsigned>::max()) + CRASH(); + unsigned matchLength = matchStringLength; + if (!matchLength) + return min(index, length()); + + // Optimization 1: fast case for strings of length 1. + if (matchLength == 1) + return WTF::find(characters(), length(), *(const unsigned char*)matchString, index); + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + const unsigned char* matchCharacters = (const unsigned char*)matchString; + + // Optimization 2: keep a running hash of the strings, + // only call memcmp if the hashes match. + unsigned searchHash = 0; + unsigned matchHash = 0; + for (unsigned i = 0; i < matchLength; ++i) { + searchHash += searchCharacters[i]; + matchHash += matchCharacters[i]; + } + + unsigned i = 0; + // keep looping until we match + while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) { + if (i == delta) + return notFound; + searchHash += searchCharacters[i + matchLength]; + searchHash -= searchCharacters[i]; + ++i; + } + return index + i; +} + +size_t StringImpl::findIgnoringCase(const char* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + size_t matchStringLength = strlen(matchString); + if (matchStringLength > numeric_limits<unsigned>::max()) + CRASH(); + unsigned matchLength = matchStringLength; + if (!matchLength) + return min(index, length()); + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + + unsigned i = 0; + // keep looping until we match + while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) { + if (i == delta) + return notFound; + ++i; + } + return index + i; +} + +size_t StringImpl::find(StringImpl* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Optimization 1: fast case for strings of length 1. + if (matchLength == 1) + return WTF::find(characters(), length(), matchString->characters()[0], index); + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + const UChar* matchCharacters = matchString->characters(); + + // Optimization 2: keep a running hash of the strings, + // only call memcmp if the hashes match. + unsigned searchHash = 0; + unsigned matchHash = 0; + for (unsigned i = 0; i < matchLength; ++i) { + searchHash += searchCharacters[i]; + matchHash += matchCharacters[i]; + } + + unsigned i = 0; + // keep looping until we match + while (searchHash != matchHash || memcmp(searchCharacters + i, matchCharacters, matchLength * sizeof(UChar))) { + if (i == delta) + return notFound; + searchHash += searchCharacters[i + matchLength]; + searchHash -= searchCharacters[i]; + ++i; + } + return index + i; +} + +size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + const UChar* matchCharacters = matchString->characters(); + + unsigned i = 0; + // keep looping until we match + while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) { + if (i == delta) + return notFound; + ++i; + } + return index + i; +} + +size_t StringImpl::reverseFind(UChar c, unsigned index) +{ + return WTF::reverseFind(m_data, m_length, c, index); +} + +size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Optimization 1: fast case for strings of length 1. + if (matchLength == 1) + return WTF::reverseFind(characters(), length(), matchString->characters()[0], index); + + // Check index & matchLength are in range. + if (matchLength > length()) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = min(index, length() - matchLength); + + const UChar *searchCharacters = characters(); + const UChar *matchCharacters = matchString->characters(); + + // Optimization 2: keep a running hash of the strings, + // only call memcmp if the hashes match. + unsigned searchHash = 0; + unsigned matchHash = 0; + for (unsigned i = 0; i < matchLength; ++i) { + searchHash += searchCharacters[delta + i]; + matchHash += matchCharacters[i]; + } + + // keep looping until we match + while (searchHash != matchHash || memcmp(searchCharacters + delta, matchCharacters, matchLength * sizeof(UChar))) { + if (!delta) + return notFound; + delta--; + searchHash -= searchCharacters[delta + matchLength]; + searchHash += searchCharacters[delta]; + } + return delta; +} + +size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Check index & matchLength are in range. + if (matchLength > length()) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = min(index, length() - matchLength); + + const UChar *searchCharacters = characters(); + const UChar *matchCharacters = matchString->characters(); + + // keep looping until we match + while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) { + if (!delta) + return notFound; + delta--; + } + return delta; +} + +bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive) +{ + ASSERT(m_data); + if (m_length >= m_data->m_length) { + unsigned start = m_length - m_data->m_length; + return (caseSensitive ? find(m_data, start) : findIgnoringCase(m_data, start)) == start; + } + return false; +} + +PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) +{ + if (oldC == newC) + return this; + unsigned i; + for (i = 0; i != m_length; ++i) + if (m_data[i] == oldC) + break; + if (i == m_length) + return this; + + UChar* data; + RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); + + for (i = 0; i != m_length; ++i) { + UChar ch = m_data[i]; + if (ch == oldC) + ch = newC; + data[i] = ch; + } + return newImpl.release(); +} + +PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str) +{ + position = min(position, length()); + lengthToReplace = min(lengthToReplace, length() - position); + unsigned lengthToInsert = str ? str->length() : 0; + if (!lengthToReplace && !lengthToInsert) + return this; + UChar* data; + + if ((length() - lengthToReplace) >= (numeric_limits<unsigned>::max() - lengthToInsert)) + CRASH(); + + RefPtr<StringImpl> newImpl = + createUninitialized(length() - lengthToReplace + lengthToInsert, data); + memcpy(data, characters(), position * sizeof(UChar)); + if (str) + memcpy(data + position, str->characters(), lengthToInsert * sizeof(UChar)); + memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace, + (length() - position - lengthToReplace) * sizeof(UChar)); + return newImpl.release(); +} + +PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement) +{ + if (!replacement) + return this; + + unsigned repStrLength = replacement->length(); + size_t srcSegmentStart = 0; + unsigned matchCount = 0; + + // Count the matches + while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { + ++matchCount; + ++srcSegmentStart; + } + + // If we have 0 matches, we don't have to do any more work + if (!matchCount) + return this; + + if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength) + CRASH(); + + unsigned replaceSize = matchCount * repStrLength; + unsigned newSize = m_length - matchCount; + if (newSize >= (numeric_limits<unsigned>::max() - replaceSize)) + CRASH(); + + newSize += replaceSize; + + UChar* data; + RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); + + // Construct the new data + size_t srcSegmentEnd; + unsigned srcSegmentLength; + srcSegmentStart = 0; + unsigned dstOffset = 0; + + while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { + srcSegmentLength = srcSegmentEnd - srcSegmentStart; + memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); + dstOffset += srcSegmentLength; + memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar)); + dstOffset += repStrLength; + srcSegmentStart = srcSegmentEnd + 1; + } + + srcSegmentLength = m_length - srcSegmentStart; + memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); + + ASSERT(dstOffset + srcSegmentLength == newImpl->length()); + + return newImpl.release(); +} + +PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement) +{ + if (!pattern || !replacement) + return this; + + unsigned patternLength = pattern->length(); + if (!patternLength) + return this; + + unsigned repStrLength = replacement->length(); + size_t srcSegmentStart = 0; + unsigned matchCount = 0; + + // Count the matches + while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { + ++matchCount; + srcSegmentStart += patternLength; + } + + // If we have 0 matches, we don't have to do any more work + if (!matchCount) + return this; + + unsigned newSize = m_length - matchCount * patternLength; + if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength) + CRASH(); + + if (newSize > (numeric_limits<unsigned>::max() - matchCount * repStrLength)) + CRASH(); + + newSize += matchCount * repStrLength; + + UChar* data; + RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); + + // Construct the new data + size_t srcSegmentEnd; + unsigned srcSegmentLength; + srcSegmentStart = 0; + unsigned dstOffset = 0; + + while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { + srcSegmentLength = srcSegmentEnd - srcSegmentStart; + memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); + dstOffset += srcSegmentLength; + memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar)); + dstOffset += repStrLength; + srcSegmentStart = srcSegmentEnd + patternLength; + } + + srcSegmentLength = m_length - srcSegmentStart; + memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); + + ASSERT(dstOffset + srcSegmentLength == newImpl->length()); + + return newImpl.release(); +} + +bool equal(const StringImpl* a, const StringImpl* b) +{ + return StringHash::equal(a, b); +} + +bool equal(const StringImpl* a, const char* b) +{ + if (!a) + return !b; + if (!b) + return !a; + + unsigned length = a->length(); + const UChar* as = a->characters(); + for (unsigned i = 0; i != length; ++i) { + unsigned char bc = b[i]; + if (!bc) + return false; + if (as[i] != bc) + return false; + } + + return !b[length]; +} + +bool equalIgnoringCase(StringImpl* a, StringImpl* b) +{ + return CaseFoldingHash::equal(a, b); +} + +bool equalIgnoringCase(StringImpl* a, const char* b) +{ + if (!a) + return !b; + if (!b) + return !a; + + unsigned length = a->length(); + const UChar* as = a->characters(); + + // Do a faster loop for the case where all the characters are ASCII. + UChar ored = 0; + bool equal = true; + for (unsigned i = 0; i != length; ++i) { + char bc = b[i]; + if (!bc) + return false; + UChar ac = as[i]; + ored |= ac; + equal = equal && (toASCIILower(ac) == toASCIILower(bc)); + } + + // Do a slower implementation for cases that include non-ASCII characters. + if (ored & ~0x7F) { + equal = true; + for (unsigned i = 0; i != length; ++i) { + unsigned char bc = b[i]; + equal = equal && (foldCase(as[i]) == foldCase(bc)); + } + } + + return equal && !b[length]; +} + +bool equalIgnoringNullity(StringImpl* a, StringImpl* b) +{ + if (StringHash::equal(a, b)) + return true; + if (!a && b && !b->length()) + return true; + if (!b && a && !a->length()) + return true; + + return false; +} + +WTF::Unicode::Direction StringImpl::defaultWritingDirection() +{ + for (unsigned i = 0; i < m_length; ++i) { + WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]); + if (charDirection == WTF::Unicode::LeftToRight) + return WTF::Unicode::LeftToRight; + if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) + return WTF::Unicode::RightToLeft; + } + return WTF::Unicode::LeftToRight; +} + +// This is a hot function because it's used when parsing HTML. +PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length) +{ + StringBuffer strippedCopy(length); + unsigned strippedLength = 0; + for (unsigned i = 0; i < length; i++) { + if (int c = characters[i]) + strippedCopy[strippedLength++] = c; + } + ASSERT(strippedLength < length); // Only take the slow case when stripping. + strippedCopy.shrink(strippedLength); + return adopt(strippedCopy); +} + +PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer& buffer) +{ + unsigned length = buffer.length(); + if (length == 0) + return empty(); + return adoptRef(new StringImpl(buffer.release(), length)); +} + +PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string) +{ + // Use createUninitialized instead of 'new StringImpl' so that the string and its buffer + // get allocated in a single memory block. + UChar* data; + unsigned length = string.m_length; + if (length >= numeric_limits<unsigned>::max()) + CRASH(); + RefPtr<StringImpl> terminatedString = createUninitialized(length + 1, data); + memcpy(data, string.m_data, length * sizeof(UChar)); + data[length] = 0; + terminatedString->m_length--; + terminatedString->m_hash = string.m_hash; + terminatedString->m_refCountAndFlags |= s_refCountFlagHasTerminatingNullCharacter; + return terminatedString.release(); +} + +PassRefPtr<StringImpl> StringImpl::threadsafeCopy() const +{ + return create(m_data, m_length); +} + +PassRefPtr<StringImpl> StringImpl::crossThreadString() +{ + if (SharedUChar* sharedBuffer = this->sharedBuffer()) + return adoptRef(new StringImpl(m_data, m_length, sharedBuffer->crossThreadCopy())); + + // If no shared buffer is available, create a copy. + return threadsafeCopy(); +} + +} // namespace WTF diff --git a/Source/JavaScriptCore/wtf/text/StringImpl.h b/Source/JavaScriptCore/wtf/text/StringImpl.h new file mode 100644 index 0000000..dc1dbb2 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringImpl.h @@ -0,0 +1,408 @@ +/* + * Copyright (C) 1999 Lars Knoll (knoll@kde.org) + * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. + * Copyright (C) 2009 Google Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef StringImpl_h +#define StringImpl_h + +#include <limits.h> +#include <wtf/ASCIICType.h> +#include <wtf/CrossThreadRefCounted.h> +#include <wtf/Forward.h> +#include <wtf/OwnFastMallocPtr.h> +#include <wtf/StdLibExtras.h> +#include <wtf/StringHasher.h> +#include <wtf/Vector.h> +#include <wtf/text/StringImplBase.h> +#include <wtf/unicode/Unicode.h> + +#if PLATFORM(CF) +typedef const struct __CFString * CFStringRef; +#endif + +#ifdef __OBJC__ +@class NSString; +#endif + +// FIXME: This is a temporary layering violation while we move string code to WTF. +// Landing the file moves in one patch, will follow on with patches to change the namespaces. +namespace JSC { +struct IdentifierCStringTranslator; +struct IdentifierUCharBufferTranslator; +} + +namespace WTF { + +struct CStringTranslator; +struct HashAndCharactersTranslator; +struct HashAndUTF8CharactersTranslator; +struct UCharBufferTranslator; + +enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive }; + +typedef OwnFastMallocPtr<const UChar> SharableUChar; +typedef CrossThreadRefCounted<SharableUChar> SharedUChar; +typedef bool (*CharacterMatchFunctionPtr)(UChar); + +class StringImpl : public StringImplBase { + friend struct JSC::IdentifierCStringTranslator; + friend struct JSC::IdentifierUCharBufferTranslator; + friend struct WTF::CStringTranslator; + friend struct WTF::HashAndCharactersTranslator; + friend struct WTF::HashAndUTF8CharactersTranslator; + friend struct WTF::UCharBufferTranslator; + friend class AtomicStringImpl; +private: + // Used to construct static strings, which have an special refCount that can never hit zero. + // This means that the static string will never be destroyed, which is important because + // static strings will be shared across threads & ref-counted in a non-threadsafe manner. + StringImpl(const UChar* characters, unsigned length, StaticStringConstructType) + : StringImplBase(length, ConstructStaticString) + , m_data(characters) + , m_buffer(0) + , m_hash(0) + { + // Ensure that the hash is computed so that AtomicStringHash can call existingHash() + // with impunity. The empty string is special because it is never entered into + // AtomicString's HashKey, but still needs to compare correctly. + hash(); + } + + // Create a normal string with internal storage (BufferInternal) + StringImpl(unsigned length) + : StringImplBase(length, BufferInternal) + , m_data(reinterpret_cast<const UChar*>(this + 1)) + , m_buffer(0) + , m_hash(0) + { + ASSERT(m_data); + ASSERT(m_length); + } + + // Create a StringImpl adopting ownership of the provided buffer (BufferOwned) + StringImpl(const UChar* characters, unsigned length) + : StringImplBase(length, BufferOwned) + , m_data(characters) + , m_buffer(0) + , m_hash(0) + { + ASSERT(m_data); + ASSERT(m_length); + } + + // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring) + StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base) + : StringImplBase(length, BufferSubstring) + , m_data(characters) + , m_substringBuffer(base.leakRef()) + , m_hash(0) + { + ASSERT(m_data); + ASSERT(m_length); + ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring); + } + + // Used to construct new strings sharing an existing SharedUChar (BufferShared) + StringImpl(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer) + : StringImplBase(length, BufferShared) + , m_data(characters) + , m_sharedBuffer(sharedBuffer.leakRef()) + , m_hash(0) + { + ASSERT(m_data); + ASSERT(m_length); + } + + // For use only by AtomicString's XXXTranslator helpers. + void setHash(unsigned hash) + { + ASSERT(!isStatic()); + ASSERT(!m_hash); + ASSERT(hash == computeHash(m_data, m_length)); + m_hash = hash; + } + +public: + ~StringImpl(); + + static PassRefPtr<StringImpl> create(const UChar*, unsigned length); + static PassRefPtr<StringImpl> create(const char*, unsigned length); + static PassRefPtr<StringImpl> create(const char*); + static PassRefPtr<StringImpl> create(const UChar*, unsigned length, PassRefPtr<SharedUChar> sharedBuffer); + static ALWAYS_INLINE PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length) + { + ASSERT(rep); + ASSERT(length <= rep->length()); + + if (!length) + return empty(); + + StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get(); + return adoptRef(new StringImpl(rep->m_data + offset, length, ownerRep)); + } + + static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data); + static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, UChar*& output) + { + if (!length) { + output = 0; + return empty(); + } + + if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar))) { + output = 0; + return 0; + } + StringImpl* resultImpl; + if (!tryFastMalloc(sizeof(UChar) * length + sizeof(StringImpl)).getValue(resultImpl)) { + output = 0; + return 0; + } + output = reinterpret_cast<UChar*>(resultImpl + 1); + return adoptRef(new(resultImpl) StringImpl(length)); + } + + static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data); } + static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&); + static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length); + + template<size_t inlineCapacity> + static PassRefPtr<StringImpl> adopt(Vector<UChar, inlineCapacity>& vector) + { + if (size_t size = vector.size()) { + ASSERT(vector.data()); + if (size > std::numeric_limits<unsigned>::max()) + CRASH(); + return adoptRef(new StringImpl(vector.releaseBuffer(), size)); + } + return empty(); + } + static PassRefPtr<StringImpl> adopt(StringBuffer&); + + SharedUChar* sharedBuffer(); + const UChar* characters() const { return m_data; } + + size_t cost() + { + // For substrings, return the cost of the base string. + if (bufferOwnership() == BufferSubstring) + return m_substringBuffer->cost(); + + if (m_refCountAndFlags & s_refCountFlagShouldReportedCost) { + m_refCountAndFlags &= ~s_refCountFlagShouldReportedCost; + return m_length; + } + return 0; + } + + bool isIdentifier() const { return m_refCountAndFlags & s_refCountFlagIsIdentifier; } + void setIsIdentifier(bool isIdentifier) + { + ASSERT(!isStatic()); + if (isIdentifier) + m_refCountAndFlags |= s_refCountFlagIsIdentifier; + else + m_refCountAndFlags &= ~s_refCountFlagIsIdentifier; + } + + bool hasTerminatingNullCharacter() const { return m_refCountAndFlags & s_refCountFlagHasTerminatingNullCharacter; } + + bool isAtomic() const { return m_refCountAndFlags & s_refCountFlagIsAtomic; } + void setIsAtomic(bool isIdentifier) + { + ASSERT(!isStatic()); + if (isIdentifier) + m_refCountAndFlags |= s_refCountFlagIsAtomic; + else + m_refCountAndFlags &= ~s_refCountFlagIsAtomic; + } + + unsigned hash() const { if (!m_hash) m_hash = computeHash(m_data, m_length); return m_hash; } + unsigned existingHash() const { ASSERT(m_hash); return m_hash; } + static unsigned computeHash(const UChar* data, unsigned length) { return WTF::StringHasher::createHash<UChar>(data, length); } + static unsigned computeHash(const char* data, unsigned length) { return WTF::StringHasher::createHash<char>(data, length); } + static unsigned computeHash(const char* data) { return WTF::StringHasher::createHash<char>(data); } + + ALWAYS_INLINE void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic))) delete this; } + ALWAYS_INLINE bool hasOneRef() const { return (m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic)) == s_refCountIncrement; } + + static StringImpl* empty(); + + static void copyChars(UChar* destination, const UChar* source, unsigned numCharacters) + { + if (numCharacters <= s_copyCharsInlineCutOff) { + for (unsigned i = 0; i < numCharacters; ++i) + destination[i] = source[i]; + } else + memcpy(destination, source, numCharacters * sizeof(UChar)); + } + + // Returns a StringImpl suitable for use on another thread. + PassRefPtr<StringImpl> crossThreadString(); + // Makes a deep copy. Helpful only if you need to use a String on another thread + // (use crossThreadString if the method call doesn't need to be threadsafe). + // Since StringImpl objects are immutable, there's no other reason to make a copy. + PassRefPtr<StringImpl> threadsafeCopy() const; + + PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX); + + UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; } + UChar32 characterStartingAt(unsigned); + + bool containsOnlyWhitespace(); + + int toIntStrict(bool* ok = 0, int base = 10); + unsigned toUIntStrict(bool* ok = 0, int base = 10); + int64_t toInt64Strict(bool* ok = 0, int base = 10); + uint64_t toUInt64Strict(bool* ok = 0, int base = 10); + intptr_t toIntPtrStrict(bool* ok = 0, int base = 10); + + int toInt(bool* ok = 0); // ignores trailing garbage + unsigned toUInt(bool* ok = 0); // ignores trailing garbage + int64_t toInt64(bool* ok = 0); // ignores trailing garbage + uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage + intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage + + double toDouble(bool* ok = 0); + float toFloat(bool* ok = 0); + + PassRefPtr<StringImpl> lower(); + PassRefPtr<StringImpl> upper(); + + enum LastCharacterBehavior { ObscureLastCharacter, DisplayLastCharacter }; + + PassRefPtr<StringImpl> secure(UChar, LastCharacterBehavior = ObscureLastCharacter); + PassRefPtr<StringImpl> foldCase(); + + PassRefPtr<StringImpl> stripWhiteSpace(); + PassRefPtr<StringImpl> simplifyWhiteSpace(); + + PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr); + + size_t find(UChar, unsigned index = 0); + size_t find(CharacterMatchFunctionPtr, unsigned index = 0); + size_t find(const char*, unsigned index = 0); + size_t find(StringImpl*, unsigned index = 0); + size_t findIgnoringCase(const char*, unsigned index = 0); + size_t findIgnoringCase(StringImpl*, unsigned index = 0); + + size_t reverseFind(UChar, unsigned index = UINT_MAX); + size_t reverseFind(StringImpl*, unsigned index = UINT_MAX); + size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX); + + bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; } + bool endsWith(StringImpl*, bool caseSensitive = true); + + PassRefPtr<StringImpl> replace(UChar, UChar); + PassRefPtr<StringImpl> replace(UChar, StringImpl*); + PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*); + PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*); + + WTF::Unicode::Direction defaultWritingDirection(); + +#if PLATFORM(CF) + CFStringRef createCFString(); +#endif +#ifdef __OBJC__ + operator NSString*(); +#endif + +private: + // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings. + static const unsigned s_copyCharsInlineCutOff = 20; + + static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length); + + BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_refCountAndFlags & s_refCountMaskBufferOwnership); } + bool isStatic() const { return m_refCountAndFlags & s_refCountFlagStatic; } + const UChar* m_data; + union { + void* m_buffer; + StringImpl* m_substringBuffer; + SharedUChar* m_sharedBuffer; + }; + mutable unsigned m_hash; +}; + +bool equal(const StringImpl*, const StringImpl*); +bool equal(const StringImpl*, const char*); +inline bool equal(const char* a, StringImpl* b) { return equal(b, a); } + +bool equalIgnoringCase(StringImpl*, StringImpl*); +bool equalIgnoringCase(StringImpl*, const char*); +inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); } +bool equalIgnoringCase(const UChar* a, const char* b, unsigned length); +inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); } + +bool equalIgnoringNullity(StringImpl*, StringImpl*); + +int codePointCompare(const StringImpl*, const StringImpl*); + +static inline bool isSpaceOrNewline(UChar c) +{ + // Use isASCIISpace() for basic Latin-1. + // This will include newlines, which aren't included in Unicode DirWS. + return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral; +} + +// This is a hot function because it's used when parsing HTML. +inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length) +{ + ASSERT(characters); + ASSERT(length); + + // Optimize for the case where there are no Null characters by quickly + // searching for nulls, and then using StringImpl::create, which will + // memcpy the whole buffer. This is faster than assigning character by + // character during the loop. + + // Fast case. + int foundNull = 0; + for (unsigned i = 0; !foundNull && i < length; i++) { + int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS) + foundNull |= !c; + } + if (!foundNull) + return StringImpl::create(characters, length); + + return StringImpl::createStrippingNullCharactersSlowCase(characters, length); +} + +struct StringHash; + +// StringHash is the default hash for StringImpl* and RefPtr<StringImpl> +template<typename T> struct DefaultHash; +template<> struct DefaultHash<StringImpl*> { + typedef StringHash Hash; +}; +template<> struct DefaultHash<RefPtr<StringImpl> > { + typedef StringHash Hash; +}; + +} + +using WTF::StringImpl; +using WTF::equal; +using WTF::TextCaseSensitivity; +using WTF::TextCaseSensitive; +using WTF::TextCaseInsensitive; + +#endif diff --git a/Source/JavaScriptCore/wtf/text/StringImplBase.h b/Source/JavaScriptCore/wtf/text/StringImplBase.h new file mode 100644 index 0000000..6567672 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringImplBase.h @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef StringImplBase_h +#define StringImplBase_h + +#include <wtf/Noncopyable.h> +#include <wtf/unicode/Unicode.h> + +namespace WTF { + +class StringImplBase : public Noncopyable { +public: + bool isStringImpl() { return (m_refCountAndFlags & s_refCountInvalidForStringImpl) != s_refCountInvalidForStringImpl; } + unsigned length() const { return m_length; } + void ref() { m_refCountAndFlags += s_refCountIncrement; } + +protected: + enum BufferOwnership { + BufferInternal, + BufferOwned, + BufferSubstring, + BufferShared, + }; + + using Noncopyable::operator new; + void* operator new(size_t, void* inPlace) { ASSERT(inPlace); return inPlace; } + + // For SmallStringStorage, which allocates an array and uses an in-place new. + StringImplBase() { } + + StringImplBase(unsigned length, BufferOwnership ownership) + : m_refCountAndFlags(s_refCountIncrement | s_refCountFlagShouldReportedCost | ownership) + , m_length(length) + { + ASSERT(isStringImpl()); + } + + enum StaticStringConstructType { ConstructStaticString }; + StringImplBase(unsigned length, StaticStringConstructType) + : m_refCountAndFlags(s_refCountFlagStatic | s_refCountFlagIsIdentifier | BufferOwned) + , m_length(length) + { + ASSERT(isStringImpl()); + } + + // This constructor is not used when creating StringImpl objects, + // and sets the flags into a state marking the object as such. + enum NonStringImplConstructType { ConstructNonStringImpl }; + StringImplBase(NonStringImplConstructType) + : m_refCountAndFlags(s_refCountIncrement | s_refCountInvalidForStringImpl) + , m_length(0) + { + ASSERT(!isStringImpl()); + } + + // The bottom 7 bits hold flags, the top 25 bits hold the ref count. + // When dereferencing StringImpls we check for the ref count AND the + // static bit both being zero - static strings are never deleted. + static const unsigned s_refCountMask = 0xFFFFFF80; + static const unsigned s_refCountIncrement = 0x80; + static const unsigned s_refCountFlagStatic = 0x40; + static const unsigned s_refCountFlagHasTerminatingNullCharacter = 0x20; + static const unsigned s_refCountFlagIsAtomic = 0x10; + static const unsigned s_refCountFlagShouldReportedCost = 0x8; + static const unsigned s_refCountFlagIsIdentifier = 0x4; + static const unsigned s_refCountMaskBufferOwnership = 0x3; + // An invalid permutation of flags (static & shouldReportedCost - static strings do not + // set shouldReportedCost in the constructor, and this bit is only ever cleared, not set). + // Used by "ConstructNonStringImpl" constructor, above. + static const unsigned s_refCountInvalidForStringImpl = s_refCountFlagStatic | s_refCountFlagShouldReportedCost; + + unsigned m_refCountAndFlags; + unsigned m_length; +}; + +} // namespace WTF + +using WTF::StringImplBase; + +#endif diff --git a/Source/JavaScriptCore/wtf/text/StringStatics.cpp b/Source/JavaScriptCore/wtf/text/StringStatics.cpp new file mode 100644 index 0000000..5654044 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/StringStatics.cpp @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2010 Apple Inc. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#ifdef SKIP_STATIC_CONSTRUCTORS_ON_GCC +#define ATOMICSTRING_HIDE_GLOBALS 1 +#endif + +#include "AtomicString.h" +#include "StaticConstructors.h" +#include "StringImpl.h" + +namespace WTF { + +StringImpl* StringImpl::empty() +{ + // FIXME: This works around a bug in our port of PCRE, that a regular expression + // run on the empty string may still perform a read from the first element, and + // as such we need this to be a valid pointer. No code should ever be reading + // from a zero length string, so this should be able to be a non-null pointer + // into the zero-page. + // Replace this with 'reinterpret_cast<UChar*>(static_cast<intptr_t>(1))' once + // PCRE goes away. + static UChar emptyUCharData = 0; + DEFINE_STATIC_LOCAL(StringImpl, emptyString, (&emptyUCharData, 0, ConstructStaticString)); + return &emptyString; +} + +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, nullAtom) +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, emptyAtom, "") +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, textAtom, "#text") +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, commentAtom, "#comment") +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, starAtom, "*") +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlAtom, "xml") +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlnsAtom, "xmlns") + +void AtomicString::init() +{ + static bool initialized; + if (!initialized) { + // Initialization is not thread safe, so this function must be called from the main thread first. + ASSERT(isMainThread()); + + // Use placement new to initialize the globals. + new ((void*)&nullAtom) AtomicString; + new ((void*)&emptyAtom) AtomicString(""); + new ((void*)&textAtom) AtomicString("#text"); + new ((void*)&commentAtom) AtomicString("#comment"); + new ((void*)&starAtom) AtomicString("*"); + new ((void*)&xmlAtom) AtomicString("xml"); + new ((void*)&xmlnsAtom) AtomicString("xmlns"); + + initialized = true; + } +} + +} diff --git a/Source/JavaScriptCore/wtf/text/TextPosition.h b/Source/JavaScriptCore/wtf/text/TextPosition.h new file mode 100644 index 0000000..9f426ea --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/TextPosition.h @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2010, Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TextPosition_h +#define TextPosition_h + +#include <wtf/Assertions.h> + +namespace WTF { + +/* + * Text Position + * + * TextPosition structure specifies coordinates within an text resource. It is used mostly + * for saving script source position. + * + * Later TextPosition0 and TextPosition1 and both number types can be merged together quite easily. + * + * 0-based and 1-based + * + * Line and column numbers could be interpreted as zero-based or 1-based. Since + * both practices coexist in WebKit source base, 'int' type should be replaced with + * a dedicated wrapper types, so that compiler helped us with this ambiguity. + * + * Here we introduce 2 types of numbers: ZeroBasedNumber and OneBasedNumber and + * 2 corresponding types of TextPosition structure. While only one type ought to be enough, + * this is done to keep transition to the new types as transparent as possible: + * e.g. in areas where 0-based integers are used, TextPosition0 should be introduced. This + * way all changes will remain trackable. + * + * Later both number types can be merged in one type quite easily. + * + * For type safety and for the future type merge it is important that all operations in API + * that accept or return integer have a name explicitly defining base of integer. For this reason + * int-receiving constructors are hidden from API. + */ + +template<typename NUMBER> +class TextPosition { +public: + TextPosition(NUMBER line, NUMBER column) + : m_line(line) + , m_column(column) + { + } + TextPosition() {} + + bool operator==(const TextPosition& other) { return m_line == other.m_line && m_column == other.m_column; } + bool operator!=(const TextPosition& other) { return !((*this) == other); } + + // A 'minimum' value of position, used as a default value. + static TextPosition<NUMBER> minimumPosition() { return TextPosition<NUMBER>(NUMBER::base(), NUMBER::base()); } + + // A value with line value less than a minimum; used as an impossible position. + static TextPosition<NUMBER> belowRangePosition() { return TextPosition<NUMBER>(NUMBER::belowBase(), NUMBER::belowBase()); } + + NUMBER m_line; + NUMBER m_column; +}; + +class OneBasedNumber; + +// An int wrapper that always reminds you that the number should be 0-based +class ZeroBasedNumber { +public: + static ZeroBasedNumber fromZeroBasedInt(int zeroBasedInt) { return ZeroBasedNumber(zeroBasedInt); } + + ZeroBasedNumber() {} + + int zeroBasedInt() const { return m_value; } + + OneBasedNumber convertToOneBased() const; + + bool operator==(ZeroBasedNumber other) { return m_value == other.m_value; } + bool operator!=(ZeroBasedNumber other) { return !((*this) == other); } + + static ZeroBasedNumber base() { return 0; } + static ZeroBasedNumber belowBase() { return -1; } + +private: + ZeroBasedNumber(int value) : m_value(value) {} + int m_value; +}; + +// An int wrapper that always reminds you that the number should be 1-based +class OneBasedNumber { +public: + static OneBasedNumber fromOneBasedInt(int oneBasedInt) { return OneBasedNumber(oneBasedInt); } + OneBasedNumber() {} + + int oneBasedInt() const { return m_value; } + int convertAsZeroBasedInt() const { return m_value - 1; } + ZeroBasedNumber convertToZeroBased() const { return ZeroBasedNumber::fromZeroBasedInt(m_value - 1); } + + bool operator==(OneBasedNumber other) { return m_value == other.m_value; } + bool operator!=(OneBasedNumber other) { return !((*this) == other); } + + static OneBasedNumber base() { return 1; } + static OneBasedNumber belowBase() { return 0; } + +private: + OneBasedNumber(int value) : m_value(value) {} + int m_value; +}; + +typedef TextPosition<ZeroBasedNumber> TextPosition0; +typedef TextPosition<OneBasedNumber> TextPosition1; + +inline TextPosition0 toZeroBasedTextPosition(const TextPosition1& position) +{ + return TextPosition0(position.m_line.convertToZeroBased(), position.m_column.convertToZeroBased()); +} + +inline TextPosition1 toOneBasedTextPosition(const TextPosition0& position) +{ + return TextPosition1(position.m_line.convertToOneBased(), position.m_column.convertToOneBased()); +} + +inline OneBasedNumber ZeroBasedNumber::convertToOneBased() const +{ + return OneBasedNumber::fromOneBasedInt(m_value + 1); +} + +} + +using WTF::TextPosition0; +using WTF::TextPosition1; + +#endif // TextPosition_h diff --git a/Source/JavaScriptCore/wtf/text/WTFString.cpp b/Source/JavaScriptCore/wtf/text/WTFString.cpp new file mode 100644 index 0000000..6bb74f6 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/WTFString.cpp @@ -0,0 +1,1003 @@ +/* + * (C) 1999 Lars Knoll (knoll@kde.org) + * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights reserved. + * Copyright (C) 2007-2009 Torch Mobile, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "config.h" +#include "WTFString.h" + +#include <stdarg.h> +#include <wtf/ASCIICType.h> +#include <wtf/text/CString.h> +#include <wtf/StringExtras.h> +#include <wtf/Vector.h> +#include <wtf/dtoa.h> +#include <wtf/unicode/UTF8.h> +#include <wtf/unicode/Unicode.h> + +using namespace std; + +namespace WTF { + +using namespace Unicode; +using namespace std; + +// Construct a string with UTF-16 data. +String::String(const UChar* characters, unsigned length) + : m_impl(characters ? StringImpl::create(characters, length) : 0) +{ +} + +// Construct a string with UTF-16 data, from a null-terminated source. +String::String(const UChar* str) +{ + if (!str) + return; + + size_t len = 0; + while (str[len] != UChar(0)) + len++; + + if (len > numeric_limits<unsigned>::max()) + CRASH(); + + m_impl = StringImpl::create(str, len); +} + +// Construct a string with latin1 data. +String::String(const char* characters, unsigned length) + : m_impl(characters ? StringImpl::create(characters, length) : 0) +{ +} + +// Construct a string with latin1 data, from a null-terminated source. +String::String(const char* characters) + : m_impl(characters ? StringImpl::create(characters) : 0) +{ +} + +void String::append(const String& str) +{ + if (str.isEmpty()) + return; + + // FIXME: This is extremely inefficient. So much so that we might want to take this + // out of String's API. We can make it better by optimizing the case where exactly + // one String is pointing at this StringImpl, but even then it's going to require a + // call to fastMalloc every single time. + if (str.m_impl) { + if (m_impl) { + UChar* data; + if (str.length() > numeric_limits<unsigned>::max() - m_impl->length()) + CRASH(); + RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data); + memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); + memcpy(data + m_impl->length(), str.characters(), str.length() * sizeof(UChar)); + m_impl = newImpl.release(); + } else + m_impl = str.m_impl; + } +} + +void String::append(char c) +{ + // FIXME: This is extremely inefficient. So much so that we might want to take this + // out of String's API. We can make it better by optimizing the case where exactly + // one String is pointing at this StringImpl, but even then it's going to require a + // call to fastMalloc every single time. + if (m_impl) { + UChar* data; + if (m_impl->length() >= numeric_limits<unsigned>::max()) + CRASH(); + RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); + memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); + data[m_impl->length()] = c; + m_impl = newImpl.release(); + } else + m_impl = StringImpl::create(&c, 1); +} + +void String::append(UChar c) +{ + // FIXME: This is extremely inefficient. So much so that we might want to take this + // out of String's API. We can make it better by optimizing the case where exactly + // one String is pointing at this StringImpl, but even then it's going to require a + // call to fastMalloc every single time. + if (m_impl) { + UChar* data; + if (m_impl->length() >= numeric_limits<unsigned>::max()) + CRASH(); + RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); + memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); + data[m_impl->length()] = c; + m_impl = newImpl.release(); + } else + m_impl = StringImpl::create(&c, 1); +} + +String operator+(const String& a, const String& b) +{ + if (a.isEmpty()) + return b; + if (b.isEmpty()) + return a; + String c = a; + c += b; + return c; +} + +String operator+(const String& s, const char* cs) +{ + return s + String(cs); +} + +String operator+(const char* cs, const String& s) +{ + return String(cs) + s; +} + +int codePointCompare(const String& a, const String& b) +{ + return codePointCompare(a.impl(), b.impl()); +} + +void String::insert(const String& str, unsigned pos) +{ + if (str.isEmpty()) { + if (str.isNull()) + return; + if (isNull()) + m_impl = str.impl(); + return; + } + insert(str.characters(), str.length(), pos); +} + +void String::append(const UChar* charactersToAppend, unsigned lengthToAppend) +{ + if (!m_impl) { + if (!charactersToAppend) + return; + m_impl = StringImpl::create(charactersToAppend, lengthToAppend); + return; + } + + if (!lengthToAppend) + return; + + ASSERT(charactersToAppend); + UChar* data; + if (lengthToAppend > numeric_limits<unsigned>::max() - length()) + CRASH(); + RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data); + memcpy(data, characters(), length() * sizeof(UChar)); + memcpy(data + length(), charactersToAppend, lengthToAppend * sizeof(UChar)); + m_impl = newImpl.release(); +} + +void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position) +{ + if (position >= length()) { + append(charactersToInsert, lengthToInsert); + return; + } + + ASSERT(m_impl); + + if (!lengthToInsert) + return; + + ASSERT(charactersToInsert); + UChar* data; + if (lengthToInsert > numeric_limits<unsigned>::max() - length()) + CRASH(); + RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToInsert, data); + memcpy(data, characters(), position * sizeof(UChar)); + memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar)); + memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar)); + m_impl = newImpl.release(); +} + +UChar32 String::characterStartingAt(unsigned i) const +{ + if (!m_impl || i >= m_impl->length()) + return 0; + return m_impl->characterStartingAt(i); +} + +void String::truncate(unsigned position) +{ + if (position >= length()) + return; + UChar* data; + RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data); + memcpy(data, characters(), position * sizeof(UChar)); + m_impl = newImpl.release(); +} + +void String::remove(unsigned position, int lengthToRemove) +{ + if (lengthToRemove <= 0) + return; + if (position >= length()) + return; + if (static_cast<unsigned>(lengthToRemove) > length() - position) + lengthToRemove = length() - position; + UChar* data; + RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data); + memcpy(data, characters(), position * sizeof(UChar)); + memcpy(data + position, characters() + position + lengthToRemove, + (length() - lengthToRemove - position) * sizeof(UChar)); + m_impl = newImpl.release(); +} + +String String::substring(unsigned pos, unsigned len) const +{ + if (!m_impl) + return String(); + return m_impl->substring(pos, len); +} + +String String::substringSharingImpl(unsigned offset, unsigned length) const +{ + // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar). + + unsigned stringLength = this->length(); + offset = min(offset, stringLength); + length = min(length, stringLength - offset); + + if (!offset && length == stringLength) + return *this; + return String(StringImpl::create(m_impl, offset, length)); +} + +String String::lower() const +{ + if (!m_impl) + return String(); + return m_impl->lower(); +} + +String String::upper() const +{ + if (!m_impl) + return String(); + return m_impl->upper(); +} + +String String::stripWhiteSpace() const +{ + if (!m_impl) + return String(); + return m_impl->stripWhiteSpace(); +} + +String String::simplifyWhiteSpace() const +{ + if (!m_impl) + return String(); + return m_impl->simplifyWhiteSpace(); +} + +String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const +{ + if (!m_impl) + return String(); + return m_impl->removeCharacters(findMatch); +} + +String String::foldCase() const +{ + if (!m_impl) + return String(); + return m_impl->foldCase(); +} + +bool String::percentage(int& result) const +{ + if (!m_impl || !m_impl->length()) + return false; + + if ((*m_impl)[m_impl->length() - 1] != '%') + return false; + + result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1); + return true; +} + +const UChar* String::charactersWithNullTermination() +{ + if (!m_impl) + return 0; + if (m_impl->hasTerminatingNullCharacter()) + return m_impl->characters(); + m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl); + return m_impl->characters(); +} + +String String::format(const char *format, ...) +{ +#if PLATFORM(QT) + // Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf. + // https://bugs.webkit.org/show_bug.cgi?id=18994 + va_list args; + va_start(args, format); + + QString buffer; + buffer.vsprintf(format, args); + + va_end(args); + + QByteArray ba = buffer.toUtf8(); + return StringImpl::create(ba.constData(), ba.length()); + +#elif OS(WINCE) + va_list args; + va_start(args, format); + + Vector<char, 256> buffer; + + int bufferSize = 256; + buffer.resize(bufferSize); + for (;;) { + int written = vsnprintf(buffer.data(), bufferSize, format, args); + va_end(args); + + if (written == 0) + return String(""); + if (written > 0) + return StringImpl::create(buffer.data(), written); + + bufferSize <<= 1; + buffer.resize(bufferSize); + va_start(args, format); + } + +#else + va_list args; + va_start(args, format); + + Vector<char, 256> buffer; + + // Do the format once to get the length. +#if COMPILER(MSVC) + int result = _vscprintf(format, args); +#else + char ch; + int result = vsnprintf(&ch, 1, format, args); + // We need to call va_end() and then va_start() again here, as the + // contents of args is undefined after the call to vsnprintf + // according to http://man.cx/snprintf(3) + // + // Not calling va_end/va_start here happens to work on lots of + // systems, but fails e.g. on 64bit Linux. + va_end(args); + va_start(args, format); +#endif + + if (result == 0) + return String(""); + if (result < 0) + return String(); + unsigned len = result; + buffer.grow(len + 1); + + // Now do the formatting again, guaranteed to fit. + vsnprintf(buffer.data(), buffer.size(), format, args); + + va_end(args); + + return StringImpl::create(buffer.data(), len); +#endif +} + +String String::number(short n) +{ + return String::format("%hd", n); +} + +String String::number(unsigned short n) +{ + return String::format("%hu", n); +} + +String String::number(int n) +{ + return String::format("%d", n); +} + +String String::number(unsigned n) +{ + return String::format("%u", n); +} + +String String::number(long n) +{ + return String::format("%ld", n); +} + +String String::number(unsigned long n) +{ + return String::format("%lu", n); +} + +String String::number(long long n) +{ +#if OS(WINDOWS) && !PLATFORM(QT) + return String::format("%I64i", n); +#else + return String::format("%lli", n); +#endif +} + +String String::number(unsigned long long n) +{ +#if OS(WINDOWS) && !PLATFORM(QT) + return String::format("%I64u", n); +#else + return String::format("%llu", n); +#endif +} + +String String::number(double n) +{ + return String::format("%.6lg", n); +} + +int String::toIntStrict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toIntStrict(ok, base); +} + +unsigned String::toUIntStrict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUIntStrict(ok, base); +} + +int64_t String::toInt64Strict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toInt64Strict(ok, base); +} + +uint64_t String::toUInt64Strict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUInt64Strict(ok, base); +} + +intptr_t String::toIntPtrStrict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toIntPtrStrict(ok, base); +} + + +int String::toInt(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toInt(ok); +} + +unsigned String::toUInt(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUInt(ok); +} + +int64_t String::toInt64(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toInt64(ok); +} + +uint64_t String::toUInt64(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUInt64(ok); +} + +intptr_t String::toIntPtr(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toIntPtr(ok); +} + +double String::toDouble(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0.0; + } + return m_impl->toDouble(ok); +} + +float String::toFloat(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0.0f; + } + return m_impl->toFloat(ok); +} + +String String::threadsafeCopy() const +{ + if (!m_impl) + return String(); + return m_impl->threadsafeCopy(); +} + +String String::crossThreadString() const +{ + if (!m_impl) + return String(); + return m_impl->crossThreadString(); +} + +void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const +{ + result.clear(); + + unsigned startPos = 0; + size_t endPos; + while ((endPos = find(separator, startPos)) != notFound) { + if (allowEmptyEntries || startPos != endPos) + result.append(substring(startPos, endPos - startPos)); + startPos = endPos + separator.length(); + } + if (allowEmptyEntries || startPos != length()) + result.append(substring(startPos)); +} + +void String::split(const String& separator, Vector<String>& result) const +{ + return split(separator, false, result); +} + +void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const +{ + result.clear(); + + unsigned startPos = 0; + size_t endPos; + while ((endPos = find(separator, startPos)) != notFound) { + if (allowEmptyEntries || startPos != endPos) + result.append(substring(startPos, endPos - startPos)); + startPos = endPos + 1; + } + if (allowEmptyEntries || startPos != length()) + result.append(substring(startPos)); +} + +void String::split(UChar separator, Vector<String>& result) const +{ + return split(String(&separator, 1), false, result); +} + +CString String::ascii() const +{ + // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are + // preserved, characters outside of this range are converted to '?'. + + unsigned length = this->length(); + const UChar* characters = this->characters(); + + char* characterBuffer; + CString result = CString::newUninitialized(length, characterBuffer); + + for (unsigned i = 0; i < length; ++i) { + UChar ch = characters[i]; + characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; + } + + return result; +} + +CString String::latin1() const +{ + // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are + // preserved, characters outside of this range are converted to '?'. + + unsigned length = this->length(); + const UChar* characters = this->characters(); + + char* characterBuffer; + CString result = CString::newUninitialized(length, characterBuffer); + + for (unsigned i = 0; i < length; ++i) { + UChar ch = characters[i]; + characterBuffer[i] = ch > 0xff ? '?' : ch; + } + + return result; +} + +// Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available. +static inline void putUTF8Triple(char*& buffer, UChar ch) +{ + ASSERT(ch >= 0x0800); + *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); + *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); + *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); +} + +CString String::utf8(bool strict) const +{ + unsigned length = this->length(); + const UChar* characters = this->characters(); + + // Allocate a buffer big enough to hold all the characters + // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). + // Optimization ideas, if we find this function is hot: + // * We could speculatively create a CStringBuffer to contain 'length' + // characters, and resize if necessary (i.e. if the buffer contains + // non-ascii characters). (Alternatively, scan the buffer first for + // ascii characters, so we know this will be sufficient). + // * We could allocate a CStringBuffer with an appropriate size to + // have a good chance of being able to write the string into the + // buffer without reallocing (say, 1.5 x length). + if (length > numeric_limits<unsigned>::max() / 3) + return CString(); + Vector<char, 1024> bufferVector(length * 3); + + char* buffer = bufferVector.data(); + ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict); + ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion + + // Only produced from strict conversion. + if (result == sourceIllegal) + return CString(); + + // Check for an unconverted high surrogate. + if (result == sourceExhausted) { + if (strict) + return CString(); + // This should be one unpaired high surrogate. Treat it the same + // was as an unpaired high surrogate would have been handled in + // the middle of a string with non-strict conversion - which is + // to say, simply encode it to UTF-8. + ASSERT((characters + 1) == (this->characters() + length)); + ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF)); + // There should be room left, since one UChar hasn't been converted. + ASSERT((buffer + 3) <= (buffer + bufferVector.size())); + putUTF8Triple(buffer, *characters); + } + + return CString(bufferVector.data(), buffer - bufferVector.data()); +} + +String String::fromUTF8(const char* stringStart, size_t length) +{ + if (length > numeric_limits<unsigned>::max()) + CRASH(); + + if (!stringStart) + return String(); + + // We'll use a StringImpl as a buffer; if the source string only contains ascii this should be + // the right length, if there are any multi-byte sequences this buffer will be too large. + UChar* buffer; + String stringBuffer(StringImpl::createUninitialized(length, buffer)); + UChar* bufferEnd = buffer + length; + + // Try converting into the buffer. + const char* stringCurrent = stringStart; + if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &buffer, bufferEnd) != conversionOK) + return String(); + + // stringBuffer is full (the input must have been all ascii) so just return it! + if (buffer == bufferEnd) + return stringBuffer; + + // stringBuffer served its purpose as a buffer, copy the contents out into a new string. + unsigned utf16Length = buffer - stringBuffer.characters(); + ASSERT(utf16Length < length); + return String(stringBuffer.characters(), utf16Length); +} + +String String::fromUTF8(const char* string) +{ + if (!string) + return String(); + return fromUTF8(string, strlen(string)); +} + +String String::fromUTF8WithLatin1Fallback(const char* string, size_t size) +{ + String utf8 = fromUTF8(string, size); + if (!utf8) + return String(string, size); + return utf8; +} + +// String Operations + +static bool isCharacterAllowedInBase(UChar c, int base) +{ + if (c > 0x7F) + return false; + if (isASCIIDigit(c)) + return c - '0' < base; + if (isASCIIAlpha(c)) { + if (base > 36) + base = 36; + return (c >= 'a' && c < 'a' + base - 10) + || (c >= 'A' && c < 'A' + base - 10); + } + return false; +} + +template <typename IntegralType> +static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base) +{ + static const IntegralType integralMax = numeric_limits<IntegralType>::max(); + static const bool isSigned = numeric_limits<IntegralType>::is_signed; + const IntegralType maxMultiplier = integralMax / base; + + IntegralType value = 0; + bool isOk = false; + bool isNegative = false; + + if (!data) + goto bye; + + // skip leading whitespace + while (length && isSpaceOrNewline(*data)) { + length--; + data++; + } + + if (isSigned && length && *data == '-') { + length--; + data++; + isNegative = true; + } else if (length && *data == '+') { + length--; + data++; + } + + if (!length || !isCharacterAllowedInBase(*data, base)) + goto bye; + + while (length && isCharacterAllowedInBase(*data, base)) { + length--; + IntegralType digitValue; + UChar c = *data; + if (isASCIIDigit(c)) + digitValue = c - '0'; + else if (c >= 'a') + digitValue = c - 'a' + 10; + else + digitValue = c - 'A' + 10; + + if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative)) + goto bye; + + value = base * value + digitValue; + data++; + } + +#if COMPILER(MSVC) +#pragma warning(push, 0) +#pragma warning(disable:4146) +#endif + + if (isNegative) + value = -value; + +#if COMPILER(MSVC) +#pragma warning(pop) +#endif + + // skip trailing space + while (length && isSpaceOrNewline(*data)) { + length--; + data++; + } + + if (!length) + isOk = true; +bye: + if (ok) + *ok = isOk; + return isOk ? value : 0; +} + +static unsigned lengthOfCharactersAsInteger(const UChar* data, size_t length) +{ + size_t i = 0; + + // Allow leading spaces. + for (; i != length; ++i) { + if (!isSpaceOrNewline(data[i])) + break; + } + + // Allow sign. + if (i != length && (data[i] == '+' || data[i] == '-')) + ++i; + + // Allow digits. + for (; i != length; ++i) { + if (!isASCIIDigit(data[i])) + break; + } + + return i; +} + +int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<int>(data, length, ok, base); +} + +unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<unsigned>(data, length, ok, base); +} + +int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<int64_t>(data, length, ok, base); +} + +uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<uint64_t>(data, length, ok, base); +} + +intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<intptr_t>(data, length, ok, base); +} + +int charactersToInt(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<int>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +unsigned charactersToUInt(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<unsigned>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +int64_t charactersToInt64(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<int64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<uint64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<intptr_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +double charactersToDouble(const UChar* data, size_t length, bool* ok) +{ + if (!length) { + if (ok) + *ok = false; + return 0.0; + } + + Vector<char, 256> bytes(length + 1); + for (unsigned i = 0; i < length; ++i) + bytes[i] = data[i] < 0x7F ? data[i] : '?'; + bytes[length] = '\0'; + char* end; + double val = WTF::strtod(bytes.data(), &end); + if (ok) + *ok = (end == 0 || *end == '\0'); + return val; +} + +float charactersToFloat(const UChar* data, size_t length, bool* ok) +{ + // FIXME: This will return ok even when the string fits into a double but not a float. + return static_cast<float>(charactersToDouble(data, length, ok)); +} + +} // namespace WTF + +#ifndef NDEBUG +// For use in the debugger +String* string(const char*); +Vector<char> asciiDebug(StringImpl* impl); +Vector<char> asciiDebug(String& string); + +String* string(const char* s) +{ + // leaks memory! + return new String(s); +} + +Vector<char> asciiDebug(StringImpl* impl) +{ + if (!impl) + return asciiDebug(String("[null]").impl()); + + Vector<char> buffer; + unsigned length = impl->length(); + const UChar* characters = impl->characters(); + + buffer.resize(length + 1); + for (unsigned i = 0; i < length; ++i) { + UChar ch = characters[i]; + buffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; + } + buffer[length] = '\0'; + + return buffer; +} + +Vector<char> asciiDebug(String& string) +{ + return asciiDebug(string.impl()); +} + +#endif diff --git a/Source/JavaScriptCore/wtf/text/WTFString.h b/Source/JavaScriptCore/wtf/text/WTFString.h new file mode 100644 index 0000000..4d853d2 --- /dev/null +++ b/Source/JavaScriptCore/wtf/text/WTFString.h @@ -0,0 +1,502 @@ +/* + * (C) 1999 Lars Knoll (knoll@kde.org) + * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef WTFString_h +#define WTFString_h + +// This file would be called String.h, but that conflicts with <string.h> +// on systems without case-sensitive file systems. + +#include "StringImpl.h" + +#ifdef __OBJC__ +#include <objc/objc.h> +#endif + +#if PLATFORM(CF) +typedef const struct __CFString * CFStringRef; +#endif + +#if PLATFORM(QT) +QT_BEGIN_NAMESPACE +class QString; +QT_END_NAMESPACE +#include <QDataStream> +#endif + +#if PLATFORM(WX) +class wxString; +#endif + +#if PLATFORM(HAIKU) +class BString; +#endif + +#if PLATFORM(BREWMP) +// AECHAR is defined in AEEStdDef.h, but don't include it here to avoid conflicts. +#ifndef _AECHAR_DEFINED +typedef uint16 AECHAR; +#define _AECHAR_DEFINED +#endif +#endif + +namespace WTF { + +class CString; +struct StringHash; + +// Declarations of string operations + +bool charactersAreAllASCII(const UChar*, size_t); +int charactersToIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10); +unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10); +int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10); +uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10); +intptr_t charactersToIntPtrStrict(const UChar*, size_t, bool* ok = 0, int base = 10); + +int charactersToInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +unsigned charactersToUInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +int64_t charactersToInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +intptr_t charactersToIntPtr(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage + +double charactersToDouble(const UChar*, size_t, bool* ok = 0); +float charactersToFloat(const UChar*, size_t, bool* ok = 0); + +template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters(const UChar*, size_t); + +class String { +public: + // Construct a null string, distinguishable from an empty string. + String() { } + + // Construct a string with UTF-16 data. + String(const UChar* characters, unsigned length); + + // Construct a string with UTF-16 data, from a null-terminated source. + String(const UChar*); + + // Construct a string with latin1 data. + String(const char* characters, unsigned length); + + // Construct a string with latin1 data, from a null-terminated source. + String(const char* characters); + + // Construct a string referencing an existing StringImpl. + String(StringImpl* impl) : m_impl(impl) { } + String(PassRefPtr<StringImpl> impl) : m_impl(impl) { } + String(RefPtr<StringImpl> impl) : m_impl(impl) { } + + // Inline the destructor. + ALWAYS_INLINE ~String() { } + + void swap(String& o) { m_impl.swap(o.m_impl); } + + static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); } + template<size_t inlineCapacity> + static String adopt(Vector<UChar, inlineCapacity>& vector) { return StringImpl::adopt(vector); } + + bool isNull() const { return !m_impl; } + bool isEmpty() const { return !m_impl || !m_impl->length(); } + + StringImpl* impl() const { return m_impl.get(); } + + unsigned length() const + { + if (!m_impl) + return 0; + return m_impl->length(); + } + + const UChar* characters() const + { + if (!m_impl) + return 0; + return m_impl->characters(); + } + + CString ascii() const; + CString latin1() const; + CString utf8(bool strict = false) const; + + UChar operator[](unsigned index) const + { + if (!m_impl || index >= m_impl->length()) + return 0; + return m_impl->characters()[index]; + } + + static String number(short); + static String number(unsigned short); + static String number(int); + static String number(unsigned); + static String number(long); + static String number(unsigned long); + static String number(long long); + static String number(unsigned long long); + static String number(double); + + // Find a single character or string, also with match function & latin1 forms. + size_t find(UChar c, unsigned start = 0) const + { return m_impl ? m_impl->find(c, start) : notFound; } + size_t find(const String& str, unsigned start = 0) const + { return m_impl ? m_impl->find(str.impl(), start) : notFound; } + size_t find(CharacterMatchFunctionPtr matchFunction, unsigned start = 0) const + { return m_impl ? m_impl->find(matchFunction, start) : notFound; } + size_t find(const char* str, unsigned start = 0) const + { return m_impl ? m_impl->find(str, start) : notFound; } + + // Find the last instance of a single character or string. + size_t reverseFind(UChar c, unsigned start = UINT_MAX) const + { return m_impl ? m_impl->reverseFind(c, start) : notFound; } + size_t reverseFind(const String& str, unsigned start = UINT_MAX) const + { return m_impl ? m_impl->reverseFind(str.impl(), start) : notFound; } + + // Case insensitive string matching. + size_t findIgnoringCase(const char* str, unsigned start = 0) const + { return m_impl ? m_impl->findIgnoringCase(str, start) : notFound; } + size_t findIgnoringCase(const String& str, unsigned start = 0) const + { return m_impl ? m_impl->findIgnoringCase(str.impl(), start) : notFound; } + size_t reverseFindIgnoringCase(const String& str, unsigned start = UINT_MAX) const + { return m_impl ? m_impl->reverseFindIgnoringCase(str.impl(), start) : notFound; } + + // Wrappers for find & reverseFind adding dynamic sensitivity check. + size_t find(const char* str, unsigned start, bool caseSensitive) const + { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); } + size_t find(const String& str, unsigned start, bool caseSensitive) const + { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); } + size_t reverseFind(const String& str, unsigned start, bool caseSensitive) const + { return caseSensitive ? reverseFind(str, start) : reverseFindIgnoringCase(str, start); } + + const UChar* charactersWithNullTermination(); + + UChar32 characterStartingAt(unsigned) const; // Ditto. + + bool contains(UChar c) const { return find(c) != notFound; } + bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; } + bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; } + + bool startsWith(const String& s, bool caseSensitive = true) const + { return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); } + bool endsWith(const String& s, bool caseSensitive = true) const + { return m_impl ? m_impl->endsWith(s.impl(), caseSensitive) : s.isEmpty(); } + + void append(const String&); + void append(char); + void append(UChar); + void append(const UChar*, unsigned length); + void insert(const String&, unsigned pos); + void insert(const UChar*, unsigned length, unsigned pos); + + String& replace(UChar a, UChar b) { if (m_impl) m_impl = m_impl->replace(a, b); return *this; } + String& replace(UChar a, const String& b) { if (m_impl) m_impl = m_impl->replace(a, b.impl()); return *this; } + String& replace(const String& a, const String& b) { if (m_impl) m_impl = m_impl->replace(a.impl(), b.impl()); return *this; } + String& replace(unsigned index, unsigned len, const String& b) { if (m_impl) m_impl = m_impl->replace(index, len, b.impl()); return *this; } + + void makeLower() { if (m_impl) m_impl = m_impl->lower(); } + void makeUpper() { if (m_impl) m_impl = m_impl->upper(); } + void makeSecure(UChar aChar) { if (m_impl) m_impl = m_impl->secure(aChar); } + + void truncate(unsigned len); + void remove(unsigned pos, int len = 1); + + String substring(unsigned pos, unsigned len = UINT_MAX) const; + String substringSharingImpl(unsigned pos, unsigned len = UINT_MAX) const; + String left(unsigned len) const { return substring(0, len); } + String right(unsigned len) const { return substring(length() - len, len); } + + // Returns a lowercase/uppercase version of the string + String lower() const; + String upper() const; + + String stripWhiteSpace() const; + String simplifyWhiteSpace() const; + + String removeCharacters(CharacterMatchFunctionPtr) const; + template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters() const; + + // Return the string with case folded for case insensitive comparison. + String foldCase() const; + +#if !PLATFORM(QT) + static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2); +#else + static String format(const char *, ...); +#endif + + // Returns an uninitialized string. The characters needs to be written + // into the buffer returned in data before the returned string is used. + // Failure to do this will have unpredictable results. + static String createUninitialized(unsigned length, UChar*& data) { return StringImpl::createUninitialized(length, data); } + + void split(const String& separator, Vector<String>& result) const; + void split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const; + void split(UChar separator, Vector<String>& result) const; + void split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const; + + int toIntStrict(bool* ok = 0, int base = 10) const; + unsigned toUIntStrict(bool* ok = 0, int base = 10) const; + int64_t toInt64Strict(bool* ok = 0, int base = 10) const; + uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const; + intptr_t toIntPtrStrict(bool* ok = 0, int base = 10) const; + + int toInt(bool* ok = 0) const; + unsigned toUInt(bool* ok = 0) const; + int64_t toInt64(bool* ok = 0) const; + uint64_t toUInt64(bool* ok = 0) const; + intptr_t toIntPtr(bool* ok = 0) const; + double toDouble(bool* ok = 0) const; + float toFloat(bool* ok = 0) const; + + bool percentage(int& percentage) const; + + // Returns a StringImpl suitable for use on another thread. + String crossThreadString() const; + // Makes a deep copy. Helpful only if you need to use a String on another thread + // (use crossThreadString if the method call doesn't need to be threadsafe). + // Since the underlying StringImpl objects are immutable, there's no other reason + // to ever prefer copy() over plain old assignment. + String threadsafeCopy() const; + + // Prevent Strings from being implicitly convertable to bool as it will be ambiguous on any platform that + // allows implicit conversion to another pointer type (e.g., Mac allows implicit conversion to NSString*). + typedef struct ImplicitConversionFromWTFStringToBoolDisallowedA* (String::*UnspecifiedBoolTypeA); + typedef struct ImplicitConversionFromWTFStringToBoolDisallowedB* (String::*UnspecifiedBoolTypeB); + operator UnspecifiedBoolTypeA() const; + operator UnspecifiedBoolTypeB() const; + +#if PLATFORM(CF) + String(CFStringRef); + CFStringRef createCFString() const; +#endif + +#ifdef __OBJC__ + String(NSString*); + + // This conversion maps NULL to "", which loses the meaning of NULL, but we + // need this mapping because AppKit crashes when passed nil NSStrings. + operator NSString*() const { if (!m_impl) return @""; return *m_impl; } +#endif + +#if PLATFORM(QT) + String(const QString&); + String(const QStringRef&); + operator QString() const; +#endif + +#if PLATFORM(WX) + String(const wxString&); + operator wxString() const; +#endif + +#if PLATFORM(HAIKU) + String(const BString&); + operator BString() const; +#endif + +#if PLATFORM(BREWMP) + String(const AECHAR*); +#endif + + // String::fromUTF8 will return a null string if + // the input data contains invalid UTF-8 sequences. + static String fromUTF8(const char*, size_t); + static String fromUTF8(const char*); + + // Tries to convert the passed in string to UTF-8, but will fall back to Latin-1 if the string is not valid UTF-8. + static String fromUTF8WithLatin1Fallback(const char*, size_t); + + // Determines the writing direction using the Unicode Bidi Algorithm rules P2 and P3. + WTF::Unicode::Direction defaultWritingDirection() const { return m_impl ? m_impl->defaultWritingDirection() : WTF::Unicode::LeftToRight; } + + bool containsOnlyASCII() const { return charactersAreAllASCII(characters(), length()); } + + // Hash table deleted values, which are only constructed and never copied or destroyed. + String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { } + bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); } + +private: + RefPtr<StringImpl> m_impl; +}; + +#if PLATFORM(QT) +QDataStream& operator<<(QDataStream& stream, const String& str); +QDataStream& operator>>(QDataStream& stream, String& str); +#endif + +String operator+(const String&, const String&); +String operator+(const String&, const char*); +String operator+(const char*, const String&); + +inline String& operator+=(String& a, const String& b) { a.append(b); return a; } + +inline bool operator==(const String& a, const String& b) { return equal(a.impl(), b.impl()); } +inline bool operator==(const String& a, const char* b) { return equal(a.impl(), b); } +inline bool operator==(const char* a, const String& b) { return equal(a, b.impl()); } + +inline bool operator!=(const String& a, const String& b) { return !equal(a.impl(), b.impl()); } +inline bool operator!=(const String& a, const char* b) { return !equal(a.impl(), b); } +inline bool operator!=(const char* a, const String& b) { return !equal(a, b.impl()); } + +inline bool equalIgnoringCase(const String& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); } +inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), b); } +inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(a, b.impl()); } + +inline bool equalPossiblyIgnoringCase(const String& a, const String& b, bool ignoreCase) +{ + return ignoreCase ? equalIgnoringCase(a, b) : (a == b); +} + +inline bool equalIgnoringNullity(const String& a, const String& b) { return equalIgnoringNullity(a.impl(), b.impl()); } + +inline bool operator!(const String& str) { return str.isNull(); } + +inline void swap(String& a, String& b) { a.swap(b); } + +// Definitions of string operations + +#ifdef __OBJC__ +// This is for situations in WebKit where the long standing behavior has been +// "nil if empty", so we try to maintain longstanding behavior for the sake of +// entrenched clients +inline NSString* nsStringNilIfEmpty(const String& str) { return str.isEmpty() ? nil : (NSString*)str; } +#endif + +inline bool charactersAreAllASCII(const UChar* characters, size_t length) +{ + UChar ored = 0; + for (size_t i = 0; i < length; ++i) + ored |= characters[i]; + return !(ored & 0xFF80); +} + +int codePointCompare(const String&, const String&); + +inline size_t find(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0) +{ + while (index < length) { + if (characters[index] == matchCharacter) + return index; + ++index; + } + return notFound; +} + +inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0) +{ + while (index < length) { + if (matchFunction(characters[index])) + return index; + ++index; + } + return notFound; +} + +inline size_t reverseFind(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX) +{ + if (!length) + return notFound; + if (index >= length) + index = length - 1; + while (characters[index] != matchCharacter) { + if (!index--) + return notFound; + } + return index; +} + +inline void append(Vector<UChar>& vector, const String& string) +{ + vector.append(string.characters(), string.length()); +} + +inline void appendNumber(Vector<UChar>& vector, unsigned char number) +{ + int numberLength = number > 99 ? 3 : (number > 9 ? 2 : 1); + size_t vectorSize = vector.size(); + vector.grow(vectorSize + numberLength); + + switch (numberLength) { + case 3: + vector[vectorSize + 2] = number % 10 + '0'; + number /= 10; + + case 2: + vector[vectorSize + 1] = number % 10 + '0'; + number /= 10; + + case 1: + vector[vectorSize] = number % 10 + '0'; + } +} + +template<bool isSpecialCharacter(UChar)> inline bool isAllSpecialCharacters(const UChar* characters, size_t length) +{ + for (size_t i = 0; i < length; ++i) { + if (!isSpecialCharacter(characters[i])) + return false; + } + return true; +} + +template<bool isSpecialCharacter(UChar)> inline bool String::isAllSpecialCharacters() const +{ + return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters(), length()); +} + +// StringHash is the default hash for String +template<typename T> struct DefaultHash; +template<> struct DefaultHash<String> { + typedef StringHash Hash; +}; + +template <> struct VectorTraits<String> : SimpleClassVectorTraits +{ + static const bool canInitializeWithMemset = true; +}; + +} + +using WTF::CString; +using WTF::String; +using WTF::append; +using WTF::appendNumber; +using WTF::charactersAreAllASCII; +using WTF::charactersToIntStrict; +using WTF::charactersToUIntStrict; +using WTF::charactersToInt64Strict; +using WTF::charactersToUInt64Strict; +using WTF::charactersToIntPtrStrict; +using WTF::charactersToInt; +using WTF::charactersToUInt; +using WTF::charactersToInt64; +using WTF::charactersToUInt64; +using WTF::charactersToIntPtr; +using WTF::charactersToDouble; +using WTF::charactersToFloat; +using WTF::equal; +using WTF::equalIgnoringCase; +using WTF::find; +using WTF::isAllSpecialCharacters; +using WTF::isSpaceOrNewline; +using WTF::reverseFind; + +#endif |