diff options
author | Steve Block <steveblock@google.com> | 2010-04-27 16:31:00 +0100 |
---|---|---|
committer | Steve Block <steveblock@google.com> | 2010-05-11 14:42:12 +0100 |
commit | dcc8cf2e65d1aa555cce12431a16547e66b469ee (patch) | |
tree | 92a8d65cd5383bca9749f5327fb5e440563926e6 /JavaScriptCore/wtf/text | |
parent | ccac38a6b48843126402088a309597e682f40fe6 (diff) | |
download | external_webkit-dcc8cf2e65d1aa555cce12431a16547e66b469ee.zip external_webkit-dcc8cf2e65d1aa555cce12431a16547e66b469ee.tar.gz external_webkit-dcc8cf2e65d1aa555cce12431a16547e66b469ee.tar.bz2 |
Merge webkit.org at r58033 : Initial merge by git
Change-Id: If006c38561af287c50cd578d251629b51e4d8cd1
Diffstat (limited to 'JavaScriptCore/wtf/text')
-rw-r--r-- | JavaScriptCore/wtf/text/AtomicString.cpp | 327 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/AtomicString.h | 166 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/AtomicStringImpl.h | 36 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/CString.cpp | 102 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/CString.h | 81 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/StringBuffer.h | 77 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/StringHash.h | 268 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/StringImpl.cpp | 953 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/StringImpl.h | 389 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/StringImplBase.h | 103 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/WTFString.cpp | 960 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/WTFString.h | 398 |
12 files changed, 3860 insertions, 0 deletions
diff --git a/JavaScriptCore/wtf/text/AtomicString.cpp b/JavaScriptCore/wtf/text/AtomicString.cpp new file mode 100644 index 0000000..79b9ab5 --- /dev/null +++ b/JavaScriptCore/wtf/text/AtomicString.cpp @@ -0,0 +1,327 @@ +/* + * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#include "config.h" + +#ifdef SKIP_STATIC_CONSTRUCTORS_ON_GCC +#define ATOMICSTRING_HIDE_GLOBALS 1 +#endif + +#include "AtomicString.h" + +#include "StaticConstructors.h" +#include "StringHash.h" +#include <wtf/Threading.h> +#include <wtf/HashSet.h> +#include <wtf/WTFThreadData.h> + +namespace WebCore { + +class AtomicStringTable { +public: + static AtomicStringTable* create() + { + AtomicStringTable* table = new AtomicStringTable; + + WTFThreadData& data = wtfThreadData(); + data.m_atomicStringTable = table; + data.m_atomicStringTableDestructor = AtomicStringTable::destroy; + + return table; + } + + HashSet<StringImpl*>& table() + { + return m_table; + } + +private: + static void destroy(AtomicStringTable* table) + { + delete table; + } + + HashSet<StringImpl*> m_table; +}; + +static inline HashSet<StringImpl*>& stringTable() +{ + // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor). + AtomicStringTable* table = wtfThreadData().atomicStringTable(); + if (UNLIKELY(!table)) + table = AtomicStringTable::create(); + return table->table(); +} + +struct CStringTranslator { + static unsigned hash(const char* c) + { + return StringImpl::computeHash(c); + } + + static bool equal(StringImpl* r, const char* s) + { + int length = r->length(); + const UChar* d = r->characters(); + for (int i = 0; i != length; ++i) { + unsigned char c = s[i]; + if (d[i] != c) + return false; + } + return s[length] == 0; + } + + static void translate(StringImpl*& location, const char* const& c, unsigned hash) + { + location = StringImpl::create(c).releaseRef(); + location->setHash(hash); + location->setInTable(); + } +}; + +bool operator==(const AtomicString& a, const char* b) +{ + StringImpl* impl = a.impl(); + if ((!impl || !impl->characters()) && !b) + return true; + if ((!impl || !impl->characters()) || !b) + return false; + return CStringTranslator::equal(impl, b); +} + +PassRefPtr<StringImpl> AtomicString::add(const char* c) +{ + if (!c) + return 0; + if (!*c) + return StringImpl::empty(); + pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<const char*, CStringTranslator>(c); + if (!addResult.second) + return *addResult.first; + return adoptRef(*addResult.first); +} + +struct UCharBuffer { + const UChar* s; + unsigned length; +}; + +static inline bool equal(StringImpl* string, const UChar* characters, unsigned length) +{ + if (string->length() != length) + return false; + + // FIXME: perhaps we should have a more abstract macro that indicates when + // going 4 bytes at a time is unsafe +#if CPU(ARM) || CPU(SH4) + const UChar* stringCharacters = string->characters(); + for (unsigned i = 0; i != length; ++i) { + if (*stringCharacters++ != *characters++) + return false; + } + return true; +#else + /* Do it 4-bytes-at-a-time on architectures where it's safe */ + + const uint32_t* stringCharacters = reinterpret_cast<const uint32_t*>(string->characters()); + const uint32_t* bufferCharacters = reinterpret_cast<const uint32_t*>(characters); + + unsigned halfLength = length >> 1; + for (unsigned i = 0; i != halfLength; ++i) { + if (*stringCharacters++ != *bufferCharacters++) + return false; + } + + if (length & 1 && *reinterpret_cast<const uint16_t*>(stringCharacters) != *reinterpret_cast<const uint16_t*>(bufferCharacters)) + return false; + + return true; +#endif +} + +struct UCharBufferTranslator { + static unsigned hash(const UCharBuffer& buf) + { + return StringImpl::computeHash(buf.s, buf.length); + } + + static bool equal(StringImpl* const& str, const UCharBuffer& buf) + { + return WebCore::equal(str, buf.s, buf.length); + } + + static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash) + { + location = StringImpl::create(buf.s, buf.length).releaseRef(); + location->setHash(hash); + location->setInTable(); + } +}; + +struct HashAndCharacters { + unsigned hash; + const UChar* characters; + unsigned length; +}; + +struct HashAndCharactersTranslator { + static unsigned hash(const HashAndCharacters& buffer) + { + ASSERT(buffer.hash == StringImpl::computeHash(buffer.characters, buffer.length)); + return buffer.hash; + } + + static bool equal(StringImpl* const& string, const HashAndCharacters& buffer) + { + return WebCore::equal(string, buffer.characters, buffer.length); + } + + static void translate(StringImpl*& location, const HashAndCharacters& buffer, unsigned hash) + { + location = StringImpl::create(buffer.characters, buffer.length).releaseRef(); + location->setHash(hash); + location->setInTable(); + } +}; + +PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length) +{ + if (!s) + return 0; + + if (length == 0) + return StringImpl::empty(); + + UCharBuffer buf = { s, length }; + pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<UCharBuffer, UCharBufferTranslator>(buf); + + // If the string is newly-translated, then we need to adopt it. + // The boolean in the pair tells us if that is so. + return addResult.second ? adoptRef(*addResult.first) : *addResult.first; +} + +PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash) +{ + ASSERT(s); + ASSERT(existingHash); + + if (length == 0) + return StringImpl::empty(); + + HashAndCharacters buffer = { existingHash, s, length }; + pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<HashAndCharacters, HashAndCharactersTranslator>(buffer); + if (!addResult.second) + return *addResult.first; + return adoptRef(*addResult.first); +} + +PassRefPtr<StringImpl> AtomicString::add(const UChar* s) +{ + if (!s) + return 0; + + int length = 0; + while (s[length] != UChar(0)) + length++; + + if (length == 0) + return StringImpl::empty(); + + UCharBuffer buf = {s, length}; + pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<UCharBuffer, UCharBufferTranslator>(buf); + + // If the string is newly-translated, then we need to adopt it. + // The boolean in the pair tells us if that is so. + return addResult.second ? adoptRef(*addResult.first) : *addResult.first; +} + +PassRefPtr<StringImpl> AtomicString::add(StringImpl* r) +{ + if (!r || r->inTable()) + return r; + + if (r->length() == 0) + return StringImpl::empty(); + + StringImpl* result = *stringTable().add(r).first; + if (result == r) + r->setInTable(); + return result; +} + +AtomicStringImpl* AtomicString::find(const UChar* s, unsigned length, unsigned existingHash) +{ + ASSERT(s); + ASSERT(existingHash); + + if (length == 0) + return static_cast<AtomicStringImpl*>(StringImpl::empty()); + + HashAndCharacters buffer = { existingHash, s, length }; + HashSet<StringImpl*>::iterator iterator = stringTable().find<HashAndCharacters, HashAndCharactersTranslator>(buffer); + if (iterator == stringTable().end()) + return 0; + return static_cast<AtomicStringImpl*>(*iterator); +} + +void AtomicString::remove(StringImpl* r) +{ + stringTable().remove(r); +} + +AtomicString AtomicString::lower() const +{ + // Note: This is a hot function in the Dromaeo benchmark. + StringImpl* impl = this->impl(); + RefPtr<StringImpl> newImpl = impl->lower(); + if (LIKELY(newImpl == impl)) + return *this; + return AtomicString(newImpl); +} + +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, nullAtom) +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, emptyAtom, "") +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, textAtom, "#text") +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, commentAtom, "#comment") +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, starAtom, "*") +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlAtom, "xml") +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlnsAtom, "xmlns") + +void AtomicString::init() +{ + static bool initialized; + if (!initialized) { + // Initialization is not thread safe, so this function must be called from the main thread first. + ASSERT(isMainThread()); + + // Use placement new to initialize the globals. + new ((void*)&nullAtom) AtomicString; + new ((void*)&emptyAtom) AtomicString(""); + new ((void*)&textAtom) AtomicString("#text"); + new ((void*)&commentAtom) AtomicString("#comment"); + new ((void*)&starAtom) AtomicString("*"); + new ((void*)&xmlAtom) AtomicString("xml"); + new ((void*)&xmlnsAtom) AtomicString("xmlns"); + + initialized = true; + } +} + +} diff --git a/JavaScriptCore/wtf/text/AtomicString.h b/JavaScriptCore/wtf/text/AtomicString.h new file mode 100644 index 0000000..9db70f4 --- /dev/null +++ b/JavaScriptCore/wtf/text/AtomicString.h @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2004, 2005, 2006, 2008 Apple Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef AtomicString_h +#define AtomicString_h + +#include "AtomicStringImpl.h" +#include "WTFString.h" + +// Define 'NO_IMPLICIT_ATOMICSTRING' before including this header, +// to disallow (expensive) implicit String-->AtomicString conversions. +#ifdef NO_IMPLICIT_ATOMICSTRING +#define ATOMICSTRING_CONVERSION explicit +#else +#define ATOMICSTRING_CONVERSION +#endif + +// FIXME: This is a temporary layering violation while we move string code to WTF. +// Landing the file moves in one patch, will follow on with patches to change the namespaces. +namespace WebCore { + +struct AtomicStringHash; + +class AtomicString : public FastAllocBase { +public: + static void init(); + + AtomicString() { } + AtomicString(const char* s) : m_string(add(s)) { } + AtomicString(const UChar* s, unsigned length) : m_string(add(s, length)) { } + AtomicString(const UChar* s, unsigned length, unsigned existingHash) : m_string(add(s, length, existingHash)) { } + AtomicString(const UChar* s) : m_string(add(s)) { } + ATOMICSTRING_CONVERSION AtomicString(StringImpl* imp) : m_string(add(imp)) { } + AtomicString(AtomicStringImpl* imp) : m_string(imp) { } + ATOMICSTRING_CONVERSION AtomicString(const String& s) : m_string(add(s.impl())) { } + + // Hash table deleted values, which are only constructed and never copied or destroyed. + AtomicString(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { } + bool isHashTableDeletedValue() const { return m_string.isHashTableDeletedValue(); } + + static AtomicStringImpl* find(const UChar* s, unsigned length, unsigned existingHash); + + operator const String&() const { return m_string; } + const String& string() const { return m_string; }; + + AtomicStringImpl* impl() const { return static_cast<AtomicStringImpl *>(m_string.impl()); } + + const UChar* characters() const { return m_string.characters(); } + unsigned length() const { return m_string.length(); } + + UChar operator[](unsigned int i) const { return m_string[i]; } + + bool contains(UChar c) const { return m_string.contains(c); } + bool contains(const char* s, bool caseSensitive = true) const + { return m_string.contains(s, caseSensitive); } + bool contains(const String& s, bool caseSensitive = true) const + { return m_string.contains(s, caseSensitive); } + + int find(UChar c, int start = 0) const { return m_string.find(c, start); } + int find(const char* s, int start = 0, bool caseSentitive = true) const + { return m_string.find(s, start, caseSentitive); } + int find(const String& s, int start = 0, bool caseSentitive = true) const + { return m_string.find(s, start, caseSentitive); } + + bool startsWith(const String& s, bool caseSensitive = true) const + { return m_string.startsWith(s, caseSensitive); } + bool endsWith(const String& s, bool caseSensitive = true) const + { return m_string.endsWith(s, caseSensitive); } + + AtomicString lower() const; + AtomicString upper() const { return AtomicString(impl()->upper()); } + + int toInt(bool* ok = 0) const { return m_string.toInt(ok); } + double toDouble(bool* ok = 0) const { return m_string.toDouble(ok); } + float toFloat(bool* ok = 0) const { return m_string.toFloat(ok); } + bool percentage(int& p) const { return m_string.percentage(p); } + + bool isNull() const { return m_string.isNull(); } + bool isEmpty() const { return m_string.isEmpty(); } + + static void remove(StringImpl*); + +#if PLATFORM(CF) + AtomicString(CFStringRef s) : m_string(add(String(s).impl())) { } + CFStringRef createCFString() const { return m_string.createCFString(); } +#endif +#ifdef __OBJC__ + AtomicString(NSString* s) : m_string(add(String(s).impl())) { } + operator NSString*() const { return m_string; } +#endif +#if PLATFORM(QT) + AtomicString(const QString& s) : m_string(add(String(s).impl())) { } + operator QString() const { return m_string; } +#endif + +private: + String m_string; + + static PassRefPtr<StringImpl> add(const char*); + static PassRefPtr<StringImpl> add(const UChar*, unsigned length); + static PassRefPtr<StringImpl> add(const UChar*, unsigned length, unsigned existingHash); + static PassRefPtr<StringImpl> add(const UChar*); + static PassRefPtr<StringImpl> add(StringImpl*); +}; + +inline bool operator==(const AtomicString& a, const AtomicString& b) { return a.impl() == b.impl(); } +bool operator==(const AtomicString& a, const char* b); +inline bool operator==(const AtomicString& a, const String& b) { return equal(a.impl(), b.impl()); } +inline bool operator==(const char* a, const AtomicString& b) { return b == a; } +inline bool operator==(const String& a, const AtomicString& b) { return equal(a.impl(), b.impl()); } + +inline bool operator!=(const AtomicString& a, const AtomicString& b) { return a.impl() != b.impl(); } +inline bool operator!=(const AtomicString& a, const char *b) { return !(a == b); } +inline bool operator!=(const AtomicString& a, const String& b) { return !equal(a.impl(), b.impl()); } +inline bool operator!=(const char* a, const AtomicString& b) { return !(b == a); } +inline bool operator!=(const String& a, const AtomicString& b) { return !equal(a.impl(), b.impl()); } + +inline bool equalIgnoringCase(const AtomicString& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); } +inline bool equalIgnoringCase(const AtomicString& a, const char* b) { return equalIgnoringCase(a.impl(), b); } +inline bool equalIgnoringCase(const AtomicString& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); } +inline bool equalIgnoringCase(const char* a, const AtomicString& b) { return equalIgnoringCase(a, b.impl()); } +inline bool equalIgnoringCase(const String& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); } + +// Define external global variables for the commonly used atomic strings. +// These are only usable from the main thread. +#ifndef ATOMICSTRING_HIDE_GLOBALS + extern const JS_EXPORTDATA AtomicString nullAtom; + extern const JS_EXPORTDATA AtomicString emptyAtom; + extern const JS_EXPORTDATA AtomicString textAtom; + extern const JS_EXPORTDATA AtomicString commentAtom; + extern const JS_EXPORTDATA AtomicString starAtom; + extern const JS_EXPORTDATA AtomicString xmlAtom; + extern const JS_EXPORTDATA AtomicString xmlnsAtom; +#endif + +} // namespace WebCore + + +namespace WTF { + + // AtomicStringHash is the default hash for AtomicString + template<typename T> struct DefaultHash; + template<> struct DefaultHash<WebCore::AtomicString> { + typedef WebCore::AtomicStringHash Hash; + }; + +} // namespace WTF + +#endif // AtomicString_h diff --git a/JavaScriptCore/wtf/text/AtomicStringImpl.h b/JavaScriptCore/wtf/text/AtomicStringImpl.h new file mode 100644 index 0000000..d21a00a --- /dev/null +++ b/JavaScriptCore/wtf/text/AtomicStringImpl.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2006 Apple Computer, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef AtomicStringImpl_h +#define AtomicStringImpl_h + +#include "StringImpl.h" + +// FIXME: This is a temporary layering violation while we move string code to WTF. +// Landing the file moves in one patch, will follow on with patches to change the namespaces. +namespace WebCore { + +class AtomicStringImpl : public StringImpl +{ +}; + +} + +#endif diff --git a/JavaScriptCore/wtf/text/CString.cpp b/JavaScriptCore/wtf/text/CString.cpp new file mode 100644 index 0000000..d93a5a3 --- /dev/null +++ b/JavaScriptCore/wtf/text/CString.cpp @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2003, 2006, 2008, 2009 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "config.h" +#include "CString.h" + +using std::min; + +namespace WTF { + +CString::CString(const char* str) +{ + init(str, strlen(str)); +} + +CString::CString(const char* str, unsigned length) +{ + init(str, length); +} + +void CString::init(const char* str, unsigned length) +{ + if (!str) + return; + + m_buffer = CStringBuffer::create(length + 1); + memcpy(m_buffer->mutableData(), str, length); + m_buffer->mutableData()[length] = '\0'; +} + +const char* CString::data() const +{ + return m_buffer ? m_buffer->data() : 0; +} + +char* CString::mutableData() +{ + copyBufferIfNeeded(); + if (!m_buffer) + return 0; + return m_buffer->mutableData(); +} + +unsigned CString::length() const +{ + return m_buffer ? m_buffer->length() - 1 : 0; +} + +CString CString::newUninitialized(size_t length, char*& characterBuffer) +{ + CString result; + result.m_buffer = CStringBuffer::create(length + 1); + char* bytes = result.m_buffer->mutableData(); + bytes[length] = '\0'; + characterBuffer = bytes; + return result; +} + +void CString::copyBufferIfNeeded() +{ + if (!m_buffer || m_buffer->hasOneRef()) + return; + + int len = m_buffer->length(); + RefPtr<CStringBuffer> m_temp = m_buffer; + m_buffer = CStringBuffer::create(len); + memcpy(m_buffer->mutableData(), m_temp->data(), len); +} + +bool operator==(const CString& a, const CString& b) +{ + if (a.isNull() != b.isNull()) + return false; + if (a.length() != b.length()) + return false; + return !strncmp(a.data(), b.data(), min(a.length(), b.length())); +} + +} // namespace WTF diff --git a/JavaScriptCore/wtf/text/CString.h b/JavaScriptCore/wtf/text/CString.h new file mode 100644 index 0000000..47f7675 --- /dev/null +++ b/JavaScriptCore/wtf/text/CString.h @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2003, 2006, 2008, 2009, 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CString_h +#define CString_h + +#include "PassRefPtr.h" +#include "RefCounted.h" +#include "Vector.h" + +namespace WTF { + +class CStringBuffer : public RefCounted<CStringBuffer> { +public: + const char* data() { return m_vector.data(); } + size_t length() { return m_vector.size(); } + +private: + friend class CString; + + static PassRefPtr<CStringBuffer> create(unsigned length) { return adoptRef(new CStringBuffer(length)); } + CStringBuffer(unsigned length) : m_vector(length) { } + char* mutableData() { return m_vector.data(); } + + Vector<char> m_vector; +}; + +// A container for a null-terminated char array supporting copy-on-write +// assignment. The contained char array may be null. +class CString { +public: + CString() { } + CString(const char*); + CString(const char*, unsigned length); + CString(CStringBuffer* buffer) : m_buffer(buffer) { } + static CString newUninitialized(size_t length, char*& characterBuffer); + + const char* data() const; + char* mutableData(); + unsigned length() const; + + bool isNull() const { return !m_buffer; } + + CStringBuffer* buffer() const { return m_buffer.get(); } + +private: + void copyBufferIfNeeded(); + void init(const char*, unsigned length); + RefPtr<CStringBuffer> m_buffer; +}; + +bool operator==(const CString& a, const CString& b); +inline bool operator!=(const CString& a, const CString& b) { return !(a == b); } + +} // namespace WTF + +using WTF::CString; + +#endif // CString_h diff --git a/JavaScriptCore/wtf/text/StringBuffer.h b/JavaScriptCore/wtf/text/StringBuffer.h new file mode 100644 index 0000000..353a44a --- /dev/null +++ b/JavaScriptCore/wtf/text/StringBuffer.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef StringBuffer_h +#define StringBuffer_h + +#include <wtf/Assertions.h> +#include <wtf/Noncopyable.h> +#include <wtf/unicode/Unicode.h> + +namespace WebCore { + +class StringBuffer : public Noncopyable { +public: + explicit StringBuffer(unsigned length) + : m_length(length) + , m_data(static_cast<UChar*>(fastMalloc(length * sizeof(UChar)))) + { + } + ~StringBuffer() + { + fastFree(m_data); + } + + void shrink(unsigned newLength) + { + ASSERT(newLength <= m_length); + m_length = newLength; + } + + void resize(unsigned newLength) + { + if (newLength > m_length) + m_data = static_cast<UChar*>(fastRealloc(m_data, newLength * sizeof(UChar))); + m_length = newLength; + } + + unsigned length() const { return m_length; } + UChar* characters() { return m_data; } + + UChar& operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; } + + UChar* release() { UChar* data = m_data; m_data = 0; return data; } + +private: + unsigned m_length; + UChar* m_data; +}; + +} + +#endif diff --git a/JavaScriptCore/wtf/text/StringHash.h b/JavaScriptCore/wtf/text/StringHash.h new file mode 100644 index 0000000..b820004 --- /dev/null +++ b/JavaScriptCore/wtf/text/StringHash.h @@ -0,0 +1,268 @@ +/* + * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved + * Copyright (C) Research In Motion Limited 2009. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef StringHash_h +#define StringHash_h + +#include "AtomicString.h" +#include "WTFString.h" +#include <wtf/HashTraits.h> +#include <wtf/StringHashFunctions.h> +#include <wtf/unicode/Unicode.h> + +// FIXME: This is a temporary layering violation while we move string code to WTF. +// Landing the file moves in one patch, will follow on with patches to change the namespaces. +namespace WebCore { + + // The hash() functions on StringHash and CaseFoldingHash do not support + // null strings. get(), contains(), and add() on HashMap<String,..., StringHash> + // cause a null-pointer dereference when passed null strings. + + // FIXME: We should really figure out a way to put the computeHash function that's + // currently a member function of StringImpl into this file so we can be a little + // closer to having all the nearly-identical hash functions in one place. + + struct StringHash { + static unsigned hash(StringImpl* key) { return key->hash(); } + static bool equal(const StringImpl* a, const StringImpl* b) + { + if (a == b) + return true; + if (!a || !b) + return false; + + unsigned aLength = a->length(); + unsigned bLength = b->length(); + if (aLength != bLength) + return false; + + // FIXME: perhaps we should have a more abstract macro that indicates when + // going 4 bytes at a time is unsafe +#if CPU(ARM) || CPU(SH4) + const UChar* aChars = a->characters(); + const UChar* bChars = b->characters(); + for (unsigned i = 0; i != aLength; ++i) { + if (*aChars++ != *bChars++) + return false; + } + return true; +#else + /* Do it 4-bytes-at-a-time on architectures where it's safe */ + const uint32_t* aChars = reinterpret_cast<const uint32_t*>(a->characters()); + const uint32_t* bChars = reinterpret_cast<const uint32_t*>(b->characters()); + + unsigned halfLength = aLength >> 1; + for (unsigned i = 0; i != halfLength; ++i) + if (*aChars++ != *bChars++) + return false; + + if (aLength & 1 && *reinterpret_cast<const uint16_t*>(aChars) != *reinterpret_cast<const uint16_t*>(bChars)) + return false; + + return true; +#endif + } + + static unsigned hash(const RefPtr<StringImpl>& key) { return key->hash(); } + static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b) + { + return equal(a.get(), b.get()); + } + + static unsigned hash(const String& key) { return key.impl()->hash(); } + static bool equal(const String& a, const String& b) + { + return equal(a.impl(), b.impl()); + } + + static const bool safeToCompareToEmptyOrDeleted = false; + }; + + class CaseFoldingHash { + public: + // Paul Hsieh's SuperFastHash + // http://www.azillionmonkeys.com/qed/hash.html + static unsigned hash(const UChar* data, unsigned length) + { + unsigned l = length; + const UChar* s = data; + uint32_t hash = WTF::stringHashingStartValue; + uint32_t tmp; + + int rem = l & 1; + l >>= 1; + + // Main loop. + for (; l > 0; l--) { + hash += WTF::Unicode::foldCase(s[0]); + tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + s += 2; + hash += hash >> 11; + } + + // Handle end case. + if (rem) { + hash += WTF::Unicode::foldCase(s[0]); + hash ^= hash << 11; + hash += hash >> 17; + } + + // Force "avalanching" of final 127 bits. + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 2; + hash += hash >> 15; + hash ^= hash << 10; + + // This avoids ever returning a hash code of 0, since that is used to + // signal "hash not computed yet", using a value that is likely to be + // effectively the same as 0 when the low bits are masked. + hash |= !hash << 31; + + return hash; + } + + static unsigned hash(StringImpl* str) + { + return hash(str->characters(), str->length()); + } + + static unsigned hash(const char* str, unsigned length) + { + // This hash is designed to work on 16-bit chunks at a time. But since the normal case + // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they + // were 16-bit chunks, which will give matching results. + + unsigned l = length; + const char* s = str; + uint32_t hash = WTF::stringHashingStartValue; + uint32_t tmp; + + int rem = l & 1; + l >>= 1; + + // Main loop + for (; l > 0; l--) { + hash += WTF::Unicode::foldCase(s[0]); + tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + s += 2; + hash += hash >> 11; + } + + // Handle end case + if (rem) { + hash += WTF::Unicode::foldCase(s[0]); + hash ^= hash << 11; + hash += hash >> 17; + } + + // Force "avalanching" of final 127 bits + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 2; + hash += hash >> 15; + hash ^= hash << 10; + + // this avoids ever returning a hash code of 0, since that is used to + // signal "hash not computed yet", using a value that is likely to be + // effectively the same as 0 when the low bits are masked + hash |= !hash << 31; + + return hash; + } + + static bool equal(const StringImpl* a, const StringImpl* b) + { + if (a == b) + return true; + if (!a || !b) + return false; + unsigned length = a->length(); + if (length != b->length()) + return false; + return WTF::Unicode::umemcasecmp(a->characters(), b->characters(), length) == 0; + } + + static unsigned hash(const RefPtr<StringImpl>& key) + { + return hash(key.get()); + } + + static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b) + { + return equal(a.get(), b.get()); + } + + static unsigned hash(const String& key) + { + return hash(key.impl()); + } + static unsigned hash(const AtomicString& key) + { + return hash(key.impl()); + } + static bool equal(const String& a, const String& b) + { + return equal(a.impl(), b.impl()); + } + static bool equal(const AtomicString& a, const AtomicString& b) + { + return (a == b) || equal(a.impl(), b.impl()); + } + + static const bool safeToCompareToEmptyOrDeleted = false; + }; + + // This hash can be used in cases where the key is a hash of a string, but we don't + // want to store the string. It's not really specific to string hashing, but all our + // current uses of it are for strings. + struct AlreadyHashed : IntHash<unsigned> { + static unsigned hash(unsigned key) { return key; } + + // To use a hash value as a key for a hash table, we need to eliminate the + // "deleted" value, which is negative one. That could be done by changing + // the string hash function to never generate negative one, but this works + // and is still relatively efficient. + static unsigned avoidDeletedValue(unsigned hash) + { + ASSERT(hash); + unsigned newHash = hash | (!(hash + 1) << 31); + ASSERT(newHash); + ASSERT(newHash != 0xFFFFFFFF); + return newHash; + } + }; + +} + +namespace WTF { + + template<> struct HashTraits<WebCore::String> : GenericHashTraits<WebCore::String> { + static const bool emptyValueIsZero = true; + static void constructDeletedValue(WebCore::String& slot) { new (&slot) WebCore::String(HashTableDeletedValue); } + static bool isDeletedValue(const WebCore::String& slot) { return slot.isHashTableDeletedValue(); } + }; + +} + +#endif diff --git a/JavaScriptCore/wtf/text/StringImpl.cpp b/JavaScriptCore/wtf/text/StringImpl.cpp new file mode 100644 index 0000000..287e529 --- /dev/null +++ b/JavaScriptCore/wtf/text/StringImpl.cpp @@ -0,0 +1,953 @@ +/* + * Copyright (C) 1999 Lars Knoll (knoll@kde.org) + * (C) 1999 Antti Koivisto (koivisto@kde.org) + * (C) 2001 Dirk Mueller ( mueller@kde.org ) + * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. + * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#include "config.h" +#include "StringImpl.h" + +#include "AtomicString.h" +#include "StringBuffer.h" +#include "StringHash.h" +#include <wtf/StdLibExtras.h> +#include <wtf/WTFThreadData.h> + +using namespace WTF; +using namespace Unicode; + +namespace WebCore { + +static const unsigned minLengthToShare = 20; + +StringImpl::~StringImpl() +{ + ASSERT(!isStatic()); + + if (inTable()) + AtomicString::remove(this); +#if USE(JSC) + if (isIdentifier()) + wtfThreadData().currentIdentifierTable()->remove(this); +#endif + + BufferOwnership ownership = bufferOwnership(); + if (ownership != BufferInternal) { + if (ownership == BufferOwned) { + ASSERT(!m_sharedBuffer); + ASSERT(m_data); + fastFree(const_cast<UChar*>(m_data)); + } else if (ownership == BufferSubstring) { + ASSERT(m_substringBuffer); + m_substringBuffer->deref(); + } else { + ASSERT(ownership == BufferShared); + ASSERT(m_sharedBuffer); + m_sharedBuffer->deref(); + } + } +} + +StringImpl* StringImpl::empty() +{ + // FIXME: This works around a bug in our port of PCRE, that a regular expression + // run on the empty string may still perform a read from the first element, and + // as such we need this to be a valid pointer. No code should ever be reading + // from a zero length string, so this should be able to be a non-null pointer + // into the zero-page. + // Replace this with 'reinterpret_cast<UChar*>(static_cast<intptr_t>(1))' once + // PCRE goes away. + static UChar emptyUCharData = 0; + DEFINE_STATIC_LOCAL(StringImpl, emptyString, (&emptyUCharData, 0, ConstructStaticString)); + return &emptyString; +} + +PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data) +{ + if (!length) { + data = 0; + return empty(); + } + + // Allocate a single buffer large enough to contain the StringImpl + // struct as well as the data which it contains. This removes one + // heap allocation from this call. + if (length > ((std::numeric_limits<size_t>::max() - sizeof(StringImpl)) / sizeof(UChar))) + CRASH(); + size_t size = sizeof(StringImpl) + length * sizeof(UChar); + StringImpl* string = static_cast<StringImpl*>(fastMalloc(size)); + + data = reinterpret_cast<UChar*>(string + 1); + return adoptRef(new (string) StringImpl(length)); +} + +PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length) +{ + if (!characters || !length) + return empty(); + + UChar* data; + PassRefPtr<StringImpl> string = createUninitialized(length, data); + memcpy(data, characters, length * sizeof(UChar)); + return string; +} + +PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length) +{ + if (!characters || !length) + return empty(); + + UChar* data; + PassRefPtr<StringImpl> string = createUninitialized(length, data); + for (unsigned i = 0; i != length; ++i) { + unsigned char c = characters[i]; + data[i] = c; + } + return string; +} + +PassRefPtr<StringImpl> StringImpl::create(const char* string) +{ + if (!string) + return empty(); + return create(string, strlen(string)); +} + +PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer) +{ + ASSERT(characters); + ASSERT(minLengthToShare && length >= minLengthToShare); + return adoptRef(new StringImpl(characters, length, sharedBuffer)); +} + +SharedUChar* StringImpl::sharedBuffer() +{ + if (m_length < minLengthToShare) + return 0; + // All static strings are smaller that the minimim length to share. + ASSERT(!isStatic()); + + BufferOwnership ownership = bufferOwnership(); + + if (ownership == BufferInternal) + return 0; + if (ownership == BufferSubstring) + return m_substringBuffer->sharedBuffer(); + if (ownership == BufferOwned) { + ASSERT(!m_sharedBuffer); + m_sharedBuffer = SharedUChar::create(new SharableUChar(m_data)).releaseRef(); + m_refCountAndFlags = (m_refCountAndFlags & ~s_refCountMaskBufferOwnership) | BufferShared; + } + + ASSERT(bufferOwnership() == BufferShared); + ASSERT(m_sharedBuffer); + return m_sharedBuffer; +} + +bool StringImpl::containsOnlyWhitespace() +{ + // FIXME: The definition of whitespace here includes a number of characters + // that are not whitespace from the point of view of RenderText; I wonder if + // that's a problem in practice. + for (unsigned i = 0; i < m_length; i++) + if (!isASCIISpace(m_data[i])) + return false; + return true; +} + +PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) +{ + if (start >= m_length) + return empty(); + unsigned maxLength = m_length - start; + if (length >= maxLength) { + if (!start) + return this; + length = maxLength; + } + return create(m_data + start, length); +} + +UChar32 StringImpl::characterStartingAt(unsigned i) +{ + if (U16_IS_SINGLE(m_data[i])) + return m_data[i]; + if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1])) + return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]); + return 0; +} + +PassRefPtr<StringImpl> StringImpl::lower() +{ + // Note: This is a hot function in the Dromaeo benchmark, specifically the + // no-op code path up through the first 'return' statement. + + // First scan the string for uppercase and non-ASCII characters: + UChar ored = 0; + bool noUpper = true; + const UChar *end = m_data + m_length; + for (const UChar* chp = m_data; chp != end; chp++) { + if (UNLIKELY(isASCIIUpper(*chp))) + noUpper = false; + ored |= *chp; + } + + // Nothing to do if the string is all ASCII with no uppercase. + if (noUpper && !(ored & ~0x7F)) + return this; + + int32_t length = m_length; + UChar* data; + RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); + + if (!(ored & ~0x7F)) { + // Do a faster loop for the case where all the characters are ASCII. + for (int i = 0; i < length; i++) { + UChar c = m_data[i]; + data[i] = toASCIILower(c); + } + return newImpl; + } + + // Do a slower implementation for cases that include non-ASCII characters. + bool error; + int32_t realLength = Unicode::toLower(data, length, m_data, m_length, &error); + if (!error && realLength == length) + return newImpl; + newImpl = createUninitialized(realLength, data); + Unicode::toLower(data, realLength, m_data, m_length, &error); + if (error) + return this; + return newImpl; +} + +PassRefPtr<StringImpl> StringImpl::upper() +{ + // This function could be optimized for no-op cases the way lower() is, + // but in empirical testing, few actual calls to upper() are no-ops, so + // it wouldn't be worth the extra time for pre-scanning. + UChar* data; + PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data); + int32_t length = m_length; + + // Do a faster loop for the case where all the characters are ASCII. + UChar ored = 0; + for (int i = 0; i < length; i++) { + UChar c = m_data[i]; + ored |= c; + data[i] = toASCIIUpper(c); + } + if (!(ored & ~0x7F)) + return newImpl; + + // Do a slower implementation for cases that include non-ASCII characters. + bool error; + int32_t realLength = Unicode::toUpper(data, length, m_data, m_length, &error); + if (!error && realLength == length) + return newImpl; + newImpl = createUninitialized(realLength, data); + Unicode::toUpper(data, realLength, m_data, m_length, &error); + if (error) + return this; + return newImpl; +} + +PassRefPtr<StringImpl> StringImpl::secure(UChar aChar) +{ + UChar* data; + PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data); + int32_t length = m_length; + for (int i = 0; i < length; ++i) + data[i] = aChar; + return newImpl; +} + +PassRefPtr<StringImpl> StringImpl::foldCase() +{ + UChar* data; + PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data); + int32_t length = m_length; + + // Do a faster loop for the case where all the characters are ASCII. + UChar ored = 0; + for (int i = 0; i < length; i++) { + UChar c = m_data[i]; + ored |= c; + data[i] = toASCIILower(c); + } + if (!(ored & ~0x7F)) + return newImpl; + + // Do a slower implementation for cases that include non-ASCII characters. + bool error; + int32_t realLength = Unicode::foldCase(data, length, m_data, m_length, &error); + if (!error && realLength == length) + return newImpl; + newImpl = createUninitialized(realLength, data); + Unicode::foldCase(data, realLength, m_data, m_length, &error); + if (error) + return this; + return newImpl; +} + +PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() +{ + if (!m_length) + return empty(); + + unsigned start = 0; + unsigned end = m_length - 1; + + // skip white space from start + while (start <= end && isSpaceOrNewline(m_data[start])) + start++; + + // only white space + if (start > end) + return empty(); + + // skip white space from end + while (end && isSpaceOrNewline(m_data[end])) + end--; + + if (!start && end == m_length - 1) + return this; + return create(m_data + start, end + 1 - start); +} + +PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch) +{ + const UChar* from = m_data; + const UChar* fromend = from + m_length; + + // Assume the common case will not remove any characters + while (from != fromend && !findMatch(*from)) + from++; + if (from == fromend) + return this; + + StringBuffer data(m_length); + UChar* to = data.characters(); + unsigned outc = from - m_data; + + if (outc) + memcpy(to, m_data, outc * sizeof(UChar)); + + while (true) { + while (from != fromend && findMatch(*from)) + from++; + while (from != fromend && !findMatch(*from)) + to[outc++] = *from++; + if (from == fromend) + break; + } + + data.shrink(outc); + + return adopt(data); +} + +PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace() +{ + StringBuffer data(m_length); + + const UChar* from = m_data; + const UChar* fromend = from + m_length; + int outc = 0; + bool changedToSpace = false; + + UChar* to = data.characters(); + + while (true) { + while (from != fromend && isSpaceOrNewline(*from)) { + if (*from != ' ') + changedToSpace = true; + from++; + } + while (from != fromend && !isSpaceOrNewline(*from)) + to[outc++] = *from++; + if (from != fromend) + to[outc++] = ' '; + else + break; + } + + if (outc > 0 && to[outc - 1] == ' ') + outc--; + + if (static_cast<unsigned>(outc) == m_length && !changedToSpace) + return this; + + data.shrink(outc); + + return adopt(data); +} + +int StringImpl::toIntStrict(bool* ok, int base) +{ + return charactersToIntStrict(m_data, m_length, ok, base); +} + +unsigned StringImpl::toUIntStrict(bool* ok, int base) +{ + return charactersToUIntStrict(m_data, m_length, ok, base); +} + +int64_t StringImpl::toInt64Strict(bool* ok, int base) +{ + return charactersToInt64Strict(m_data, m_length, ok, base); +} + +uint64_t StringImpl::toUInt64Strict(bool* ok, int base) +{ + return charactersToUInt64Strict(m_data, m_length, ok, base); +} + +intptr_t StringImpl::toIntPtrStrict(bool* ok, int base) +{ + return charactersToIntPtrStrict(m_data, m_length, ok, base); +} + +int StringImpl::toInt(bool* ok) +{ + return charactersToInt(m_data, m_length, ok); +} + +unsigned StringImpl::toUInt(bool* ok) +{ + return charactersToUInt(m_data, m_length, ok); +} + +int64_t StringImpl::toInt64(bool* ok) +{ + return charactersToInt64(m_data, m_length, ok); +} + +uint64_t StringImpl::toUInt64(bool* ok) +{ + return charactersToUInt64(m_data, m_length, ok); +} + +intptr_t StringImpl::toIntPtr(bool* ok) +{ + return charactersToIntPtr(m_data, m_length, ok); +} + +double StringImpl::toDouble(bool* ok) +{ + return charactersToDouble(m_data, m_length, ok); +} + +float StringImpl::toFloat(bool* ok) +{ + return charactersToFloat(m_data, m_length, ok); +} + +static bool equal(const UChar* a, const char* b, int length) +{ + ASSERT(length >= 0); + while (length--) { + unsigned char bc = *b++; + if (*a++ != bc) + return false; + } + return true; +} + +bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) +{ + while (length--) { + unsigned char bc = *b++; + if (foldCase(*a++) != foldCase(bc)) + return false; + } + return true; +} + +static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) +{ + ASSERT(length >= 0); + return umemcasecmp(a, b, length) == 0; +} + +int StringImpl::find(const char* chs, int index, bool caseSensitive) +{ + if (!chs || index < 0) + return -1; + + int chsLength = strlen(chs); + int n = m_length - index; + if (n < 0) + return -1; + n -= chsLength - 1; + if (n <= 0) + return -1; + + const char* chsPlusOne = chs + 1; + int chsLengthMinusOne = chsLength - 1; + + const UChar* ptr = m_data + index - 1; + if (caseSensitive) { + UChar c = *chs; + do { + if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne)) + return m_length - chsLength - n + 1; + } while (--n); + } else { + UChar lc = Unicode::foldCase(*chs); + do { + if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne)) + return m_length - chsLength - n + 1; + } while (--n); + } + + return -1; +} + +int StringImpl::find(UChar c, int start) +{ + return WebCore::find(m_data, m_length, c, start); +} + +int StringImpl::find(CharacterMatchFunctionPtr matchFunction, int start) +{ + return WebCore::find(m_data, m_length, matchFunction, start); +} + +int StringImpl::find(StringImpl* str, int index, bool caseSensitive) +{ + /* + We use a simple trick for efficiency's sake. Instead of + comparing strings, we compare the sum of str with that of + a part of this string. Only if that matches, we call memcmp + or ucstrnicmp. + */ + ASSERT(str); + if (index < 0) + index += m_length; + int lstr = str->m_length; + int lthis = m_length - index; + if ((unsigned)lthis > m_length) + return -1; + int delta = lthis - lstr; + if (delta < 0) + return -1; + + const UChar* uthis = m_data + index; + const UChar* ustr = str->m_data; + unsigned hthis = 0; + unsigned hstr = 0; + if (caseSensitive) { + for (int i = 0; i < lstr; i++) { + hthis += uthis[i]; + hstr += ustr[i]; + } + int i = 0; + while (1) { + if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0) + return index + i; + if (i == delta) + return -1; + hthis += uthis[i + lstr]; + hthis -= uthis[i]; + i++; + } + } else { + for (int i = 0; i < lstr; i++ ) { + hthis += toASCIILower(uthis[i]); + hstr += toASCIILower(ustr[i]); + } + int i = 0; + while (1) { + if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr)) + return index + i; + if (i == delta) + return -1; + hthis += toASCIILower(uthis[i + lstr]); + hthis -= toASCIILower(uthis[i]); + i++; + } + } +} + +int StringImpl::reverseFind(UChar c, int index) +{ + return WebCore::reverseFind(m_data, m_length, c, index); +} + +int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive) +{ + /* + See StringImpl::find() for explanations. + */ + ASSERT(str); + int lthis = m_length; + if (index < 0) + index += lthis; + + int lstr = str->m_length; + int delta = lthis - lstr; + if ( index < 0 || index > lthis || delta < 0 ) + return -1; + if ( index > delta ) + index = delta; + + const UChar *uthis = m_data; + const UChar *ustr = str->m_data; + unsigned hthis = 0; + unsigned hstr = 0; + int i; + if (caseSensitive) { + for ( i = 0; i < lstr; i++ ) { + hthis += uthis[index + i]; + hstr += ustr[i]; + } + i = index; + while (1) { + if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0) + return i; + if (i == 0) + return -1; + i--; + hthis -= uthis[i + lstr]; + hthis += uthis[i]; + } + } else { + for (i = 0; i < lstr; i++) { + hthis += toASCIILower(uthis[index + i]); + hstr += toASCIILower(ustr[i]); + } + i = index; + while (1) { + if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) ) + return i; + if (i == 0) + return -1; + i--; + hthis -= toASCIILower(uthis[i + lstr]); + hthis += toASCIILower(uthis[i]); + } + } + + // Should never get here. + return -1; +} + +bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive) +{ + ASSERT(m_data); + int start = m_length - m_data->m_length; + if (start >= 0) + return (find(m_data, start, caseSensitive) == start); + return false; +} + +PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) +{ + if (oldC == newC) + return this; + unsigned i; + for (i = 0; i != m_length; ++i) + if (m_data[i] == oldC) + break; + if (i == m_length) + return this; + + UChar* data; + PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data); + + for (i = 0; i != m_length; ++i) { + UChar ch = m_data[i]; + if (ch == oldC) + ch = newC; + data[i] = ch; + } + return newImpl; +} + +PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str) +{ + position = min(position, length()); + lengthToReplace = min(lengthToReplace, length() - position); + unsigned lengthToInsert = str ? str->length() : 0; + if (!lengthToReplace && !lengthToInsert) + return this; + UChar* data; + PassRefPtr<StringImpl> newImpl = + createUninitialized(length() - lengthToReplace + lengthToInsert, data); + memcpy(data, characters(), position * sizeof(UChar)); + if (str) + memcpy(data + position, str->characters(), lengthToInsert * sizeof(UChar)); + memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace, + (length() - position - lengthToReplace) * sizeof(UChar)); + return newImpl; +} + +PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement) +{ + if (!replacement) + return this; + + int repStrLength = replacement->length(); + int srcSegmentStart = 0; + int matchCount = 0; + + // Count the matches + while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) { + ++matchCount; + ++srcSegmentStart; + } + + // If we have 0 matches, we don't have to do any more work + if (!matchCount) + return this; + + UChar* data; + PassRefPtr<StringImpl> newImpl = + createUninitialized(m_length - matchCount + (matchCount * repStrLength), data); + + // Construct the new data + int srcSegmentEnd; + int srcSegmentLength; + srcSegmentStart = 0; + int dstOffset = 0; + + while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) { + srcSegmentLength = srcSegmentEnd - srcSegmentStart; + memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); + dstOffset += srcSegmentLength; + memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar)); + dstOffset += repStrLength; + srcSegmentStart = srcSegmentEnd + 1; + } + + srcSegmentLength = m_length - srcSegmentStart; + memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); + + ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length())); + + return newImpl; +} + +PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement) +{ + if (!pattern || !replacement) + return this; + + int patternLength = pattern->length(); + if (!patternLength) + return this; + + int repStrLength = replacement->length(); + int srcSegmentStart = 0; + int matchCount = 0; + + // Count the matches + while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) { + ++matchCount; + srcSegmentStart += patternLength; + } + + // If we have 0 matches, we don't have to do any more work + if (!matchCount) + return this; + + UChar* data; + PassRefPtr<StringImpl> newImpl = + createUninitialized(m_length + matchCount * (repStrLength - patternLength), data); + + // Construct the new data + int srcSegmentEnd; + int srcSegmentLength; + srcSegmentStart = 0; + int dstOffset = 0; + + while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) { + srcSegmentLength = srcSegmentEnd - srcSegmentStart; + memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); + dstOffset += srcSegmentLength; + memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar)); + dstOffset += repStrLength; + srcSegmentStart = srcSegmentEnd + patternLength; + } + + srcSegmentLength = m_length - srcSegmentStart; + memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); + + ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length())); + + return newImpl; +} + +bool equal(const StringImpl* a, const StringImpl* b) +{ + return StringHash::equal(a, b); +} + +bool equal(const StringImpl* a, const char* b) +{ + if (!a) + return !b; + if (!b) + return !a; + + unsigned length = a->length(); + const UChar* as = a->characters(); + for (unsigned i = 0; i != length; ++i) { + unsigned char bc = b[i]; + if (!bc) + return false; + if (as[i] != bc) + return false; + } + + return !b[length]; +} + +bool equalIgnoringCase(StringImpl* a, StringImpl* b) +{ + return CaseFoldingHash::equal(a, b); +} + +bool equalIgnoringCase(StringImpl* a, const char* b) +{ + if (!a) + return !b; + if (!b) + return !a; + + unsigned length = a->length(); + const UChar* as = a->characters(); + + // Do a faster loop for the case where all the characters are ASCII. + UChar ored = 0; + bool equal = true; + for (unsigned i = 0; i != length; ++i) { + char bc = b[i]; + if (!bc) + return false; + UChar ac = as[i]; + ored |= ac; + equal = equal && (toASCIILower(ac) == toASCIILower(bc)); + } + + // Do a slower implementation for cases that include non-ASCII characters. + if (ored & ~0x7F) { + equal = true; + for (unsigned i = 0; i != length; ++i) { + unsigned char bc = b[i]; + equal = equal && (foldCase(as[i]) == foldCase(bc)); + } + } + + return equal && !b[length]; +} + +bool equalIgnoringNullity(StringImpl* a, StringImpl* b) +{ + if (StringHash::equal(a, b)) + return true; + if (!a && b && !b->length()) + return true; + if (!b && a && !a->length()) + return true; + + return false; +} + +Vector<char> StringImpl::ascii() +{ + Vector<char> buffer(m_length + 1); + for (unsigned i = 0; i != m_length; ++i) { + UChar c = m_data[i]; + if ((c >= 0x20 && c < 0x7F) || c == 0x00) + buffer[i] = c; + else + buffer[i] = '?'; + } + buffer[m_length] = '\0'; + return buffer; +} + +WTF::Unicode::Direction StringImpl::defaultWritingDirection() +{ + for (unsigned i = 0; i < m_length; ++i) { + WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]); + if (charDirection == WTF::Unicode::LeftToRight) + return WTF::Unicode::LeftToRight; + if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) + return WTF::Unicode::RightToLeft; + } + return WTF::Unicode::LeftToRight; +} + +// This is a hot function because it's used when parsing HTML. +PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length) +{ + StringBuffer strippedCopy(length); + unsigned strippedLength = 0; + for (unsigned i = 0; i < length; i++) { + if (int c = characters[i]) + strippedCopy[strippedLength++] = c; + } + ASSERT(strippedLength < length); // Only take the slow case when stripping. + strippedCopy.shrink(strippedLength); + return adopt(strippedCopy); +} + +PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer& buffer) +{ + unsigned length = buffer.length(); + if (length == 0) + return empty(); + return adoptRef(new StringImpl(buffer.release(), length)); +} + +PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string) +{ + // Use createUninitialized instead of 'new StringImpl' so that the string and its buffer + // get allocated in a single malloc block. + UChar* data; + int length = string.m_length; + RefPtr<StringImpl> terminatedString = createUninitialized(length + 1, data); + memcpy(data, string.m_data, length * sizeof(UChar)); + data[length] = 0; + terminatedString->m_length--; + terminatedString->m_hash = string.m_hash; + terminatedString->m_refCountAndFlags |= s_refCountFlagHasTerminatingNullCharacter; + return terminatedString.release(); +} + +PassRefPtr<StringImpl> StringImpl::threadsafeCopy() const +{ + return create(m_data, m_length); +} + +PassRefPtr<StringImpl> StringImpl::crossThreadString() +{ + if (SharedUChar* sharedBuffer = this->sharedBuffer()) + return adoptRef(new StringImpl(m_data, m_length, sharedBuffer->crossThreadCopy())); + + // If no shared buffer is available, create a copy. + return threadsafeCopy(); +} + +} // namespace WebCore diff --git a/JavaScriptCore/wtf/text/StringImpl.h b/JavaScriptCore/wtf/text/StringImpl.h new file mode 100644 index 0000000..6ac9e40 --- /dev/null +++ b/JavaScriptCore/wtf/text/StringImpl.h @@ -0,0 +1,389 @@ +/* + * Copyright (C) 1999 Lars Knoll (knoll@kde.org) + * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. + * Copyright (C) 2009 Google Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef StringImpl_h +#define StringImpl_h + +#include <limits.h> +#include <wtf/ASCIICType.h> +#include <wtf/CrossThreadRefCounted.h> +#include <wtf/OwnFastMallocPtr.h> +#include <wtf/StringHashFunctions.h> +#include <wtf/Vector.h> +#include <wtf/text/StringImplBase.h> +#include <wtf/unicode/Unicode.h> + +#if PLATFORM(CF) +typedef const struct __CFString * CFStringRef; +#endif + +#ifdef __OBJC__ +@class NSString; +#endif + +// FIXME: This is a temporary layering violation while we move string code to WTF. +// Landing the file moves in one patch, will follow on with patches to change the namespaces. +namespace JSC { + +struct IdentifierCStringTranslator; +struct IdentifierUCharBufferTranslator; + +} + +// FIXME: This is a temporary layering violation while we move string code to WTF. +// Landing the file moves in one patch, will follow on with patches to change the namespaces. +namespace WebCore { + +class StringBuffer; + +struct CStringTranslator; +struct HashAndCharactersTranslator; +struct StringHash; +struct UCharBufferTranslator; + +enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive }; + +typedef OwnFastMallocPtr<const UChar> SharableUChar; +typedef CrossThreadRefCounted<SharableUChar> SharedUChar; +typedef bool (*CharacterMatchFunctionPtr)(UChar); + +class StringImpl : public StringImplBase { + friend struct JSC::IdentifierCStringTranslator; + friend struct JSC::IdentifierUCharBufferTranslator; + friend struct CStringTranslator; + friend struct HashAndCharactersTranslator; + friend struct UCharBufferTranslator; +private: + // Used to construct static strings, which have an special refCount that can never hit zero. + // This means that the static string will never be destroyed, which is important because + // static strings will be shared across threads & ref-counted in a non-threadsafe manner. + StringImpl(const UChar* characters, unsigned length, StaticStringConstructType) + : StringImplBase(length, ConstructStaticString) + , m_data(characters) + , m_buffer(0) + , m_hash(0) + { + // Ensure that the hash is computed so that AtomicStringHash can call existingHash() + // with impunity. The empty string is special because it is never entered into + // AtomicString's HashKey, but still needs to compare correctly. + hash(); + } + + // Create a normal string with internal storage (BufferInternal) + StringImpl(unsigned length) + : StringImplBase(length, BufferInternal) + , m_data(reinterpret_cast<const UChar*>(this + 1)) + , m_buffer(0) + , m_hash(0) + { + ASSERT(m_data); + ASSERT(m_length); + } + + // Create a StringImpl adopting ownership of the provided buffer (BufferOwned) + StringImpl(const UChar* characters, unsigned length) + : StringImplBase(length, BufferOwned) + , m_data(characters) + , m_buffer(0) + , m_hash(0) + { + ASSERT(m_data); + ASSERT(m_length); + } + + // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring) + StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base) + : StringImplBase(length, BufferSubstring) + , m_data(characters) + , m_substringBuffer(base.releaseRef()) + , m_hash(0) + { + ASSERT(m_data); + ASSERT(m_length); + ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring); + } + + // Used to construct new strings sharing an existing SharedUChar (BufferShared) + StringImpl(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer) + : StringImplBase(length, BufferShared) + , m_data(characters) + , m_sharedBuffer(sharedBuffer.releaseRef()) + , m_hash(0) + { + ASSERT(m_data); + ASSERT(m_length); + } + + // For use only by AtomicString's XXXTranslator helpers. + void setHash(unsigned hash) + { + ASSERT(!isStatic()); + ASSERT(!m_hash); + ASSERT(hash == computeHash(m_data, m_length)); + m_hash = hash; + } + +public: + ~StringImpl(); + + static PassRefPtr<StringImpl> create(const UChar*, unsigned length); + static PassRefPtr<StringImpl> create(const char*, unsigned length); + static PassRefPtr<StringImpl> create(const char*); + static PassRefPtr<StringImpl> create(const UChar*, unsigned length, PassRefPtr<SharedUChar> sharedBuffer); + static PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length) + { + ASSERT(rep); + ASSERT(length <= rep->length()); + + if (!length) + return empty(); + + StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get(); + return adoptRef(new StringImpl(rep->m_data + offset, length, ownerRep)); + } + + static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data); + static PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, UChar*& output) + { + if (!length) { + output = 0; + return empty(); + } + + if (length > ((std::numeric_limits<size_t>::max() - sizeof(StringImpl)) / sizeof(UChar))) + return 0; + StringImpl* resultImpl; + if (!tryFastMalloc(sizeof(UChar) * length + sizeof(StringImpl)).getValue(resultImpl)) + return 0; + output = reinterpret_cast<UChar*>(resultImpl + 1); + return adoptRef(new(resultImpl) StringImpl(length)); + } + + static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&); + static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length); + + template<size_t inlineCapacity> + static PassRefPtr<StringImpl> adopt(Vector<UChar, inlineCapacity>& vector) + { + if (size_t size = vector.size()) { + ASSERT(vector.data()); + return adoptRef(new StringImpl(vector.releaseBuffer(), size)); + } + return empty(); + } + static PassRefPtr<StringImpl> adopt(StringBuffer&); + + SharedUChar* sharedBuffer(); + const UChar* characters() const { return m_data; } + + size_t cost() + { + // For substrings, return the cost of the base string. + if (bufferOwnership() == BufferSubstring) + return m_substringBuffer->cost(); + + if (m_refCountAndFlags & s_refCountFlagShouldReportedCost) { + m_refCountAndFlags &= ~s_refCountFlagShouldReportedCost; + return m_length; + } + return 0; + } + + bool isIdentifier() const { return m_refCountAndFlags & s_refCountFlagIsIdentifier; } + void setIsIdentifier(bool isIdentifier) + { + ASSERT(!isStatic()); + if (isIdentifier) + m_refCountAndFlags |= s_refCountFlagIsIdentifier; + else + m_refCountAndFlags &= ~s_refCountFlagIsIdentifier; + } + + bool hasTerminatingNullCharacter() const { return m_refCountAndFlags & s_refCountFlagHasTerminatingNullCharacter; } + + bool inTable() const { return m_refCountAndFlags & s_refCountFlagInTable; } + void setInTable() { m_refCountAndFlags |= s_refCountFlagInTable; } + + unsigned hash() const { if (!m_hash) m_hash = computeHash(m_data, m_length); return m_hash; } + unsigned existingHash() const { ASSERT(m_hash); return m_hash; } + static unsigned computeHash(const UChar* data, unsigned length) { return WTF::stringHash(data, length); } + static unsigned computeHash(const char* data, unsigned length) { return WTF::stringHash(data, length); } + static unsigned computeHash(const char* data) { return WTF::stringHash(data); } + + ALWAYS_INLINE void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic))) delete this; } + ALWAYS_INLINE bool hasOneRef() const { return (m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic)) == s_refCountIncrement; } + + static StringImpl* empty(); + + static void copyChars(UChar* destination, const UChar* source, unsigned numCharacters) + { + if (numCharacters <= s_copyCharsInlineCutOff) { + for (unsigned i = 0; i < numCharacters; ++i) + destination[i] = source[i]; + } else + memcpy(destination, source, numCharacters * sizeof(UChar)); + } + + // Returns a StringImpl suitable for use on another thread. + PassRefPtr<StringImpl> crossThreadString(); + // Makes a deep copy. Helpful only if you need to use a String on another thread + // (use crossThreadString if the method call doesn't need to be threadsafe). + // Since StringImpl objects are immutable, there's no other reason to make a copy. + PassRefPtr<StringImpl> threadsafeCopy() const; + + PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX); + + UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; } + UChar32 characterStartingAt(unsigned); + + bool containsOnlyWhitespace(); + + int toIntStrict(bool* ok = 0, int base = 10); + unsigned toUIntStrict(bool* ok = 0, int base = 10); + int64_t toInt64Strict(bool* ok = 0, int base = 10); + uint64_t toUInt64Strict(bool* ok = 0, int base = 10); + intptr_t toIntPtrStrict(bool* ok = 0, int base = 10); + + int toInt(bool* ok = 0); // ignores trailing garbage + unsigned toUInt(bool* ok = 0); // ignores trailing garbage + int64_t toInt64(bool* ok = 0); // ignores trailing garbage + uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage + intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage + + double toDouble(bool* ok = 0); + float toFloat(bool* ok = 0); + + PassRefPtr<StringImpl> lower(); + PassRefPtr<StringImpl> upper(); + PassRefPtr<StringImpl> secure(UChar aChar); + PassRefPtr<StringImpl> foldCase(); + + PassRefPtr<StringImpl> stripWhiteSpace(); + PassRefPtr<StringImpl> simplifyWhiteSpace(); + + PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr); + + int find(const char*, int index = 0, bool caseSensitive = true); + int find(UChar, int index = 0); + int find(CharacterMatchFunctionPtr, int index = 0); + int find(StringImpl*, int index, bool caseSensitive = true); + + int reverseFind(UChar, int index); + int reverseFind(StringImpl*, int index, bool caseSensitive = true); + + bool startsWith(StringImpl* str, bool caseSensitive = true) { return reverseFind(str, 0, caseSensitive) == 0; } + bool endsWith(StringImpl*, bool caseSensitive = true); + + PassRefPtr<StringImpl> replace(UChar, UChar); + PassRefPtr<StringImpl> replace(UChar, StringImpl*); + PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*); + PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*); + + Vector<char> ascii(); + + WTF::Unicode::Direction defaultWritingDirection(); + +#if PLATFORM(CF) + CFStringRef createCFString(); +#endif +#ifdef __OBJC__ + operator NSString*(); +#endif + +private: + // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings. + static const unsigned s_copyCharsInlineCutOff = 20; + + static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length); + + BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_refCountAndFlags & s_refCountMaskBufferOwnership); } + bool isStatic() const { return m_refCountAndFlags & s_refCountFlagStatic; } + + const UChar* m_data; + union { + void* m_buffer; + StringImpl* m_substringBuffer; + SharedUChar* m_sharedBuffer; + }; + mutable unsigned m_hash; +}; + +bool equal(const StringImpl*, const StringImpl*); +bool equal(const StringImpl*, const char*); +inline bool equal(const char* a, StringImpl* b) { return equal(b, a); } + +bool equalIgnoringCase(StringImpl*, StringImpl*); +bool equalIgnoringCase(StringImpl*, const char*); +inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); } +bool equalIgnoringCase(const UChar* a, const char* b, unsigned length); +inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); } + +bool equalIgnoringNullity(StringImpl*, StringImpl*); + +static inline bool isSpaceOrNewline(UChar c) +{ + // Use isASCIISpace() for basic Latin-1. + // This will include newlines, which aren't included in Unicode DirWS. + return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral; +} + +// This is a hot function because it's used when parsing HTML. +inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length) +{ + ASSERT(characters); + ASSERT(length); + + // Optimize for the case where there are no Null characters by quickly + // searching for nulls, and then using StringImpl::create, which will + // memcpy the whole buffer. This is faster than assigning character by + // character during the loop. + + // Fast case. + int foundNull = 0; + for (unsigned i = 0; !foundNull && i < length; i++) { + int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS) + foundNull |= !c; + } + if (!foundNull) + return StringImpl::create(characters, length); + + return StringImpl::createStrippingNullCharactersSlowCase(characters, length); +} + +} + +using WebCore::equal; + +namespace WTF { + + // WebCore::StringHash is the default hash for StringImpl* and RefPtr<StringImpl> + template<typename T> struct DefaultHash; + template<> struct DefaultHash<WebCore::StringImpl*> { + typedef WebCore::StringHash Hash; + }; + template<> struct DefaultHash<RefPtr<WebCore::StringImpl> > { + typedef WebCore::StringHash Hash; + }; + +} + +#endif diff --git a/JavaScriptCore/wtf/text/StringImplBase.h b/JavaScriptCore/wtf/text/StringImplBase.h new file mode 100644 index 0000000..a8e3385 --- /dev/null +++ b/JavaScriptCore/wtf/text/StringImplBase.h @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2010 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef StringImplBase_h +#define StringImplBase_h + +#include <wtf/Noncopyable.h> +#include <wtf/unicode/Unicode.h> + +namespace WTF { + +class StringImplBase : public Noncopyable { +public: + bool isStringImpl() { return (m_refCountAndFlags & s_refCountInvalidForStringImpl) != s_refCountInvalidForStringImpl; } + unsigned length() const { return m_length; } + void ref() { m_refCountAndFlags += s_refCountIncrement; } + +protected: + enum BufferOwnership { + BufferInternal, + BufferOwned, + BufferSubstring, + BufferShared, + }; + + using Noncopyable::operator new; + void* operator new(size_t, void* inPlace) { ASSERT(inPlace); return inPlace; } + + // For SmallStringStorage, which allocates an array and uses an in-place new. + StringImplBase() { } + + StringImplBase(unsigned length, BufferOwnership ownership) + : m_refCountAndFlags(s_refCountIncrement | s_refCountFlagShouldReportedCost | ownership) + , m_length(length) + { + ASSERT(isStringImpl()); + } + + enum StaticStringConstructType { ConstructStaticString }; + StringImplBase(unsigned length, StaticStringConstructType) + : m_refCountAndFlags(s_refCountFlagStatic | s_refCountFlagIsIdentifier | BufferOwned) + , m_length(length) + { + ASSERT(isStringImpl()); + } + + // This constructor is not used when creating StringImpl objects, + // and sets the flags into a state marking the object as such. + enum NonStringImplConstructType { ConstructNonStringImpl }; + StringImplBase(NonStringImplConstructType) + : m_refCountAndFlags(s_refCountIncrement | s_refCountInvalidForStringImpl) + , m_length(0) + { + ASSERT(!isStringImpl()); + } + + // The bottom 7 bits hold flags, the top 25 bits hold the ref count. + // When dereferencing StringImpls we check for the ref count AND the + // static bit both being zero - static strings are never deleted. + static const unsigned s_refCountMask = 0xFFFFFF80; + static const unsigned s_refCountIncrement = 0x80; + static const unsigned s_refCountFlagStatic = 0x40; + static const unsigned s_refCountFlagHasTerminatingNullCharacter = 0x20; + static const unsigned s_refCountFlagInTable = 0x10; + static const unsigned s_refCountFlagShouldReportedCost = 0x8; + static const unsigned s_refCountFlagIsIdentifier = 0x4; + static const unsigned s_refCountMaskBufferOwnership = 0x3; + // An invalid permutation of flags (static & shouldReportedCost - static strings do not + // set shouldReportedCost in the constructor, and this bit is only ever cleared, not set). + // Used by "ConstructNonStringImpl" constructor, above. + static const unsigned s_refCountInvalidForStringImpl = s_refCountFlagStatic | s_refCountFlagShouldReportedCost; + + unsigned m_refCountAndFlags; + unsigned m_length; +}; + +} // namespace WTF + +using WTF::StringImplBase; + +#endif diff --git a/JavaScriptCore/wtf/text/WTFString.cpp b/JavaScriptCore/wtf/text/WTFString.cpp new file mode 100644 index 0000000..a683e3d --- /dev/null +++ b/JavaScriptCore/wtf/text/WTFString.cpp @@ -0,0 +1,960 @@ +/* + * (C) 1999 Lars Knoll (knoll@kde.org) + * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. + * Copyright (C) 2007-2009 Torch Mobile, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#include "config.h" +#include "WTFString.h" + +#include <limits> +#include <stdarg.h> +#include <wtf/ASCIICType.h> +#include <wtf/text/CString.h> +#include <wtf/StringExtras.h> +#include <wtf/Vector.h> +#include <wtf/dtoa.h> +#include <wtf/unicode/UTF8.h> +#include <wtf/unicode/Unicode.h> + +using namespace WTF; +using namespace WTF::Unicode; + +namespace WebCore { + +String::String(const UChar* str, unsigned len) +{ + if (!str) + return; + m_impl = StringImpl::create(str, len); +} + +String::String(const UChar* str) +{ + if (!str) + return; + + int len = 0; + while (str[len] != UChar(0)) + len++; + + m_impl = StringImpl::create(str, len); +} + +String::String(const char* str) +{ + if (!str) + return; + m_impl = StringImpl::create(str); +} + +String::String(const char* str, unsigned length) +{ + if (!str) + return; + m_impl = StringImpl::create(str, length); +} + +void String::append(const String& str) +{ + if (str.isEmpty()) + return; + + // FIXME: This is extremely inefficient. So much so that we might want to take this + // out of String's API. We can make it better by optimizing the case where exactly + // one String is pointing at this StringImpl, but even then it's going to require a + // call to fastMalloc every single time. + if (str.m_impl) { + if (m_impl) { + UChar* data; + RefPtr<StringImpl> newImpl = + StringImpl::createUninitialized(m_impl->length() + str.length(), data); + memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); + memcpy(data + m_impl->length(), str.characters(), str.length() * sizeof(UChar)); + m_impl = newImpl.release(); + } else + m_impl = str.m_impl; + } +} + +void String::append(char c) +{ + // FIXME: This is extremely inefficient. So much so that we might want to take this + // out of String's API. We can make it better by optimizing the case where exactly + // one String is pointing at this StringImpl, but even then it's going to require a + // call to fastMalloc every single time. + if (m_impl) { + UChar* data; + RefPtr<StringImpl> newImpl = + StringImpl::createUninitialized(m_impl->length() + 1, data); + memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); + data[m_impl->length()] = c; + m_impl = newImpl.release(); + } else + m_impl = StringImpl::create(&c, 1); +} + +void String::append(UChar c) +{ + // FIXME: This is extremely inefficient. So much so that we might want to take this + // out of String's API. We can make it better by optimizing the case where exactly + // one String is pointing at this StringImpl, but even then it's going to require a + // call to fastMalloc every single time. + if (m_impl) { + UChar* data; + RefPtr<StringImpl> newImpl = + StringImpl::createUninitialized(m_impl->length() + 1, data); + memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); + data[m_impl->length()] = c; + m_impl = newImpl.release(); + } else + m_impl = StringImpl::create(&c, 1); +} + +String operator+(const String& a, const String& b) +{ + if (a.isEmpty()) + return b; + if (b.isEmpty()) + return a; + String c = a; + c += b; + return c; +} + +String operator+(const String& s, const char* cs) +{ + return s + String(cs); +} + +String operator+(const char* cs, const String& s) +{ + return String(cs) + s; +} + +void String::insert(const String& str, unsigned pos) +{ + if (str.isEmpty()) { + if (str.isNull()) + return; + if (isNull()) + m_impl = str.impl(); + return; + } + insert(str.characters(), str.length(), pos); +} + +void String::append(const UChar* charactersToAppend, unsigned lengthToAppend) +{ + if (!m_impl) { + if (!charactersToAppend) + return; + m_impl = StringImpl::create(charactersToAppend, lengthToAppend); + return; + } + + if (!lengthToAppend) + return; + + ASSERT(charactersToAppend); + UChar* data; + RefPtr<StringImpl> newImpl = + StringImpl::createUninitialized(length() + lengthToAppend, data); + memcpy(data, characters(), length() * sizeof(UChar)); + memcpy(data + length(), charactersToAppend, lengthToAppend * sizeof(UChar)); + m_impl = newImpl.release(); +} + +void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position) +{ + if (position >= length()) { + append(charactersToInsert, lengthToInsert); + return; + } + + ASSERT(m_impl); + + if (!lengthToInsert) + return; + + ASSERT(charactersToInsert); + UChar* data; + RefPtr<StringImpl> newImpl = + StringImpl::createUninitialized(length() + lengthToInsert, data); + memcpy(data, characters(), position * sizeof(UChar)); + memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar)); + memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar)); + m_impl = newImpl.release(); +} + +UChar String::operator[](unsigned i) const +{ + if (!m_impl || i >= m_impl->length()) + return 0; + return m_impl->characters()[i]; +} + +UChar32 String::characterStartingAt(unsigned i) const +{ + if (!m_impl || i >= m_impl->length()) + return 0; + return m_impl->characterStartingAt(i); +} + +unsigned String::length() const +{ + if (!m_impl) + return 0; + return m_impl->length(); +} + +void String::truncate(unsigned position) +{ + if (position >= length()) + return; + UChar* data; + RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data); + memcpy(data, characters(), position * sizeof(UChar)); + m_impl = newImpl.release(); +} + +void String::remove(unsigned position, int lengthToRemove) +{ + if (lengthToRemove <= 0) + return; + if (position >= length()) + return; + if (static_cast<unsigned>(lengthToRemove) > length() - position) + lengthToRemove = length() - position; + UChar* data; + RefPtr<StringImpl> newImpl = + StringImpl::createUninitialized(length() - lengthToRemove, data); + memcpy(data, characters(), position * sizeof(UChar)); + memcpy(data + position, characters() + position + lengthToRemove, + (length() - lengthToRemove - position) * sizeof(UChar)); + m_impl = newImpl.release(); +} + +String String::substring(unsigned pos, unsigned len) const +{ + if (!m_impl) + return String(); + return m_impl->substring(pos, len); +} + +String String::lower() const +{ + if (!m_impl) + return String(); + return m_impl->lower(); +} + +String String::upper() const +{ + if (!m_impl) + return String(); + return m_impl->upper(); +} + +String String::stripWhiteSpace() const +{ + if (!m_impl) + return String(); + return m_impl->stripWhiteSpace(); +} + +String String::simplifyWhiteSpace() const +{ + if (!m_impl) + return String(); + return m_impl->simplifyWhiteSpace(); +} + +String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const +{ + if (!m_impl) + return String(); + return m_impl->removeCharacters(findMatch); +} + +String String::foldCase() const +{ + if (!m_impl) + return String(); + return m_impl->foldCase(); +} + +bool String::percentage(int& result) const +{ + if (!m_impl || !m_impl->length()) + return false; + + if ((*m_impl)[m_impl->length() - 1] != '%') + return false; + + result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1); + return true; +} + +const UChar* String::characters() const +{ + if (!m_impl) + return 0; + return m_impl->characters(); +} + +const UChar* String::charactersWithNullTermination() +{ + if (!m_impl) + return 0; + if (m_impl->hasTerminatingNullCharacter()) + return m_impl->characters(); + m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl); + return m_impl->characters(); +} + +String String::format(const char *format, ...) +{ +#if PLATFORM(QT) + // Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf. + // https://bugs.webkit.org/show_bug.cgi?id=18994 + va_list args; + va_start(args, format); + + QString buffer; + buffer.vsprintf(format, args); + + va_end(args); + + return buffer; + +#elif OS(WINCE) + va_list args; + va_start(args, format); + + Vector<char, 256> buffer; + + int bufferSize = 256; + buffer.resize(bufferSize); + for (;;) { + int written = vsnprintf(buffer.data(), bufferSize, format, args); + va_end(args); + + if (written == 0) + return String(""); + if (written > 0) + return StringImpl::create(buffer.data(), written); + + bufferSize <<= 1; + buffer.resize(bufferSize); + va_start(args, format); + } + +#else + va_list args; + va_start(args, format); + + Vector<char, 256> buffer; + + // Do the format once to get the length. +#if COMPILER(MSVC) + int result = _vscprintf(format, args); +#else + char ch; + int result = vsnprintf(&ch, 1, format, args); + // We need to call va_end() and then va_start() again here, as the + // contents of args is undefined after the call to vsnprintf + // according to http://man.cx/snprintf(3) + // + // Not calling va_end/va_start here happens to work on lots of + // systems, but fails e.g. on 64bit Linux. + va_end(args); + va_start(args, format); +#endif + + if (result == 0) + return String(""); + if (result < 0) + return String(); + unsigned len = result; + buffer.grow(len + 1); + + // Now do the formatting again, guaranteed to fit. + vsnprintf(buffer.data(), buffer.size(), format, args); + + va_end(args); + + return StringImpl::create(buffer.data(), len); +#endif +} + +String String::number(short n) +{ + return String::format("%hd", n); +} + +String String::number(unsigned short n) +{ + return String::format("%hu", n); +} + +String String::number(int n) +{ + return String::format("%d", n); +} + +String String::number(unsigned n) +{ + return String::format("%u", n); +} + +String String::number(long n) +{ + return String::format("%ld", n); +} + +String String::number(unsigned long n) +{ + return String::format("%lu", n); +} + +String String::number(long long n) +{ +#if OS(WINDOWS) && !PLATFORM(QT) + return String::format("%I64i", n); +#else + return String::format("%lli", n); +#endif +} + +String String::number(unsigned long long n) +{ +#if OS(WINDOWS) && !PLATFORM(QT) + return String::format("%I64u", n); +#else + return String::format("%llu", n); +#endif +} + +String String::number(double n) +{ + return String::format("%.6lg", n); +} + +int String::toIntStrict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toIntStrict(ok, base); +} + +unsigned String::toUIntStrict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUIntStrict(ok, base); +} + +int64_t String::toInt64Strict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toInt64Strict(ok, base); +} + +uint64_t String::toUInt64Strict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUInt64Strict(ok, base); +} + +intptr_t String::toIntPtrStrict(bool* ok, int base) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toIntPtrStrict(ok, base); +} + + +int String::toInt(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toInt(ok); +} + +unsigned String::toUInt(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUInt(ok); +} + +int64_t String::toInt64(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toInt64(ok); +} + +uint64_t String::toUInt64(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toUInt64(ok); +} + +intptr_t String::toIntPtr(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0; + } + return m_impl->toIntPtr(ok); +} + +double String::toDouble(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0.0; + } + return m_impl->toDouble(ok); +} + +float String::toFloat(bool* ok) const +{ + if (!m_impl) { + if (ok) + *ok = false; + return 0.0f; + } + return m_impl->toFloat(ok); +} + +String String::threadsafeCopy() const +{ + if (!m_impl) + return String(); + return m_impl->threadsafeCopy(); +} + +String String::crossThreadString() const +{ + if (!m_impl) + return String(); + return m_impl->crossThreadString(); +} + +bool String::isEmpty() const +{ + return !m_impl || !m_impl->length(); +} + +void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const +{ + result.clear(); + + int startPos = 0; + int endPos; + while ((endPos = find(separator, startPos)) != -1) { + if (allowEmptyEntries || startPos != endPos) + result.append(substring(startPos, endPos - startPos)); + startPos = endPos + separator.length(); + } + if (allowEmptyEntries || startPos != static_cast<int>(length())) + result.append(substring(startPos)); +} + +void String::split(const String& separator, Vector<String>& result) const +{ + return split(separator, false, result); +} + +void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const +{ + result.clear(); + + int startPos = 0; + int endPos; + while ((endPos = find(separator, startPos)) != -1) { + if (allowEmptyEntries || startPos != endPos) + result.append(substring(startPos, endPos - startPos)); + startPos = endPos + 1; + } + if (allowEmptyEntries || startPos != static_cast<int>(length())) + result.append(substring(startPos)); +} + +void String::split(UChar separator, Vector<String>& result) const +{ + return split(String(&separator, 1), false, result); +} + +Vector<char> String::ascii() const +{ + if (m_impl) + return m_impl->ascii(); + + const char* nullMsg = "(null impl)"; + Vector<char, 2048> buffer; + for (int i = 0; nullMsg[i]; ++i) + buffer.append(nullMsg[i]); + + buffer.append('\0'); + return buffer; +} + +CString String::latin1() const +{ + // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are + // preserved, characters outside of this range are converted to '?'. + + unsigned length = this->length(); + const UChar* characters = this->characters(); + + char* characterBuffer; + CString result = CString::newUninitialized(length, characterBuffer); + + for (unsigned i = 0; i < length; ++i) { + UChar ch = characters[i]; + characterBuffer[i] = ch > 255 ? '?' : ch; + } + + return result; +} + +// Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available. +static inline void putUTF8Triple(char*& buffer, UChar ch) +{ + ASSERT(ch >= 0x0800); + *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); + *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); + *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); +} + +CString String::utf8() const +{ + unsigned length = this->length(); + const UChar* characters = this->characters(); + + // Allocate a buffer big enough to hold all the characters + // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). + // Optimization ideas, if we find this function is hot: + // * We could speculatively create a CStringBuffer to contain 'length' + // characters, and resize if necessary (i.e. if the buffer contains + // non-ascii characters). (Alternatively, scan the buffer first for + // ascii characters, so we know this will be sufficient). + // * We could allocate a CStringBuffer with an appropriate size to + // have a good chance of being able to write the string into the + // buffer without reallocing (say, 1.5 x length). + Vector<char, 1024> bufferVector(length * 3); + + char* buffer = bufferVector.data(); + ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), false); + ASSERT(result != sourceIllegal); // Only produced from strict conversion. + ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion + + // If a high surrogate is left unconverted, treat it the same was as an unpaired high surrogate + // would have been handled in the middle of a string with non-strict conversion - which is to say, + // simply encode it to UTF-8. + if (result == sourceExhausted) { + // This should be one unpaired high surrogate. + ASSERT((characters + 1) == (characters + length)); + ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF)); + // There should be room left, since one UChar hasn't been converted. + ASSERT((buffer + 3) <= (buffer + bufferVector.size())); + putUTF8Triple(buffer, *characters); + } + + return CString(bufferVector.data(), buffer - bufferVector.data()); +} + +String String::fromUTF8(const char* stringStart, size_t length) +{ + if (!stringStart) + return String(); + + // We'll use a StringImpl as a buffer; if the source string only contains ascii this should be + // the right length, if there are any multi-byte sequences this buffer will be too large. + UChar* buffer; + String stringBuffer(StringImpl::createUninitialized(length, buffer)); + UChar* bufferEnd = buffer + length; + + // Try converting into the buffer. + const char* stringCurrent = stringStart; + if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &buffer, bufferEnd) != conversionOK) + return String(); + + // stringBuffer is full (the input must have been all ascii) so just return it! + if (buffer == bufferEnd) + return stringBuffer; + + // stringBuffer served its purpose as a buffer, copy the contents out into a new string. + unsigned utf16Length = buffer - stringBuffer.characters(); + ASSERT(utf16Length < length); + return String(stringBuffer.characters(), utf16Length); +} + +String String::fromUTF8(const char* string) +{ + if (!string) + return String(); + return fromUTF8(string, strlen(string)); +} + +String String::fromUTF8WithLatin1Fallback(const char* string, size_t size) +{ + String utf8 = fromUTF8(string, size); + if (!utf8) + return String(string, size); + return utf8; +} + +// String Operations + +static bool isCharacterAllowedInBase(UChar c, int base) +{ + if (c > 0x7F) + return false; + if (isASCIIDigit(c)) + return c - '0' < base; + if (isASCIIAlpha(c)) { + if (base > 36) + base = 36; + return (c >= 'a' && c < 'a' + base - 10) + || (c >= 'A' && c < 'A' + base - 10); + } + return false; +} + +template <typename IntegralType> +static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base) +{ + static const IntegralType integralMax = std::numeric_limits<IntegralType>::max(); + static const bool isSigned = std::numeric_limits<IntegralType>::is_signed; + const IntegralType maxMultiplier = integralMax / base; + + IntegralType value = 0; + bool isOk = false; + bool isNegative = false; + + if (!data) + goto bye; + + // skip leading whitespace + while (length && isSpaceOrNewline(*data)) { + length--; + data++; + } + + if (isSigned && length && *data == '-') { + length--; + data++; + isNegative = true; + } else if (length && *data == '+') { + length--; + data++; + } + + if (!length || !isCharacterAllowedInBase(*data, base)) + goto bye; + + while (length && isCharacterAllowedInBase(*data, base)) { + length--; + IntegralType digitValue; + UChar c = *data; + if (isASCIIDigit(c)) + digitValue = c - '0'; + else if (c >= 'a') + digitValue = c - 'a' + 10; + else + digitValue = c - 'A' + 10; + + if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative)) + goto bye; + + value = base * value + digitValue; + data++; + } + +#if COMPILER(MSVC) +#pragma warning(push, 0) +#pragma warning(disable:4146) +#endif + + if (isNegative) + value = -value; + +#if COMPILER(MSVC) +#pragma warning(pop) +#endif + + // skip trailing space + while (length && isSpaceOrNewline(*data)) { + length--; + data++; + } + + if (!length) + isOk = true; +bye: + if (ok) + *ok = isOk; + return isOk ? value : 0; +} + +static unsigned lengthOfCharactersAsInteger(const UChar* data, size_t length) +{ + size_t i = 0; + + // Allow leading spaces. + for (; i != length; ++i) { + if (!isSpaceOrNewline(data[i])) + break; + } + + // Allow sign. + if (i != length && (data[i] == '+' || data[i] == '-')) + ++i; + + // Allow digits. + for (; i != length; ++i) { + if (!isASCIIDigit(data[i])) + break; + } + + return i; +} + +int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<int>(data, length, ok, base); +} + +unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<unsigned>(data, length, ok, base); +} + +int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<int64_t>(data, length, ok, base); +} + +uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<uint64_t>(data, length, ok, base); +} + +intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base) +{ + return toIntegralType<intptr_t>(data, length, ok, base); +} + +int charactersToInt(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<int>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +unsigned charactersToUInt(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<unsigned>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +int64_t charactersToInt64(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<int64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<uint64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok) +{ + return toIntegralType<intptr_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); +} + +double charactersToDouble(const UChar* data, size_t length, bool* ok) +{ + if (!length) { + if (ok) + *ok = false; + return 0.0; + } + + Vector<char, 256> bytes(length + 1); + for (unsigned i = 0; i < length; ++i) + bytes[i] = data[i] < 0x7F ? data[i] : '?'; + bytes[length] = '\0'; + char* end; + double val = WTF::strtod(bytes.data(), &end); + if (ok) + *ok = (end == 0 || *end == '\0'); + return val; +} + +float charactersToFloat(const UChar* data, size_t length, bool* ok) +{ + // FIXME: This will return ok even when the string fits into a double but not a float. + return static_cast<float>(charactersToDouble(data, length, ok)); +} + +} // namespace WebCore + +#ifndef NDEBUG +// For use in the debugger - leaks memory +WebCore::String* string(const char*); + +WebCore::String* string(const char* s) +{ + return new WebCore::String(s); +} +#endif diff --git a/JavaScriptCore/wtf/text/WTFString.h b/JavaScriptCore/wtf/text/WTFString.h new file mode 100644 index 0000000..7c3c2dd --- /dev/null +++ b/JavaScriptCore/wtf/text/WTFString.h @@ -0,0 +1,398 @@ +/* + * (C) 1999 Lars Knoll (knoll@kde.org) + * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef WTFString_h +#define WTFString_h + +// This file would be called String.h, but that conflicts with <string.h> +// on systems without case-sensitive file systems. + +#include "StringImpl.h" + +#ifdef __OBJC__ +#include <objc/objc.h> +#endif + +#if PLATFORM(CF) +typedef const struct __CFString * CFStringRef; +#endif + +#if PLATFORM(QT) +QT_BEGIN_NAMESPACE +class QString; +QT_END_NAMESPACE +#include <QDataStream> +#endif + +#if PLATFORM(WX) +class wxString; +#endif + +#if PLATFORM(HAIKU) +class BString; +#endif + +namespace WTF { +class CString; +} +using WTF::CString; + +// FIXME: This is a temporary layering violation while we move string code to WTF. +// Landing the file moves in one patch, will follow on with patches to change the namespaces. +namespace WebCore { + +class SharedBuffer; +struct StringHash; + +// Declarations of string operations + +bool charactersAreAllASCII(const UChar*, size_t); +int charactersToIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10); +unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10); +int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10); +uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10); +intptr_t charactersToIntPtrStrict(const UChar*, size_t, bool* ok = 0, int base = 10); + +int charactersToInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +unsigned charactersToUInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +int64_t charactersToInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage +intptr_t charactersToIntPtr(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage + +double charactersToDouble(const UChar*, size_t, bool* ok = 0); +float charactersToFloat(const UChar*, size_t, bool* ok = 0); + +int find(const UChar*, size_t, UChar, int startPosition = 0); +int reverseFind(const UChar*, size_t, UChar, int startPosition = -1); + +class String { +public: + String() { } // gives null string, distinguishable from an empty string + String(const UChar*, unsigned length); + String(const UChar*); // Specifically for null terminated UTF-16 + String(const char*); + String(const char*, unsigned length); + String(StringImpl* i) : m_impl(i) { } + String(PassRefPtr<StringImpl> i) : m_impl(i) { } + String(RefPtr<StringImpl> i) : m_impl(i) { } + + void swap(String& o) { m_impl.swap(o.m_impl); } + + // Hash table deleted values, which are only constructed and never copied or destroyed. + String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { } + bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); } + + static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); } + static String adopt(Vector<UChar>& vector) { return StringImpl::adopt(vector); } + + unsigned length() const; + const UChar* characters() const; + const UChar* charactersWithNullTermination(); + + UChar operator[](unsigned i) const; // if i >= length(), returns 0 + UChar32 characterStartingAt(unsigned) const; // Ditto. + + bool contains(UChar c) const { return find(c) != -1; } + bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; } + bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; } + + int find(UChar c, int start = 0) const + { return m_impl ? m_impl->find(c, start) : -1; } + int find(CharacterMatchFunctionPtr matchFunction, int start = 0) const + { return m_impl ? m_impl->find(matchFunction, start) : -1; } + int find(const char* str, int start = 0, bool caseSensitive = true) const + { return m_impl ? m_impl->find(str, start, caseSensitive) : -1; } + int find(const String& str, int start = 0, bool caseSensitive = true) const + { return m_impl ? m_impl->find(str.impl(), start, caseSensitive) : -1; } + + int reverseFind(UChar c, int start = -1) const + { return m_impl ? m_impl->reverseFind(c, start) : -1; } + int reverseFind(const String& str, int start = -1, bool caseSensitive = true) const + { return m_impl ? m_impl->reverseFind(str.impl(), start, caseSensitive) : -1; } + + bool startsWith(const String& s, bool caseSensitive = true) const + { return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); } + bool endsWith(const String& s, bool caseSensitive = true) const + { return m_impl ? m_impl->endsWith(s.impl(), caseSensitive) : s.isEmpty(); } + + void append(const String&); + void append(char); + void append(UChar); + void append(const UChar*, unsigned length); + void insert(const String&, unsigned pos); + void insert(const UChar*, unsigned length, unsigned pos); + + String& replace(UChar a, UChar b) { if (m_impl) m_impl = m_impl->replace(a, b); return *this; } + String& replace(UChar a, const String& b) { if (m_impl) m_impl = m_impl->replace(a, b.impl()); return *this; } + String& replace(const String& a, const String& b) { if (m_impl) m_impl = m_impl->replace(a.impl(), b.impl()); return *this; } + String& replace(unsigned index, unsigned len, const String& b) { if (m_impl) m_impl = m_impl->replace(index, len, b.impl()); return *this; } + + void makeLower() { if (m_impl) m_impl = m_impl->lower(); } + void makeUpper() { if (m_impl) m_impl = m_impl->upper(); } + void makeSecure(UChar aChar) { if (m_impl) m_impl = m_impl->secure(aChar); } + + void truncate(unsigned len); + void remove(unsigned pos, int len = 1); + + String substring(unsigned pos, unsigned len = UINT_MAX) const; + String left(unsigned len) const { return substring(0, len); } + String right(unsigned len) const { return substring(length() - len, len); } + + // Returns a lowercase/uppercase version of the string + String lower() const; + String upper() const; + + String stripWhiteSpace() const; + String simplifyWhiteSpace() const; + + String removeCharacters(CharacterMatchFunctionPtr) const; + + // Return the string with case folded for case insensitive comparison. + String foldCase() const; + + static String number(short); + static String number(unsigned short); + static String number(int); + static String number(unsigned); + static String number(long); + static String number(unsigned long); + static String number(long long); + static String number(unsigned long long); + static String number(double); + + static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2); + + // Returns an uninitialized string. The characters needs to be written + // into the buffer returned in data before the returned string is used. + // Failure to do this will have unpredictable results. + static String createUninitialized(unsigned length, UChar*& data) { return StringImpl::createUninitialized(length, data); } + + void split(const String& separator, Vector<String>& result) const; + void split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const; + void split(UChar separator, Vector<String>& result) const; + void split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const; + + int toIntStrict(bool* ok = 0, int base = 10) const; + unsigned toUIntStrict(bool* ok = 0, int base = 10) const; + int64_t toInt64Strict(bool* ok = 0, int base = 10) const; + uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const; + intptr_t toIntPtrStrict(bool* ok = 0, int base = 10) const; + + int toInt(bool* ok = 0) const; + unsigned toUInt(bool* ok = 0) const; + int64_t toInt64(bool* ok = 0) const; + uint64_t toUInt64(bool* ok = 0) const; + intptr_t toIntPtr(bool* ok = 0) const; + double toDouble(bool* ok = 0) const; + float toFloat(bool* ok = 0) const; + + bool percentage(int& percentage) const; + + // Returns a StringImpl suitable for use on another thread. + String crossThreadString() const; + // Makes a deep copy. Helpful only if you need to use a String on another thread + // (use crossThreadString if the method call doesn't need to be threadsafe). + // Since the underlying StringImpl objects are immutable, there's no other reason + // to ever prefer copy() over plain old assignment. + String threadsafeCopy() const; + + bool isNull() const { return !m_impl; } + bool isEmpty() const; + + StringImpl* impl() const { return m_impl.get(); } + +#if PLATFORM(CF) + String(CFStringRef); + CFStringRef createCFString() const; +#endif + +#ifdef __OBJC__ + String(NSString*); + + // This conversion maps NULL to "", which loses the meaning of NULL, but we + // need this mapping because AppKit crashes when passed nil NSStrings. + operator NSString*() const { if (!m_impl) return @""; return *m_impl; } +#endif + +#if PLATFORM(QT) + String(const QString&); + String(const QStringRef&); + operator QString() const; +#endif + +#if PLATFORM(WX) + String(const wxString&); + operator wxString() const; +#endif + +#if PLATFORM(HAIKU) + String(const BString&); + operator BString() const; +#endif + + Vector<char> ascii() const; + + CString latin1() const; + CString utf8() const; + + static String fromUTF8(const char*, size_t); + static String fromUTF8(const char*); + + // Tries to convert the passed in string to UTF-8, but will fall back to Latin-1 if the string is not valid UTF-8. + static String fromUTF8WithLatin1Fallback(const char*, size_t); + + // Determines the writing direction using the Unicode Bidi Algorithm rules P2 and P3. + WTF::Unicode::Direction defaultWritingDirection() const { return m_impl ? m_impl->defaultWritingDirection() : WTF::Unicode::LeftToRight; } + + bool containsOnlyASCII() const { return charactersAreAllASCII(characters(), length()); } + +private: + RefPtr<StringImpl> m_impl; +}; + +#if PLATFORM(QT) +QDataStream& operator<<(QDataStream& stream, const String& str); +QDataStream& operator>>(QDataStream& stream, String& str); +#endif + +String operator+(const String&, const String&); +String operator+(const String&, const char*); +String operator+(const char*, const String&); + +inline String& operator+=(String& a, const String& b) { a.append(b); return a; } + +inline bool operator==(const String& a, const String& b) { return equal(a.impl(), b.impl()); } +inline bool operator==(const String& a, const char* b) { return equal(a.impl(), b); } +inline bool operator==(const char* a, const String& b) { return equal(a, b.impl()); } + +inline bool operator!=(const String& a, const String& b) { return !equal(a.impl(), b.impl()); } +inline bool operator!=(const String& a, const char* b) { return !equal(a.impl(), b); } +inline bool operator!=(const char* a, const String& b) { return !equal(a, b.impl()); } + +inline bool equalIgnoringCase(const String& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); } +inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), b); } +inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(a, b.impl()); } + +inline bool equalPossiblyIgnoringCase(const String& a, const String& b, bool ignoreCase) +{ + return ignoreCase ? equalIgnoringCase(a, b) : (a == b); +} + +inline bool equalIgnoringNullity(const String& a, const String& b) { return equalIgnoringNullity(a.impl(), b.impl()); } + +inline bool operator!(const String& str) { return str.isNull(); } + +inline void swap(String& a, String& b) { a.swap(b); } + +// Definitions of string operations + +#ifdef __OBJC__ +// This is for situations in WebKit where the long standing behavior has been +// "nil if empty", so we try to maintain longstanding behavior for the sake of +// entrenched clients +inline NSString* nsStringNilIfEmpty(const String& str) { return str.isEmpty() ? nil : (NSString*)str; } +#endif + +inline bool charactersAreAllASCII(const UChar* characters, size_t length) +{ + UChar ored = 0; + for (size_t i = 0; i < length; ++i) + ored |= characters[i]; + return !(ored & 0xFF80); +} + +inline int find(const UChar* characters, size_t length, UChar character, int startPosition) +{ + if (startPosition >= static_cast<int>(length)) + return -1; + for (size_t i = startPosition; i < length; ++i) { + if (characters[i] == character) + return static_cast<int>(i); + } + return -1; +} + +inline int find(const UChar* characters, size_t length, CharacterMatchFunctionPtr matchFunction, int startPosition) +{ + if (startPosition >= static_cast<int>(length)) + return -1; + for (size_t i = startPosition; i < length; ++i) { + if (matchFunction(characters[i])) + return static_cast<int>(i); + } + return -1; +} + +inline int reverseFind(const UChar* characters, size_t length, UChar character, int startPosition) +{ + if (startPosition >= static_cast<int>(length) || !length) + return -1; + if (startPosition < 0) + startPosition += static_cast<int>(length); + while (true) { + if (characters[startPosition] == character) + return startPosition; + if (!startPosition) + return -1; + startPosition--; + } + ASSERT_NOT_REACHED(); + return -1; +} + +inline void append(Vector<UChar>& vector, const String& string) +{ + vector.append(string.characters(), string.length()); +} + +inline void appendNumber(Vector<UChar>& vector, unsigned char number) +{ + int numberLength = number > 99 ? 3 : (number > 9 ? 2 : 1); + size_t vectorSize = vector.size(); + vector.grow(vectorSize + numberLength); + + switch (numberLength) { + case 3: + vector[vectorSize + 2] = number % 10 + '0'; + number /= 10; + + case 2: + vector[vectorSize + 1] = number % 10 + '0'; + number /= 10; + + case 1: + vector[vectorSize] = number % 10 + '0'; + } +} + +} // namespace WebCore + +namespace WTF { + + // StringHash is the default hash for String + template<typename T> struct DefaultHash; + template<> struct DefaultHash<WebCore::String> { + typedef WebCore::StringHash Hash; + }; + +} + +#endif |