summaryrefslogtreecommitdiffstats
path: root/JavaScriptCore/wtf/text
diff options
context:
space:
mode:
authorSteve Block <steveblock@google.com>2010-04-27 16:31:00 +0100
committerSteve Block <steveblock@google.com>2010-05-11 14:42:12 +0100
commitdcc8cf2e65d1aa555cce12431a16547e66b469ee (patch)
tree92a8d65cd5383bca9749f5327fb5e440563926e6 /JavaScriptCore/wtf/text
parentccac38a6b48843126402088a309597e682f40fe6 (diff)
downloadexternal_webkit-dcc8cf2e65d1aa555cce12431a16547e66b469ee.zip
external_webkit-dcc8cf2e65d1aa555cce12431a16547e66b469ee.tar.gz
external_webkit-dcc8cf2e65d1aa555cce12431a16547e66b469ee.tar.bz2
Merge webkit.org at r58033 : Initial merge by git
Change-Id: If006c38561af287c50cd578d251629b51e4d8cd1
Diffstat (limited to 'JavaScriptCore/wtf/text')
-rw-r--r--JavaScriptCore/wtf/text/AtomicString.cpp327
-rw-r--r--JavaScriptCore/wtf/text/AtomicString.h166
-rw-r--r--JavaScriptCore/wtf/text/AtomicStringImpl.h36
-rw-r--r--JavaScriptCore/wtf/text/CString.cpp102
-rw-r--r--JavaScriptCore/wtf/text/CString.h81
-rw-r--r--JavaScriptCore/wtf/text/StringBuffer.h77
-rw-r--r--JavaScriptCore/wtf/text/StringHash.h268
-rw-r--r--JavaScriptCore/wtf/text/StringImpl.cpp953
-rw-r--r--JavaScriptCore/wtf/text/StringImpl.h389
-rw-r--r--JavaScriptCore/wtf/text/StringImplBase.h103
-rw-r--r--JavaScriptCore/wtf/text/WTFString.cpp960
-rw-r--r--JavaScriptCore/wtf/text/WTFString.h398
12 files changed, 3860 insertions, 0 deletions
diff --git a/JavaScriptCore/wtf/text/AtomicString.cpp b/JavaScriptCore/wtf/text/AtomicString.cpp
new file mode 100644
index 0000000..79b9ab5
--- /dev/null
+++ b/JavaScriptCore/wtf/text/AtomicString.cpp
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+
+#ifdef SKIP_STATIC_CONSTRUCTORS_ON_GCC
+#define ATOMICSTRING_HIDE_GLOBALS 1
+#endif
+
+#include "AtomicString.h"
+
+#include "StaticConstructors.h"
+#include "StringHash.h"
+#include <wtf/Threading.h>
+#include <wtf/HashSet.h>
+#include <wtf/WTFThreadData.h>
+
+namespace WebCore {
+
+class AtomicStringTable {
+public:
+ static AtomicStringTable* create()
+ {
+ AtomicStringTable* table = new AtomicStringTable;
+
+ WTFThreadData& data = wtfThreadData();
+ data.m_atomicStringTable = table;
+ data.m_atomicStringTableDestructor = AtomicStringTable::destroy;
+
+ return table;
+ }
+
+ HashSet<StringImpl*>& table()
+ {
+ return m_table;
+ }
+
+private:
+ static void destroy(AtomicStringTable* table)
+ {
+ delete table;
+ }
+
+ HashSet<StringImpl*> m_table;
+};
+
+static inline HashSet<StringImpl*>& stringTable()
+{
+ // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor).
+ AtomicStringTable* table = wtfThreadData().atomicStringTable();
+ if (UNLIKELY(!table))
+ table = AtomicStringTable::create();
+ return table->table();
+}
+
+struct CStringTranslator {
+ static unsigned hash(const char* c)
+ {
+ return StringImpl::computeHash(c);
+ }
+
+ static bool equal(StringImpl* r, const char* s)
+ {
+ int length = r->length();
+ const UChar* d = r->characters();
+ for (int i = 0; i != length; ++i) {
+ unsigned char c = s[i];
+ if (d[i] != c)
+ return false;
+ }
+ return s[length] == 0;
+ }
+
+ static void translate(StringImpl*& location, const char* const& c, unsigned hash)
+ {
+ location = StringImpl::create(c).releaseRef();
+ location->setHash(hash);
+ location->setInTable();
+ }
+};
+
+bool operator==(const AtomicString& a, const char* b)
+{
+ StringImpl* impl = a.impl();
+ if ((!impl || !impl->characters()) && !b)
+ return true;
+ if ((!impl || !impl->characters()) || !b)
+ return false;
+ return CStringTranslator::equal(impl, b);
+}
+
+PassRefPtr<StringImpl> AtomicString::add(const char* c)
+{
+ if (!c)
+ return 0;
+ if (!*c)
+ return StringImpl::empty();
+ pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<const char*, CStringTranslator>(c);
+ if (!addResult.second)
+ return *addResult.first;
+ return adoptRef(*addResult.first);
+}
+
+struct UCharBuffer {
+ const UChar* s;
+ unsigned length;
+};
+
+static inline bool equal(StringImpl* string, const UChar* characters, unsigned length)
+{
+ if (string->length() != length)
+ return false;
+
+ // FIXME: perhaps we should have a more abstract macro that indicates when
+ // going 4 bytes at a time is unsafe
+#if CPU(ARM) || CPU(SH4)
+ const UChar* stringCharacters = string->characters();
+ for (unsigned i = 0; i != length; ++i) {
+ if (*stringCharacters++ != *characters++)
+ return false;
+ }
+ return true;
+#else
+ /* Do it 4-bytes-at-a-time on architectures where it's safe */
+
+ const uint32_t* stringCharacters = reinterpret_cast<const uint32_t*>(string->characters());
+ const uint32_t* bufferCharacters = reinterpret_cast<const uint32_t*>(characters);
+
+ unsigned halfLength = length >> 1;
+ for (unsigned i = 0; i != halfLength; ++i) {
+ if (*stringCharacters++ != *bufferCharacters++)
+ return false;
+ }
+
+ if (length & 1 && *reinterpret_cast<const uint16_t*>(stringCharacters) != *reinterpret_cast<const uint16_t*>(bufferCharacters))
+ return false;
+
+ return true;
+#endif
+}
+
+struct UCharBufferTranslator {
+ static unsigned hash(const UCharBuffer& buf)
+ {
+ return StringImpl::computeHash(buf.s, buf.length);
+ }
+
+ static bool equal(StringImpl* const& str, const UCharBuffer& buf)
+ {
+ return WebCore::equal(str, buf.s, buf.length);
+ }
+
+ static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
+ {
+ location = StringImpl::create(buf.s, buf.length).releaseRef();
+ location->setHash(hash);
+ location->setInTable();
+ }
+};
+
+struct HashAndCharacters {
+ unsigned hash;
+ const UChar* characters;
+ unsigned length;
+};
+
+struct HashAndCharactersTranslator {
+ static unsigned hash(const HashAndCharacters& buffer)
+ {
+ ASSERT(buffer.hash == StringImpl::computeHash(buffer.characters, buffer.length));
+ return buffer.hash;
+ }
+
+ static bool equal(StringImpl* const& string, const HashAndCharacters& buffer)
+ {
+ return WebCore::equal(string, buffer.characters, buffer.length);
+ }
+
+ static void translate(StringImpl*& location, const HashAndCharacters& buffer, unsigned hash)
+ {
+ location = StringImpl::create(buffer.characters, buffer.length).releaseRef();
+ location->setHash(hash);
+ location->setInTable();
+ }
+};
+
+PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
+{
+ if (!s)
+ return 0;
+
+ if (length == 0)
+ return StringImpl::empty();
+
+ UCharBuffer buf = { s, length };
+ pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<UCharBuffer, UCharBufferTranslator>(buf);
+
+ // If the string is newly-translated, then we need to adopt it.
+ // The boolean in the pair tells us if that is so.
+ return addResult.second ? adoptRef(*addResult.first) : *addResult.first;
+}
+
+PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash)
+{
+ ASSERT(s);
+ ASSERT(existingHash);
+
+ if (length == 0)
+ return StringImpl::empty();
+
+ HashAndCharacters buffer = { existingHash, s, length };
+ pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<HashAndCharacters, HashAndCharactersTranslator>(buffer);
+ if (!addResult.second)
+ return *addResult.first;
+ return adoptRef(*addResult.first);
+}
+
+PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
+{
+ if (!s)
+ return 0;
+
+ int length = 0;
+ while (s[length] != UChar(0))
+ length++;
+
+ if (length == 0)
+ return StringImpl::empty();
+
+ UCharBuffer buf = {s, length};
+ pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<UCharBuffer, UCharBufferTranslator>(buf);
+
+ // If the string is newly-translated, then we need to adopt it.
+ // The boolean in the pair tells us if that is so.
+ return addResult.second ? adoptRef(*addResult.first) : *addResult.first;
+}
+
+PassRefPtr<StringImpl> AtomicString::add(StringImpl* r)
+{
+ if (!r || r->inTable())
+ return r;
+
+ if (r->length() == 0)
+ return StringImpl::empty();
+
+ StringImpl* result = *stringTable().add(r).first;
+ if (result == r)
+ r->setInTable();
+ return result;
+}
+
+AtomicStringImpl* AtomicString::find(const UChar* s, unsigned length, unsigned existingHash)
+{
+ ASSERT(s);
+ ASSERT(existingHash);
+
+ if (length == 0)
+ return static_cast<AtomicStringImpl*>(StringImpl::empty());
+
+ HashAndCharacters buffer = { existingHash, s, length };
+ HashSet<StringImpl*>::iterator iterator = stringTable().find<HashAndCharacters, HashAndCharactersTranslator>(buffer);
+ if (iterator == stringTable().end())
+ return 0;
+ return static_cast<AtomicStringImpl*>(*iterator);
+}
+
+void AtomicString::remove(StringImpl* r)
+{
+ stringTable().remove(r);
+}
+
+AtomicString AtomicString::lower() const
+{
+ // Note: This is a hot function in the Dromaeo benchmark.
+ StringImpl* impl = this->impl();
+ RefPtr<StringImpl> newImpl = impl->lower();
+ if (LIKELY(newImpl == impl))
+ return *this;
+ return AtomicString(newImpl);
+}
+
+JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, nullAtom)
+JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, emptyAtom, "")
+JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, textAtom, "#text")
+JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, commentAtom, "#comment")
+JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, starAtom, "*")
+JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlAtom, "xml")
+JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlnsAtom, "xmlns")
+
+void AtomicString::init()
+{
+ static bool initialized;
+ if (!initialized) {
+ // Initialization is not thread safe, so this function must be called from the main thread first.
+ ASSERT(isMainThread());
+
+ // Use placement new to initialize the globals.
+ new ((void*)&nullAtom) AtomicString;
+ new ((void*)&emptyAtom) AtomicString("");
+ new ((void*)&textAtom) AtomicString("#text");
+ new ((void*)&commentAtom) AtomicString("#comment");
+ new ((void*)&starAtom) AtomicString("*");
+ new ((void*)&xmlAtom) AtomicString("xml");
+ new ((void*)&xmlnsAtom) AtomicString("xmlns");
+
+ initialized = true;
+ }
+}
+
+}
diff --git a/JavaScriptCore/wtf/text/AtomicString.h b/JavaScriptCore/wtf/text/AtomicString.h
new file mode 100644
index 0000000..9db70f4
--- /dev/null
+++ b/JavaScriptCore/wtf/text/AtomicString.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2004, 2005, 2006, 2008 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef AtomicString_h
+#define AtomicString_h
+
+#include "AtomicStringImpl.h"
+#include "WTFString.h"
+
+// Define 'NO_IMPLICIT_ATOMICSTRING' before including this header,
+// to disallow (expensive) implicit String-->AtomicString conversions.
+#ifdef NO_IMPLICIT_ATOMICSTRING
+#define ATOMICSTRING_CONVERSION explicit
+#else
+#define ATOMICSTRING_CONVERSION
+#endif
+
+// FIXME: This is a temporary layering violation while we move string code to WTF.
+// Landing the file moves in one patch, will follow on with patches to change the namespaces.
+namespace WebCore {
+
+struct AtomicStringHash;
+
+class AtomicString : public FastAllocBase {
+public:
+ static void init();
+
+ AtomicString() { }
+ AtomicString(const char* s) : m_string(add(s)) { }
+ AtomicString(const UChar* s, unsigned length) : m_string(add(s, length)) { }
+ AtomicString(const UChar* s, unsigned length, unsigned existingHash) : m_string(add(s, length, existingHash)) { }
+ AtomicString(const UChar* s) : m_string(add(s)) { }
+ ATOMICSTRING_CONVERSION AtomicString(StringImpl* imp) : m_string(add(imp)) { }
+ AtomicString(AtomicStringImpl* imp) : m_string(imp) { }
+ ATOMICSTRING_CONVERSION AtomicString(const String& s) : m_string(add(s.impl())) { }
+
+ // Hash table deleted values, which are only constructed and never copied or destroyed.
+ AtomicString(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { }
+ bool isHashTableDeletedValue() const { return m_string.isHashTableDeletedValue(); }
+
+ static AtomicStringImpl* find(const UChar* s, unsigned length, unsigned existingHash);
+
+ operator const String&() const { return m_string; }
+ const String& string() const { return m_string; };
+
+ AtomicStringImpl* impl() const { return static_cast<AtomicStringImpl *>(m_string.impl()); }
+
+ const UChar* characters() const { return m_string.characters(); }
+ unsigned length() const { return m_string.length(); }
+
+ UChar operator[](unsigned int i) const { return m_string[i]; }
+
+ bool contains(UChar c) const { return m_string.contains(c); }
+ bool contains(const char* s, bool caseSensitive = true) const
+ { return m_string.contains(s, caseSensitive); }
+ bool contains(const String& s, bool caseSensitive = true) const
+ { return m_string.contains(s, caseSensitive); }
+
+ int find(UChar c, int start = 0) const { return m_string.find(c, start); }
+ int find(const char* s, int start = 0, bool caseSentitive = true) const
+ { return m_string.find(s, start, caseSentitive); }
+ int find(const String& s, int start = 0, bool caseSentitive = true) const
+ { return m_string.find(s, start, caseSentitive); }
+
+ bool startsWith(const String& s, bool caseSensitive = true) const
+ { return m_string.startsWith(s, caseSensitive); }
+ bool endsWith(const String& s, bool caseSensitive = true) const
+ { return m_string.endsWith(s, caseSensitive); }
+
+ AtomicString lower() const;
+ AtomicString upper() const { return AtomicString(impl()->upper()); }
+
+ int toInt(bool* ok = 0) const { return m_string.toInt(ok); }
+ double toDouble(bool* ok = 0) const { return m_string.toDouble(ok); }
+ float toFloat(bool* ok = 0) const { return m_string.toFloat(ok); }
+ bool percentage(int& p) const { return m_string.percentage(p); }
+
+ bool isNull() const { return m_string.isNull(); }
+ bool isEmpty() const { return m_string.isEmpty(); }
+
+ static void remove(StringImpl*);
+
+#if PLATFORM(CF)
+ AtomicString(CFStringRef s) : m_string(add(String(s).impl())) { }
+ CFStringRef createCFString() const { return m_string.createCFString(); }
+#endif
+#ifdef __OBJC__
+ AtomicString(NSString* s) : m_string(add(String(s).impl())) { }
+ operator NSString*() const { return m_string; }
+#endif
+#if PLATFORM(QT)
+ AtomicString(const QString& s) : m_string(add(String(s).impl())) { }
+ operator QString() const { return m_string; }
+#endif
+
+private:
+ String m_string;
+
+ static PassRefPtr<StringImpl> add(const char*);
+ static PassRefPtr<StringImpl> add(const UChar*, unsigned length);
+ static PassRefPtr<StringImpl> add(const UChar*, unsigned length, unsigned existingHash);
+ static PassRefPtr<StringImpl> add(const UChar*);
+ static PassRefPtr<StringImpl> add(StringImpl*);
+};
+
+inline bool operator==(const AtomicString& a, const AtomicString& b) { return a.impl() == b.impl(); }
+bool operator==(const AtomicString& a, const char* b);
+inline bool operator==(const AtomicString& a, const String& b) { return equal(a.impl(), b.impl()); }
+inline bool operator==(const char* a, const AtomicString& b) { return b == a; }
+inline bool operator==(const String& a, const AtomicString& b) { return equal(a.impl(), b.impl()); }
+
+inline bool operator!=(const AtomicString& a, const AtomicString& b) { return a.impl() != b.impl(); }
+inline bool operator!=(const AtomicString& a, const char *b) { return !(a == b); }
+inline bool operator!=(const AtomicString& a, const String& b) { return !equal(a.impl(), b.impl()); }
+inline bool operator!=(const char* a, const AtomicString& b) { return !(b == a); }
+inline bool operator!=(const String& a, const AtomicString& b) { return !equal(a.impl(), b.impl()); }
+
+inline bool equalIgnoringCase(const AtomicString& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); }
+inline bool equalIgnoringCase(const AtomicString& a, const char* b) { return equalIgnoringCase(a.impl(), b); }
+inline bool equalIgnoringCase(const AtomicString& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); }
+inline bool equalIgnoringCase(const char* a, const AtomicString& b) { return equalIgnoringCase(a, b.impl()); }
+inline bool equalIgnoringCase(const String& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); }
+
+// Define external global variables for the commonly used atomic strings.
+// These are only usable from the main thread.
+#ifndef ATOMICSTRING_HIDE_GLOBALS
+ extern const JS_EXPORTDATA AtomicString nullAtom;
+ extern const JS_EXPORTDATA AtomicString emptyAtom;
+ extern const JS_EXPORTDATA AtomicString textAtom;
+ extern const JS_EXPORTDATA AtomicString commentAtom;
+ extern const JS_EXPORTDATA AtomicString starAtom;
+ extern const JS_EXPORTDATA AtomicString xmlAtom;
+ extern const JS_EXPORTDATA AtomicString xmlnsAtom;
+#endif
+
+} // namespace WebCore
+
+
+namespace WTF {
+
+ // AtomicStringHash is the default hash for AtomicString
+ template<typename T> struct DefaultHash;
+ template<> struct DefaultHash<WebCore::AtomicString> {
+ typedef WebCore::AtomicStringHash Hash;
+ };
+
+} // namespace WTF
+
+#endif // AtomicString_h
diff --git a/JavaScriptCore/wtf/text/AtomicStringImpl.h b/JavaScriptCore/wtf/text/AtomicStringImpl.h
new file mode 100644
index 0000000..d21a00a
--- /dev/null
+++ b/JavaScriptCore/wtf/text/AtomicStringImpl.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2006 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef AtomicStringImpl_h
+#define AtomicStringImpl_h
+
+#include "StringImpl.h"
+
+// FIXME: This is a temporary layering violation while we move string code to WTF.
+// Landing the file moves in one patch, will follow on with patches to change the namespaces.
+namespace WebCore {
+
+class AtomicStringImpl : public StringImpl
+{
+};
+
+}
+
+#endif
diff --git a/JavaScriptCore/wtf/text/CString.cpp b/JavaScriptCore/wtf/text/CString.cpp
new file mode 100644
index 0000000..d93a5a3
--- /dev/null
+++ b/JavaScriptCore/wtf/text/CString.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2003, 2006, 2008, 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "config.h"
+#include "CString.h"
+
+using std::min;
+
+namespace WTF {
+
+CString::CString(const char* str)
+{
+ init(str, strlen(str));
+}
+
+CString::CString(const char* str, unsigned length)
+{
+ init(str, length);
+}
+
+void CString::init(const char* str, unsigned length)
+{
+ if (!str)
+ return;
+
+ m_buffer = CStringBuffer::create(length + 1);
+ memcpy(m_buffer->mutableData(), str, length);
+ m_buffer->mutableData()[length] = '\0';
+}
+
+const char* CString::data() const
+{
+ return m_buffer ? m_buffer->data() : 0;
+}
+
+char* CString::mutableData()
+{
+ copyBufferIfNeeded();
+ if (!m_buffer)
+ return 0;
+ return m_buffer->mutableData();
+}
+
+unsigned CString::length() const
+{
+ return m_buffer ? m_buffer->length() - 1 : 0;
+}
+
+CString CString::newUninitialized(size_t length, char*& characterBuffer)
+{
+ CString result;
+ result.m_buffer = CStringBuffer::create(length + 1);
+ char* bytes = result.m_buffer->mutableData();
+ bytes[length] = '\0';
+ characterBuffer = bytes;
+ return result;
+}
+
+void CString::copyBufferIfNeeded()
+{
+ if (!m_buffer || m_buffer->hasOneRef())
+ return;
+
+ int len = m_buffer->length();
+ RefPtr<CStringBuffer> m_temp = m_buffer;
+ m_buffer = CStringBuffer::create(len);
+ memcpy(m_buffer->mutableData(), m_temp->data(), len);
+}
+
+bool operator==(const CString& a, const CString& b)
+{
+ if (a.isNull() != b.isNull())
+ return false;
+ if (a.length() != b.length())
+ return false;
+ return !strncmp(a.data(), b.data(), min(a.length(), b.length()));
+}
+
+} // namespace WTF
diff --git a/JavaScriptCore/wtf/text/CString.h b/JavaScriptCore/wtf/text/CString.h
new file mode 100644
index 0000000..47f7675
--- /dev/null
+++ b/JavaScriptCore/wtf/text/CString.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2003, 2006, 2008, 2009, 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CString_h
+#define CString_h
+
+#include "PassRefPtr.h"
+#include "RefCounted.h"
+#include "Vector.h"
+
+namespace WTF {
+
+class CStringBuffer : public RefCounted<CStringBuffer> {
+public:
+ const char* data() { return m_vector.data(); }
+ size_t length() { return m_vector.size(); }
+
+private:
+ friend class CString;
+
+ static PassRefPtr<CStringBuffer> create(unsigned length) { return adoptRef(new CStringBuffer(length)); }
+ CStringBuffer(unsigned length) : m_vector(length) { }
+ char* mutableData() { return m_vector.data(); }
+
+ Vector<char> m_vector;
+};
+
+// A container for a null-terminated char array supporting copy-on-write
+// assignment. The contained char array may be null.
+class CString {
+public:
+ CString() { }
+ CString(const char*);
+ CString(const char*, unsigned length);
+ CString(CStringBuffer* buffer) : m_buffer(buffer) { }
+ static CString newUninitialized(size_t length, char*& characterBuffer);
+
+ const char* data() const;
+ char* mutableData();
+ unsigned length() const;
+
+ bool isNull() const { return !m_buffer; }
+
+ CStringBuffer* buffer() const { return m_buffer.get(); }
+
+private:
+ void copyBufferIfNeeded();
+ void init(const char*, unsigned length);
+ RefPtr<CStringBuffer> m_buffer;
+};
+
+bool operator==(const CString& a, const CString& b);
+inline bool operator!=(const CString& a, const CString& b) { return !(a == b); }
+
+} // namespace WTF
+
+using WTF::CString;
+
+#endif // CString_h
diff --git a/JavaScriptCore/wtf/text/StringBuffer.h b/JavaScriptCore/wtf/text/StringBuffer.h
new file mode 100644
index 0000000..353a44a
--- /dev/null
+++ b/JavaScriptCore/wtf/text/StringBuffer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Apple Inc. ("Apple") nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef StringBuffer_h
+#define StringBuffer_h
+
+#include <wtf/Assertions.h>
+#include <wtf/Noncopyable.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+class StringBuffer : public Noncopyable {
+public:
+ explicit StringBuffer(unsigned length)
+ : m_length(length)
+ , m_data(static_cast<UChar*>(fastMalloc(length * sizeof(UChar))))
+ {
+ }
+ ~StringBuffer()
+ {
+ fastFree(m_data);
+ }
+
+ void shrink(unsigned newLength)
+ {
+ ASSERT(newLength <= m_length);
+ m_length = newLength;
+ }
+
+ void resize(unsigned newLength)
+ {
+ if (newLength > m_length)
+ m_data = static_cast<UChar*>(fastRealloc(m_data, newLength * sizeof(UChar)));
+ m_length = newLength;
+ }
+
+ unsigned length() const { return m_length; }
+ UChar* characters() { return m_data; }
+
+ UChar& operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
+
+ UChar* release() { UChar* data = m_data; m_data = 0; return data; }
+
+private:
+ unsigned m_length;
+ UChar* m_data;
+};
+
+}
+
+#endif
diff --git a/JavaScriptCore/wtf/text/StringHash.h b/JavaScriptCore/wtf/text/StringHash.h
new file mode 100644
index 0000000..b820004
--- /dev/null
+++ b/JavaScriptCore/wtf/text/StringHash.h
@@ -0,0 +1,268 @@
+/*
+ * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved
+ * Copyright (C) Research In Motion Limited 2009. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef StringHash_h
+#define StringHash_h
+
+#include "AtomicString.h"
+#include "WTFString.h"
+#include <wtf/HashTraits.h>
+#include <wtf/StringHashFunctions.h>
+#include <wtf/unicode/Unicode.h>
+
+// FIXME: This is a temporary layering violation while we move string code to WTF.
+// Landing the file moves in one patch, will follow on with patches to change the namespaces.
+namespace WebCore {
+
+ // The hash() functions on StringHash and CaseFoldingHash do not support
+ // null strings. get(), contains(), and add() on HashMap<String,..., StringHash>
+ // cause a null-pointer dereference when passed null strings.
+
+ // FIXME: We should really figure out a way to put the computeHash function that's
+ // currently a member function of StringImpl into this file so we can be a little
+ // closer to having all the nearly-identical hash functions in one place.
+
+ struct StringHash {
+ static unsigned hash(StringImpl* key) { return key->hash(); }
+ static bool equal(const StringImpl* a, const StringImpl* b)
+ {
+ if (a == b)
+ return true;
+ if (!a || !b)
+ return false;
+
+ unsigned aLength = a->length();
+ unsigned bLength = b->length();
+ if (aLength != bLength)
+ return false;
+
+ // FIXME: perhaps we should have a more abstract macro that indicates when
+ // going 4 bytes at a time is unsafe
+#if CPU(ARM) || CPU(SH4)
+ const UChar* aChars = a->characters();
+ const UChar* bChars = b->characters();
+ for (unsigned i = 0; i != aLength; ++i) {
+ if (*aChars++ != *bChars++)
+ return false;
+ }
+ return true;
+#else
+ /* Do it 4-bytes-at-a-time on architectures where it's safe */
+ const uint32_t* aChars = reinterpret_cast<const uint32_t*>(a->characters());
+ const uint32_t* bChars = reinterpret_cast<const uint32_t*>(b->characters());
+
+ unsigned halfLength = aLength >> 1;
+ for (unsigned i = 0; i != halfLength; ++i)
+ if (*aChars++ != *bChars++)
+ return false;
+
+ if (aLength & 1 && *reinterpret_cast<const uint16_t*>(aChars) != *reinterpret_cast<const uint16_t*>(bChars))
+ return false;
+
+ return true;
+#endif
+ }
+
+ static unsigned hash(const RefPtr<StringImpl>& key) { return key->hash(); }
+ static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b)
+ {
+ return equal(a.get(), b.get());
+ }
+
+ static unsigned hash(const String& key) { return key.impl()->hash(); }
+ static bool equal(const String& a, const String& b)
+ {
+ return equal(a.impl(), b.impl());
+ }
+
+ static const bool safeToCompareToEmptyOrDeleted = false;
+ };
+
+ class CaseFoldingHash {
+ public:
+ // Paul Hsieh's SuperFastHash
+ // http://www.azillionmonkeys.com/qed/hash.html
+ static unsigned hash(const UChar* data, unsigned length)
+ {
+ unsigned l = length;
+ const UChar* s = data;
+ uint32_t hash = WTF::stringHashingStartValue;
+ uint32_t tmp;
+
+ int rem = l & 1;
+ l >>= 1;
+
+ // Main loop.
+ for (; l > 0; l--) {
+ hash += WTF::Unicode::foldCase(s[0]);
+ tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ s += 2;
+ hash += hash >> 11;
+ }
+
+ // Handle end case.
+ if (rem) {
+ hash += WTF::Unicode::foldCase(s[0]);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ }
+
+ // Force "avalanching" of final 127 bits.
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 2;
+ hash += hash >> 15;
+ hash ^= hash << 10;
+
+ // This avoids ever returning a hash code of 0, since that is used to
+ // signal "hash not computed yet", using a value that is likely to be
+ // effectively the same as 0 when the low bits are masked.
+ hash |= !hash << 31;
+
+ return hash;
+ }
+
+ static unsigned hash(StringImpl* str)
+ {
+ return hash(str->characters(), str->length());
+ }
+
+ static unsigned hash(const char* str, unsigned length)
+ {
+ // This hash is designed to work on 16-bit chunks at a time. But since the normal case
+ // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
+ // were 16-bit chunks, which will give matching results.
+
+ unsigned l = length;
+ const char* s = str;
+ uint32_t hash = WTF::stringHashingStartValue;
+ uint32_t tmp;
+
+ int rem = l & 1;
+ l >>= 1;
+
+ // Main loop
+ for (; l > 0; l--) {
+ hash += WTF::Unicode::foldCase(s[0]);
+ tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ s += 2;
+ hash += hash >> 11;
+ }
+
+ // Handle end case
+ if (rem) {
+ hash += WTF::Unicode::foldCase(s[0]);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ }
+
+ // Force "avalanching" of final 127 bits
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 2;
+ hash += hash >> 15;
+ hash ^= hash << 10;
+
+ // this avoids ever returning a hash code of 0, since that is used to
+ // signal "hash not computed yet", using a value that is likely to be
+ // effectively the same as 0 when the low bits are masked
+ hash |= !hash << 31;
+
+ return hash;
+ }
+
+ static bool equal(const StringImpl* a, const StringImpl* b)
+ {
+ if (a == b)
+ return true;
+ if (!a || !b)
+ return false;
+ unsigned length = a->length();
+ if (length != b->length())
+ return false;
+ return WTF::Unicode::umemcasecmp(a->characters(), b->characters(), length) == 0;
+ }
+
+ static unsigned hash(const RefPtr<StringImpl>& key)
+ {
+ return hash(key.get());
+ }
+
+ static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b)
+ {
+ return equal(a.get(), b.get());
+ }
+
+ static unsigned hash(const String& key)
+ {
+ return hash(key.impl());
+ }
+ static unsigned hash(const AtomicString& key)
+ {
+ return hash(key.impl());
+ }
+ static bool equal(const String& a, const String& b)
+ {
+ return equal(a.impl(), b.impl());
+ }
+ static bool equal(const AtomicString& a, const AtomicString& b)
+ {
+ return (a == b) || equal(a.impl(), b.impl());
+ }
+
+ static const bool safeToCompareToEmptyOrDeleted = false;
+ };
+
+ // This hash can be used in cases where the key is a hash of a string, but we don't
+ // want to store the string. It's not really specific to string hashing, but all our
+ // current uses of it are for strings.
+ struct AlreadyHashed : IntHash<unsigned> {
+ static unsigned hash(unsigned key) { return key; }
+
+ // To use a hash value as a key for a hash table, we need to eliminate the
+ // "deleted" value, which is negative one. That could be done by changing
+ // the string hash function to never generate negative one, but this works
+ // and is still relatively efficient.
+ static unsigned avoidDeletedValue(unsigned hash)
+ {
+ ASSERT(hash);
+ unsigned newHash = hash | (!(hash + 1) << 31);
+ ASSERT(newHash);
+ ASSERT(newHash != 0xFFFFFFFF);
+ return newHash;
+ }
+ };
+
+}
+
+namespace WTF {
+
+ template<> struct HashTraits<WebCore::String> : GenericHashTraits<WebCore::String> {
+ static const bool emptyValueIsZero = true;
+ static void constructDeletedValue(WebCore::String& slot) { new (&slot) WebCore::String(HashTableDeletedValue); }
+ static bool isDeletedValue(const WebCore::String& slot) { return slot.isHashTableDeletedValue(); }
+ };
+
+}
+
+#endif
diff --git a/JavaScriptCore/wtf/text/StringImpl.cpp b/JavaScriptCore/wtf/text/StringImpl.cpp
new file mode 100644
index 0000000..287e529
--- /dev/null
+++ b/JavaScriptCore/wtf/text/StringImpl.cpp
@@ -0,0 +1,953 @@
+/*
+ * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
+ * (C) 1999 Antti Koivisto (koivisto@kde.org)
+ * (C) 2001 Dirk Mueller ( mueller@kde.org )
+ * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "StringImpl.h"
+
+#include "AtomicString.h"
+#include "StringBuffer.h"
+#include "StringHash.h"
+#include <wtf/StdLibExtras.h>
+#include <wtf/WTFThreadData.h>
+
+using namespace WTF;
+using namespace Unicode;
+
+namespace WebCore {
+
+static const unsigned minLengthToShare = 20;
+
+StringImpl::~StringImpl()
+{
+ ASSERT(!isStatic());
+
+ if (inTable())
+ AtomicString::remove(this);
+#if USE(JSC)
+ if (isIdentifier())
+ wtfThreadData().currentIdentifierTable()->remove(this);
+#endif
+
+ BufferOwnership ownership = bufferOwnership();
+ if (ownership != BufferInternal) {
+ if (ownership == BufferOwned) {
+ ASSERT(!m_sharedBuffer);
+ ASSERT(m_data);
+ fastFree(const_cast<UChar*>(m_data));
+ } else if (ownership == BufferSubstring) {
+ ASSERT(m_substringBuffer);
+ m_substringBuffer->deref();
+ } else {
+ ASSERT(ownership == BufferShared);
+ ASSERT(m_sharedBuffer);
+ m_sharedBuffer->deref();
+ }
+ }
+}
+
+StringImpl* StringImpl::empty()
+{
+ // FIXME: This works around a bug in our port of PCRE, that a regular expression
+ // run on the empty string may still perform a read from the first element, and
+ // as such we need this to be a valid pointer. No code should ever be reading
+ // from a zero length string, so this should be able to be a non-null pointer
+ // into the zero-page.
+ // Replace this with 'reinterpret_cast<UChar*>(static_cast<intptr_t>(1))' once
+ // PCRE goes away.
+ static UChar emptyUCharData = 0;
+ DEFINE_STATIC_LOCAL(StringImpl, emptyString, (&emptyUCharData, 0, ConstructStaticString));
+ return &emptyString;
+}
+
+PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
+{
+ if (!length) {
+ data = 0;
+ return empty();
+ }
+
+ // Allocate a single buffer large enough to contain the StringImpl
+ // struct as well as the data which it contains. This removes one
+ // heap allocation from this call.
+ if (length > ((std::numeric_limits<size_t>::max() - sizeof(StringImpl)) / sizeof(UChar)))
+ CRASH();
+ size_t size = sizeof(StringImpl) + length * sizeof(UChar);
+ StringImpl* string = static_cast<StringImpl*>(fastMalloc(size));
+
+ data = reinterpret_cast<UChar*>(string + 1);
+ return adoptRef(new (string) StringImpl(length));
+}
+
+PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
+{
+ if (!characters || !length)
+ return empty();
+
+ UChar* data;
+ PassRefPtr<StringImpl> string = createUninitialized(length, data);
+ memcpy(data, characters, length * sizeof(UChar));
+ return string;
+}
+
+PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length)
+{
+ if (!characters || !length)
+ return empty();
+
+ UChar* data;
+ PassRefPtr<StringImpl> string = createUninitialized(length, data);
+ for (unsigned i = 0; i != length; ++i) {
+ unsigned char c = characters[i];
+ data[i] = c;
+ }
+ return string;
+}
+
+PassRefPtr<StringImpl> StringImpl::create(const char* string)
+{
+ if (!string)
+ return empty();
+ return create(string, strlen(string));
+}
+
+PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
+{
+ ASSERT(characters);
+ ASSERT(minLengthToShare && length >= minLengthToShare);
+ return adoptRef(new StringImpl(characters, length, sharedBuffer));
+}
+
+SharedUChar* StringImpl::sharedBuffer()
+{
+ if (m_length < minLengthToShare)
+ return 0;
+ // All static strings are smaller that the minimim length to share.
+ ASSERT(!isStatic());
+
+ BufferOwnership ownership = bufferOwnership();
+
+ if (ownership == BufferInternal)
+ return 0;
+ if (ownership == BufferSubstring)
+ return m_substringBuffer->sharedBuffer();
+ if (ownership == BufferOwned) {
+ ASSERT(!m_sharedBuffer);
+ m_sharedBuffer = SharedUChar::create(new SharableUChar(m_data)).releaseRef();
+ m_refCountAndFlags = (m_refCountAndFlags & ~s_refCountMaskBufferOwnership) | BufferShared;
+ }
+
+ ASSERT(bufferOwnership() == BufferShared);
+ ASSERT(m_sharedBuffer);
+ return m_sharedBuffer;
+}
+
+bool StringImpl::containsOnlyWhitespace()
+{
+ // FIXME: The definition of whitespace here includes a number of characters
+ // that are not whitespace from the point of view of RenderText; I wonder if
+ // that's a problem in practice.
+ for (unsigned i = 0; i < m_length; i++)
+ if (!isASCIISpace(m_data[i]))
+ return false;
+ return true;
+}
+
+PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
+{
+ if (start >= m_length)
+ return empty();
+ unsigned maxLength = m_length - start;
+ if (length >= maxLength) {
+ if (!start)
+ return this;
+ length = maxLength;
+ }
+ return create(m_data + start, length);
+}
+
+UChar32 StringImpl::characterStartingAt(unsigned i)
+{
+ if (U16_IS_SINGLE(m_data[i]))
+ return m_data[i];
+ if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1]))
+ return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]);
+ return 0;
+}
+
+PassRefPtr<StringImpl> StringImpl::lower()
+{
+ // Note: This is a hot function in the Dromaeo benchmark, specifically the
+ // no-op code path up through the first 'return' statement.
+
+ // First scan the string for uppercase and non-ASCII characters:
+ UChar ored = 0;
+ bool noUpper = true;
+ const UChar *end = m_data + m_length;
+ for (const UChar* chp = m_data; chp != end; chp++) {
+ if (UNLIKELY(isASCIIUpper(*chp)))
+ noUpper = false;
+ ored |= *chp;
+ }
+
+ // Nothing to do if the string is all ASCII with no uppercase.
+ if (noUpper && !(ored & ~0x7F))
+ return this;
+
+ int32_t length = m_length;
+ UChar* data;
+ RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
+
+ if (!(ored & ~0x7F)) {
+ // Do a faster loop for the case where all the characters are ASCII.
+ for (int i = 0; i < length; i++) {
+ UChar c = m_data[i];
+ data[i] = toASCIILower(c);
+ }
+ return newImpl;
+ }
+
+ // Do a slower implementation for cases that include non-ASCII characters.
+ bool error;
+ int32_t realLength = Unicode::toLower(data, length, m_data, m_length, &error);
+ if (!error && realLength == length)
+ return newImpl;
+ newImpl = createUninitialized(realLength, data);
+ Unicode::toLower(data, realLength, m_data, m_length, &error);
+ if (error)
+ return this;
+ return newImpl;
+}
+
+PassRefPtr<StringImpl> StringImpl::upper()
+{
+ // This function could be optimized for no-op cases the way lower() is,
+ // but in empirical testing, few actual calls to upper() are no-ops, so
+ // it wouldn't be worth the extra time for pre-scanning.
+ UChar* data;
+ PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
+ int32_t length = m_length;
+
+ // Do a faster loop for the case where all the characters are ASCII.
+ UChar ored = 0;
+ for (int i = 0; i < length; i++) {
+ UChar c = m_data[i];
+ ored |= c;
+ data[i] = toASCIIUpper(c);
+ }
+ if (!(ored & ~0x7F))
+ return newImpl;
+
+ // Do a slower implementation for cases that include non-ASCII characters.
+ bool error;
+ int32_t realLength = Unicode::toUpper(data, length, m_data, m_length, &error);
+ if (!error && realLength == length)
+ return newImpl;
+ newImpl = createUninitialized(realLength, data);
+ Unicode::toUpper(data, realLength, m_data, m_length, &error);
+ if (error)
+ return this;
+ return newImpl;
+}
+
+PassRefPtr<StringImpl> StringImpl::secure(UChar aChar)
+{
+ UChar* data;
+ PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
+ int32_t length = m_length;
+ for (int i = 0; i < length; ++i)
+ data[i] = aChar;
+ return newImpl;
+}
+
+PassRefPtr<StringImpl> StringImpl::foldCase()
+{
+ UChar* data;
+ PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
+ int32_t length = m_length;
+
+ // Do a faster loop for the case where all the characters are ASCII.
+ UChar ored = 0;
+ for (int i = 0; i < length; i++) {
+ UChar c = m_data[i];
+ ored |= c;
+ data[i] = toASCIILower(c);
+ }
+ if (!(ored & ~0x7F))
+ return newImpl;
+
+ // Do a slower implementation for cases that include non-ASCII characters.
+ bool error;
+ int32_t realLength = Unicode::foldCase(data, length, m_data, m_length, &error);
+ if (!error && realLength == length)
+ return newImpl;
+ newImpl = createUninitialized(realLength, data);
+ Unicode::foldCase(data, realLength, m_data, m_length, &error);
+ if (error)
+ return this;
+ return newImpl;
+}
+
+PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
+{
+ if (!m_length)
+ return empty();
+
+ unsigned start = 0;
+ unsigned end = m_length - 1;
+
+ // skip white space from start
+ while (start <= end && isSpaceOrNewline(m_data[start]))
+ start++;
+
+ // only white space
+ if (start > end)
+ return empty();
+
+ // skip white space from end
+ while (end && isSpaceOrNewline(m_data[end]))
+ end--;
+
+ if (!start && end == m_length - 1)
+ return this;
+ return create(m_data + start, end + 1 - start);
+}
+
+PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
+{
+ const UChar* from = m_data;
+ const UChar* fromend = from + m_length;
+
+ // Assume the common case will not remove any characters
+ while (from != fromend && !findMatch(*from))
+ from++;
+ if (from == fromend)
+ return this;
+
+ StringBuffer data(m_length);
+ UChar* to = data.characters();
+ unsigned outc = from - m_data;
+
+ if (outc)
+ memcpy(to, m_data, outc * sizeof(UChar));
+
+ while (true) {
+ while (from != fromend && findMatch(*from))
+ from++;
+ while (from != fromend && !findMatch(*from))
+ to[outc++] = *from++;
+ if (from == fromend)
+ break;
+ }
+
+ data.shrink(outc);
+
+ return adopt(data);
+}
+
+PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace()
+{
+ StringBuffer data(m_length);
+
+ const UChar* from = m_data;
+ const UChar* fromend = from + m_length;
+ int outc = 0;
+ bool changedToSpace = false;
+
+ UChar* to = data.characters();
+
+ while (true) {
+ while (from != fromend && isSpaceOrNewline(*from)) {
+ if (*from != ' ')
+ changedToSpace = true;
+ from++;
+ }
+ while (from != fromend && !isSpaceOrNewline(*from))
+ to[outc++] = *from++;
+ if (from != fromend)
+ to[outc++] = ' ';
+ else
+ break;
+ }
+
+ if (outc > 0 && to[outc - 1] == ' ')
+ outc--;
+
+ if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
+ return this;
+
+ data.shrink(outc);
+
+ return adopt(data);
+}
+
+int StringImpl::toIntStrict(bool* ok, int base)
+{
+ return charactersToIntStrict(m_data, m_length, ok, base);
+}
+
+unsigned StringImpl::toUIntStrict(bool* ok, int base)
+{
+ return charactersToUIntStrict(m_data, m_length, ok, base);
+}
+
+int64_t StringImpl::toInt64Strict(bool* ok, int base)
+{
+ return charactersToInt64Strict(m_data, m_length, ok, base);
+}
+
+uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
+{
+ return charactersToUInt64Strict(m_data, m_length, ok, base);
+}
+
+intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
+{
+ return charactersToIntPtrStrict(m_data, m_length, ok, base);
+}
+
+int StringImpl::toInt(bool* ok)
+{
+ return charactersToInt(m_data, m_length, ok);
+}
+
+unsigned StringImpl::toUInt(bool* ok)
+{
+ return charactersToUInt(m_data, m_length, ok);
+}
+
+int64_t StringImpl::toInt64(bool* ok)
+{
+ return charactersToInt64(m_data, m_length, ok);
+}
+
+uint64_t StringImpl::toUInt64(bool* ok)
+{
+ return charactersToUInt64(m_data, m_length, ok);
+}
+
+intptr_t StringImpl::toIntPtr(bool* ok)
+{
+ return charactersToIntPtr(m_data, m_length, ok);
+}
+
+double StringImpl::toDouble(bool* ok)
+{
+ return charactersToDouble(m_data, m_length, ok);
+}
+
+float StringImpl::toFloat(bool* ok)
+{
+ return charactersToFloat(m_data, m_length, ok);
+}
+
+static bool equal(const UChar* a, const char* b, int length)
+{
+ ASSERT(length >= 0);
+ while (length--) {
+ unsigned char bc = *b++;
+ if (*a++ != bc)
+ return false;
+ }
+ return true;
+}
+
+bool equalIgnoringCase(const UChar* a, const char* b, unsigned length)
+{
+ while (length--) {
+ unsigned char bc = *b++;
+ if (foldCase(*a++) != foldCase(bc))
+ return false;
+ }
+ return true;
+}
+
+static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length)
+{
+ ASSERT(length >= 0);
+ return umemcasecmp(a, b, length) == 0;
+}
+
+int StringImpl::find(const char* chs, int index, bool caseSensitive)
+{
+ if (!chs || index < 0)
+ return -1;
+
+ int chsLength = strlen(chs);
+ int n = m_length - index;
+ if (n < 0)
+ return -1;
+ n -= chsLength - 1;
+ if (n <= 0)
+ return -1;
+
+ const char* chsPlusOne = chs + 1;
+ int chsLengthMinusOne = chsLength - 1;
+
+ const UChar* ptr = m_data + index - 1;
+ if (caseSensitive) {
+ UChar c = *chs;
+ do {
+ if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne))
+ return m_length - chsLength - n + 1;
+ } while (--n);
+ } else {
+ UChar lc = Unicode::foldCase(*chs);
+ do {
+ if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne))
+ return m_length - chsLength - n + 1;
+ } while (--n);
+ }
+
+ return -1;
+}
+
+int StringImpl::find(UChar c, int start)
+{
+ return WebCore::find(m_data, m_length, c, start);
+}
+
+int StringImpl::find(CharacterMatchFunctionPtr matchFunction, int start)
+{
+ return WebCore::find(m_data, m_length, matchFunction, start);
+}
+
+int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
+{
+ /*
+ We use a simple trick for efficiency's sake. Instead of
+ comparing strings, we compare the sum of str with that of
+ a part of this string. Only if that matches, we call memcmp
+ or ucstrnicmp.
+ */
+ ASSERT(str);
+ if (index < 0)
+ index += m_length;
+ int lstr = str->m_length;
+ int lthis = m_length - index;
+ if ((unsigned)lthis > m_length)
+ return -1;
+ int delta = lthis - lstr;
+ if (delta < 0)
+ return -1;
+
+ const UChar* uthis = m_data + index;
+ const UChar* ustr = str->m_data;
+ unsigned hthis = 0;
+ unsigned hstr = 0;
+ if (caseSensitive) {
+ for (int i = 0; i < lstr; i++) {
+ hthis += uthis[i];
+ hstr += ustr[i];
+ }
+ int i = 0;
+ while (1) {
+ if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
+ return index + i;
+ if (i == delta)
+ return -1;
+ hthis += uthis[i + lstr];
+ hthis -= uthis[i];
+ i++;
+ }
+ } else {
+ for (int i = 0; i < lstr; i++ ) {
+ hthis += toASCIILower(uthis[i]);
+ hstr += toASCIILower(ustr[i]);
+ }
+ int i = 0;
+ while (1) {
+ if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr))
+ return index + i;
+ if (i == delta)
+ return -1;
+ hthis += toASCIILower(uthis[i + lstr]);
+ hthis -= toASCIILower(uthis[i]);
+ i++;
+ }
+ }
+}
+
+int StringImpl::reverseFind(UChar c, int index)
+{
+ return WebCore::reverseFind(m_data, m_length, c, index);
+}
+
+int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive)
+{
+ /*
+ See StringImpl::find() for explanations.
+ */
+ ASSERT(str);
+ int lthis = m_length;
+ if (index < 0)
+ index += lthis;
+
+ int lstr = str->m_length;
+ int delta = lthis - lstr;
+ if ( index < 0 || index > lthis || delta < 0 )
+ return -1;
+ if ( index > delta )
+ index = delta;
+
+ const UChar *uthis = m_data;
+ const UChar *ustr = str->m_data;
+ unsigned hthis = 0;
+ unsigned hstr = 0;
+ int i;
+ if (caseSensitive) {
+ for ( i = 0; i < lstr; i++ ) {
+ hthis += uthis[index + i];
+ hstr += ustr[i];
+ }
+ i = index;
+ while (1) {
+ if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
+ return i;
+ if (i == 0)
+ return -1;
+ i--;
+ hthis -= uthis[i + lstr];
+ hthis += uthis[i];
+ }
+ } else {
+ for (i = 0; i < lstr; i++) {
+ hthis += toASCIILower(uthis[index + i]);
+ hstr += toASCIILower(ustr[i]);
+ }
+ i = index;
+ while (1) {
+ if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) )
+ return i;
+ if (i == 0)
+ return -1;
+ i--;
+ hthis -= toASCIILower(uthis[i + lstr]);
+ hthis += toASCIILower(uthis[i]);
+ }
+ }
+
+ // Should never get here.
+ return -1;
+}
+
+bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive)
+{
+ ASSERT(m_data);
+ int start = m_length - m_data->m_length;
+ if (start >= 0)
+ return (find(m_data, start, caseSensitive) == start);
+ return false;
+}
+
+PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
+{
+ if (oldC == newC)
+ return this;
+ unsigned i;
+ for (i = 0; i != m_length; ++i)
+ if (m_data[i] == oldC)
+ break;
+ if (i == m_length)
+ return this;
+
+ UChar* data;
+ PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
+
+ for (i = 0; i != m_length; ++i) {
+ UChar ch = m_data[i];
+ if (ch == oldC)
+ ch = newC;
+ data[i] = ch;
+ }
+ return newImpl;
+}
+
+PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
+{
+ position = min(position, length());
+ lengthToReplace = min(lengthToReplace, length() - position);
+ unsigned lengthToInsert = str ? str->length() : 0;
+ if (!lengthToReplace && !lengthToInsert)
+ return this;
+ UChar* data;
+ PassRefPtr<StringImpl> newImpl =
+ createUninitialized(length() - lengthToReplace + lengthToInsert, data);
+ memcpy(data, characters(), position * sizeof(UChar));
+ if (str)
+ memcpy(data + position, str->characters(), lengthToInsert * sizeof(UChar));
+ memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace,
+ (length() - position - lengthToReplace) * sizeof(UChar));
+ return newImpl;
+}
+
+PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
+{
+ if (!replacement)
+ return this;
+
+ int repStrLength = replacement->length();
+ int srcSegmentStart = 0;
+ int matchCount = 0;
+
+ // Count the matches
+ while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
+ ++matchCount;
+ ++srcSegmentStart;
+ }
+
+ // If we have 0 matches, we don't have to do any more work
+ if (!matchCount)
+ return this;
+
+ UChar* data;
+ PassRefPtr<StringImpl> newImpl =
+ createUninitialized(m_length - matchCount + (matchCount * repStrLength), data);
+
+ // Construct the new data
+ int srcSegmentEnd;
+ int srcSegmentLength;
+ srcSegmentStart = 0;
+ int dstOffset = 0;
+
+ while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
+ srcSegmentLength = srcSegmentEnd - srcSegmentStart;
+ memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
+ dstOffset += srcSegmentLength;
+ memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
+ dstOffset += repStrLength;
+ srcSegmentStart = srcSegmentEnd + 1;
+ }
+
+ srcSegmentLength = m_length - srcSegmentStart;
+ memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
+
+ ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
+
+ return newImpl;
+}
+
+PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
+{
+ if (!pattern || !replacement)
+ return this;
+
+ int patternLength = pattern->length();
+ if (!patternLength)
+ return this;
+
+ int repStrLength = replacement->length();
+ int srcSegmentStart = 0;
+ int matchCount = 0;
+
+ // Count the matches
+ while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
+ ++matchCount;
+ srcSegmentStart += patternLength;
+ }
+
+ // If we have 0 matches, we don't have to do any more work
+ if (!matchCount)
+ return this;
+
+ UChar* data;
+ PassRefPtr<StringImpl> newImpl =
+ createUninitialized(m_length + matchCount * (repStrLength - patternLength), data);
+
+ // Construct the new data
+ int srcSegmentEnd;
+ int srcSegmentLength;
+ srcSegmentStart = 0;
+ int dstOffset = 0;
+
+ while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
+ srcSegmentLength = srcSegmentEnd - srcSegmentStart;
+ memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
+ dstOffset += srcSegmentLength;
+ memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
+ dstOffset += repStrLength;
+ srcSegmentStart = srcSegmentEnd + patternLength;
+ }
+
+ srcSegmentLength = m_length - srcSegmentStart;
+ memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
+
+ ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
+
+ return newImpl;
+}
+
+bool equal(const StringImpl* a, const StringImpl* b)
+{
+ return StringHash::equal(a, b);
+}
+
+bool equal(const StringImpl* a, const char* b)
+{
+ if (!a)
+ return !b;
+ if (!b)
+ return !a;
+
+ unsigned length = a->length();
+ const UChar* as = a->characters();
+ for (unsigned i = 0; i != length; ++i) {
+ unsigned char bc = b[i];
+ if (!bc)
+ return false;
+ if (as[i] != bc)
+ return false;
+ }
+
+ return !b[length];
+}
+
+bool equalIgnoringCase(StringImpl* a, StringImpl* b)
+{
+ return CaseFoldingHash::equal(a, b);
+}
+
+bool equalIgnoringCase(StringImpl* a, const char* b)
+{
+ if (!a)
+ return !b;
+ if (!b)
+ return !a;
+
+ unsigned length = a->length();
+ const UChar* as = a->characters();
+
+ // Do a faster loop for the case where all the characters are ASCII.
+ UChar ored = 0;
+ bool equal = true;
+ for (unsigned i = 0; i != length; ++i) {
+ char bc = b[i];
+ if (!bc)
+ return false;
+ UChar ac = as[i];
+ ored |= ac;
+ equal = equal && (toASCIILower(ac) == toASCIILower(bc));
+ }
+
+ // Do a slower implementation for cases that include non-ASCII characters.
+ if (ored & ~0x7F) {
+ equal = true;
+ for (unsigned i = 0; i != length; ++i) {
+ unsigned char bc = b[i];
+ equal = equal && (foldCase(as[i]) == foldCase(bc));
+ }
+ }
+
+ return equal && !b[length];
+}
+
+bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
+{
+ if (StringHash::equal(a, b))
+ return true;
+ if (!a && b && !b->length())
+ return true;
+ if (!b && a && !a->length())
+ return true;
+
+ return false;
+}
+
+Vector<char> StringImpl::ascii()
+{
+ Vector<char> buffer(m_length + 1);
+ for (unsigned i = 0; i != m_length; ++i) {
+ UChar c = m_data[i];
+ if ((c >= 0x20 && c < 0x7F) || c == 0x00)
+ buffer[i] = c;
+ else
+ buffer[i] = '?';
+ }
+ buffer[m_length] = '\0';
+ return buffer;
+}
+
+WTF::Unicode::Direction StringImpl::defaultWritingDirection()
+{
+ for (unsigned i = 0; i < m_length; ++i) {
+ WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]);
+ if (charDirection == WTF::Unicode::LeftToRight)
+ return WTF::Unicode::LeftToRight;
+ if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic)
+ return WTF::Unicode::RightToLeft;
+ }
+ return WTF::Unicode::LeftToRight;
+}
+
+// This is a hot function because it's used when parsing HTML.
+PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length)
+{
+ StringBuffer strippedCopy(length);
+ unsigned strippedLength = 0;
+ for (unsigned i = 0; i < length; i++) {
+ if (int c = characters[i])
+ strippedCopy[strippedLength++] = c;
+ }
+ ASSERT(strippedLength < length); // Only take the slow case when stripping.
+ strippedCopy.shrink(strippedLength);
+ return adopt(strippedCopy);
+}
+
+PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer& buffer)
+{
+ unsigned length = buffer.length();
+ if (length == 0)
+ return empty();
+ return adoptRef(new StringImpl(buffer.release(), length));
+}
+
+PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string)
+{
+ // Use createUninitialized instead of 'new StringImpl' so that the string and its buffer
+ // get allocated in a single malloc block.
+ UChar* data;
+ int length = string.m_length;
+ RefPtr<StringImpl> terminatedString = createUninitialized(length + 1, data);
+ memcpy(data, string.m_data, length * sizeof(UChar));
+ data[length] = 0;
+ terminatedString->m_length--;
+ terminatedString->m_hash = string.m_hash;
+ terminatedString->m_refCountAndFlags |= s_refCountFlagHasTerminatingNullCharacter;
+ return terminatedString.release();
+}
+
+PassRefPtr<StringImpl> StringImpl::threadsafeCopy() const
+{
+ return create(m_data, m_length);
+}
+
+PassRefPtr<StringImpl> StringImpl::crossThreadString()
+{
+ if (SharedUChar* sharedBuffer = this->sharedBuffer())
+ return adoptRef(new StringImpl(m_data, m_length, sharedBuffer->crossThreadCopy()));
+
+ // If no shared buffer is available, create a copy.
+ return threadsafeCopy();
+}
+
+} // namespace WebCore
diff --git a/JavaScriptCore/wtf/text/StringImpl.h b/JavaScriptCore/wtf/text/StringImpl.h
new file mode 100644
index 0000000..6ac9e40
--- /dev/null
+++ b/JavaScriptCore/wtf/text/StringImpl.h
@@ -0,0 +1,389 @@
+/*
+ * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
+ * Copyright (C) 2009 Google Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef StringImpl_h
+#define StringImpl_h
+
+#include <limits.h>
+#include <wtf/ASCIICType.h>
+#include <wtf/CrossThreadRefCounted.h>
+#include <wtf/OwnFastMallocPtr.h>
+#include <wtf/StringHashFunctions.h>
+#include <wtf/Vector.h>
+#include <wtf/text/StringImplBase.h>
+#include <wtf/unicode/Unicode.h>
+
+#if PLATFORM(CF)
+typedef const struct __CFString * CFStringRef;
+#endif
+
+#ifdef __OBJC__
+@class NSString;
+#endif
+
+// FIXME: This is a temporary layering violation while we move string code to WTF.
+// Landing the file moves in one patch, will follow on with patches to change the namespaces.
+namespace JSC {
+
+struct IdentifierCStringTranslator;
+struct IdentifierUCharBufferTranslator;
+
+}
+
+// FIXME: This is a temporary layering violation while we move string code to WTF.
+// Landing the file moves in one patch, will follow on with patches to change the namespaces.
+namespace WebCore {
+
+class StringBuffer;
+
+struct CStringTranslator;
+struct HashAndCharactersTranslator;
+struct StringHash;
+struct UCharBufferTranslator;
+
+enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
+
+typedef OwnFastMallocPtr<const UChar> SharableUChar;
+typedef CrossThreadRefCounted<SharableUChar> SharedUChar;
+typedef bool (*CharacterMatchFunctionPtr)(UChar);
+
+class StringImpl : public StringImplBase {
+ friend struct JSC::IdentifierCStringTranslator;
+ friend struct JSC::IdentifierUCharBufferTranslator;
+ friend struct CStringTranslator;
+ friend struct HashAndCharactersTranslator;
+ friend struct UCharBufferTranslator;
+private:
+ // Used to construct static strings, which have an special refCount that can never hit zero.
+ // This means that the static string will never be destroyed, which is important because
+ // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
+ StringImpl(const UChar* characters, unsigned length, StaticStringConstructType)
+ : StringImplBase(length, ConstructStaticString)
+ , m_data(characters)
+ , m_buffer(0)
+ , m_hash(0)
+ {
+ // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
+ // with impunity. The empty string is special because it is never entered into
+ // AtomicString's HashKey, but still needs to compare correctly.
+ hash();
+ }
+
+ // Create a normal string with internal storage (BufferInternal)
+ StringImpl(unsigned length)
+ : StringImplBase(length, BufferInternal)
+ , m_data(reinterpret_cast<const UChar*>(this + 1))
+ , m_buffer(0)
+ , m_hash(0)
+ {
+ ASSERT(m_data);
+ ASSERT(m_length);
+ }
+
+ // Create a StringImpl adopting ownership of the provided buffer (BufferOwned)
+ StringImpl(const UChar* characters, unsigned length)
+ : StringImplBase(length, BufferOwned)
+ , m_data(characters)
+ , m_buffer(0)
+ , m_hash(0)
+ {
+ ASSERT(m_data);
+ ASSERT(m_length);
+ }
+
+ // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring)
+ StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base)
+ : StringImplBase(length, BufferSubstring)
+ , m_data(characters)
+ , m_substringBuffer(base.releaseRef())
+ , m_hash(0)
+ {
+ ASSERT(m_data);
+ ASSERT(m_length);
+ ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring);
+ }
+
+ // Used to construct new strings sharing an existing SharedUChar (BufferShared)
+ StringImpl(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
+ : StringImplBase(length, BufferShared)
+ , m_data(characters)
+ , m_sharedBuffer(sharedBuffer.releaseRef())
+ , m_hash(0)
+ {
+ ASSERT(m_data);
+ ASSERT(m_length);
+ }
+
+ // For use only by AtomicString's XXXTranslator helpers.
+ void setHash(unsigned hash)
+ {
+ ASSERT(!isStatic());
+ ASSERT(!m_hash);
+ ASSERT(hash == computeHash(m_data, m_length));
+ m_hash = hash;
+ }
+
+public:
+ ~StringImpl();
+
+ static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
+ static PassRefPtr<StringImpl> create(const char*, unsigned length);
+ static PassRefPtr<StringImpl> create(const char*);
+ static PassRefPtr<StringImpl> create(const UChar*, unsigned length, PassRefPtr<SharedUChar> sharedBuffer);
+ static PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length)
+ {
+ ASSERT(rep);
+ ASSERT(length <= rep->length());
+
+ if (!length)
+ return empty();
+
+ StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get();
+ return adoptRef(new StringImpl(rep->m_data + offset, length, ownerRep));
+ }
+
+ static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data);
+ static PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, UChar*& output)
+ {
+ if (!length) {
+ output = 0;
+ return empty();
+ }
+
+ if (length > ((std::numeric_limits<size_t>::max() - sizeof(StringImpl)) / sizeof(UChar)))
+ return 0;
+ StringImpl* resultImpl;
+ if (!tryFastMalloc(sizeof(UChar) * length + sizeof(StringImpl)).getValue(resultImpl))
+ return 0;
+ output = reinterpret_cast<UChar*>(resultImpl + 1);
+ return adoptRef(new(resultImpl) StringImpl(length));
+ }
+
+ static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&);
+ static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length);
+
+ template<size_t inlineCapacity>
+ static PassRefPtr<StringImpl> adopt(Vector<UChar, inlineCapacity>& vector)
+ {
+ if (size_t size = vector.size()) {
+ ASSERT(vector.data());
+ return adoptRef(new StringImpl(vector.releaseBuffer(), size));
+ }
+ return empty();
+ }
+ static PassRefPtr<StringImpl> adopt(StringBuffer&);
+
+ SharedUChar* sharedBuffer();
+ const UChar* characters() const { return m_data; }
+
+ size_t cost()
+ {
+ // For substrings, return the cost of the base string.
+ if (bufferOwnership() == BufferSubstring)
+ return m_substringBuffer->cost();
+
+ if (m_refCountAndFlags & s_refCountFlagShouldReportedCost) {
+ m_refCountAndFlags &= ~s_refCountFlagShouldReportedCost;
+ return m_length;
+ }
+ return 0;
+ }
+
+ bool isIdentifier() const { return m_refCountAndFlags & s_refCountFlagIsIdentifier; }
+ void setIsIdentifier(bool isIdentifier)
+ {
+ ASSERT(!isStatic());
+ if (isIdentifier)
+ m_refCountAndFlags |= s_refCountFlagIsIdentifier;
+ else
+ m_refCountAndFlags &= ~s_refCountFlagIsIdentifier;
+ }
+
+ bool hasTerminatingNullCharacter() const { return m_refCountAndFlags & s_refCountFlagHasTerminatingNullCharacter; }
+
+ bool inTable() const { return m_refCountAndFlags & s_refCountFlagInTable; }
+ void setInTable() { m_refCountAndFlags |= s_refCountFlagInTable; }
+
+ unsigned hash() const { if (!m_hash) m_hash = computeHash(m_data, m_length); return m_hash; }
+ unsigned existingHash() const { ASSERT(m_hash); return m_hash; }
+ static unsigned computeHash(const UChar* data, unsigned length) { return WTF::stringHash(data, length); }
+ static unsigned computeHash(const char* data, unsigned length) { return WTF::stringHash(data, length); }
+ static unsigned computeHash(const char* data) { return WTF::stringHash(data); }
+
+ ALWAYS_INLINE void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic))) delete this; }
+ ALWAYS_INLINE bool hasOneRef() const { return (m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic)) == s_refCountIncrement; }
+
+ static StringImpl* empty();
+
+ static void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
+ {
+ if (numCharacters <= s_copyCharsInlineCutOff) {
+ for (unsigned i = 0; i < numCharacters; ++i)
+ destination[i] = source[i];
+ } else
+ memcpy(destination, source, numCharacters * sizeof(UChar));
+ }
+
+ // Returns a StringImpl suitable for use on another thread.
+ PassRefPtr<StringImpl> crossThreadString();
+ // Makes a deep copy. Helpful only if you need to use a String on another thread
+ // (use crossThreadString if the method call doesn't need to be threadsafe).
+ // Since StringImpl objects are immutable, there's no other reason to make a copy.
+ PassRefPtr<StringImpl> threadsafeCopy() const;
+
+ PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
+
+ UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
+ UChar32 characterStartingAt(unsigned);
+
+ bool containsOnlyWhitespace();
+
+ int toIntStrict(bool* ok = 0, int base = 10);
+ unsigned toUIntStrict(bool* ok = 0, int base = 10);
+ int64_t toInt64Strict(bool* ok = 0, int base = 10);
+ uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
+ intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
+
+ int toInt(bool* ok = 0); // ignores trailing garbage
+ unsigned toUInt(bool* ok = 0); // ignores trailing garbage
+ int64_t toInt64(bool* ok = 0); // ignores trailing garbage
+ uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
+ intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
+
+ double toDouble(bool* ok = 0);
+ float toFloat(bool* ok = 0);
+
+ PassRefPtr<StringImpl> lower();
+ PassRefPtr<StringImpl> upper();
+ PassRefPtr<StringImpl> secure(UChar aChar);
+ PassRefPtr<StringImpl> foldCase();
+
+ PassRefPtr<StringImpl> stripWhiteSpace();
+ PassRefPtr<StringImpl> simplifyWhiteSpace();
+
+ PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
+
+ int find(const char*, int index = 0, bool caseSensitive = true);
+ int find(UChar, int index = 0);
+ int find(CharacterMatchFunctionPtr, int index = 0);
+ int find(StringImpl*, int index, bool caseSensitive = true);
+
+ int reverseFind(UChar, int index);
+ int reverseFind(StringImpl*, int index, bool caseSensitive = true);
+
+ bool startsWith(StringImpl* str, bool caseSensitive = true) { return reverseFind(str, 0, caseSensitive) == 0; }
+ bool endsWith(StringImpl*, bool caseSensitive = true);
+
+ PassRefPtr<StringImpl> replace(UChar, UChar);
+ PassRefPtr<StringImpl> replace(UChar, StringImpl*);
+ PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
+ PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
+
+ Vector<char> ascii();
+
+ WTF::Unicode::Direction defaultWritingDirection();
+
+#if PLATFORM(CF)
+ CFStringRef createCFString();
+#endif
+#ifdef __OBJC__
+ operator NSString*();
+#endif
+
+private:
+ // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
+ static const unsigned s_copyCharsInlineCutOff = 20;
+
+ static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length);
+
+ BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_refCountAndFlags & s_refCountMaskBufferOwnership); }
+ bool isStatic() const { return m_refCountAndFlags & s_refCountFlagStatic; }
+
+ const UChar* m_data;
+ union {
+ void* m_buffer;
+ StringImpl* m_substringBuffer;
+ SharedUChar* m_sharedBuffer;
+ };
+ mutable unsigned m_hash;
+};
+
+bool equal(const StringImpl*, const StringImpl*);
+bool equal(const StringImpl*, const char*);
+inline bool equal(const char* a, StringImpl* b) { return equal(b, a); }
+
+bool equalIgnoringCase(StringImpl*, StringImpl*);
+bool equalIgnoringCase(StringImpl*, const char*);
+inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); }
+bool equalIgnoringCase(const UChar* a, const char* b, unsigned length);
+inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); }
+
+bool equalIgnoringNullity(StringImpl*, StringImpl*);
+
+static inline bool isSpaceOrNewline(UChar c)
+{
+ // Use isASCIISpace() for basic Latin-1.
+ // This will include newlines, which aren't included in Unicode DirWS.
+ return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
+}
+
+// This is a hot function because it's used when parsing HTML.
+inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length)
+{
+ ASSERT(characters);
+ ASSERT(length);
+
+ // Optimize for the case where there are no Null characters by quickly
+ // searching for nulls, and then using StringImpl::create, which will
+ // memcpy the whole buffer. This is faster than assigning character by
+ // character during the loop.
+
+ // Fast case.
+ int foundNull = 0;
+ for (unsigned i = 0; !foundNull && i < length; i++) {
+ int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
+ foundNull |= !c;
+ }
+ if (!foundNull)
+ return StringImpl::create(characters, length);
+
+ return StringImpl::createStrippingNullCharactersSlowCase(characters, length);
+}
+
+}
+
+using WebCore::equal;
+
+namespace WTF {
+
+ // WebCore::StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
+ template<typename T> struct DefaultHash;
+ template<> struct DefaultHash<WebCore::StringImpl*> {
+ typedef WebCore::StringHash Hash;
+ };
+ template<> struct DefaultHash<RefPtr<WebCore::StringImpl> > {
+ typedef WebCore::StringHash Hash;
+ };
+
+}
+
+#endif
diff --git a/JavaScriptCore/wtf/text/StringImplBase.h b/JavaScriptCore/wtf/text/StringImplBase.h
new file mode 100644
index 0000000..a8e3385
--- /dev/null
+++ b/JavaScriptCore/wtf/text/StringImplBase.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef StringImplBase_h
+#define StringImplBase_h
+
+#include <wtf/Noncopyable.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WTF {
+
+class StringImplBase : public Noncopyable {
+public:
+ bool isStringImpl() { return (m_refCountAndFlags & s_refCountInvalidForStringImpl) != s_refCountInvalidForStringImpl; }
+ unsigned length() const { return m_length; }
+ void ref() { m_refCountAndFlags += s_refCountIncrement; }
+
+protected:
+ enum BufferOwnership {
+ BufferInternal,
+ BufferOwned,
+ BufferSubstring,
+ BufferShared,
+ };
+
+ using Noncopyable::operator new;
+ void* operator new(size_t, void* inPlace) { ASSERT(inPlace); return inPlace; }
+
+ // For SmallStringStorage, which allocates an array and uses an in-place new.
+ StringImplBase() { }
+
+ StringImplBase(unsigned length, BufferOwnership ownership)
+ : m_refCountAndFlags(s_refCountIncrement | s_refCountFlagShouldReportedCost | ownership)
+ , m_length(length)
+ {
+ ASSERT(isStringImpl());
+ }
+
+ enum StaticStringConstructType { ConstructStaticString };
+ StringImplBase(unsigned length, StaticStringConstructType)
+ : m_refCountAndFlags(s_refCountFlagStatic | s_refCountFlagIsIdentifier | BufferOwned)
+ , m_length(length)
+ {
+ ASSERT(isStringImpl());
+ }
+
+ // This constructor is not used when creating StringImpl objects,
+ // and sets the flags into a state marking the object as such.
+ enum NonStringImplConstructType { ConstructNonStringImpl };
+ StringImplBase(NonStringImplConstructType)
+ : m_refCountAndFlags(s_refCountIncrement | s_refCountInvalidForStringImpl)
+ , m_length(0)
+ {
+ ASSERT(!isStringImpl());
+ }
+
+ // The bottom 7 bits hold flags, the top 25 bits hold the ref count.
+ // When dereferencing StringImpls we check for the ref count AND the
+ // static bit both being zero - static strings are never deleted.
+ static const unsigned s_refCountMask = 0xFFFFFF80;
+ static const unsigned s_refCountIncrement = 0x80;
+ static const unsigned s_refCountFlagStatic = 0x40;
+ static const unsigned s_refCountFlagHasTerminatingNullCharacter = 0x20;
+ static const unsigned s_refCountFlagInTable = 0x10;
+ static const unsigned s_refCountFlagShouldReportedCost = 0x8;
+ static const unsigned s_refCountFlagIsIdentifier = 0x4;
+ static const unsigned s_refCountMaskBufferOwnership = 0x3;
+ // An invalid permutation of flags (static & shouldReportedCost - static strings do not
+ // set shouldReportedCost in the constructor, and this bit is only ever cleared, not set).
+ // Used by "ConstructNonStringImpl" constructor, above.
+ static const unsigned s_refCountInvalidForStringImpl = s_refCountFlagStatic | s_refCountFlagShouldReportedCost;
+
+ unsigned m_refCountAndFlags;
+ unsigned m_length;
+};
+
+} // namespace WTF
+
+using WTF::StringImplBase;
+
+#endif
diff --git a/JavaScriptCore/wtf/text/WTFString.cpp b/JavaScriptCore/wtf/text/WTFString.cpp
new file mode 100644
index 0000000..a683e3d
--- /dev/null
+++ b/JavaScriptCore/wtf/text/WTFString.cpp
@@ -0,0 +1,960 @@
+/*
+ * (C) 1999 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2007-2009 Torch Mobile, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+#include "WTFString.h"
+
+#include <limits>
+#include <stdarg.h>
+#include <wtf/ASCIICType.h>
+#include <wtf/text/CString.h>
+#include <wtf/StringExtras.h>
+#include <wtf/Vector.h>
+#include <wtf/dtoa.h>
+#include <wtf/unicode/UTF8.h>
+#include <wtf/unicode/Unicode.h>
+
+using namespace WTF;
+using namespace WTF::Unicode;
+
+namespace WebCore {
+
+String::String(const UChar* str, unsigned len)
+{
+ if (!str)
+ return;
+ m_impl = StringImpl::create(str, len);
+}
+
+String::String(const UChar* str)
+{
+ if (!str)
+ return;
+
+ int len = 0;
+ while (str[len] != UChar(0))
+ len++;
+
+ m_impl = StringImpl::create(str, len);
+}
+
+String::String(const char* str)
+{
+ if (!str)
+ return;
+ m_impl = StringImpl::create(str);
+}
+
+String::String(const char* str, unsigned length)
+{
+ if (!str)
+ return;
+ m_impl = StringImpl::create(str, length);
+}
+
+void String::append(const String& str)
+{
+ if (str.isEmpty())
+ return;
+
+ // FIXME: This is extremely inefficient. So much so that we might want to take this
+ // out of String's API. We can make it better by optimizing the case where exactly
+ // one String is pointing at this StringImpl, but even then it's going to require a
+ // call to fastMalloc every single time.
+ if (str.m_impl) {
+ if (m_impl) {
+ UChar* data;
+ RefPtr<StringImpl> newImpl =
+ StringImpl::createUninitialized(m_impl->length() + str.length(), data);
+ memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
+ memcpy(data + m_impl->length(), str.characters(), str.length() * sizeof(UChar));
+ m_impl = newImpl.release();
+ } else
+ m_impl = str.m_impl;
+ }
+}
+
+void String::append(char c)
+{
+ // FIXME: This is extremely inefficient. So much so that we might want to take this
+ // out of String's API. We can make it better by optimizing the case where exactly
+ // one String is pointing at this StringImpl, but even then it's going to require a
+ // call to fastMalloc every single time.
+ if (m_impl) {
+ UChar* data;
+ RefPtr<StringImpl> newImpl =
+ StringImpl::createUninitialized(m_impl->length() + 1, data);
+ memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
+ data[m_impl->length()] = c;
+ m_impl = newImpl.release();
+ } else
+ m_impl = StringImpl::create(&c, 1);
+}
+
+void String::append(UChar c)
+{
+ // FIXME: This is extremely inefficient. So much so that we might want to take this
+ // out of String's API. We can make it better by optimizing the case where exactly
+ // one String is pointing at this StringImpl, but even then it's going to require a
+ // call to fastMalloc every single time.
+ if (m_impl) {
+ UChar* data;
+ RefPtr<StringImpl> newImpl =
+ StringImpl::createUninitialized(m_impl->length() + 1, data);
+ memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
+ data[m_impl->length()] = c;
+ m_impl = newImpl.release();
+ } else
+ m_impl = StringImpl::create(&c, 1);
+}
+
+String operator+(const String& a, const String& b)
+{
+ if (a.isEmpty())
+ return b;
+ if (b.isEmpty())
+ return a;
+ String c = a;
+ c += b;
+ return c;
+}
+
+String operator+(const String& s, const char* cs)
+{
+ return s + String(cs);
+}
+
+String operator+(const char* cs, const String& s)
+{
+ return String(cs) + s;
+}
+
+void String::insert(const String& str, unsigned pos)
+{
+ if (str.isEmpty()) {
+ if (str.isNull())
+ return;
+ if (isNull())
+ m_impl = str.impl();
+ return;
+ }
+ insert(str.characters(), str.length(), pos);
+}
+
+void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
+{
+ if (!m_impl) {
+ if (!charactersToAppend)
+ return;
+ m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
+ return;
+ }
+
+ if (!lengthToAppend)
+ return;
+
+ ASSERT(charactersToAppend);
+ UChar* data;
+ RefPtr<StringImpl> newImpl =
+ StringImpl::createUninitialized(length() + lengthToAppend, data);
+ memcpy(data, characters(), length() * sizeof(UChar));
+ memcpy(data + length(), charactersToAppend, lengthToAppend * sizeof(UChar));
+ m_impl = newImpl.release();
+}
+
+void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position)
+{
+ if (position >= length()) {
+ append(charactersToInsert, lengthToInsert);
+ return;
+ }
+
+ ASSERT(m_impl);
+
+ if (!lengthToInsert)
+ return;
+
+ ASSERT(charactersToInsert);
+ UChar* data;
+ RefPtr<StringImpl> newImpl =
+ StringImpl::createUninitialized(length() + lengthToInsert, data);
+ memcpy(data, characters(), position * sizeof(UChar));
+ memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar));
+ memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar));
+ m_impl = newImpl.release();
+}
+
+UChar String::operator[](unsigned i) const
+{
+ if (!m_impl || i >= m_impl->length())
+ return 0;
+ return m_impl->characters()[i];
+}
+
+UChar32 String::characterStartingAt(unsigned i) const
+{
+ if (!m_impl || i >= m_impl->length())
+ return 0;
+ return m_impl->characterStartingAt(i);
+}
+
+unsigned String::length() const
+{
+ if (!m_impl)
+ return 0;
+ return m_impl->length();
+}
+
+void String::truncate(unsigned position)
+{
+ if (position >= length())
+ return;
+ UChar* data;
+ RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data);
+ memcpy(data, characters(), position * sizeof(UChar));
+ m_impl = newImpl.release();
+}
+
+void String::remove(unsigned position, int lengthToRemove)
+{
+ if (lengthToRemove <= 0)
+ return;
+ if (position >= length())
+ return;
+ if (static_cast<unsigned>(lengthToRemove) > length() - position)
+ lengthToRemove = length() - position;
+ UChar* data;
+ RefPtr<StringImpl> newImpl =
+ StringImpl::createUninitialized(length() - lengthToRemove, data);
+ memcpy(data, characters(), position * sizeof(UChar));
+ memcpy(data + position, characters() + position + lengthToRemove,
+ (length() - lengthToRemove - position) * sizeof(UChar));
+ m_impl = newImpl.release();
+}
+
+String String::substring(unsigned pos, unsigned len) const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->substring(pos, len);
+}
+
+String String::lower() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->lower();
+}
+
+String String::upper() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->upper();
+}
+
+String String::stripWhiteSpace() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->stripWhiteSpace();
+}
+
+String String::simplifyWhiteSpace() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->simplifyWhiteSpace();
+}
+
+String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->removeCharacters(findMatch);
+}
+
+String String::foldCase() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->foldCase();
+}
+
+bool String::percentage(int& result) const
+{
+ if (!m_impl || !m_impl->length())
+ return false;
+
+ if ((*m_impl)[m_impl->length() - 1] != '%')
+ return false;
+
+ result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1);
+ return true;
+}
+
+const UChar* String::characters() const
+{
+ if (!m_impl)
+ return 0;
+ return m_impl->characters();
+}
+
+const UChar* String::charactersWithNullTermination()
+{
+ if (!m_impl)
+ return 0;
+ if (m_impl->hasTerminatingNullCharacter())
+ return m_impl->characters();
+ m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl);
+ return m_impl->characters();
+}
+
+String String::format(const char *format, ...)
+{
+#if PLATFORM(QT)
+ // Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf.
+ // https://bugs.webkit.org/show_bug.cgi?id=18994
+ va_list args;
+ va_start(args, format);
+
+ QString buffer;
+ buffer.vsprintf(format, args);
+
+ va_end(args);
+
+ return buffer;
+
+#elif OS(WINCE)
+ va_list args;
+ va_start(args, format);
+
+ Vector<char, 256> buffer;
+
+ int bufferSize = 256;
+ buffer.resize(bufferSize);
+ for (;;) {
+ int written = vsnprintf(buffer.data(), bufferSize, format, args);
+ va_end(args);
+
+ if (written == 0)
+ return String("");
+ if (written > 0)
+ return StringImpl::create(buffer.data(), written);
+
+ bufferSize <<= 1;
+ buffer.resize(bufferSize);
+ va_start(args, format);
+ }
+
+#else
+ va_list args;
+ va_start(args, format);
+
+ Vector<char, 256> buffer;
+
+ // Do the format once to get the length.
+#if COMPILER(MSVC)
+ int result = _vscprintf(format, args);
+#else
+ char ch;
+ int result = vsnprintf(&ch, 1, format, args);
+ // We need to call va_end() and then va_start() again here, as the
+ // contents of args is undefined after the call to vsnprintf
+ // according to http://man.cx/snprintf(3)
+ //
+ // Not calling va_end/va_start here happens to work on lots of
+ // systems, but fails e.g. on 64bit Linux.
+ va_end(args);
+ va_start(args, format);
+#endif
+
+ if (result == 0)
+ return String("");
+ if (result < 0)
+ return String();
+ unsigned len = result;
+ buffer.grow(len + 1);
+
+ // Now do the formatting again, guaranteed to fit.
+ vsnprintf(buffer.data(), buffer.size(), format, args);
+
+ va_end(args);
+
+ return StringImpl::create(buffer.data(), len);
+#endif
+}
+
+String String::number(short n)
+{
+ return String::format("%hd", n);
+}
+
+String String::number(unsigned short n)
+{
+ return String::format("%hu", n);
+}
+
+String String::number(int n)
+{
+ return String::format("%d", n);
+}
+
+String String::number(unsigned n)
+{
+ return String::format("%u", n);
+}
+
+String String::number(long n)
+{
+ return String::format("%ld", n);
+}
+
+String String::number(unsigned long n)
+{
+ return String::format("%lu", n);
+}
+
+String String::number(long long n)
+{
+#if OS(WINDOWS) && !PLATFORM(QT)
+ return String::format("%I64i", n);
+#else
+ return String::format("%lli", n);
+#endif
+}
+
+String String::number(unsigned long long n)
+{
+#if OS(WINDOWS) && !PLATFORM(QT)
+ return String::format("%I64u", n);
+#else
+ return String::format("%llu", n);
+#endif
+}
+
+String String::number(double n)
+{
+ return String::format("%.6lg", n);
+}
+
+int String::toIntStrict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toIntStrict(ok, base);
+}
+
+unsigned String::toUIntStrict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toUIntStrict(ok, base);
+}
+
+int64_t String::toInt64Strict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toInt64Strict(ok, base);
+}
+
+uint64_t String::toUInt64Strict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toUInt64Strict(ok, base);
+}
+
+intptr_t String::toIntPtrStrict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toIntPtrStrict(ok, base);
+}
+
+
+int String::toInt(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toInt(ok);
+}
+
+unsigned String::toUInt(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toUInt(ok);
+}
+
+int64_t String::toInt64(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toInt64(ok);
+}
+
+uint64_t String::toUInt64(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toUInt64(ok);
+}
+
+intptr_t String::toIntPtr(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toIntPtr(ok);
+}
+
+double String::toDouble(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0.0;
+ }
+ return m_impl->toDouble(ok);
+}
+
+float String::toFloat(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0.0f;
+ }
+ return m_impl->toFloat(ok);
+}
+
+String String::threadsafeCopy() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->threadsafeCopy();
+}
+
+String String::crossThreadString() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->crossThreadString();
+}
+
+bool String::isEmpty() const
+{
+ return !m_impl || !m_impl->length();
+}
+
+void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const
+{
+ result.clear();
+
+ int startPos = 0;
+ int endPos;
+ while ((endPos = find(separator, startPos)) != -1) {
+ if (allowEmptyEntries || startPos != endPos)
+ result.append(substring(startPos, endPos - startPos));
+ startPos = endPos + separator.length();
+ }
+ if (allowEmptyEntries || startPos != static_cast<int>(length()))
+ result.append(substring(startPos));
+}
+
+void String::split(const String& separator, Vector<String>& result) const
+{
+ return split(separator, false, result);
+}
+
+void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const
+{
+ result.clear();
+
+ int startPos = 0;
+ int endPos;
+ while ((endPos = find(separator, startPos)) != -1) {
+ if (allowEmptyEntries || startPos != endPos)
+ result.append(substring(startPos, endPos - startPos));
+ startPos = endPos + 1;
+ }
+ if (allowEmptyEntries || startPos != static_cast<int>(length()))
+ result.append(substring(startPos));
+}
+
+void String::split(UChar separator, Vector<String>& result) const
+{
+ return split(String(&separator, 1), false, result);
+}
+
+Vector<char> String::ascii() const
+{
+ if (m_impl)
+ return m_impl->ascii();
+
+ const char* nullMsg = "(null impl)";
+ Vector<char, 2048> buffer;
+ for (int i = 0; nullMsg[i]; ++i)
+ buffer.append(nullMsg[i]);
+
+ buffer.append('\0');
+ return buffer;
+}
+
+CString String::latin1() const
+{
+ // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
+ // preserved, characters outside of this range are converted to '?'.
+
+ unsigned length = this->length();
+ const UChar* characters = this->characters();
+
+ char* characterBuffer;
+ CString result = CString::newUninitialized(length, characterBuffer);
+
+ for (unsigned i = 0; i < length; ++i) {
+ UChar ch = characters[i];
+ characterBuffer[i] = ch > 255 ? '?' : ch;
+ }
+
+ return result;
+}
+
+// Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
+static inline void putUTF8Triple(char*& buffer, UChar ch)
+{
+ ASSERT(ch >= 0x0800);
+ *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
+ *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
+ *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
+}
+
+CString String::utf8() const
+{
+ unsigned length = this->length();
+ const UChar* characters = this->characters();
+
+ // Allocate a buffer big enough to hold all the characters
+ // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
+ // Optimization ideas, if we find this function is hot:
+ // * We could speculatively create a CStringBuffer to contain 'length'
+ // characters, and resize if necessary (i.e. if the buffer contains
+ // non-ascii characters). (Alternatively, scan the buffer first for
+ // ascii characters, so we know this will be sufficient).
+ // * We could allocate a CStringBuffer with an appropriate size to
+ // have a good chance of being able to write the string into the
+ // buffer without reallocing (say, 1.5 x length).
+ Vector<char, 1024> bufferVector(length * 3);
+
+ char* buffer = bufferVector.data();
+ ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), false);
+ ASSERT(result != sourceIllegal); // Only produced from strict conversion.
+ ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
+
+ // If a high surrogate is left unconverted, treat it the same was as an unpaired high surrogate
+ // would have been handled in the middle of a string with non-strict conversion - which is to say,
+ // simply encode it to UTF-8.
+ if (result == sourceExhausted) {
+ // This should be one unpaired high surrogate.
+ ASSERT((characters + 1) == (characters + length));
+ ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
+ // There should be room left, since one UChar hasn't been converted.
+ ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
+ putUTF8Triple(buffer, *characters);
+ }
+
+ return CString(bufferVector.data(), buffer - bufferVector.data());
+}
+
+String String::fromUTF8(const char* stringStart, size_t length)
+{
+ if (!stringStart)
+ return String();
+
+ // We'll use a StringImpl as a buffer; if the source string only contains ascii this should be
+ // the right length, if there are any multi-byte sequences this buffer will be too large.
+ UChar* buffer;
+ String stringBuffer(StringImpl::createUninitialized(length, buffer));
+ UChar* bufferEnd = buffer + length;
+
+ // Try converting into the buffer.
+ const char* stringCurrent = stringStart;
+ if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &buffer, bufferEnd) != conversionOK)
+ return String();
+
+ // stringBuffer is full (the input must have been all ascii) so just return it!
+ if (buffer == bufferEnd)
+ return stringBuffer;
+
+ // stringBuffer served its purpose as a buffer, copy the contents out into a new string.
+ unsigned utf16Length = buffer - stringBuffer.characters();
+ ASSERT(utf16Length < length);
+ return String(stringBuffer.characters(), utf16Length);
+}
+
+String String::fromUTF8(const char* string)
+{
+ if (!string)
+ return String();
+ return fromUTF8(string, strlen(string));
+}
+
+String String::fromUTF8WithLatin1Fallback(const char* string, size_t size)
+{
+ String utf8 = fromUTF8(string, size);
+ if (!utf8)
+ return String(string, size);
+ return utf8;
+}
+
+// String Operations
+
+static bool isCharacterAllowedInBase(UChar c, int base)
+{
+ if (c > 0x7F)
+ return false;
+ if (isASCIIDigit(c))
+ return c - '0' < base;
+ if (isASCIIAlpha(c)) {
+ if (base > 36)
+ base = 36;
+ return (c >= 'a' && c < 'a' + base - 10)
+ || (c >= 'A' && c < 'A' + base - 10);
+ }
+ return false;
+}
+
+template <typename IntegralType>
+static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base)
+{
+ static const IntegralType integralMax = std::numeric_limits<IntegralType>::max();
+ static const bool isSigned = std::numeric_limits<IntegralType>::is_signed;
+ const IntegralType maxMultiplier = integralMax / base;
+
+ IntegralType value = 0;
+ bool isOk = false;
+ bool isNegative = false;
+
+ if (!data)
+ goto bye;
+
+ // skip leading whitespace
+ while (length && isSpaceOrNewline(*data)) {
+ length--;
+ data++;
+ }
+
+ if (isSigned && length && *data == '-') {
+ length--;
+ data++;
+ isNegative = true;
+ } else if (length && *data == '+') {
+ length--;
+ data++;
+ }
+
+ if (!length || !isCharacterAllowedInBase(*data, base))
+ goto bye;
+
+ while (length && isCharacterAllowedInBase(*data, base)) {
+ length--;
+ IntegralType digitValue;
+ UChar c = *data;
+ if (isASCIIDigit(c))
+ digitValue = c - '0';
+ else if (c >= 'a')
+ digitValue = c - 'a' + 10;
+ else
+ digitValue = c - 'A' + 10;
+
+ if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative))
+ goto bye;
+
+ value = base * value + digitValue;
+ data++;
+ }
+
+#if COMPILER(MSVC)
+#pragma warning(push, 0)
+#pragma warning(disable:4146)
+#endif
+
+ if (isNegative)
+ value = -value;
+
+#if COMPILER(MSVC)
+#pragma warning(pop)
+#endif
+
+ // skip trailing space
+ while (length && isSpaceOrNewline(*data)) {
+ length--;
+ data++;
+ }
+
+ if (!length)
+ isOk = true;
+bye:
+ if (ok)
+ *ok = isOk;
+ return isOk ? value : 0;
+}
+
+static unsigned lengthOfCharactersAsInteger(const UChar* data, size_t length)
+{
+ size_t i = 0;
+
+ // Allow leading spaces.
+ for (; i != length; ++i) {
+ if (!isSpaceOrNewline(data[i]))
+ break;
+ }
+
+ // Allow sign.
+ if (i != length && (data[i] == '+' || data[i] == '-'))
+ ++i;
+
+ // Allow digits.
+ for (; i != length; ++i) {
+ if (!isASCIIDigit(data[i]))
+ break;
+ }
+
+ return i;
+}
+
+int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<int>(data, length, ok, base);
+}
+
+unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<unsigned>(data, length, ok, base);
+}
+
+int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<int64_t>(data, length, ok, base);
+}
+
+uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<uint64_t>(data, length, ok, base);
+}
+
+intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<intptr_t>(data, length, ok, base);
+}
+
+int charactersToInt(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<int>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<unsigned>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<int64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<uint64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<intptr_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+double charactersToDouble(const UChar* data, size_t length, bool* ok)
+{
+ if (!length) {
+ if (ok)
+ *ok = false;
+ return 0.0;
+ }
+
+ Vector<char, 256> bytes(length + 1);
+ for (unsigned i = 0; i < length; ++i)
+ bytes[i] = data[i] < 0x7F ? data[i] : '?';
+ bytes[length] = '\0';
+ char* end;
+ double val = WTF::strtod(bytes.data(), &end);
+ if (ok)
+ *ok = (end == 0 || *end == '\0');
+ return val;
+}
+
+float charactersToFloat(const UChar* data, size_t length, bool* ok)
+{
+ // FIXME: This will return ok even when the string fits into a double but not a float.
+ return static_cast<float>(charactersToDouble(data, length, ok));
+}
+
+} // namespace WebCore
+
+#ifndef NDEBUG
+// For use in the debugger - leaks memory
+WebCore::String* string(const char*);
+
+WebCore::String* string(const char* s)
+{
+ return new WebCore::String(s);
+}
+#endif
diff --git a/JavaScriptCore/wtf/text/WTFString.h b/JavaScriptCore/wtf/text/WTFString.h
new file mode 100644
index 0000000..7c3c2dd
--- /dev/null
+++ b/JavaScriptCore/wtf/text/WTFString.h
@@ -0,0 +1,398 @@
+/*
+ * (C) 1999 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef WTFString_h
+#define WTFString_h
+
+// This file would be called String.h, but that conflicts with <string.h>
+// on systems without case-sensitive file systems.
+
+#include "StringImpl.h"
+
+#ifdef __OBJC__
+#include <objc/objc.h>
+#endif
+
+#if PLATFORM(CF)
+typedef const struct __CFString * CFStringRef;
+#endif
+
+#if PLATFORM(QT)
+QT_BEGIN_NAMESPACE
+class QString;
+QT_END_NAMESPACE
+#include <QDataStream>
+#endif
+
+#if PLATFORM(WX)
+class wxString;
+#endif
+
+#if PLATFORM(HAIKU)
+class BString;
+#endif
+
+namespace WTF {
+class CString;
+}
+using WTF::CString;
+
+// FIXME: This is a temporary layering violation while we move string code to WTF.
+// Landing the file moves in one patch, will follow on with patches to change the namespaces.
+namespace WebCore {
+
+class SharedBuffer;
+struct StringHash;
+
+// Declarations of string operations
+
+bool charactersAreAllASCII(const UChar*, size_t);
+int charactersToIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
+unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
+int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
+uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
+intptr_t charactersToIntPtrStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
+
+int charactersToInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
+unsigned charactersToUInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
+int64_t charactersToInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
+uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
+intptr_t charactersToIntPtr(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
+
+double charactersToDouble(const UChar*, size_t, bool* ok = 0);
+float charactersToFloat(const UChar*, size_t, bool* ok = 0);
+
+int find(const UChar*, size_t, UChar, int startPosition = 0);
+int reverseFind(const UChar*, size_t, UChar, int startPosition = -1);
+
+class String {
+public:
+ String() { } // gives null string, distinguishable from an empty string
+ String(const UChar*, unsigned length);
+ String(const UChar*); // Specifically for null terminated UTF-16
+ String(const char*);
+ String(const char*, unsigned length);
+ String(StringImpl* i) : m_impl(i) { }
+ String(PassRefPtr<StringImpl> i) : m_impl(i) { }
+ String(RefPtr<StringImpl> i) : m_impl(i) { }
+
+ void swap(String& o) { m_impl.swap(o.m_impl); }
+
+ // Hash table deleted values, which are only constructed and never copied or destroyed.
+ String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { }
+ bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); }
+
+ static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); }
+ static String adopt(Vector<UChar>& vector) { return StringImpl::adopt(vector); }
+
+ unsigned length() const;
+ const UChar* characters() const;
+ const UChar* charactersWithNullTermination();
+
+ UChar operator[](unsigned i) const; // if i >= length(), returns 0
+ UChar32 characterStartingAt(unsigned) const; // Ditto.
+
+ bool contains(UChar c) const { return find(c) != -1; }
+ bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; }
+ bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; }
+
+ int find(UChar c, int start = 0) const
+ { return m_impl ? m_impl->find(c, start) : -1; }
+ int find(CharacterMatchFunctionPtr matchFunction, int start = 0) const
+ { return m_impl ? m_impl->find(matchFunction, start) : -1; }
+ int find(const char* str, int start = 0, bool caseSensitive = true) const
+ { return m_impl ? m_impl->find(str, start, caseSensitive) : -1; }
+ int find(const String& str, int start = 0, bool caseSensitive = true) const
+ { return m_impl ? m_impl->find(str.impl(), start, caseSensitive) : -1; }
+
+ int reverseFind(UChar c, int start = -1) const
+ { return m_impl ? m_impl->reverseFind(c, start) : -1; }
+ int reverseFind(const String& str, int start = -1, bool caseSensitive = true) const
+ { return m_impl ? m_impl->reverseFind(str.impl(), start, caseSensitive) : -1; }
+
+ bool startsWith(const String& s, bool caseSensitive = true) const
+ { return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); }
+ bool endsWith(const String& s, bool caseSensitive = true) const
+ { return m_impl ? m_impl->endsWith(s.impl(), caseSensitive) : s.isEmpty(); }
+
+ void append(const String&);
+ void append(char);
+ void append(UChar);
+ void append(const UChar*, unsigned length);
+ void insert(const String&, unsigned pos);
+ void insert(const UChar*, unsigned length, unsigned pos);
+
+ String& replace(UChar a, UChar b) { if (m_impl) m_impl = m_impl->replace(a, b); return *this; }
+ String& replace(UChar a, const String& b) { if (m_impl) m_impl = m_impl->replace(a, b.impl()); return *this; }
+ String& replace(const String& a, const String& b) { if (m_impl) m_impl = m_impl->replace(a.impl(), b.impl()); return *this; }
+ String& replace(unsigned index, unsigned len, const String& b) { if (m_impl) m_impl = m_impl->replace(index, len, b.impl()); return *this; }
+
+ void makeLower() { if (m_impl) m_impl = m_impl->lower(); }
+ void makeUpper() { if (m_impl) m_impl = m_impl->upper(); }
+ void makeSecure(UChar aChar) { if (m_impl) m_impl = m_impl->secure(aChar); }
+
+ void truncate(unsigned len);
+ void remove(unsigned pos, int len = 1);
+
+ String substring(unsigned pos, unsigned len = UINT_MAX) const;
+ String left(unsigned len) const { return substring(0, len); }
+ String right(unsigned len) const { return substring(length() - len, len); }
+
+ // Returns a lowercase/uppercase version of the string
+ String lower() const;
+ String upper() const;
+
+ String stripWhiteSpace() const;
+ String simplifyWhiteSpace() const;
+
+ String removeCharacters(CharacterMatchFunctionPtr) const;
+
+ // Return the string with case folded for case insensitive comparison.
+ String foldCase() const;
+
+ static String number(short);
+ static String number(unsigned short);
+ static String number(int);
+ static String number(unsigned);
+ static String number(long);
+ static String number(unsigned long);
+ static String number(long long);
+ static String number(unsigned long long);
+ static String number(double);
+
+ static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2);
+
+ // Returns an uninitialized string. The characters needs to be written
+ // into the buffer returned in data before the returned string is used.
+ // Failure to do this will have unpredictable results.
+ static String createUninitialized(unsigned length, UChar*& data) { return StringImpl::createUninitialized(length, data); }
+
+ void split(const String& separator, Vector<String>& result) const;
+ void split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const;
+ void split(UChar separator, Vector<String>& result) const;
+ void split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const;
+
+ int toIntStrict(bool* ok = 0, int base = 10) const;
+ unsigned toUIntStrict(bool* ok = 0, int base = 10) const;
+ int64_t toInt64Strict(bool* ok = 0, int base = 10) const;
+ uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const;
+ intptr_t toIntPtrStrict(bool* ok = 0, int base = 10) const;
+
+ int toInt(bool* ok = 0) const;
+ unsigned toUInt(bool* ok = 0) const;
+ int64_t toInt64(bool* ok = 0) const;
+ uint64_t toUInt64(bool* ok = 0) const;
+ intptr_t toIntPtr(bool* ok = 0) const;
+ double toDouble(bool* ok = 0) const;
+ float toFloat(bool* ok = 0) const;
+
+ bool percentage(int& percentage) const;
+
+ // Returns a StringImpl suitable for use on another thread.
+ String crossThreadString() const;
+ // Makes a deep copy. Helpful only if you need to use a String on another thread
+ // (use crossThreadString if the method call doesn't need to be threadsafe).
+ // Since the underlying StringImpl objects are immutable, there's no other reason
+ // to ever prefer copy() over plain old assignment.
+ String threadsafeCopy() const;
+
+ bool isNull() const { return !m_impl; }
+ bool isEmpty() const;
+
+ StringImpl* impl() const { return m_impl.get(); }
+
+#if PLATFORM(CF)
+ String(CFStringRef);
+ CFStringRef createCFString() const;
+#endif
+
+#ifdef __OBJC__
+ String(NSString*);
+
+ // This conversion maps NULL to "", which loses the meaning of NULL, but we
+ // need this mapping because AppKit crashes when passed nil NSStrings.
+ operator NSString*() const { if (!m_impl) return @""; return *m_impl; }
+#endif
+
+#if PLATFORM(QT)
+ String(const QString&);
+ String(const QStringRef&);
+ operator QString() const;
+#endif
+
+#if PLATFORM(WX)
+ String(const wxString&);
+ operator wxString() const;
+#endif
+
+#if PLATFORM(HAIKU)
+ String(const BString&);
+ operator BString() const;
+#endif
+
+ Vector<char> ascii() const;
+
+ CString latin1() const;
+ CString utf8() const;
+
+ static String fromUTF8(const char*, size_t);
+ static String fromUTF8(const char*);
+
+ // Tries to convert the passed in string to UTF-8, but will fall back to Latin-1 if the string is not valid UTF-8.
+ static String fromUTF8WithLatin1Fallback(const char*, size_t);
+
+ // Determines the writing direction using the Unicode Bidi Algorithm rules P2 and P3.
+ WTF::Unicode::Direction defaultWritingDirection() const { return m_impl ? m_impl->defaultWritingDirection() : WTF::Unicode::LeftToRight; }
+
+ bool containsOnlyASCII() const { return charactersAreAllASCII(characters(), length()); }
+
+private:
+ RefPtr<StringImpl> m_impl;
+};
+
+#if PLATFORM(QT)
+QDataStream& operator<<(QDataStream& stream, const String& str);
+QDataStream& operator>>(QDataStream& stream, String& str);
+#endif
+
+String operator+(const String&, const String&);
+String operator+(const String&, const char*);
+String operator+(const char*, const String&);
+
+inline String& operator+=(String& a, const String& b) { a.append(b); return a; }
+
+inline bool operator==(const String& a, const String& b) { return equal(a.impl(), b.impl()); }
+inline bool operator==(const String& a, const char* b) { return equal(a.impl(), b); }
+inline bool operator==(const char* a, const String& b) { return equal(a, b.impl()); }
+
+inline bool operator!=(const String& a, const String& b) { return !equal(a.impl(), b.impl()); }
+inline bool operator!=(const String& a, const char* b) { return !equal(a.impl(), b); }
+inline bool operator!=(const char* a, const String& b) { return !equal(a, b.impl()); }
+
+inline bool equalIgnoringCase(const String& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); }
+inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), b); }
+inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(a, b.impl()); }
+
+inline bool equalPossiblyIgnoringCase(const String& a, const String& b, bool ignoreCase)
+{
+ return ignoreCase ? equalIgnoringCase(a, b) : (a == b);
+}
+
+inline bool equalIgnoringNullity(const String& a, const String& b) { return equalIgnoringNullity(a.impl(), b.impl()); }
+
+inline bool operator!(const String& str) { return str.isNull(); }
+
+inline void swap(String& a, String& b) { a.swap(b); }
+
+// Definitions of string operations
+
+#ifdef __OBJC__
+// This is for situations in WebKit where the long standing behavior has been
+// "nil if empty", so we try to maintain longstanding behavior for the sake of
+// entrenched clients
+inline NSString* nsStringNilIfEmpty(const String& str) { return str.isEmpty() ? nil : (NSString*)str; }
+#endif
+
+inline bool charactersAreAllASCII(const UChar* characters, size_t length)
+{
+ UChar ored = 0;
+ for (size_t i = 0; i < length; ++i)
+ ored |= characters[i];
+ return !(ored & 0xFF80);
+}
+
+inline int find(const UChar* characters, size_t length, UChar character, int startPosition)
+{
+ if (startPosition >= static_cast<int>(length))
+ return -1;
+ for (size_t i = startPosition; i < length; ++i) {
+ if (characters[i] == character)
+ return static_cast<int>(i);
+ }
+ return -1;
+}
+
+inline int find(const UChar* characters, size_t length, CharacterMatchFunctionPtr matchFunction, int startPosition)
+{
+ if (startPosition >= static_cast<int>(length))
+ return -1;
+ for (size_t i = startPosition; i < length; ++i) {
+ if (matchFunction(characters[i]))
+ return static_cast<int>(i);
+ }
+ return -1;
+}
+
+inline int reverseFind(const UChar* characters, size_t length, UChar character, int startPosition)
+{
+ if (startPosition >= static_cast<int>(length) || !length)
+ return -1;
+ if (startPosition < 0)
+ startPosition += static_cast<int>(length);
+ while (true) {
+ if (characters[startPosition] == character)
+ return startPosition;
+ if (!startPosition)
+ return -1;
+ startPosition--;
+ }
+ ASSERT_NOT_REACHED();
+ return -1;
+}
+
+inline void append(Vector<UChar>& vector, const String& string)
+{
+ vector.append(string.characters(), string.length());
+}
+
+inline void appendNumber(Vector<UChar>& vector, unsigned char number)
+{
+ int numberLength = number > 99 ? 3 : (number > 9 ? 2 : 1);
+ size_t vectorSize = vector.size();
+ vector.grow(vectorSize + numberLength);
+
+ switch (numberLength) {
+ case 3:
+ vector[vectorSize + 2] = number % 10 + '0';
+ number /= 10;
+
+ case 2:
+ vector[vectorSize + 1] = number % 10 + '0';
+ number /= 10;
+
+ case 1:
+ vector[vectorSize] = number % 10 + '0';
+ }
+}
+
+} // namespace WebCore
+
+namespace WTF {
+
+ // StringHash is the default hash for String
+ template<typename T> struct DefaultHash;
+ template<> struct DefaultHash<WebCore::String> {
+ typedef WebCore::StringHash Hash;
+ };
+
+}
+
+#endif