summaryrefslogtreecommitdiffstats
path: root/JavaScriptCore/wtf/text
diff options
context:
space:
mode:
Diffstat (limited to 'JavaScriptCore/wtf/text')
-rw-r--r--JavaScriptCore/wtf/text/AtomicString.h14
-rw-r--r--JavaScriptCore/wtf/text/AtomicStringHash.h62
-rw-r--r--JavaScriptCore/wtf/text/StringImpl.cpp409
-rw-r--r--JavaScriptCore/wtf/text/StringImpl.h23
-rw-r--r--JavaScriptCore/wtf/text/WTFString.cpp97
-rw-r--r--JavaScriptCore/wtf/text/WTFString.h226
6 files changed, 506 insertions, 325 deletions
diff --git a/JavaScriptCore/wtf/text/AtomicString.h b/JavaScriptCore/wtf/text/AtomicString.h
index d29981a..cfabde7 100644
--- a/JavaScriptCore/wtf/text/AtomicString.h
+++ b/JavaScriptCore/wtf/text/AtomicString.h
@@ -32,14 +32,10 @@
#define ATOMICSTRING_CONVERSION
#endif
-// FIXME: this should be in WTF, too!
-namespace WebCore {
-struct AtomicStringHash;
-}
-using WebCore::AtomicStringHash;
-
namespace WTF {
+struct AtomicStringHash;
+
class AtomicString {
public:
static void init();
@@ -75,10 +71,10 @@ public:
bool contains(const String& s, bool caseSensitive = true) const
{ return m_string.contains(s, caseSensitive); }
- int find(UChar c, int start = 0) const { return m_string.find(c, start); }
- int find(const char* s, int start = 0, bool caseSentitive = true) const
+ size_t find(UChar c, size_t start = 0) const { return m_string.find(c, start); }
+ size_t find(const char* s, size_t start = 0, bool caseSentitive = true) const
{ return m_string.find(s, start, caseSentitive); }
- int find(const String& s, int start = 0, bool caseSentitive = true) const
+ size_t find(const String& s, size_t start = 0, bool caseSentitive = true) const
{ return m_string.find(s, start, caseSentitive); }
bool startsWith(const String& s, bool caseSensitive = true) const
diff --git a/JavaScriptCore/wtf/text/AtomicStringHash.h b/JavaScriptCore/wtf/text/AtomicStringHash.h
new file mode 100644
index 0000000..f6e4ad1
--- /dev/null
+++ b/JavaScriptCore/wtf/text/AtomicStringHash.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+ * its contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef AtomicStringHash_h
+#define AtomicStringHash_h
+
+#include <wtf/text/AtomicString.h>
+#include <wtf/HashTraits.h>
+
+namespace WTF {
+
+ struct AtomicStringHash {
+ static unsigned hash(const AtomicString& key)
+ {
+ return key.impl()->existingHash();
+ }
+
+ static bool equal(const AtomicString& a, const AtomicString& b)
+ {
+ return a == b;
+ }
+
+ static const bool safeToCompareToEmptyOrDeleted = false;
+ };
+
+ // AtomicStringHash is the default hash for AtomicString
+ template<> struct HashTraits<WTF::AtomicString> : GenericHashTraits<WTF::AtomicString> {
+ static const bool emptyValueIsZero = true;
+ static void constructDeletedValue(WTF::AtomicString& slot) { new (&slot) WTF::AtomicString(HashTableDeletedValue); }
+ static bool isDeletedValue(const WTF::AtomicString& slot) { return slot.isHashTableDeletedValue(); }
+ };
+
+}
+
+using WTF::AtomicStringHash;
+
+#endif
diff --git a/JavaScriptCore/wtf/text/StringImpl.cpp b/JavaScriptCore/wtf/text/StringImpl.cpp
index 3669628..ab0f009 100644
--- a/JavaScriptCore/wtf/text/StringImpl.cpp
+++ b/JavaScriptCore/wtf/text/StringImpl.cpp
@@ -498,175 +498,250 @@ int codePointCompare(const StringImpl* s1, const StringImpl* s2)
return (l1 > l2) ? 1 : -1;
}
-int StringImpl::find(const char* chs, int index, bool caseSensitive)
+size_t StringImpl::find(UChar c, unsigned start)
{
- if (!chs || index < 0)
- return -1;
+ return WTF::find(m_data, m_length, c, start);
+}
- int chsLength = strlen(chs);
- int n = m_length - index;
- if (n < 0)
- return -1;
- n -= chsLength - 1;
- if (n <= 0)
- return -1;
+size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
+{
+ return WTF::find(m_data, m_length, matchFunction, start);
+}
- const char* chsPlusOne = chs + 1;
- int chsLengthMinusOne = chsLength - 1;
-
- const UChar* ptr = m_data + index - 1;
- if (caseSensitive) {
- UChar c = *chs;
- do {
- if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne))
- return m_length - chsLength - n + 1;
- } while (--n);
- } else {
- UChar lc = Unicode::foldCase(*chs);
- do {
- if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne))
- return m_length - chsLength - n + 1;
- } while (--n);
+size_t StringImpl::find(const char* matchString, unsigned index)
+{
+ // Check for null or empty string to match against
+ if (!matchString)
+ return notFound;
+ unsigned matchLength = strlen(matchString);
+ if (!matchLength)
+ return min(index, length());
+
+ // Optimization 1: fast case for strings of length 1.
+ if (matchLength == 1)
+ return WTF::find(characters(), length(), *(const unsigned char*)matchString, index);
+
+ // Check index & matchLength are in range.
+ if (index > length())
+ return notFound;
+ unsigned searchLength = length() - index;
+ if (matchLength > searchLength)
+ return notFound;
+ // delta is the number of additional times to test; delta == 0 means test only once.
+ unsigned delta = searchLength - matchLength;
+
+ const UChar* searchCharacters = characters() + index;
+ const unsigned char* matchCharacters = (const unsigned char*)matchString;
+
+ // Optimization 2: keep a running hash of the strings,
+ // only call memcmp if the hashes match.
+ unsigned searchHash = 0;
+ unsigned matchHash = 0;
+ for (unsigned i = 0; i < matchLength; ++i) {
+ searchHash += searchCharacters[i];
+ matchHash += matchCharacters[i];
}
- return -1;
+ unsigned i = 0;
+ // keep looping until we match
+ while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
+ if (i == delta)
+ return notFound;
+ searchHash += searchCharacters[i + matchLength];
+ searchHash -= searchCharacters[i];
+ ++i;
+ }
+ return index + i;
}
-int StringImpl::find(UChar c, int start)
+size_t StringImpl::findIgnoringCase(const char* matchString, unsigned index)
{
- return WTF::find(m_data, m_length, c, start);
+ // Check for null or empty string to match against
+ if (!matchString)
+ return notFound;
+ unsigned matchLength = strlen(matchString);
+ if (!matchLength)
+ return min(index, length());
+
+ // Check index & matchLength are in range.
+ if (index > length())
+ return notFound;
+ unsigned searchLength = length() - index;
+ if (matchLength > searchLength)
+ return notFound;
+ // delta is the number of additional times to test; delta == 0 means test only once.
+ unsigned delta = searchLength - matchLength;
+
+ const UChar* searchCharacters = characters() + index;
+
+ unsigned i = 0;
+ // keep looping until we match
+ while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
+ if (i == delta)
+ return notFound;
+ ++i;
+ }
+ return index + i;
}
-int StringImpl::find(CharacterMatchFunctionPtr matchFunction, int start)
+size_t StringImpl::find(StringImpl* matchString, unsigned index)
{
- return WTF::find(m_data, m_length, matchFunction, start);
+ // Check for null or empty string to match against
+ if (!matchString)
+ return notFound;
+ unsigned matchLength = matchString->length();
+ if (!matchLength)
+ return min(index, length());
+
+ // Optimization 1: fast case for strings of length 1.
+ if (matchLength == 1)
+ return WTF::find(characters(), length(), matchString->characters()[0], index);
+
+ // Check index & matchLength are in range.
+ if (index > length())
+ return notFound;
+ unsigned searchLength = length() - index;
+ if (matchLength > searchLength)
+ return notFound;
+ // delta is the number of additional times to test; delta == 0 means test only once.
+ unsigned delta = searchLength - matchLength;
+
+ const UChar* searchCharacters = characters() + index;
+ const UChar* matchCharacters = matchString->characters();
+
+ // Optimization 2: keep a running hash of the strings,
+ // only call memcmp if the hashes match.
+ unsigned searchHash = 0;
+ unsigned matchHash = 0;
+ for (unsigned i = 0; i < matchLength; ++i) {
+ searchHash += searchCharacters[i];
+ matchHash += matchCharacters[i];
+ }
+
+ unsigned i = 0;
+ // keep looping until we match
+ while (searchHash != matchHash || memcmp(searchCharacters + i, matchCharacters, matchLength * sizeof(UChar))) {
+ if (i == delta)
+ return notFound;
+ searchHash += searchCharacters[i + matchLength];
+ searchHash -= searchCharacters[i];
+ ++i;
+ }
+ return index + i;
}
-int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
-{
- /*
- We use a simple trick for efficiency's sake. Instead of
- comparing strings, we compare the sum of str with that of
- a part of this string. Only if that matches, we call memcmp
- or ucstrnicmp.
- */
- ASSERT(str);
- if (index < 0)
- index += m_length;
- int lstr = str->m_length;
- int lthis = m_length - index;
- if ((unsigned)lthis > m_length)
- return -1;
- int delta = lthis - lstr;
- if (delta < 0)
- return -1;
-
- const UChar* uthis = m_data + index;
- const UChar* ustr = str->m_data;
- unsigned hthis = 0;
- unsigned hstr = 0;
- if (caseSensitive) {
- for (int i = 0; i < lstr; i++) {
- hthis += uthis[i];
- hstr += ustr[i];
- }
- int i = 0;
- while (1) {
- if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
- return index + i;
- if (i == delta)
- return -1;
- hthis += uthis[i + lstr];
- hthis -= uthis[i];
- i++;
- }
- } else {
- for (int i = 0; i < lstr; i++ ) {
- hthis += toASCIILower(uthis[i]);
- hstr += toASCIILower(ustr[i]);
- }
- int i = 0;
- while (1) {
- if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr))
- return index + i;
- if (i == delta)
- return -1;
- hthis += toASCIILower(uthis[i + lstr]);
- hthis -= toASCIILower(uthis[i]);
- i++;
- }
+size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
+{
+ // Check for null or empty string to match against
+ if (!matchString)
+ return notFound;
+ unsigned matchLength = matchString->length();
+ if (!matchLength)
+ return min(index, length());
+
+ // Check index & matchLength are in range.
+ if (index > length())
+ return notFound;
+ unsigned searchLength = length() - index;
+ if (matchLength > searchLength)
+ return notFound;
+ // delta is the number of additional times to test; delta == 0 means test only once.
+ unsigned delta = searchLength - matchLength;
+
+ const UChar* searchCharacters = characters() + index;
+ const UChar* matchCharacters = matchString->characters();
+
+ unsigned i = 0;
+ // keep looping until we match
+ while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
+ if (i == delta)
+ return notFound;
+ ++i;
}
+ return index + i;
}
-int StringImpl::reverseFind(UChar c, int index)
+size_t StringImpl::reverseFind(UChar c, unsigned index)
{
return WTF::reverseFind(m_data, m_length, c, index);
}
-int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive)
+size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
{
- /*
- See StringImpl::find() for explanations.
- */
- ASSERT(str);
- int lthis = m_length;
- if (index < 0)
- index += lthis;
-
- int lstr = str->m_length;
- int delta = lthis - lstr;
- if ( index < 0 || index > lthis || delta < 0 )
- return -1;
- if ( index > delta )
- index = delta;
-
- const UChar *uthis = m_data;
- const UChar *ustr = str->m_data;
- unsigned hthis = 0;
- unsigned hstr = 0;
- int i;
- if (caseSensitive) {
- for ( i = 0; i < lstr; i++ ) {
- hthis += uthis[index + i];
- hstr += ustr[i];
- }
- i = index;
- while (1) {
- if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
- return i;
- if (i == 0)
- return -1;
- i--;
- hthis -= uthis[i + lstr];
- hthis += uthis[i];
- }
- } else {
- for (i = 0; i < lstr; i++) {
- hthis += toASCIILower(uthis[index + i]);
- hstr += toASCIILower(ustr[i]);
- }
- i = index;
- while (1) {
- if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) )
- return i;
- if (i == 0)
- return -1;
- i--;
- hthis -= toASCIILower(uthis[i + lstr]);
- hthis += toASCIILower(uthis[i]);
- }
+ // Check for null or empty string to match against
+ if (!matchString)
+ return notFound;
+ unsigned matchLength = matchString->length();
+ if (!matchLength)
+ return min(index, length());
+
+ // Optimization 1: fast case for strings of length 1.
+ if (matchLength == 1)
+ return WTF::reverseFind(characters(), length(), matchString->characters()[0], index);
+
+ // Check index & matchLength are in range.
+ if (matchLength > length())
+ return notFound;
+ // delta is the number of additional times to test; delta == 0 means test only once.
+ unsigned delta = min(index, length() - matchLength);
+
+ const UChar *searchCharacters = characters();
+ const UChar *matchCharacters = matchString->characters();
+
+ // Optimization 2: keep a running hash of the strings,
+ // only call memcmp if the hashes match.
+ unsigned searchHash = 0;
+ unsigned matchHash = 0;
+ for (unsigned i = 0; i < matchLength; ++i) {
+ searchHash += searchCharacters[delta + i];
+ matchHash += matchCharacters[i];
+ }
+
+ // keep looping until we match
+ while (searchHash != matchHash || memcmp(searchCharacters + delta, matchCharacters, matchLength * sizeof(UChar))) {
+ if (!delta)
+ return notFound;
+ delta--;
+ searchHash -= searchCharacters[delta + matchLength];
+ searchHash += searchCharacters[delta];
}
+ return delta;
+}
+
+size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
+{
+ // Check for null or empty string to match against
+ if (!matchString)
+ return notFound;
+ unsigned matchLength = matchString->length();
+ if (!matchLength)
+ return min(index, length());
+
+ // Check index & matchLength are in range.
+ if (matchLength > length())
+ return notFound;
+ // delta is the number of additional times to test; delta == 0 means test only once.
+ unsigned delta = min(index, length() - matchLength);
- // Should never get here.
- return -1;
+ const UChar *searchCharacters = characters();
+ const UChar *matchCharacters = matchString->characters();
+
+ // keep looping until we match
+ while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
+ if (!delta)
+ return notFound;
+ delta--;
+ }
+ return delta;
}
bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive)
{
ASSERT(m_data);
- int start = m_length - m_data->m_length;
- if (start >= 0)
- return (find(m_data, start, caseSensitive) == start);
+ if (m_length >= m_data->m_length) {
+ unsigned start = m_length - m_data->m_length;
+ return (caseSensitive ? find(m_data, start) : findIgnoringCase(m_data, start)) == start;
+ }
return false;
}
@@ -716,12 +791,12 @@ PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen
if (!replacement)
return this;
- int repStrLength = replacement->length();
- int srcSegmentStart = 0;
- int matchCount = 0;
+ unsigned repStrLength = replacement->length();
+ size_t srcSegmentStart = 0;
+ unsigned matchCount = 0;
// Count the matches
- while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
+ while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
++matchCount;
++srcSegmentStart;
}
@@ -735,12 +810,12 @@ PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen
createUninitialized(m_length - matchCount + (matchCount * repStrLength), data);
// Construct the new data
- int srcSegmentEnd;
- int srcSegmentLength;
+ size_t srcSegmentEnd;
+ unsigned srcSegmentLength;
srcSegmentStart = 0;
- int dstOffset = 0;
+ unsigned dstOffset = 0;
- while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
+ while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
srcSegmentLength = srcSegmentEnd - srcSegmentStart;
memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
dstOffset += srcSegmentLength;
@@ -752,7 +827,7 @@ PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen
srcSegmentLength = m_length - srcSegmentStart;
memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
- ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
+ ASSERT(dstOffset + srcSegmentLength == newImpl->length());
return newImpl;
}
@@ -762,16 +837,16 @@ PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl
if (!pattern || !replacement)
return this;
- int patternLength = pattern->length();
+ unsigned patternLength = pattern->length();
if (!patternLength)
return this;
- int repStrLength = replacement->length();
- int srcSegmentStart = 0;
- int matchCount = 0;
+ unsigned repStrLength = replacement->length();
+ size_t srcSegmentStart = 0;
+ unsigned matchCount = 0;
// Count the matches
- while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
+ while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
++matchCount;
srcSegmentStart += patternLength;
}
@@ -785,12 +860,12 @@ PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl
createUninitialized(m_length + matchCount * (repStrLength - patternLength), data);
// Construct the new data
- int srcSegmentEnd;
- int srcSegmentLength;
+ size_t srcSegmentEnd;
+ unsigned srcSegmentLength;
srcSegmentStart = 0;
- int dstOffset = 0;
+ unsigned dstOffset = 0;
- while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
+ while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
srcSegmentLength = srcSegmentEnd - srcSegmentStart;
memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
dstOffset += srcSegmentLength;
@@ -802,7 +877,7 @@ PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl
srcSegmentLength = m_length - srcSegmentStart;
memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
- ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
+ ASSERT(dstOffset + srcSegmentLength == newImpl->length());
return newImpl;
}
@@ -883,20 +958,6 @@ bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
return false;
}
-Vector<char> StringImpl::ascii()
-{
- Vector<char> buffer(m_length + 1);
- for (unsigned i = 0; i != m_length; ++i) {
- UChar c = m_data[i];
- if ((c >= 0x20 && c < 0x7F) || c == 0x00)
- buffer[i] = static_cast<char>(c);
- else
- buffer[i] = '?';
- }
- buffer[m_length] = '\0';
- return buffer;
-}
-
WTF::Unicode::Direction StringImpl::defaultWritingDirection()
{
for (unsigned i = 0; i < m_length; ++i) {
diff --git a/JavaScriptCore/wtf/text/StringImpl.h b/JavaScriptCore/wtf/text/StringImpl.h
index 6080474..cec0b80 100644
--- a/JavaScriptCore/wtf/text/StringImpl.h
+++ b/JavaScriptCore/wtf/text/StringImpl.h
@@ -290,15 +290,18 @@ public:
PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
- int find(const char*, int index = 0, bool caseSensitive = true);
- int find(UChar, int index = 0);
- int find(CharacterMatchFunctionPtr, int index = 0);
- int find(StringImpl*, int index, bool caseSensitive = true);
-
- int reverseFind(UChar, int index);
- int reverseFind(StringImpl*, int index, bool caseSensitive = true);
-
- bool startsWith(StringImpl* str, bool caseSensitive = true) { return reverseFind(str, 0, caseSensitive) == 0; }
+ size_t find(UChar, unsigned index = 0);
+ size_t find(CharacterMatchFunctionPtr, unsigned index = 0);
+ size_t find(const char*, unsigned index = 0);
+ size_t find(StringImpl*, unsigned index = 0);
+ size_t findIgnoringCase(const char*, unsigned index = 0);
+ size_t findIgnoringCase(StringImpl*, unsigned index = 0);
+
+ size_t reverseFind(UChar, unsigned index = UINT_MAX);
+ size_t reverseFind(StringImpl*, unsigned index = UINT_MAX);
+ size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX);
+
+ bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; }
bool endsWith(StringImpl*, bool caseSensitive = true);
PassRefPtr<StringImpl> replace(UChar, UChar);
@@ -306,8 +309,6 @@ public:
PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
- Vector<char> ascii();
-
WTF::Unicode::Direction defaultWritingDirection();
#if PLATFORM(CF)
diff --git a/JavaScriptCore/wtf/text/WTFString.cpp b/JavaScriptCore/wtf/text/WTFString.cpp
index 6c4de6e..7d44d21 100644
--- a/JavaScriptCore/wtf/text/WTFString.cpp
+++ b/JavaScriptCore/wtf/text/WTFString.cpp
@@ -36,6 +36,13 @@ namespace WTF {
using namespace Unicode;
+// Construct a string with UTF-16 data.
+String::String(const UChar* characters, unsigned length)
+ : m_impl(characters ? StringImpl::create(characters, length) : 0)
+{
+}
+
+// Construct a string with UTF-16 data, from a null-terminated source.
String::String(const UChar* str)
{
if (!str)
@@ -48,6 +55,18 @@ String::String(const UChar* str)
m_impl = StringImpl::create(str, len);
}
+// Construct a string with latin1 data.
+String::String(const char* characters, unsigned length)
+ : m_impl(characters ? StringImpl::create(characters, length) : 0)
+{
+}
+
+// Construct a string with latin1 data, from a null-terminated source.
+String::String(const char* characters)
+ : m_impl(characters ? StringImpl::create(characters) : 0)
+{
+}
+
void String::append(const String& str)
{
if (str.isEmpty())
@@ -226,6 +245,19 @@ String String::substring(unsigned pos, unsigned len) const
return m_impl->substring(pos, len);
}
+String String::substringSharingImpl(unsigned offset, unsigned length) const
+{
+ // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).
+
+ unsigned stringLength = this->length();
+ offset = min(offset, stringLength);
+ length = min(length, stringLength - offset);
+
+ if (!offset && length == stringLength)
+ return *this;
+ return String(StringImpl::create(m_impl, offset, length));
+}
+
String String::lower() const
{
if (!m_impl)
@@ -557,14 +589,14 @@ void String::split(const String& separator, bool allowEmptyEntries, Vector<Strin
{
result.clear();
- int startPos = 0;
- int endPos;
- while ((endPos = find(separator, startPos)) != -1) {
+ unsigned startPos = 0;
+ size_t endPos;
+ while ((endPos = find(separator, startPos)) != notFound) {
if (allowEmptyEntries || startPos != endPos)
result.append(substring(startPos, endPos - startPos));
startPos = endPos + separator.length();
}
- if (allowEmptyEntries || startPos != static_cast<int>(length()))
+ if (allowEmptyEntries || startPos != length())
result.append(substring(startPos));
}
@@ -577,14 +609,14 @@ void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& resu
{
result.clear();
- int startPos = 0;
- int endPos;
- while ((endPos = find(separator, startPos)) != -1) {
+ unsigned startPos = 0;
+ size_t endPos;
+ while ((endPos = find(separator, startPos)) != notFound) {
if (allowEmptyEntries || startPos != endPos)
result.append(substring(startPos, endPos - startPos));
startPos = endPos + 1;
}
- if (allowEmptyEntries || startPos != static_cast<int>(length()))
+ if (allowEmptyEntries || startPos != length())
result.append(substring(startPos));
}
@@ -593,18 +625,23 @@ void String::split(UChar separator, Vector<String>& result) const
return split(String(&separator, 1), false, result);
}
-Vector<char> String::ascii() const
+CString String::ascii() const
{
- if (m_impl)
- return m_impl->ascii();
-
- const char* nullMsg = "(null impl)";
- Vector<char, 2048> buffer;
- for (int i = 0; nullMsg[i]; ++i)
- buffer.append(nullMsg[i]);
-
- buffer.append('\0');
- return buffer;
+ // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
+ // preserved, characters outside of this range are converted to '?'.
+
+ unsigned length = this->length();
+ const UChar* characters = this->characters();
+
+ char* characterBuffer;
+ CString result = CString::newUninitialized(length, characterBuffer);
+
+ for (unsigned i = 0; i < length; ++i) {
+ UChar ch = characters[i];
+ characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
+ }
+
+ return result;
}
CString String::latin1() const
@@ -620,7 +657,7 @@ CString String::latin1() const
for (unsigned i = 0; i < length; ++i) {
UChar ch = characters[i];
- characterBuffer[i] = ch > 255 ? '?' : ch;
+ characterBuffer[i] = ch > 0xff ? '?' : ch;
}
return result;
@@ -635,7 +672,7 @@ static inline void putUTF8Triple(char*& buffer, UChar ch)
*buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
}
-CString String::utf8() const
+CString String::utf8(bool strict) const
{
unsigned length = this->length();
const UChar* characters = this->characters();
@@ -653,15 +690,21 @@ CString String::utf8() const
Vector<char, 1024> bufferVector(length * 3);
char* buffer = bufferVector.data();
- ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), false);
- ASSERT(result != sourceIllegal); // Only produced from strict conversion.
+ ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
- // If a high surrogate is left unconverted, treat it the same was as an unpaired high surrogate
- // would have been handled in the middle of a string with non-strict conversion - which is to say,
- // simply encode it to UTF-8.
+ // Only produced from strict conversion.
+ if (result == sourceIllegal)
+ return CString();
+
+ // Check for an unconverted high surrogate.
if (result == sourceExhausted) {
- // This should be one unpaired high surrogate.
+ if (strict)
+ return CString();
+ // This should be one unpaired high surrogate. Treat it the same
+ // was as an unpaired high surrogate would have been handled in
+ // the middle of a string with non-strict conversion - which is
+ // to say, simply encode it to UTF-8.
ASSERT((characters + 1) == (this->characters() + length));
ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
// There should be room left, since one UChar hasn't been converted.
diff --git a/JavaScriptCore/wtf/text/WTFString.h b/JavaScriptCore/wtf/text/WTFString.h
index 6af519c..fafef12 100644
--- a/JavaScriptCore/wtf/text/WTFString.h
+++ b/JavaScriptCore/wtf/text/WTFString.h
@@ -1,6 +1,6 @@
/*
* (C) 1999 Lars Knoll (knoll@kde.org)
- * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -72,45 +72,43 @@ intptr_t charactersToIntPtr(const UChar*, size_t, bool* ok = 0); // ignores trai
double charactersToDouble(const UChar*, size_t, bool* ok = 0);
float charactersToFloat(const UChar*, size_t, bool* ok = 0);
-int find(const UChar*, size_t, UChar, int startPosition = 0);
-int reverseFind(const UChar*, size_t, UChar, int startPosition = -1);
-
class String {
public:
- String() { } // gives null string, distinguishable from an empty string
- String(const UChar* str, unsigned len)
- {
- if (!str)
- return;
- m_impl = StringImpl::create(str, len);
- }
- String(const char* str)
- {
- if (!str)
- return;
- m_impl = StringImpl::create(str);
- }
- String(const char* str, unsigned length)
- {
- if (!str)
- return;
- m_impl = StringImpl::create(str, length);
- }
- String(const UChar*); // Specifically for null terminated UTF-16
- String(StringImpl* i) : m_impl(i) { }
- String(PassRefPtr<StringImpl> i) : m_impl(i) { }
- String(RefPtr<StringImpl> i) : m_impl(i) { }
+ // Construct a null string, distinguishable from an empty string.
+ String() { }
- void swap(String& o) { m_impl.swap(o.m_impl); }
+ // Construct a string with UTF-16 data.
+ String(const UChar* characters, unsigned length);
- // Hash table deleted values, which are only constructed and never copied or destroyed.
- String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { }
- bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); }
+ // Construct a string with UTF-16 data, from a null-terminated source.
+ String(const UChar*);
+
+ // Construct a string with latin1 data.
+ String(const char* characters, unsigned length);
+
+ // Construct a string with latin1 data, from a null-terminated source.
+ String(const char* characters);
+
+ // Construct a string referencing an existing StringImpl.
+ String(StringImpl* impl) : m_impl(impl) { }
+ String(PassRefPtr<StringImpl> impl) : m_impl(impl) { }
+ String(RefPtr<StringImpl> impl) : m_impl(impl) { }
+
+ // Inline the destructor.
+ ALWAYS_INLINE ~String() { }
+
+ void swap(String& o) { m_impl.swap(o.m_impl); }
static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); }
- static String adopt(Vector<UChar>& vector) { return StringImpl::adopt(vector); }
+ template<size_t inlineCapacity>
+ static String adopt(Vector<UChar, inlineCapacity>& vector) { return StringImpl::adopt(vector); }
- ALWAYS_INLINE unsigned length() const
+ bool isNull() const { return !m_impl; }
+ bool isEmpty() const { return !m_impl || !m_impl->length(); }
+
+ StringImpl* impl() const { return m_impl.get(); }
+
+ unsigned length() const
{
if (!m_impl)
return 0;
@@ -124,34 +122,67 @@ public:
return m_impl->characters();
}
- const UChar* charactersWithNullTermination();
-
- UChar operator[](unsigned i) const // if i >= length(), returns 0
+ CString ascii() const;
+ CString latin1() const;
+ CString utf8(bool strict = false) const;
+
+ UChar operator[](unsigned index) const
{
- if (!m_impl || i >= m_impl->length())
+ if (!m_impl || index >= m_impl->length())
return 0;
- return m_impl->characters()[i];
+ return m_impl->characters()[index];
}
- UChar32 characterStartingAt(unsigned) const; // Ditto.
+
+ static String number(short);
+ static String number(unsigned short);
+ static String number(int);
+ static String number(unsigned);
+ static String number(long);
+ static String number(unsigned long);
+ static String number(long long);
+ static String number(unsigned long long);
+ static String number(double);
+
+ // Find a single character or string, also with match function & latin1 forms.
+ size_t find(UChar c, unsigned start = 0) const
+ { return m_impl ? m_impl->find(c, start) : notFound; }
+ size_t find(const String& str, unsigned start = 0) const
+ { return m_impl ? m_impl->find(str.impl(), start) : notFound; }
+ size_t find(CharacterMatchFunctionPtr matchFunction, unsigned start = 0) const
+ { return m_impl ? m_impl->find(matchFunction, start) : notFound; }
+ size_t find(const char* str, unsigned start = 0) const
+ { return m_impl ? m_impl->find(str, start) : notFound; }
+
+ // Find the last instance of a single character or string.
+ size_t reverseFind(UChar c, unsigned start = UINT_MAX) const
+ { return m_impl ? m_impl->reverseFind(c, start) : notFound; }
+ size_t reverseFind(const String& str, unsigned start = UINT_MAX) const
+ { return m_impl ? m_impl->reverseFind(str.impl(), start) : notFound; }
+
+ // Case insensitive string matching.
+ size_t findIgnoringCase(const char* str, unsigned start = 0) const
+ { return m_impl ? m_impl->findIgnoringCase(str, start) : notFound; }
+ size_t findIgnoringCase(const String& str, unsigned start = 0) const
+ { return m_impl ? m_impl->findIgnoringCase(str.impl(), start) : notFound; }
+ size_t reverseFindIgnoringCase(const String& str, unsigned start = UINT_MAX) const
+ { return m_impl ? m_impl->reverseFindIgnoringCase(str.impl(), start) : notFound; }
+
+ // Wrappers for find & reverseFind adding dynamic sensitivity check.
+ size_t find(const char* str, unsigned start, bool caseSensitive) const
+ { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); }
+ size_t find(const String& str, unsigned start, bool caseSensitive) const
+ { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); }
+ size_t reverseFind(const String& str, unsigned start, bool caseSensitive) const
+ { return caseSensitive ? reverseFind(str, start) : reverseFindIgnoringCase(str, start); }
+
+ const UChar* charactersWithNullTermination();
- bool contains(UChar c) const { return find(c) != -1; }
- bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; }
- bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; }
-
- int find(UChar c, int start = 0) const
- { return m_impl ? m_impl->find(c, start) : -1; }
- int find(CharacterMatchFunctionPtr matchFunction, int start = 0) const
- { return m_impl ? m_impl->find(matchFunction, start) : -1; }
- int find(const char* str, int start = 0, bool caseSensitive = true) const
- { return m_impl ? m_impl->find(str, start, caseSensitive) : -1; }
- int find(const String& str, int start = 0, bool caseSensitive = true) const
- { return m_impl ? m_impl->find(str.impl(), start, caseSensitive) : -1; }
-
- int reverseFind(UChar c, int start = -1) const
- { return m_impl ? m_impl->reverseFind(c, start) : -1; }
- int reverseFind(const String& str, int start = -1, bool caseSensitive = true) const
- { return m_impl ? m_impl->reverseFind(str.impl(), start, caseSensitive) : -1; }
+ UChar32 characterStartingAt(unsigned) const; // Ditto.
+ bool contains(UChar c) const { return find(c) != notFound; }
+ bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; }
+ bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; }
+
bool startsWith(const String& s, bool caseSensitive = true) const
{ return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); }
bool endsWith(const String& s, bool caseSensitive = true) const
@@ -177,6 +208,7 @@ public:
void remove(unsigned pos, int len = 1);
String substring(unsigned pos, unsigned len = UINT_MAX) const;
+ String substringSharingImpl(unsigned pos, unsigned len = UINT_MAX) const;
String left(unsigned len) const { return substring(0, len); }
String right(unsigned len) const { return substring(length() - len, len); }
@@ -192,17 +224,11 @@ public:
// Return the string with case folded for case insensitive comparison.
String foldCase() const;
- static String number(short);
- static String number(unsigned short);
- static String number(int);
- static String number(unsigned);
- static String number(long);
- static String number(unsigned long);
- static String number(long long);
- static String number(unsigned long long);
- static String number(double);
-
+#if !PLATFORM(QT)
static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2);
+#else
+ static String format(const char *, ...);
+#endif
// Returns an uninitialized string. The characters needs to be written
// into the buffer returned in data before the returned string is used.
@@ -238,11 +264,6 @@ public:
// to ever prefer copy() over plain old assignment.
String threadsafeCopy() const;
- bool isNull() const { return !m_impl; }
- ALWAYS_INLINE bool isEmpty() const { return !m_impl || !m_impl->length(); }
-
- StringImpl* impl() const { return m_impl.get(); }
-
#if PLATFORM(CF)
String(CFStringRef);
CFStringRef createCFString() const;
@@ -272,11 +293,6 @@ public:
operator BString() const;
#endif
- Vector<char> ascii() const;
-
- CString latin1() const;
- CString utf8() const;
-
static String fromUTF8(const char*, size_t);
static String fromUTF8(const char*);
@@ -288,6 +304,10 @@ public:
bool containsOnlyASCII() const { return charactersAreAllASCII(characters(), length()); }
+ // Hash table deleted values, which are only constructed and never copied or destroyed.
+ String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { }
+ bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); }
+
private:
RefPtr<StringImpl> m_impl;
};
@@ -345,43 +365,37 @@ inline bool charactersAreAllASCII(const UChar* characters, size_t length)
int codePointCompare(const String&, const String&);
-inline int find(const UChar* characters, size_t length, UChar character, int startPosition)
+inline size_t find(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0)
{
- if (startPosition >= static_cast<int>(length))
- return -1;
- for (size_t i = startPosition; i < length; ++i) {
- if (characters[i] == character)
- return static_cast<int>(i);
+ while (index < length) {
+ if (characters[index] == matchCharacter)
+ return index;
+ ++index;
}
- return -1;
+ return notFound;
}
-inline int find(const UChar* characters, size_t length, CharacterMatchFunctionPtr matchFunction, int startPosition)
+inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0)
{
- if (startPosition >= static_cast<int>(length))
- return -1;
- for (size_t i = startPosition; i < length; ++i) {
- if (matchFunction(characters[i]))
- return static_cast<int>(i);
+ while (index < length) {
+ if (matchFunction(characters[index]))
+ return index;
+ ++index;
}
- return -1;
+ return notFound;
}
-inline int reverseFind(const UChar* characters, size_t length, UChar character, int startPosition)
+inline size_t reverseFind(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX)
{
- if (startPosition >= static_cast<int>(length) || !length)
- return -1;
- if (startPosition < 0)
- startPosition += static_cast<int>(length);
- while (true) {
- if (characters[startPosition] == character)
- return startPosition;
- if (!startPosition)
- return -1;
- startPosition--;
+ if (!length)
+ return notFound;
+ if (index >= length)
+ index = length - 1;
+ while (characters[index] != matchCharacter) {
+ if (!index--)
+ return notFound;
}
- ASSERT_NOT_REACHED();
- return -1;
+ return index;
}
inline void append(Vector<UChar>& vector, const String& string)
@@ -417,6 +431,11 @@ template<> struct DefaultHash<String> {
typedef StringHash Hash;
};
+template <> struct VectorTraits<String> : SimpleClassVectorTraits
+{
+ static const bool canInitializeWithMemset = true;
+};
+
}
using WTF::CString;
@@ -433,6 +452,5 @@ using WTF::charactersAreAllASCII;
using WTF::charactersToInt;
using WTF::charactersToFloat;
using WTF::charactersToDouble;
-using WTF::operator+;
#endif