summaryrefslogtreecommitdiffstats
path: root/JavaScriptCore/wtf/text/StringImpl.cpp
diff options
context:
space:
mode:
authorIain Merrick <husky@google.com>2010-08-19 17:55:56 +0100
committerIain Merrick <husky@google.com>2010-08-23 11:05:40 +0100
commitf486d19d62f1bc33246748b14b14a9dfa617b57f (patch)
tree195485454c93125455a30e553a73981c3816144d /JavaScriptCore/wtf/text/StringImpl.cpp
parent6ba0b43722d16bc295606bec39f396f596e4fef1 (diff)
downloadexternal_webkit-f486d19d62f1bc33246748b14b14a9dfa617b57f.zip
external_webkit-f486d19d62f1bc33246748b14b14a9dfa617b57f.tar.gz
external_webkit-f486d19d62f1bc33246748b14b14a9dfa617b57f.tar.bz2
Merge WebKit at r65615 : Initial merge by git.
Change-Id: Ifbf384f4531e3b58475a662e38195c2d9152ae79
Diffstat (limited to 'JavaScriptCore/wtf/text/StringImpl.cpp')
-rw-r--r--JavaScriptCore/wtf/text/StringImpl.cpp409
1 files changed, 235 insertions, 174 deletions
diff --git a/JavaScriptCore/wtf/text/StringImpl.cpp b/JavaScriptCore/wtf/text/StringImpl.cpp
index 3669628..ab0f009 100644
--- a/JavaScriptCore/wtf/text/StringImpl.cpp
+++ b/JavaScriptCore/wtf/text/StringImpl.cpp
@@ -498,175 +498,250 @@ int codePointCompare(const StringImpl* s1, const StringImpl* s2)
return (l1 > l2) ? 1 : -1;
}
-int StringImpl::find(const char* chs, int index, bool caseSensitive)
+size_t StringImpl::find(UChar c, unsigned start)
{
- if (!chs || index < 0)
- return -1;
+ return WTF::find(m_data, m_length, c, start);
+}
- int chsLength = strlen(chs);
- int n = m_length - index;
- if (n < 0)
- return -1;
- n -= chsLength - 1;
- if (n <= 0)
- return -1;
+size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
+{
+ return WTF::find(m_data, m_length, matchFunction, start);
+}
- const char* chsPlusOne = chs + 1;
- int chsLengthMinusOne = chsLength - 1;
-
- const UChar* ptr = m_data + index - 1;
- if (caseSensitive) {
- UChar c = *chs;
- do {
- if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne))
- return m_length - chsLength - n + 1;
- } while (--n);
- } else {
- UChar lc = Unicode::foldCase(*chs);
- do {
- if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne))
- return m_length - chsLength - n + 1;
- } while (--n);
+size_t StringImpl::find(const char* matchString, unsigned index)
+{
+ // Check for null or empty string to match against
+ if (!matchString)
+ return notFound;
+ unsigned matchLength = strlen(matchString);
+ if (!matchLength)
+ return min(index, length());
+
+ // Optimization 1: fast case for strings of length 1.
+ if (matchLength == 1)
+ return WTF::find(characters(), length(), *(const unsigned char*)matchString, index);
+
+ // Check index & matchLength are in range.
+ if (index > length())
+ return notFound;
+ unsigned searchLength = length() - index;
+ if (matchLength > searchLength)
+ return notFound;
+ // delta is the number of additional times to test; delta == 0 means test only once.
+ unsigned delta = searchLength - matchLength;
+
+ const UChar* searchCharacters = characters() + index;
+ const unsigned char* matchCharacters = (const unsigned char*)matchString;
+
+ // Optimization 2: keep a running hash of the strings,
+ // only call memcmp if the hashes match.
+ unsigned searchHash = 0;
+ unsigned matchHash = 0;
+ for (unsigned i = 0; i < matchLength; ++i) {
+ searchHash += searchCharacters[i];
+ matchHash += matchCharacters[i];
}
- return -1;
+ unsigned i = 0;
+ // keep looping until we match
+ while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
+ if (i == delta)
+ return notFound;
+ searchHash += searchCharacters[i + matchLength];
+ searchHash -= searchCharacters[i];
+ ++i;
+ }
+ return index + i;
}
-int StringImpl::find(UChar c, int start)
+size_t StringImpl::findIgnoringCase(const char* matchString, unsigned index)
{
- return WTF::find(m_data, m_length, c, start);
+ // Check for null or empty string to match against
+ if (!matchString)
+ return notFound;
+ unsigned matchLength = strlen(matchString);
+ if (!matchLength)
+ return min(index, length());
+
+ // Check index & matchLength are in range.
+ if (index > length())
+ return notFound;
+ unsigned searchLength = length() - index;
+ if (matchLength > searchLength)
+ return notFound;
+ // delta is the number of additional times to test; delta == 0 means test only once.
+ unsigned delta = searchLength - matchLength;
+
+ const UChar* searchCharacters = characters() + index;
+
+ unsigned i = 0;
+ // keep looping until we match
+ while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
+ if (i == delta)
+ return notFound;
+ ++i;
+ }
+ return index + i;
}
-int StringImpl::find(CharacterMatchFunctionPtr matchFunction, int start)
+size_t StringImpl::find(StringImpl* matchString, unsigned index)
{
- return WTF::find(m_data, m_length, matchFunction, start);
+ // Check for null or empty string to match against
+ if (!matchString)
+ return notFound;
+ unsigned matchLength = matchString->length();
+ if (!matchLength)
+ return min(index, length());
+
+ // Optimization 1: fast case for strings of length 1.
+ if (matchLength == 1)
+ return WTF::find(characters(), length(), matchString->characters()[0], index);
+
+ // Check index & matchLength are in range.
+ if (index > length())
+ return notFound;
+ unsigned searchLength = length() - index;
+ if (matchLength > searchLength)
+ return notFound;
+ // delta is the number of additional times to test; delta == 0 means test only once.
+ unsigned delta = searchLength - matchLength;
+
+ const UChar* searchCharacters = characters() + index;
+ const UChar* matchCharacters = matchString->characters();
+
+ // Optimization 2: keep a running hash of the strings,
+ // only call memcmp if the hashes match.
+ unsigned searchHash = 0;
+ unsigned matchHash = 0;
+ for (unsigned i = 0; i < matchLength; ++i) {
+ searchHash += searchCharacters[i];
+ matchHash += matchCharacters[i];
+ }
+
+ unsigned i = 0;
+ // keep looping until we match
+ while (searchHash != matchHash || memcmp(searchCharacters + i, matchCharacters, matchLength * sizeof(UChar))) {
+ if (i == delta)
+ return notFound;
+ searchHash += searchCharacters[i + matchLength];
+ searchHash -= searchCharacters[i];
+ ++i;
+ }
+ return index + i;
}
-int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
-{
- /*
- We use a simple trick for efficiency's sake. Instead of
- comparing strings, we compare the sum of str with that of
- a part of this string. Only if that matches, we call memcmp
- or ucstrnicmp.
- */
- ASSERT(str);
- if (index < 0)
- index += m_length;
- int lstr = str->m_length;
- int lthis = m_length - index;
- if ((unsigned)lthis > m_length)
- return -1;
- int delta = lthis - lstr;
- if (delta < 0)
- return -1;
-
- const UChar* uthis = m_data + index;
- const UChar* ustr = str->m_data;
- unsigned hthis = 0;
- unsigned hstr = 0;
- if (caseSensitive) {
- for (int i = 0; i < lstr; i++) {
- hthis += uthis[i];
- hstr += ustr[i];
- }
- int i = 0;
- while (1) {
- if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
- return index + i;
- if (i == delta)
- return -1;
- hthis += uthis[i + lstr];
- hthis -= uthis[i];
- i++;
- }
- } else {
- for (int i = 0; i < lstr; i++ ) {
- hthis += toASCIILower(uthis[i]);
- hstr += toASCIILower(ustr[i]);
- }
- int i = 0;
- while (1) {
- if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr))
- return index + i;
- if (i == delta)
- return -1;
- hthis += toASCIILower(uthis[i + lstr]);
- hthis -= toASCIILower(uthis[i]);
- i++;
- }
+size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
+{
+ // Check for null or empty string to match against
+ if (!matchString)
+ return notFound;
+ unsigned matchLength = matchString->length();
+ if (!matchLength)
+ return min(index, length());
+
+ // Check index & matchLength are in range.
+ if (index > length())
+ return notFound;
+ unsigned searchLength = length() - index;
+ if (matchLength > searchLength)
+ return notFound;
+ // delta is the number of additional times to test; delta == 0 means test only once.
+ unsigned delta = searchLength - matchLength;
+
+ const UChar* searchCharacters = characters() + index;
+ const UChar* matchCharacters = matchString->characters();
+
+ unsigned i = 0;
+ // keep looping until we match
+ while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
+ if (i == delta)
+ return notFound;
+ ++i;
}
+ return index + i;
}
-int StringImpl::reverseFind(UChar c, int index)
+size_t StringImpl::reverseFind(UChar c, unsigned index)
{
return WTF::reverseFind(m_data, m_length, c, index);
}
-int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive)
+size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
{
- /*
- See StringImpl::find() for explanations.
- */
- ASSERT(str);
- int lthis = m_length;
- if (index < 0)
- index += lthis;
-
- int lstr = str->m_length;
- int delta = lthis - lstr;
- if ( index < 0 || index > lthis || delta < 0 )
- return -1;
- if ( index > delta )
- index = delta;
-
- const UChar *uthis = m_data;
- const UChar *ustr = str->m_data;
- unsigned hthis = 0;
- unsigned hstr = 0;
- int i;
- if (caseSensitive) {
- for ( i = 0; i < lstr; i++ ) {
- hthis += uthis[index + i];
- hstr += ustr[i];
- }
- i = index;
- while (1) {
- if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
- return i;
- if (i == 0)
- return -1;
- i--;
- hthis -= uthis[i + lstr];
- hthis += uthis[i];
- }
- } else {
- for (i = 0; i < lstr; i++) {
- hthis += toASCIILower(uthis[index + i]);
- hstr += toASCIILower(ustr[i]);
- }
- i = index;
- while (1) {
- if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) )
- return i;
- if (i == 0)
- return -1;
- i--;
- hthis -= toASCIILower(uthis[i + lstr]);
- hthis += toASCIILower(uthis[i]);
- }
+ // Check for null or empty string to match against
+ if (!matchString)
+ return notFound;
+ unsigned matchLength = matchString->length();
+ if (!matchLength)
+ return min(index, length());
+
+ // Optimization 1: fast case for strings of length 1.
+ if (matchLength == 1)
+ return WTF::reverseFind(characters(), length(), matchString->characters()[0], index);
+
+ // Check index & matchLength are in range.
+ if (matchLength > length())
+ return notFound;
+ // delta is the number of additional times to test; delta == 0 means test only once.
+ unsigned delta = min(index, length() - matchLength);
+
+ const UChar *searchCharacters = characters();
+ const UChar *matchCharacters = matchString->characters();
+
+ // Optimization 2: keep a running hash of the strings,
+ // only call memcmp if the hashes match.
+ unsigned searchHash = 0;
+ unsigned matchHash = 0;
+ for (unsigned i = 0; i < matchLength; ++i) {
+ searchHash += searchCharacters[delta + i];
+ matchHash += matchCharacters[i];
+ }
+
+ // keep looping until we match
+ while (searchHash != matchHash || memcmp(searchCharacters + delta, matchCharacters, matchLength * sizeof(UChar))) {
+ if (!delta)
+ return notFound;
+ delta--;
+ searchHash -= searchCharacters[delta + matchLength];
+ searchHash += searchCharacters[delta];
}
+ return delta;
+}
+
+size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
+{
+ // Check for null or empty string to match against
+ if (!matchString)
+ return notFound;
+ unsigned matchLength = matchString->length();
+ if (!matchLength)
+ return min(index, length());
+
+ // Check index & matchLength are in range.
+ if (matchLength > length())
+ return notFound;
+ // delta is the number of additional times to test; delta == 0 means test only once.
+ unsigned delta = min(index, length() - matchLength);
- // Should never get here.
- return -1;
+ const UChar *searchCharacters = characters();
+ const UChar *matchCharacters = matchString->characters();
+
+ // keep looping until we match
+ while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
+ if (!delta)
+ return notFound;
+ delta--;
+ }
+ return delta;
}
bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive)
{
ASSERT(m_data);
- int start = m_length - m_data->m_length;
- if (start >= 0)
- return (find(m_data, start, caseSensitive) == start);
+ if (m_length >= m_data->m_length) {
+ unsigned start = m_length - m_data->m_length;
+ return (caseSensitive ? find(m_data, start) : findIgnoringCase(m_data, start)) == start;
+ }
return false;
}
@@ -716,12 +791,12 @@ PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen
if (!replacement)
return this;
- int repStrLength = replacement->length();
- int srcSegmentStart = 0;
- int matchCount = 0;
+ unsigned repStrLength = replacement->length();
+ size_t srcSegmentStart = 0;
+ unsigned matchCount = 0;
// Count the matches
- while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
+ while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
++matchCount;
++srcSegmentStart;
}
@@ -735,12 +810,12 @@ PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen
createUninitialized(m_length - matchCount + (matchCount * repStrLength), data);
// Construct the new data
- int srcSegmentEnd;
- int srcSegmentLength;
+ size_t srcSegmentEnd;
+ unsigned srcSegmentLength;
srcSegmentStart = 0;
- int dstOffset = 0;
+ unsigned dstOffset = 0;
- while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
+ while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
srcSegmentLength = srcSegmentEnd - srcSegmentStart;
memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
dstOffset += srcSegmentLength;
@@ -752,7 +827,7 @@ PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen
srcSegmentLength = m_length - srcSegmentStart;
memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
- ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
+ ASSERT(dstOffset + srcSegmentLength == newImpl->length());
return newImpl;
}
@@ -762,16 +837,16 @@ PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl
if (!pattern || !replacement)
return this;
- int patternLength = pattern->length();
+ unsigned patternLength = pattern->length();
if (!patternLength)
return this;
- int repStrLength = replacement->length();
- int srcSegmentStart = 0;
- int matchCount = 0;
+ unsigned repStrLength = replacement->length();
+ size_t srcSegmentStart = 0;
+ unsigned matchCount = 0;
// Count the matches
- while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
+ while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
++matchCount;
srcSegmentStart += patternLength;
}
@@ -785,12 +860,12 @@ PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl
createUninitialized(m_length + matchCount * (repStrLength - patternLength), data);
// Construct the new data
- int srcSegmentEnd;
- int srcSegmentLength;
+ size_t srcSegmentEnd;
+ unsigned srcSegmentLength;
srcSegmentStart = 0;
- int dstOffset = 0;
+ unsigned dstOffset = 0;
- while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
+ while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
srcSegmentLength = srcSegmentEnd - srcSegmentStart;
memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
dstOffset += srcSegmentLength;
@@ -802,7 +877,7 @@ PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl
srcSegmentLength = m_length - srcSegmentStart;
memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
- ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
+ ASSERT(dstOffset + srcSegmentLength == newImpl->length());
return newImpl;
}
@@ -883,20 +958,6 @@ bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
return false;
}
-Vector<char> StringImpl::ascii()
-{
- Vector<char> buffer(m_length + 1);
- for (unsigned i = 0; i != m_length; ++i) {
- UChar c = m_data[i];
- if ((c >= 0x20 && c < 0x7F) || c == 0x00)
- buffer[i] = static_cast<char>(c);
- else
- buffer[i] = '?';
- }
- buffer[m_length] = '\0';
- return buffer;
-}
-
WTF::Unicode::Direction StringImpl::defaultWritingDirection()
{
for (unsigned i = 0; i < m_length; ++i) {