diff options
author | Iain Merrick <husky@google.com> | 2010-08-19 17:55:56 +0100 |
---|---|---|
committer | Iain Merrick <husky@google.com> | 2010-08-23 11:05:40 +0100 |
commit | f486d19d62f1bc33246748b14b14a9dfa617b57f (patch) | |
tree | 195485454c93125455a30e553a73981c3816144d /JavaScriptCore/wtf/text/StringImpl.cpp | |
parent | 6ba0b43722d16bc295606bec39f396f596e4fef1 (diff) | |
download | external_webkit-f486d19d62f1bc33246748b14b14a9dfa617b57f.zip external_webkit-f486d19d62f1bc33246748b14b14a9dfa617b57f.tar.gz external_webkit-f486d19d62f1bc33246748b14b14a9dfa617b57f.tar.bz2 |
Merge WebKit at r65615 : Initial merge by git.
Change-Id: Ifbf384f4531e3b58475a662e38195c2d9152ae79
Diffstat (limited to 'JavaScriptCore/wtf/text/StringImpl.cpp')
-rw-r--r-- | JavaScriptCore/wtf/text/StringImpl.cpp | 409 |
1 files changed, 235 insertions, 174 deletions
diff --git a/JavaScriptCore/wtf/text/StringImpl.cpp b/JavaScriptCore/wtf/text/StringImpl.cpp index 3669628..ab0f009 100644 --- a/JavaScriptCore/wtf/text/StringImpl.cpp +++ b/JavaScriptCore/wtf/text/StringImpl.cpp @@ -498,175 +498,250 @@ int codePointCompare(const StringImpl* s1, const StringImpl* s2) return (l1 > l2) ? 1 : -1; } -int StringImpl::find(const char* chs, int index, bool caseSensitive) +size_t StringImpl::find(UChar c, unsigned start) { - if (!chs || index < 0) - return -1; + return WTF::find(m_data, m_length, c, start); +} - int chsLength = strlen(chs); - int n = m_length - index; - if (n < 0) - return -1; - n -= chsLength - 1; - if (n <= 0) - return -1; +size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start) +{ + return WTF::find(m_data, m_length, matchFunction, start); +} - const char* chsPlusOne = chs + 1; - int chsLengthMinusOne = chsLength - 1; - - const UChar* ptr = m_data + index - 1; - if (caseSensitive) { - UChar c = *chs; - do { - if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne)) - return m_length - chsLength - n + 1; - } while (--n); - } else { - UChar lc = Unicode::foldCase(*chs); - do { - if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne)) - return m_length - chsLength - n + 1; - } while (--n); +size_t StringImpl::find(const char* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = strlen(matchString); + if (!matchLength) + return min(index, length()); + + // Optimization 1: fast case for strings of length 1. + if (matchLength == 1) + return WTF::find(characters(), length(), *(const unsigned char*)matchString, index); + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + const unsigned char* matchCharacters = (const unsigned char*)matchString; + + // Optimization 2: keep a running hash of the strings, + // only call memcmp if the hashes match. + unsigned searchHash = 0; + unsigned matchHash = 0; + for (unsigned i = 0; i < matchLength; ++i) { + searchHash += searchCharacters[i]; + matchHash += matchCharacters[i]; } - return -1; + unsigned i = 0; + // keep looping until we match + while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) { + if (i == delta) + return notFound; + searchHash += searchCharacters[i + matchLength]; + searchHash -= searchCharacters[i]; + ++i; + } + return index + i; } -int StringImpl::find(UChar c, int start) +size_t StringImpl::findIgnoringCase(const char* matchString, unsigned index) { - return WTF::find(m_data, m_length, c, start); + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = strlen(matchString); + if (!matchLength) + return min(index, length()); + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + + unsigned i = 0; + // keep looping until we match + while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) { + if (i == delta) + return notFound; + ++i; + } + return index + i; } -int StringImpl::find(CharacterMatchFunctionPtr matchFunction, int start) +size_t StringImpl::find(StringImpl* matchString, unsigned index) { - return WTF::find(m_data, m_length, matchFunction, start); + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Optimization 1: fast case for strings of length 1. + if (matchLength == 1) + return WTF::find(characters(), length(), matchString->characters()[0], index); + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + const UChar* matchCharacters = matchString->characters(); + + // Optimization 2: keep a running hash of the strings, + // only call memcmp if the hashes match. + unsigned searchHash = 0; + unsigned matchHash = 0; + for (unsigned i = 0; i < matchLength; ++i) { + searchHash += searchCharacters[i]; + matchHash += matchCharacters[i]; + } + + unsigned i = 0; + // keep looping until we match + while (searchHash != matchHash || memcmp(searchCharacters + i, matchCharacters, matchLength * sizeof(UChar))) { + if (i == delta) + return notFound; + searchHash += searchCharacters[i + matchLength]; + searchHash -= searchCharacters[i]; + ++i; + } + return index + i; } -int StringImpl::find(StringImpl* str, int index, bool caseSensitive) -{ - /* - We use a simple trick for efficiency's sake. Instead of - comparing strings, we compare the sum of str with that of - a part of this string. Only if that matches, we call memcmp - or ucstrnicmp. - */ - ASSERT(str); - if (index < 0) - index += m_length; - int lstr = str->m_length; - int lthis = m_length - index; - if ((unsigned)lthis > m_length) - return -1; - int delta = lthis - lstr; - if (delta < 0) - return -1; - - const UChar* uthis = m_data + index; - const UChar* ustr = str->m_data; - unsigned hthis = 0; - unsigned hstr = 0; - if (caseSensitive) { - for (int i = 0; i < lstr; i++) { - hthis += uthis[i]; - hstr += ustr[i]; - } - int i = 0; - while (1) { - if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0) - return index + i; - if (i == delta) - return -1; - hthis += uthis[i + lstr]; - hthis -= uthis[i]; - i++; - } - } else { - for (int i = 0; i < lstr; i++ ) { - hthis += toASCIILower(uthis[i]); - hstr += toASCIILower(ustr[i]); - } - int i = 0; - while (1) { - if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr)) - return index + i; - if (i == delta) - return -1; - hthis += toASCIILower(uthis[i + lstr]); - hthis -= toASCIILower(uthis[i]); - i++; - } +size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + const UChar* matchCharacters = matchString->characters(); + + unsigned i = 0; + // keep looping until we match + while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) { + if (i == delta) + return notFound; + ++i; } + return index + i; } -int StringImpl::reverseFind(UChar c, int index) +size_t StringImpl::reverseFind(UChar c, unsigned index) { return WTF::reverseFind(m_data, m_length, c, index); } -int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive) +size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) { - /* - See StringImpl::find() for explanations. - */ - ASSERT(str); - int lthis = m_length; - if (index < 0) - index += lthis; - - int lstr = str->m_length; - int delta = lthis - lstr; - if ( index < 0 || index > lthis || delta < 0 ) - return -1; - if ( index > delta ) - index = delta; - - const UChar *uthis = m_data; - const UChar *ustr = str->m_data; - unsigned hthis = 0; - unsigned hstr = 0; - int i; - if (caseSensitive) { - for ( i = 0; i < lstr; i++ ) { - hthis += uthis[index + i]; - hstr += ustr[i]; - } - i = index; - while (1) { - if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0) - return i; - if (i == 0) - return -1; - i--; - hthis -= uthis[i + lstr]; - hthis += uthis[i]; - } - } else { - for (i = 0; i < lstr; i++) { - hthis += toASCIILower(uthis[index + i]); - hstr += toASCIILower(ustr[i]); - } - i = index; - while (1) { - if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) ) - return i; - if (i == 0) - return -1; - i--; - hthis -= toASCIILower(uthis[i + lstr]); - hthis += toASCIILower(uthis[i]); - } + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Optimization 1: fast case for strings of length 1. + if (matchLength == 1) + return WTF::reverseFind(characters(), length(), matchString->characters()[0], index); + + // Check index & matchLength are in range. + if (matchLength > length()) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = min(index, length() - matchLength); + + const UChar *searchCharacters = characters(); + const UChar *matchCharacters = matchString->characters(); + + // Optimization 2: keep a running hash of the strings, + // only call memcmp if the hashes match. + unsigned searchHash = 0; + unsigned matchHash = 0; + for (unsigned i = 0; i < matchLength; ++i) { + searchHash += searchCharacters[delta + i]; + matchHash += matchCharacters[i]; + } + + // keep looping until we match + while (searchHash != matchHash || memcmp(searchCharacters + delta, matchCharacters, matchLength * sizeof(UChar))) { + if (!delta) + return notFound; + delta--; + searchHash -= searchCharacters[delta + matchLength]; + searchHash += searchCharacters[delta]; } + return delta; +} + +size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Check index & matchLength are in range. + if (matchLength > length()) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = min(index, length() - matchLength); - // Should never get here. - return -1; + const UChar *searchCharacters = characters(); + const UChar *matchCharacters = matchString->characters(); + + // keep looping until we match + while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) { + if (!delta) + return notFound; + delta--; + } + return delta; } bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive) { ASSERT(m_data); - int start = m_length - m_data->m_length; - if (start >= 0) - return (find(m_data, start, caseSensitive) == start); + if (m_length >= m_data->m_length) { + unsigned start = m_length - m_data->m_length; + return (caseSensitive ? find(m_data, start) : findIgnoringCase(m_data, start)) == start; + } return false; } @@ -716,12 +791,12 @@ PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen if (!replacement) return this; - int repStrLength = replacement->length(); - int srcSegmentStart = 0; - int matchCount = 0; + unsigned repStrLength = replacement->length(); + size_t srcSegmentStart = 0; + unsigned matchCount = 0; // Count the matches - while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) { + while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { ++matchCount; ++srcSegmentStart; } @@ -735,12 +810,12 @@ PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen createUninitialized(m_length - matchCount + (matchCount * repStrLength), data); // Construct the new data - int srcSegmentEnd; - int srcSegmentLength; + size_t srcSegmentEnd; + unsigned srcSegmentLength; srcSegmentStart = 0; - int dstOffset = 0; + unsigned dstOffset = 0; - while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) { + while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { srcSegmentLength = srcSegmentEnd - srcSegmentStart; memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); dstOffset += srcSegmentLength; @@ -752,7 +827,7 @@ PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen srcSegmentLength = m_length - srcSegmentStart; memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); - ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length())); + ASSERT(dstOffset + srcSegmentLength == newImpl->length()); return newImpl; } @@ -762,16 +837,16 @@ PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl if (!pattern || !replacement) return this; - int patternLength = pattern->length(); + unsigned patternLength = pattern->length(); if (!patternLength) return this; - int repStrLength = replacement->length(); - int srcSegmentStart = 0; - int matchCount = 0; + unsigned repStrLength = replacement->length(); + size_t srcSegmentStart = 0; + unsigned matchCount = 0; // Count the matches - while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) { + while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { ++matchCount; srcSegmentStart += patternLength; } @@ -785,12 +860,12 @@ PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl createUninitialized(m_length + matchCount * (repStrLength - patternLength), data); // Construct the new data - int srcSegmentEnd; - int srcSegmentLength; + size_t srcSegmentEnd; + unsigned srcSegmentLength; srcSegmentStart = 0; - int dstOffset = 0; + unsigned dstOffset = 0; - while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) { + while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { srcSegmentLength = srcSegmentEnd - srcSegmentStart; memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); dstOffset += srcSegmentLength; @@ -802,7 +877,7 @@ PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl srcSegmentLength = m_length - srcSegmentStart; memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); - ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length())); + ASSERT(dstOffset + srcSegmentLength == newImpl->length()); return newImpl; } @@ -883,20 +958,6 @@ bool equalIgnoringNullity(StringImpl* a, StringImpl* b) return false; } -Vector<char> StringImpl::ascii() -{ - Vector<char> buffer(m_length + 1); - for (unsigned i = 0; i != m_length; ++i) { - UChar c = m_data[i]; - if ((c >= 0x20 && c < 0x7F) || c == 0x00) - buffer[i] = static_cast<char>(c); - else - buffer[i] = '?'; - } - buffer[m_length] = '\0'; - return buffer; -} - WTF::Unicode::Direction StringImpl::defaultWritingDirection() { for (unsigned i = 0; i < m_length; ++i) { |