diff options
Diffstat (limited to 'JavaScriptCore/wtf/text')
-rw-r--r-- | JavaScriptCore/wtf/text/AtomicString.h | 14 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/AtomicStringHash.h | 62 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/StringImpl.cpp | 409 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/StringImpl.h | 23 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/WTFString.cpp | 97 | ||||
-rw-r--r-- | JavaScriptCore/wtf/text/WTFString.h | 226 |
6 files changed, 506 insertions, 325 deletions
diff --git a/JavaScriptCore/wtf/text/AtomicString.h b/JavaScriptCore/wtf/text/AtomicString.h index d29981a..cfabde7 100644 --- a/JavaScriptCore/wtf/text/AtomicString.h +++ b/JavaScriptCore/wtf/text/AtomicString.h @@ -32,14 +32,10 @@ #define ATOMICSTRING_CONVERSION #endif -// FIXME: this should be in WTF, too! -namespace WebCore { -struct AtomicStringHash; -} -using WebCore::AtomicStringHash; - namespace WTF { +struct AtomicStringHash; + class AtomicString { public: static void init(); @@ -75,10 +71,10 @@ public: bool contains(const String& s, bool caseSensitive = true) const { return m_string.contains(s, caseSensitive); } - int find(UChar c, int start = 0) const { return m_string.find(c, start); } - int find(const char* s, int start = 0, bool caseSentitive = true) const + size_t find(UChar c, size_t start = 0) const { return m_string.find(c, start); } + size_t find(const char* s, size_t start = 0, bool caseSentitive = true) const { return m_string.find(s, start, caseSentitive); } - int find(const String& s, int start = 0, bool caseSentitive = true) const + size_t find(const String& s, size_t start = 0, bool caseSentitive = true) const { return m_string.find(s, start, caseSentitive); } bool startsWith(const String& s, bool caseSensitive = true) const diff --git a/JavaScriptCore/wtf/text/AtomicStringHash.h b/JavaScriptCore/wtf/text/AtomicStringHash.h new file mode 100644 index 0000000..f6e4ad1 --- /dev/null +++ b/JavaScriptCore/wtf/text/AtomicStringHash.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef AtomicStringHash_h +#define AtomicStringHash_h + +#include <wtf/text/AtomicString.h> +#include <wtf/HashTraits.h> + +namespace WTF { + + struct AtomicStringHash { + static unsigned hash(const AtomicString& key) + { + return key.impl()->existingHash(); + } + + static bool equal(const AtomicString& a, const AtomicString& b) + { + return a == b; + } + + static const bool safeToCompareToEmptyOrDeleted = false; + }; + + // AtomicStringHash is the default hash for AtomicString + template<> struct HashTraits<WTF::AtomicString> : GenericHashTraits<WTF::AtomicString> { + static const bool emptyValueIsZero = true; + static void constructDeletedValue(WTF::AtomicString& slot) { new (&slot) WTF::AtomicString(HashTableDeletedValue); } + static bool isDeletedValue(const WTF::AtomicString& slot) { return slot.isHashTableDeletedValue(); } + }; + +} + +using WTF::AtomicStringHash; + +#endif diff --git a/JavaScriptCore/wtf/text/StringImpl.cpp b/JavaScriptCore/wtf/text/StringImpl.cpp index 3669628..ab0f009 100644 --- a/JavaScriptCore/wtf/text/StringImpl.cpp +++ b/JavaScriptCore/wtf/text/StringImpl.cpp @@ -498,175 +498,250 @@ int codePointCompare(const StringImpl* s1, const StringImpl* s2) return (l1 > l2) ? 1 : -1; } -int StringImpl::find(const char* chs, int index, bool caseSensitive) +size_t StringImpl::find(UChar c, unsigned start) { - if (!chs || index < 0) - return -1; + return WTF::find(m_data, m_length, c, start); +} - int chsLength = strlen(chs); - int n = m_length - index; - if (n < 0) - return -1; - n -= chsLength - 1; - if (n <= 0) - return -1; +size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start) +{ + return WTF::find(m_data, m_length, matchFunction, start); +} - const char* chsPlusOne = chs + 1; - int chsLengthMinusOne = chsLength - 1; - - const UChar* ptr = m_data + index - 1; - if (caseSensitive) { - UChar c = *chs; - do { - if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne)) - return m_length - chsLength - n + 1; - } while (--n); - } else { - UChar lc = Unicode::foldCase(*chs); - do { - if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne)) - return m_length - chsLength - n + 1; - } while (--n); +size_t StringImpl::find(const char* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = strlen(matchString); + if (!matchLength) + return min(index, length()); + + // Optimization 1: fast case for strings of length 1. + if (matchLength == 1) + return WTF::find(characters(), length(), *(const unsigned char*)matchString, index); + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + const unsigned char* matchCharacters = (const unsigned char*)matchString; + + // Optimization 2: keep a running hash of the strings, + // only call memcmp if the hashes match. + unsigned searchHash = 0; + unsigned matchHash = 0; + for (unsigned i = 0; i < matchLength; ++i) { + searchHash += searchCharacters[i]; + matchHash += matchCharacters[i]; } - return -1; + unsigned i = 0; + // keep looping until we match + while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) { + if (i == delta) + return notFound; + searchHash += searchCharacters[i + matchLength]; + searchHash -= searchCharacters[i]; + ++i; + } + return index + i; } -int StringImpl::find(UChar c, int start) +size_t StringImpl::findIgnoringCase(const char* matchString, unsigned index) { - return WTF::find(m_data, m_length, c, start); + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = strlen(matchString); + if (!matchLength) + return min(index, length()); + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + + unsigned i = 0; + // keep looping until we match + while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) { + if (i == delta) + return notFound; + ++i; + } + return index + i; } -int StringImpl::find(CharacterMatchFunctionPtr matchFunction, int start) +size_t StringImpl::find(StringImpl* matchString, unsigned index) { - return WTF::find(m_data, m_length, matchFunction, start); + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Optimization 1: fast case for strings of length 1. + if (matchLength == 1) + return WTF::find(characters(), length(), matchString->characters()[0], index); + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + const UChar* matchCharacters = matchString->characters(); + + // Optimization 2: keep a running hash of the strings, + // only call memcmp if the hashes match. + unsigned searchHash = 0; + unsigned matchHash = 0; + for (unsigned i = 0; i < matchLength; ++i) { + searchHash += searchCharacters[i]; + matchHash += matchCharacters[i]; + } + + unsigned i = 0; + // keep looping until we match + while (searchHash != matchHash || memcmp(searchCharacters + i, matchCharacters, matchLength * sizeof(UChar))) { + if (i == delta) + return notFound; + searchHash += searchCharacters[i + matchLength]; + searchHash -= searchCharacters[i]; + ++i; + } + return index + i; } -int StringImpl::find(StringImpl* str, int index, bool caseSensitive) -{ - /* - We use a simple trick for efficiency's sake. Instead of - comparing strings, we compare the sum of str with that of - a part of this string. Only if that matches, we call memcmp - or ucstrnicmp. - */ - ASSERT(str); - if (index < 0) - index += m_length; - int lstr = str->m_length; - int lthis = m_length - index; - if ((unsigned)lthis > m_length) - return -1; - int delta = lthis - lstr; - if (delta < 0) - return -1; - - const UChar* uthis = m_data + index; - const UChar* ustr = str->m_data; - unsigned hthis = 0; - unsigned hstr = 0; - if (caseSensitive) { - for (int i = 0; i < lstr; i++) { - hthis += uthis[i]; - hstr += ustr[i]; - } - int i = 0; - while (1) { - if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0) - return index + i; - if (i == delta) - return -1; - hthis += uthis[i + lstr]; - hthis -= uthis[i]; - i++; - } - } else { - for (int i = 0; i < lstr; i++ ) { - hthis += toASCIILower(uthis[i]); - hstr += toASCIILower(ustr[i]); - } - int i = 0; - while (1) { - if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr)) - return index + i; - if (i == delta) - return -1; - hthis += toASCIILower(uthis[i + lstr]); - hthis -= toASCIILower(uthis[i]); - i++; - } +size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Check index & matchLength are in range. + if (index > length()) + return notFound; + unsigned searchLength = length() - index; + if (matchLength > searchLength) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = searchLength - matchLength; + + const UChar* searchCharacters = characters() + index; + const UChar* matchCharacters = matchString->characters(); + + unsigned i = 0; + // keep looping until we match + while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) { + if (i == delta) + return notFound; + ++i; } + return index + i; } -int StringImpl::reverseFind(UChar c, int index) +size_t StringImpl::reverseFind(UChar c, unsigned index) { return WTF::reverseFind(m_data, m_length, c, index); } -int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive) +size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) { - /* - See StringImpl::find() for explanations. - */ - ASSERT(str); - int lthis = m_length; - if (index < 0) - index += lthis; - - int lstr = str->m_length; - int delta = lthis - lstr; - if ( index < 0 || index > lthis || delta < 0 ) - return -1; - if ( index > delta ) - index = delta; - - const UChar *uthis = m_data; - const UChar *ustr = str->m_data; - unsigned hthis = 0; - unsigned hstr = 0; - int i; - if (caseSensitive) { - for ( i = 0; i < lstr; i++ ) { - hthis += uthis[index + i]; - hstr += ustr[i]; - } - i = index; - while (1) { - if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0) - return i; - if (i == 0) - return -1; - i--; - hthis -= uthis[i + lstr]; - hthis += uthis[i]; - } - } else { - for (i = 0; i < lstr; i++) { - hthis += toASCIILower(uthis[index + i]); - hstr += toASCIILower(ustr[i]); - } - i = index; - while (1) { - if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) ) - return i; - if (i == 0) - return -1; - i--; - hthis -= toASCIILower(uthis[i + lstr]); - hthis += toASCIILower(uthis[i]); - } + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Optimization 1: fast case for strings of length 1. + if (matchLength == 1) + return WTF::reverseFind(characters(), length(), matchString->characters()[0], index); + + // Check index & matchLength are in range. + if (matchLength > length()) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = min(index, length() - matchLength); + + const UChar *searchCharacters = characters(); + const UChar *matchCharacters = matchString->characters(); + + // Optimization 2: keep a running hash of the strings, + // only call memcmp if the hashes match. + unsigned searchHash = 0; + unsigned matchHash = 0; + for (unsigned i = 0; i < matchLength; ++i) { + searchHash += searchCharacters[delta + i]; + matchHash += matchCharacters[i]; + } + + // keep looping until we match + while (searchHash != matchHash || memcmp(searchCharacters + delta, matchCharacters, matchLength * sizeof(UChar))) { + if (!delta) + return notFound; + delta--; + searchHash -= searchCharacters[delta + matchLength]; + searchHash += searchCharacters[delta]; } + return delta; +} + +size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index) +{ + // Check for null or empty string to match against + if (!matchString) + return notFound; + unsigned matchLength = matchString->length(); + if (!matchLength) + return min(index, length()); + + // Check index & matchLength are in range. + if (matchLength > length()) + return notFound; + // delta is the number of additional times to test; delta == 0 means test only once. + unsigned delta = min(index, length() - matchLength); - // Should never get here. - return -1; + const UChar *searchCharacters = characters(); + const UChar *matchCharacters = matchString->characters(); + + // keep looping until we match + while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) { + if (!delta) + return notFound; + delta--; + } + return delta; } bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive) { ASSERT(m_data); - int start = m_length - m_data->m_length; - if (start >= 0) - return (find(m_data, start, caseSensitive) == start); + if (m_length >= m_data->m_length) { + unsigned start = m_length - m_data->m_length; + return (caseSensitive ? find(m_data, start) : findIgnoringCase(m_data, start)) == start; + } return false; } @@ -716,12 +791,12 @@ PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen if (!replacement) return this; - int repStrLength = replacement->length(); - int srcSegmentStart = 0; - int matchCount = 0; + unsigned repStrLength = replacement->length(); + size_t srcSegmentStart = 0; + unsigned matchCount = 0; // Count the matches - while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) { + while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { ++matchCount; ++srcSegmentStart; } @@ -735,12 +810,12 @@ PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen createUninitialized(m_length - matchCount + (matchCount * repStrLength), data); // Construct the new data - int srcSegmentEnd; - int srcSegmentLength; + size_t srcSegmentEnd; + unsigned srcSegmentLength; srcSegmentStart = 0; - int dstOffset = 0; + unsigned dstOffset = 0; - while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) { + while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { srcSegmentLength = srcSegmentEnd - srcSegmentStart; memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); dstOffset += srcSegmentLength; @@ -752,7 +827,7 @@ PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacemen srcSegmentLength = m_length - srcSegmentStart; memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); - ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length())); + ASSERT(dstOffset + srcSegmentLength == newImpl->length()); return newImpl; } @@ -762,16 +837,16 @@ PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl if (!pattern || !replacement) return this; - int patternLength = pattern->length(); + unsigned patternLength = pattern->length(); if (!patternLength) return this; - int repStrLength = replacement->length(); - int srcSegmentStart = 0; - int matchCount = 0; + unsigned repStrLength = replacement->length(); + size_t srcSegmentStart = 0; + unsigned matchCount = 0; // Count the matches - while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) { + while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) { ++matchCount; srcSegmentStart += patternLength; } @@ -785,12 +860,12 @@ PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl createUninitialized(m_length + matchCount * (repStrLength - patternLength), data); // Construct the new data - int srcSegmentEnd; - int srcSegmentLength; + size_t srcSegmentEnd; + unsigned srcSegmentLength; srcSegmentStart = 0; - int dstOffset = 0; + unsigned dstOffset = 0; - while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) { + while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) { srcSegmentLength = srcSegmentEnd - srcSegmentStart; memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); dstOffset += srcSegmentLength; @@ -802,7 +877,7 @@ PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* repl srcSegmentLength = m_length - srcSegmentStart; memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); - ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length())); + ASSERT(dstOffset + srcSegmentLength == newImpl->length()); return newImpl; } @@ -883,20 +958,6 @@ bool equalIgnoringNullity(StringImpl* a, StringImpl* b) return false; } -Vector<char> StringImpl::ascii() -{ - Vector<char> buffer(m_length + 1); - for (unsigned i = 0; i != m_length; ++i) { - UChar c = m_data[i]; - if ((c >= 0x20 && c < 0x7F) || c == 0x00) - buffer[i] = static_cast<char>(c); - else - buffer[i] = '?'; - } - buffer[m_length] = '\0'; - return buffer; -} - WTF::Unicode::Direction StringImpl::defaultWritingDirection() { for (unsigned i = 0; i < m_length; ++i) { diff --git a/JavaScriptCore/wtf/text/StringImpl.h b/JavaScriptCore/wtf/text/StringImpl.h index 6080474..cec0b80 100644 --- a/JavaScriptCore/wtf/text/StringImpl.h +++ b/JavaScriptCore/wtf/text/StringImpl.h @@ -290,15 +290,18 @@ public: PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr); - int find(const char*, int index = 0, bool caseSensitive = true); - int find(UChar, int index = 0); - int find(CharacterMatchFunctionPtr, int index = 0); - int find(StringImpl*, int index, bool caseSensitive = true); - - int reverseFind(UChar, int index); - int reverseFind(StringImpl*, int index, bool caseSensitive = true); - - bool startsWith(StringImpl* str, bool caseSensitive = true) { return reverseFind(str, 0, caseSensitive) == 0; } + size_t find(UChar, unsigned index = 0); + size_t find(CharacterMatchFunctionPtr, unsigned index = 0); + size_t find(const char*, unsigned index = 0); + size_t find(StringImpl*, unsigned index = 0); + size_t findIgnoringCase(const char*, unsigned index = 0); + size_t findIgnoringCase(StringImpl*, unsigned index = 0); + + size_t reverseFind(UChar, unsigned index = UINT_MAX); + size_t reverseFind(StringImpl*, unsigned index = UINT_MAX); + size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX); + + bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; } bool endsWith(StringImpl*, bool caseSensitive = true); PassRefPtr<StringImpl> replace(UChar, UChar); @@ -306,8 +309,6 @@ public: PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*); PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*); - Vector<char> ascii(); - WTF::Unicode::Direction defaultWritingDirection(); #if PLATFORM(CF) diff --git a/JavaScriptCore/wtf/text/WTFString.cpp b/JavaScriptCore/wtf/text/WTFString.cpp index 6c4de6e..7d44d21 100644 --- a/JavaScriptCore/wtf/text/WTFString.cpp +++ b/JavaScriptCore/wtf/text/WTFString.cpp @@ -36,6 +36,13 @@ namespace WTF { using namespace Unicode; +// Construct a string with UTF-16 data. +String::String(const UChar* characters, unsigned length) + : m_impl(characters ? StringImpl::create(characters, length) : 0) +{ +} + +// Construct a string with UTF-16 data, from a null-terminated source. String::String(const UChar* str) { if (!str) @@ -48,6 +55,18 @@ String::String(const UChar* str) m_impl = StringImpl::create(str, len); } +// Construct a string with latin1 data. +String::String(const char* characters, unsigned length) + : m_impl(characters ? StringImpl::create(characters, length) : 0) +{ +} + +// Construct a string with latin1 data, from a null-terminated source. +String::String(const char* characters) + : m_impl(characters ? StringImpl::create(characters) : 0) +{ +} + void String::append(const String& str) { if (str.isEmpty()) @@ -226,6 +245,19 @@ String String::substring(unsigned pos, unsigned len) const return m_impl->substring(pos, len); } +String String::substringSharingImpl(unsigned offset, unsigned length) const +{ + // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar). + + unsigned stringLength = this->length(); + offset = min(offset, stringLength); + length = min(length, stringLength - offset); + + if (!offset && length == stringLength) + return *this; + return String(StringImpl::create(m_impl, offset, length)); +} + String String::lower() const { if (!m_impl) @@ -557,14 +589,14 @@ void String::split(const String& separator, bool allowEmptyEntries, Vector<Strin { result.clear(); - int startPos = 0; - int endPos; - while ((endPos = find(separator, startPos)) != -1) { + unsigned startPos = 0; + size_t endPos; + while ((endPos = find(separator, startPos)) != notFound) { if (allowEmptyEntries || startPos != endPos) result.append(substring(startPos, endPos - startPos)); startPos = endPos + separator.length(); } - if (allowEmptyEntries || startPos != static_cast<int>(length())) + if (allowEmptyEntries || startPos != length()) result.append(substring(startPos)); } @@ -577,14 +609,14 @@ void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& resu { result.clear(); - int startPos = 0; - int endPos; - while ((endPos = find(separator, startPos)) != -1) { + unsigned startPos = 0; + size_t endPos; + while ((endPos = find(separator, startPos)) != notFound) { if (allowEmptyEntries || startPos != endPos) result.append(substring(startPos, endPos - startPos)); startPos = endPos + 1; } - if (allowEmptyEntries || startPos != static_cast<int>(length())) + if (allowEmptyEntries || startPos != length()) result.append(substring(startPos)); } @@ -593,18 +625,23 @@ void String::split(UChar separator, Vector<String>& result) const return split(String(&separator, 1), false, result); } -Vector<char> String::ascii() const +CString String::ascii() const { - if (m_impl) - return m_impl->ascii(); - - const char* nullMsg = "(null impl)"; - Vector<char, 2048> buffer; - for (int i = 0; nullMsg[i]; ++i) - buffer.append(nullMsg[i]); - - buffer.append('\0'); - return buffer; + // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are + // preserved, characters outside of this range are converted to '?'. + + unsigned length = this->length(); + const UChar* characters = this->characters(); + + char* characterBuffer; + CString result = CString::newUninitialized(length, characterBuffer); + + for (unsigned i = 0; i < length; ++i) { + UChar ch = characters[i]; + characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; + } + + return result; } CString String::latin1() const @@ -620,7 +657,7 @@ CString String::latin1() const for (unsigned i = 0; i < length; ++i) { UChar ch = characters[i]; - characterBuffer[i] = ch > 255 ? '?' : ch; + characterBuffer[i] = ch > 0xff ? '?' : ch; } return result; @@ -635,7 +672,7 @@ static inline void putUTF8Triple(char*& buffer, UChar ch) *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); } -CString String::utf8() const +CString String::utf8(bool strict) const { unsigned length = this->length(); const UChar* characters = this->characters(); @@ -653,15 +690,21 @@ CString String::utf8() const Vector<char, 1024> bufferVector(length * 3); char* buffer = bufferVector.data(); - ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), false); - ASSERT(result != sourceIllegal); // Only produced from strict conversion. + ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict); ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion - // If a high surrogate is left unconverted, treat it the same was as an unpaired high surrogate - // would have been handled in the middle of a string with non-strict conversion - which is to say, - // simply encode it to UTF-8. + // Only produced from strict conversion. + if (result == sourceIllegal) + return CString(); + + // Check for an unconverted high surrogate. if (result == sourceExhausted) { - // This should be one unpaired high surrogate. + if (strict) + return CString(); + // This should be one unpaired high surrogate. Treat it the same + // was as an unpaired high surrogate would have been handled in + // the middle of a string with non-strict conversion - which is + // to say, simply encode it to UTF-8. ASSERT((characters + 1) == (this->characters() + length)); ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF)); // There should be room left, since one UChar hasn't been converted. diff --git a/JavaScriptCore/wtf/text/WTFString.h b/JavaScriptCore/wtf/text/WTFString.h index 6af519c..fafef12 100644 --- a/JavaScriptCore/wtf/text/WTFString.h +++ b/JavaScriptCore/wtf/text/WTFString.h @@ -1,6 +1,6 @@ /* * (C) 1999 Lars Knoll (knoll@kde.org) - * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. + * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -72,45 +72,43 @@ intptr_t charactersToIntPtr(const UChar*, size_t, bool* ok = 0); // ignores trai double charactersToDouble(const UChar*, size_t, bool* ok = 0); float charactersToFloat(const UChar*, size_t, bool* ok = 0); -int find(const UChar*, size_t, UChar, int startPosition = 0); -int reverseFind(const UChar*, size_t, UChar, int startPosition = -1); - class String { public: - String() { } // gives null string, distinguishable from an empty string - String(const UChar* str, unsigned len) - { - if (!str) - return; - m_impl = StringImpl::create(str, len); - } - String(const char* str) - { - if (!str) - return; - m_impl = StringImpl::create(str); - } - String(const char* str, unsigned length) - { - if (!str) - return; - m_impl = StringImpl::create(str, length); - } - String(const UChar*); // Specifically for null terminated UTF-16 - String(StringImpl* i) : m_impl(i) { } - String(PassRefPtr<StringImpl> i) : m_impl(i) { } - String(RefPtr<StringImpl> i) : m_impl(i) { } + // Construct a null string, distinguishable from an empty string. + String() { } - void swap(String& o) { m_impl.swap(o.m_impl); } + // Construct a string with UTF-16 data. + String(const UChar* characters, unsigned length); - // Hash table deleted values, which are only constructed and never copied or destroyed. - String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { } - bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); } + // Construct a string with UTF-16 data, from a null-terminated source. + String(const UChar*); + + // Construct a string with latin1 data. + String(const char* characters, unsigned length); + + // Construct a string with latin1 data, from a null-terminated source. + String(const char* characters); + + // Construct a string referencing an existing StringImpl. + String(StringImpl* impl) : m_impl(impl) { } + String(PassRefPtr<StringImpl> impl) : m_impl(impl) { } + String(RefPtr<StringImpl> impl) : m_impl(impl) { } + + // Inline the destructor. + ALWAYS_INLINE ~String() { } + + void swap(String& o) { m_impl.swap(o.m_impl); } static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); } - static String adopt(Vector<UChar>& vector) { return StringImpl::adopt(vector); } + template<size_t inlineCapacity> + static String adopt(Vector<UChar, inlineCapacity>& vector) { return StringImpl::adopt(vector); } - ALWAYS_INLINE unsigned length() const + bool isNull() const { return !m_impl; } + bool isEmpty() const { return !m_impl || !m_impl->length(); } + + StringImpl* impl() const { return m_impl.get(); } + + unsigned length() const { if (!m_impl) return 0; @@ -124,34 +122,67 @@ public: return m_impl->characters(); } - const UChar* charactersWithNullTermination(); - - UChar operator[](unsigned i) const // if i >= length(), returns 0 + CString ascii() const; + CString latin1() const; + CString utf8(bool strict = false) const; + + UChar operator[](unsigned index) const { - if (!m_impl || i >= m_impl->length()) + if (!m_impl || index >= m_impl->length()) return 0; - return m_impl->characters()[i]; + return m_impl->characters()[index]; } - UChar32 characterStartingAt(unsigned) const; // Ditto. + + static String number(short); + static String number(unsigned short); + static String number(int); + static String number(unsigned); + static String number(long); + static String number(unsigned long); + static String number(long long); + static String number(unsigned long long); + static String number(double); + + // Find a single character or string, also with match function & latin1 forms. + size_t find(UChar c, unsigned start = 0) const + { return m_impl ? m_impl->find(c, start) : notFound; } + size_t find(const String& str, unsigned start = 0) const + { return m_impl ? m_impl->find(str.impl(), start) : notFound; } + size_t find(CharacterMatchFunctionPtr matchFunction, unsigned start = 0) const + { return m_impl ? m_impl->find(matchFunction, start) : notFound; } + size_t find(const char* str, unsigned start = 0) const + { return m_impl ? m_impl->find(str, start) : notFound; } + + // Find the last instance of a single character or string. + size_t reverseFind(UChar c, unsigned start = UINT_MAX) const + { return m_impl ? m_impl->reverseFind(c, start) : notFound; } + size_t reverseFind(const String& str, unsigned start = UINT_MAX) const + { return m_impl ? m_impl->reverseFind(str.impl(), start) : notFound; } + + // Case insensitive string matching. + size_t findIgnoringCase(const char* str, unsigned start = 0) const + { return m_impl ? m_impl->findIgnoringCase(str, start) : notFound; } + size_t findIgnoringCase(const String& str, unsigned start = 0) const + { return m_impl ? m_impl->findIgnoringCase(str.impl(), start) : notFound; } + size_t reverseFindIgnoringCase(const String& str, unsigned start = UINT_MAX) const + { return m_impl ? m_impl->reverseFindIgnoringCase(str.impl(), start) : notFound; } + + // Wrappers for find & reverseFind adding dynamic sensitivity check. + size_t find(const char* str, unsigned start, bool caseSensitive) const + { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); } + size_t find(const String& str, unsigned start, bool caseSensitive) const + { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); } + size_t reverseFind(const String& str, unsigned start, bool caseSensitive) const + { return caseSensitive ? reverseFind(str, start) : reverseFindIgnoringCase(str, start); } + + const UChar* charactersWithNullTermination(); - bool contains(UChar c) const { return find(c) != -1; } - bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; } - bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; } - - int find(UChar c, int start = 0) const - { return m_impl ? m_impl->find(c, start) : -1; } - int find(CharacterMatchFunctionPtr matchFunction, int start = 0) const - { return m_impl ? m_impl->find(matchFunction, start) : -1; } - int find(const char* str, int start = 0, bool caseSensitive = true) const - { return m_impl ? m_impl->find(str, start, caseSensitive) : -1; } - int find(const String& str, int start = 0, bool caseSensitive = true) const - { return m_impl ? m_impl->find(str.impl(), start, caseSensitive) : -1; } - - int reverseFind(UChar c, int start = -1) const - { return m_impl ? m_impl->reverseFind(c, start) : -1; } - int reverseFind(const String& str, int start = -1, bool caseSensitive = true) const - { return m_impl ? m_impl->reverseFind(str.impl(), start, caseSensitive) : -1; } + UChar32 characterStartingAt(unsigned) const; // Ditto. + bool contains(UChar c) const { return find(c) != notFound; } + bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; } + bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; } + bool startsWith(const String& s, bool caseSensitive = true) const { return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); } bool endsWith(const String& s, bool caseSensitive = true) const @@ -177,6 +208,7 @@ public: void remove(unsigned pos, int len = 1); String substring(unsigned pos, unsigned len = UINT_MAX) const; + String substringSharingImpl(unsigned pos, unsigned len = UINT_MAX) const; String left(unsigned len) const { return substring(0, len); } String right(unsigned len) const { return substring(length() - len, len); } @@ -192,17 +224,11 @@ public: // Return the string with case folded for case insensitive comparison. String foldCase() const; - static String number(short); - static String number(unsigned short); - static String number(int); - static String number(unsigned); - static String number(long); - static String number(unsigned long); - static String number(long long); - static String number(unsigned long long); - static String number(double); - +#if !PLATFORM(QT) static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2); +#else + static String format(const char *, ...); +#endif // Returns an uninitialized string. The characters needs to be written // into the buffer returned in data before the returned string is used. @@ -238,11 +264,6 @@ public: // to ever prefer copy() over plain old assignment. String threadsafeCopy() const; - bool isNull() const { return !m_impl; } - ALWAYS_INLINE bool isEmpty() const { return !m_impl || !m_impl->length(); } - - StringImpl* impl() const { return m_impl.get(); } - #if PLATFORM(CF) String(CFStringRef); CFStringRef createCFString() const; @@ -272,11 +293,6 @@ public: operator BString() const; #endif - Vector<char> ascii() const; - - CString latin1() const; - CString utf8() const; - static String fromUTF8(const char*, size_t); static String fromUTF8(const char*); @@ -288,6 +304,10 @@ public: bool containsOnlyASCII() const { return charactersAreAllASCII(characters(), length()); } + // Hash table deleted values, which are only constructed and never copied or destroyed. + String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { } + bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); } + private: RefPtr<StringImpl> m_impl; }; @@ -345,43 +365,37 @@ inline bool charactersAreAllASCII(const UChar* characters, size_t length) int codePointCompare(const String&, const String&); -inline int find(const UChar* characters, size_t length, UChar character, int startPosition) +inline size_t find(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0) { - if (startPosition >= static_cast<int>(length)) - return -1; - for (size_t i = startPosition; i < length; ++i) { - if (characters[i] == character) - return static_cast<int>(i); + while (index < length) { + if (characters[index] == matchCharacter) + return index; + ++index; } - return -1; + return notFound; } -inline int find(const UChar* characters, size_t length, CharacterMatchFunctionPtr matchFunction, int startPosition) +inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0) { - if (startPosition >= static_cast<int>(length)) - return -1; - for (size_t i = startPosition; i < length; ++i) { - if (matchFunction(characters[i])) - return static_cast<int>(i); + while (index < length) { + if (matchFunction(characters[index])) + return index; + ++index; } - return -1; + return notFound; } -inline int reverseFind(const UChar* characters, size_t length, UChar character, int startPosition) +inline size_t reverseFind(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX) { - if (startPosition >= static_cast<int>(length) || !length) - return -1; - if (startPosition < 0) - startPosition += static_cast<int>(length); - while (true) { - if (characters[startPosition] == character) - return startPosition; - if (!startPosition) - return -1; - startPosition--; + if (!length) + return notFound; + if (index >= length) + index = length - 1; + while (characters[index] != matchCharacter) { + if (!index--) + return notFound; } - ASSERT_NOT_REACHED(); - return -1; + return index; } inline void append(Vector<UChar>& vector, const String& string) @@ -417,6 +431,11 @@ template<> struct DefaultHash<String> { typedef StringHash Hash; }; +template <> struct VectorTraits<String> : SimpleClassVectorTraits +{ + static const bool canInitializeWithMemset = true; +}; + } using WTF::CString; @@ -433,6 +452,5 @@ using WTF::charactersAreAllASCII; using WTF::charactersToInt; using WTF::charactersToFloat; using WTF::charactersToDouble; -using WTF::operator+; #endif |