diff options
Diffstat (limited to 'WebCore/platform/text/StringImpl.cpp')
-rw-r--r-- | WebCore/platform/text/StringImpl.cpp | 317 |
1 files changed, 78 insertions, 239 deletions
diff --git a/WebCore/platform/text/StringImpl.cpp b/WebCore/platform/text/StringImpl.cpp index 0643de6..911c0dc 100644 --- a/WebCore/platform/text/StringImpl.cpp +++ b/WebCore/platform/text/StringImpl.cpp @@ -28,24 +28,18 @@ #include "AtomicString.h" #include "CString.h" #include "CharacterNames.h" -#include "DeprecatedString.h" #include "FloatConversion.h" -#include "Length.h" #include "StringBuffer.h" #include "StringHash.h" #include "TextBreakIterator.h" #include "TextEncoding.h" #include <kjs/dtoa.h> -#include <kjs/identifier.h> #include <wtf/Assertions.h> #include <wtf/unicode/Unicode.h> using namespace WTF; using namespace Unicode; -using KJS::Identifier; -using KJS::UString; - namespace WebCore { static inline UChar* newUCharVector(unsigned n) @@ -60,21 +54,23 @@ static inline void deleteUCharVector(const UChar* p) // This constructor is used only to create the empty string. StringImpl::StringImpl() - : RefCounted<StringImpl>(1) - , m_length(0) + : m_length(0) , m_data(0) , m_hash(0) , m_inTable(false) , m_hasTerminatingNullCharacter(false) { + // Ensure that the hash is computed so that AtomicStringHash can call existingHash() + // with impunity. The empty string is special because it is never entered into + // AtomicString's HashKey, but still needs to compare correctly. + hash(); } // This is one of the most common constructors, but it's also used for the copy() // operation. Because of that, it's the one constructor that doesn't assert the // length is non-zero, since we support copying the empty string. inline StringImpl::StringImpl(const UChar* characters, unsigned length) - : RefCounted<StringImpl>(1) - , m_length(length) + : m_length(length) , m_hash(0) , m_inTable(false) , m_hasTerminatingNullCharacter(false) @@ -85,8 +81,7 @@ inline StringImpl::StringImpl(const UChar* characters, unsigned length) } inline StringImpl::StringImpl(const StringImpl& str, WithTerminatingNullCharacter) - : RefCounted<StringImpl>(1) - , m_length(str.m_length) + : m_length(str.m_length) , m_hash(str.m_hash) , m_inTable(false) , m_hasTerminatingNullCharacter(true) @@ -98,8 +93,7 @@ inline StringImpl::StringImpl(const StringImpl& str, WithTerminatingNullCharacte } inline StringImpl::StringImpl(const char* characters, unsigned length) - : RefCounted<StringImpl>(1) - , m_length(length) + : m_length(length) , m_hash(0) , m_inTable(false) , m_hasTerminatingNullCharacter(false) @@ -116,8 +110,7 @@ inline StringImpl::StringImpl(const char* characters, unsigned length) } inline StringImpl::StringImpl(UChar* characters, unsigned length, AdoptBuffer) - : RefCounted<StringImpl>(1) - , m_length(length) + : m_length(length) , m_data(characters) , m_hash(0) , m_inTable(false) @@ -127,15 +120,9 @@ inline StringImpl::StringImpl(UChar* characters, unsigned length, AdoptBuffer) ASSERT(length); } -// FIXME: These AtomicString constructors return objects with a refCount of 0, -// even though the others return objects with a refCount of 1. That preserves -// the historical behavior for the hash map translator call sites inside the -// AtomicString code, but is it correct? - // This constructor is only for use by AtomicString. StringImpl::StringImpl(const UChar* characters, unsigned length, unsigned hash) - : RefCounted<StringImpl>(0) - , m_length(length) + : m_length(length) , m_hash(hash) , m_inTable(true) , m_hasTerminatingNullCharacter(false) @@ -151,8 +138,7 @@ StringImpl::StringImpl(const UChar* characters, unsigned length, unsigned hash) // This constructor is only for use by AtomicString. StringImpl::StringImpl(const char* characters, unsigned length, unsigned hash) - : RefCounted<StringImpl>(0) - , m_length(length) + : m_length(length) , m_hash(hash) , m_inTable(true) , m_hasTerminatingNullCharacter(false) @@ -178,8 +164,8 @@ StringImpl::~StringImpl() StringImpl* StringImpl::empty() { - static StringImpl e; - return &e; + static StringImpl* e = new StringImpl; + return e; } bool StringImpl::containsOnlyWhitespace() @@ -211,128 +197,6 @@ UChar32 StringImpl::characterStartingAt(unsigned i) return 0; } -static Length parseLength(const UChar* data, unsigned length) -{ - if (length == 0) - return Length(1, Relative); - - unsigned i = 0; - while (i < length && isSpaceOrNewline(data[i])) - ++i; - if (i < length && (data[i] == '+' || data[i] == '-')) - ++i; - while (i < length && Unicode::isDigit(data[i])) - ++i; - - bool ok; - int r = DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(data), i).string().toInt(&ok); - - /* Skip over any remaining digits, we are not that accurate (5.5% => 5%) */ - while (i < length && (Unicode::isDigit(data[i]) || data[i] == '.')) - ++i; - - /* IE Quirk: Skip any whitespace (20 % => 20%) */ - while (i < length && isSpaceOrNewline(data[i])) - ++i; - - if (ok) { - if (i < length) { - UChar next = data[i]; - if (next == '%') - return Length(static_cast<double>(r), Percent); - if (next == '*') - return Length(r, Relative); - } - return Length(r, Fixed); - } else { - if (i < length) { - UChar next = data[i]; - if (next == '*') - return Length(1, Relative); - if (next == '%') - return Length(1, Relative); - } - } - return Length(0, Relative); -} - -Length StringImpl::toLength() -{ - return parseLength(m_data, m_length); -} - -static int countCharacter(StringImpl* string, UChar character) -{ - int count = 0; - int length = string->length(); - for (int i = 0; i < length; ++i) - count += (*string)[i] == character; - return count; -} - -Length* StringImpl::toCoordsArray(int& len) -{ - StringBuffer spacified(m_length); - for (unsigned i = 0; i < m_length; i++) { - UChar cc = m_data[i]; - if (cc > '9' || (cc < '0' && cc != '-' && cc != '*' && cc != '.')) - spacified[i] = ' '; - else - spacified[i] = cc; - } - RefPtr<StringImpl> str = adopt(spacified); - - str = str->simplifyWhiteSpace(); - - len = countCharacter(str.get(), ' ') + 1; - Length* r = new Length[len]; - - int i = 0; - int pos = 0; - int pos2; - - while ((pos2 = str->find(' ', pos)) != -1) { - r[i++] = parseLength(str->characters() + pos, pos2 - pos); - pos = pos2+1; - } - r[i] = parseLength(str->characters() + pos, str->length() - pos); - - ASSERT(i == len - 1); - - return r; -} - -Length* StringImpl::toLengthArray(int& len) -{ - RefPtr<StringImpl> str = simplifyWhiteSpace(); - if (!str->length()) { - len = 1; - return 0; - } - - len = countCharacter(str.get(), ',') + 1; - Length* r = new Length[len]; - - int i = 0; - int pos = 0; - int pos2; - - while ((pos2 = str->find(',', pos)) != -1) { - r[i++] = parseLength(str->characters() + pos, pos2 - pos); - pos = pos2+1; - } - - ASSERT(i == len - 1); - - /* IE Quirk: If the last comma is the last char skip it and reduce len by one */ - if (str->length()-pos > 0) - r[i] = parseLength(str->characters() + pos, str->length() - pos); - else - len--; - - return r; -} - bool StringImpl::isLower() { // Do a faster loop for the case where all the characters are ASCII. @@ -378,7 +242,7 @@ PassRefPtr<StringImpl> StringImpl::lower() if (!error && realLength == length) return adopt(data); data.resize(realLength); - Unicode::toLower(data.characters(), length, m_data, m_length, &error); + Unicode::toLower(data.characters(), realLength, m_data, m_length, &error); if (error) return this; return adopt(data); @@ -386,10 +250,26 @@ PassRefPtr<StringImpl> StringImpl::lower() PassRefPtr<StringImpl> StringImpl::upper() { + StringBuffer data(m_length); + int32_t length = m_length; + + // Do a faster loop for the case where all the characters are ASCII. + UChar ored = 0; + for (int i = 0; i < length; i++) { + UChar c = m_data[i]; + ored |= c; + data[i] = toASCIIUpper(c); + } + if (!(ored & ~0x7F)) + return adopt(data); + + // Do a slower implementation for cases that include non-ASCII characters. bool error; - int32_t length = Unicode::toUpper(0, 0, m_data, m_length, &error); - StringBuffer data(length); - Unicode::toUpper(data.characters(), length, m_data, m_length, &error); + int32_t realLength = Unicode::toUpper(data.characters(), length, m_data, m_length, &error); + if (!error && realLength == length) + return adopt(data); + data.resize(realLength); + Unicode::toUpper(data.characters(), realLength, m_data, m_length, &error); if (error) return this; return adopt(data); @@ -425,7 +305,7 @@ PassRefPtr<StringImpl> StringImpl::foldCase() if (!error && realLength == length) return adopt(data); data.resize(realLength); - Unicode::foldCase(data.characters(), length, m_data, m_length, &error); + Unicode::foldCase(data.characters(), realLength, m_data, m_length, &error); if (error) return this; return adopt(data); @@ -513,84 +393,54 @@ PassRefPtr<StringImpl> StringImpl::capitalize(UChar previous) return adopt(data); } -int StringImpl::toInt(bool* ok) +int StringImpl::toIntStrict(bool* ok, int base) { - unsigned i = 0; + return charactersToIntStrict(m_data, m_length, ok, base); +} - // Allow leading spaces. - for (; i != m_length; ++i) - if (!isSpaceOrNewline(m_data[i])) - break; - - // Allow sign. - if (i != m_length && (m_data[i] == '+' || m_data[i] == '-')) - ++i; - - // Allow digits. - for (; i != m_length; ++i) - if (!Unicode::isDigit(m_data[i])) - break; - - return DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_data), i).string().toInt(ok); +unsigned StringImpl::toUIntStrict(bool* ok, int base) +{ + return charactersToUIntStrict(m_data, m_length, ok, base); } -int64_t StringImpl::toInt64(bool* ok) +int64_t StringImpl::toInt64Strict(bool* ok, int base) { - unsigned i = 0; + return charactersToInt64Strict(m_data, m_length, ok, base); +} - // Allow leading spaces. - for (; i != m_length; ++i) - if (!isSpaceOrNewline(m_data[i])) - break; - - // Allow sign. - if (i != m_length && (m_data[i] == '+' || m_data[i] == '-')) - ++i; - - // Allow digits. - for (; i != m_length; ++i) - if (!Unicode::isDigit(m_data[i])) - break; - - return DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_data), i).string().toInt64(ok); +uint64_t StringImpl::toUInt64Strict(bool* ok, int base) +{ + return charactersToUInt64Strict(m_data, m_length, ok, base); } -uint64_t StringImpl::toUInt64(bool* ok) +int StringImpl::toInt(bool* ok) { - unsigned i = 0; + return charactersToInt(m_data, m_length, ok); +} - // Allow leading spaces. - for (; i != m_length; ++i) - if (!isSpaceOrNewline(m_data[i])) - break; +unsigned StringImpl::toUInt(bool* ok) +{ + return charactersToUInt(m_data, m_length, ok); +} - // Allow digits. - for (; i != m_length; ++i) - if (!Unicode::isDigit(m_data[i])) - break; - - return DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_data), i).string().toUInt64(ok); +int64_t StringImpl::toInt64(bool* ok) +{ + return charactersToInt64(m_data, m_length, ok); +} + +uint64_t StringImpl::toUInt64(bool* ok) +{ + return charactersToUInt64(m_data, m_length, ok); } double StringImpl::toDouble(bool* ok) { - if (!m_length) { - if (ok) - *ok = false; - return 0; - } - char *end; - CString latin1String = Latin1Encoding().encode(characters(), length()); - double val = kjs_strtod(latin1String.data(), &end); - if (ok) - *ok = end == 0 || *end == '\0'; - return val; + return charactersToDouble(m_data, m_length, ok); } float StringImpl::toFloat(bool* ok) { - // FIXME: This will return ok even when the string fits into a double but not a float. - return narrowPrecisionToFloat(toDouble(ok)); + return charactersToFloat(m_data, m_length, ok); } static bool equal(const UChar* a, const char* b, int length) @@ -657,15 +507,7 @@ int StringImpl::find(const char* chs, int index, bool caseSensitive) int StringImpl::find(UChar c, int start) { - unsigned index = start; - if (index >= m_length ) - return -1; - while(index < m_length) { - if (m_data[index] == c) - return index; - index++; - } - return -1; + return WebCore::find(m_data, m_length, c, start); } int StringImpl::find(StringImpl* str, int index, bool caseSensitive) @@ -726,18 +568,7 @@ int StringImpl::find(StringImpl* str, int index, bool caseSensitive) int StringImpl::reverseFind(UChar c, int index) { - if (index >= (int)m_length || m_length == 0) - return -1; - - if (index < 0) - index += m_length; - while (1) { - if (m_data[index] == c) - return index; - if (index == 0) - return -1; - index--; - } + return WebCore::reverseFind(m_data, m_length, c, index); } int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive) @@ -1032,20 +863,28 @@ PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* ch ASSERT(characters); ASSERT(length); - StringBuffer strippedCopy(length); + // Optimize for the case where there are no Null characters by quickly + // searching for nulls, and then using StringImpl::create, which will + // memcpy the whole buffer. This is faster than assigning character by + // character during the loop. + + // Fast case. int foundNull = 0; - for (unsigned i = 0; i < length; i++) { + for (unsigned i = 0; !foundNull && i < length; i++) { int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS) - strippedCopy[i] = c; - foundNull |= ~c; + foundNull |= !c; } if (!foundNull) - return adoptRef(new StringImpl(strippedCopy.release(), length, AdoptBuffer())); + return StringImpl::create(characters, length); + + // Slow case. + StringBuffer strippedCopy(length); unsigned strippedLength = 0; for (unsigned i = 0; i < length; i++) { if (int c = characters[i]) strippedCopy[strippedLength++] = c; } + ASSERT(strippedLength < length); // Only take the slow case when stripping. strippedCopy.shrink(strippedLength); return adopt(strippedCopy); } |