1 files changed, 78 insertions, 239 deletions
diff --git a/WebCore/platform/text/StringImpl.cpp b/WebCore/platform/text/StringImpl.cpp
index 0643de6..911c0dc 100644
--- a/WebCore/platform/text/StringImpl.cpp
+++ b/WebCore/platform/text/StringImpl.cpp
@@ -28,24 +28,18 @@
 #include "AtomicString.h"
 #include "CString.h"
 #include "CharacterNames.h"
-#include "DeprecatedString.h"
 #include "FloatConversion.h"
-#include "Length.h"
 #include "StringBuffer.h"
 #include "StringHash.h"
 #include "TextBreakIterator.h"
 #include "TextEncoding.h"
 #include <kjs/dtoa.h>
-#include <kjs/identifier.h>
 #include <wtf/Assertions.h>
 #include <wtf/unicode/Unicode.h>
 
 using namespace WTF;
 using namespace Unicode;
 
-using KJS::Identifier;
-using KJS::UString;
-
 namespace WebCore {
 
 static inline UChar* newUCharVector(unsigned n)
@@ -60,21 +54,23 @@ static inline void deleteUCharVector(const UChar* p)
 
 // This constructor is used only to create the empty string.
 StringImpl::StringImpl()
-    : RefCounted<StringImpl>(1)
-    , m_length(0)
+    : m_length(0)
     , m_data(0)
     , m_hash(0)
     , m_inTable(false)
     , m_hasTerminatingNullCharacter(false)
 {
+    // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
+    // with impunity. The empty string is special because it is never entered into
+    // AtomicString's HashKey, but still needs to compare correctly.
+    hash();
 }
 
 // This is one of the most common constructors, but it's also used for the copy()
 // operation. Because of that, it's the one constructor that doesn't assert the
 // length is non-zero, since we support copying the empty string.
 inline StringImpl::StringImpl(const UChar* characters, unsigned length)
-    : RefCounted<StringImpl>(1)
-    , m_length(length)
+    : m_length(length)
     , m_hash(0)
     , m_inTable(false)
     , m_hasTerminatingNullCharacter(false)
@@ -85,8 +81,7 @@ inline StringImpl::StringImpl(const UChar* characters, unsigned length)
 }
 
 inline StringImpl::StringImpl(const StringImpl& str, WithTerminatingNullCharacter)
-    : RefCounted<StringImpl>(1)
-    , m_length(str.m_length)
+    : m_length(str.m_length)
     , m_hash(str.m_hash)
     , m_inTable(false)
     , m_hasTerminatingNullCharacter(true)
@@ -98,8 +93,7 @@ inline StringImpl::StringImpl(const StringImpl& str, WithTerminatingNullCharacte
 }
 
 inline StringImpl::StringImpl(const char* characters, unsigned length)
-    : RefCounted<StringImpl>(1)
-    , m_length(length)
+    : m_length(length)
     , m_hash(0)
     , m_inTable(false)
     , m_hasTerminatingNullCharacter(false)
@@ -116,8 +110,7 @@ inline StringImpl::StringImpl(const char* characters, unsigned length)
 }
 
 inline StringImpl::StringImpl(UChar* characters, unsigned length, AdoptBuffer)
-    : RefCounted<StringImpl>(1)
-    , m_length(length)
+    : m_length(length)
     , m_data(characters)
     , m_hash(0)
     , m_inTable(false)
@@ -127,15 +120,9 @@ inline StringImpl::StringImpl(UChar* characters, unsigned length, AdoptBuffer)
     ASSERT(length);
 }
 
-// FIXME: These AtomicString constructors return objects with a refCount of 0,
-// even though the others return objects with a refCount of 1. That preserves
-// the historical behavior for the hash map translator call sites inside the
-// AtomicString code, but is it correct?
-
 // This constructor is only for use by AtomicString.
 StringImpl::StringImpl(const UChar* characters, unsigned length, unsigned hash)
-    : RefCounted<StringImpl>(0)
-    , m_length(length)
+    : m_length(length)
     , m_hash(hash)
     , m_inTable(true)
     , m_hasTerminatingNullCharacter(false)
@@ -151,8 +138,7 @@ StringImpl::StringImpl(const UChar* characters, unsigned length, unsigned hash)
 
 // This constructor is only for use by AtomicString.
 StringImpl::StringImpl(const char* characters, unsigned length, unsigned hash)
-    : RefCounted<StringImpl>(0)
-    , m_length(length)
+    : m_length(length)
     , m_hash(hash)
     , m_inTable(true)
     , m_hasTerminatingNullCharacter(false)
@@ -178,8 +164,8 @@ StringImpl::~StringImpl()
 
 StringImpl* StringImpl::empty()
 {
-    static StringImpl e;
-    return &e;
+    static StringImpl* e = new StringImpl;
+    return e;
 }
 
 bool StringImpl::containsOnlyWhitespace()
@@ -211,128 +197,6 @@ UChar32 StringImpl::characterStartingAt(unsigned i)
     return 0;
 }
 
-static Length parseLength(const UChar* data, unsigned length)
-{
-    if (length == 0)
-        return Length(1, Relative);
-
-    unsigned i = 0;
-    while (i < length && isSpaceOrNewline(data[i]))
-        ++i;
-    if (i < length && (data[i] == '+' || data[i] == '-'))
-        ++i;
-    while (i < length && Unicode::isDigit(data[i]))
-        ++i;
-
-    bool ok;
-    int r = DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(data), i).string().toInt(&ok);
-
-    /* Skip over any remaining digits, we are not that accurate (5.5% => 5%) */
-    while (i < length && (Unicode::isDigit(data[i]) || data[i] == '.'))
-        ++i;
-
-    /* IE Quirk: Skip any whitespace (20 % => 20%) */
-    while (i < length && isSpaceOrNewline(data[i]))
-        ++i;
-
-    if (ok) {
-        if (i < length) {
-            UChar next = data[i];
-            if (next == '%')
-                return Length(static_cast<double>(r), Percent);
-            if (next == '*')
-                return Length(r, Relative);
-        }
-        return Length(r, Fixed);
-    } else {
-        if (i < length) {
-            UChar next = data[i];
-            if (next == '*')
-                return Length(1, Relative);
-            if (next == '%')
-                return Length(1, Relative);
-        }
-    }
-    return Length(0, Relative);
-}
-
-Length StringImpl::toLength()
-{
-    return parseLength(m_data, m_length);
-}
-
-static int countCharacter(StringImpl* string, UChar character)
-{
-    int count = 0;
-    int length = string->length();
-    for (int i = 0; i < length; ++i)
-            count += (*string)[i] == character;
-    return count;
-}
-
-Length* StringImpl::toCoordsArray(int& len)
-{
-    StringBuffer spacified(m_length);
-    for (unsigned i = 0; i < m_length; i++) {
-        UChar cc = m_data[i];
-        if (cc > '9' || (cc < '0' && cc != '-' && cc != '*' && cc != '.'))
-            spacified[i] = ' ';
-        else
-            spacified[i] = cc;
-    }
-    RefPtr<StringImpl> str = adopt(spacified);
-
-    str = str->simplifyWhiteSpace();
-
-    len = countCharacter(str.get(), ' ') + 1;
-    Length* r = new Length[len];
-
-    int i = 0;
-    int pos = 0;
-    int pos2;
-
-    while ((pos2 = str->find(' ', pos)) != -1) {
-        r[i++] = parseLength(str->characters() + pos, pos2 - pos);
-        pos = pos2+1;
-    }
-    r[i] = parseLength(str->characters() + pos, str->length() - pos);
-
-    ASSERT(i == len - 1);
-
-    return r;
-}
-
-Length* StringImpl::toLengthArray(int& len)
-{
-    RefPtr<StringImpl> str = simplifyWhiteSpace();
-    if (!str->length()) {
-        len = 1;
-        return 0;
-    }
-
-    len = countCharacter(str.get(), ',') + 1;
-    Length* r = new Length[len];
-
-    int i = 0;
-    int pos = 0;
-    int pos2;
-
-    while ((pos2 = str->find(',', pos)) != -1) {
-        r[i++] = parseLength(str->characters() + pos, pos2 - pos);
-        pos = pos2+1;
-    }
-
-    ASSERT(i == len - 1);
-
-    /* IE Quirk: If the last comma is the last char skip it and reduce len by one */
-    if (str->length()-pos > 0)
-        r[i] = parseLength(str->characters() + pos, str->length() - pos);
-    else
-        len--;
-
-    return r;
-}
-
 bool StringImpl::isLower()
 {
     // Do a faster loop for the case where all the characters are ASCII.
@@ -378,7 +242,7 @@ PassRefPtr<StringImpl> StringImpl::lower()
     if (!error && realLength == length)
         return adopt(data);
     data.resize(realLength);
-    Unicode::toLower(data.characters(), length, m_data, m_length, &error);
+    Unicode::toLower(data.characters(), realLength, m_data, m_length, &error);
     if (error)
         return this;
     return adopt(data);
@@ -386,10 +250,26 @@ PassRefPtr<StringImpl> StringImpl::lower()
 
 PassRefPtr<StringImpl> StringImpl::upper()
 {
+    StringBuffer data(m_length);
+    int32_t length = m_length;
+
+    // Do a faster loop for the case where all the characters are ASCII.
+    UChar ored = 0;
+    for (int i = 0; i < length; i++) {
+        UChar c = m_data[i];
+        ored |= c;
+        data[i] = toASCIIUpper(c);
+    }
+    if (!(ored & ~0x7F))
+        return adopt(data);
+
+    // Do a slower implementation for cases that include non-ASCII characters.
     bool error;
-    int32_t length = Unicode::toUpper(0, 0, m_data, m_length, &error);
-    StringBuffer data(length);
-    Unicode::toUpper(data.characters(), length, m_data, m_length, &error);
+    int32_t realLength = Unicode::toUpper(data.characters(), length, m_data, m_length, &error);
+    if (!error && realLength == length)
+        return adopt(data);
+    data.resize(realLength);
+    Unicode::toUpper(data.characters(), realLength, m_data, m_length, &error);
     if (error)
         return this;
     return adopt(data);
@@ -425,7 +305,7 @@ PassRefPtr<StringImpl> StringImpl::foldCase()
     if (!error && realLength == length)
         return adopt(data);
     data.resize(realLength);
-    Unicode::foldCase(data.characters(), length, m_data, m_length, &error);
+    Unicode::foldCase(data.characters(), realLength, m_data, m_length, &error);
     if (error)
         return this;
     return adopt(data);
@@ -513,84 +393,54 @@ PassRefPtr<StringImpl> StringImpl::capitalize(UChar previous)
     return adopt(data);
 }
 
-int StringImpl::toInt(bool* ok)
+int StringImpl::toIntStrict(bool* ok, int base)
 {
-    unsigned i = 0;
+    return charactersToIntStrict(m_data, m_length, ok, base);
+}
 
-    // Allow leading spaces.
-    for (; i != m_length; ++i)
-        if (!isSpaceOrNewline(m_data[i]))
-            break;
-    
-    // Allow sign.
-    if (i != m_length && (m_data[i] == '+' || m_data[i] == '-'))
-        ++i;
-    
-    // Allow digits.
-    for (; i != m_length; ++i)
-        if (!Unicode::isDigit(m_data[i]))
-            break;
-    
-    return DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_data), i).string().toInt(ok);
+unsigned StringImpl::toUIntStrict(bool* ok, int base)
+{
+    return charactersToUIntStrict(m_data, m_length, ok, base);
 }
 
-int64_t StringImpl::toInt64(bool* ok)
+int64_t StringImpl::toInt64Strict(bool* ok, int base)
 {
-    unsigned i = 0;
+    return charactersToInt64Strict(m_data, m_length, ok, base);
+}
 
-    // Allow leading spaces.
-    for (; i != m_length; ++i)
-        if (!isSpaceOrNewline(m_data[i]))
-            break;
-    
-    // Allow sign.
-    if (i != m_length && (m_data[i] == '+' || m_data[i] == '-'))
-        ++i;
-    
-    // Allow digits.
-    for (; i != m_length; ++i)
-        if (!Unicode::isDigit(m_data[i]))
-            break;
-    
-    return DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_data), i).string().toInt64(ok);
+uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
+{
+    return charactersToUInt64Strict(m_data, m_length, ok, base);
 }
 
-uint64_t StringImpl::toUInt64(bool* ok)
+int StringImpl::toInt(bool* ok)
 {
-    unsigned i = 0;
+    return charactersToInt(m_data, m_length, ok);
+}
 
-    // Allow leading spaces.
-    for (; i != m_length; ++i)
-        if (!isSpaceOrNewline(m_data[i]))
-            break;
+unsigned StringImpl::toUInt(bool* ok)
+{
+    return charactersToUInt(m_data, m_length, ok);
+}
 
-    // Allow digits.
-    for (; i != m_length; ++i)
-        if (!Unicode::isDigit(m_data[i]))
-            break;
-    
-    return DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_data), i).string().toUInt64(ok);
+int64_t StringImpl::toInt64(bool* ok)
+{
+    return charactersToInt64(m_data, m_length, ok);
+}
+
+uint64_t StringImpl::toUInt64(bool* ok)
+{
+    return charactersToUInt64(m_data, m_length, ok);
 }
 
 double StringImpl::toDouble(bool* ok)
 {
-    if (!m_length) {
-        if (ok)
-            *ok = false;
-        return 0;
-    }
-    char *end;
-    CString latin1String = Latin1Encoding().encode(characters(), length());
-    double val = kjs_strtod(latin1String.data(), &end);
-    if (ok)
-        *ok = end == 0 || *end == '\0';
-    return val;
+    return charactersToDouble(m_data, m_length, ok);
 }
 
 float StringImpl::toFloat(bool* ok)
 {
-    // FIXME: This will return ok even when the string fits into a double but not a float.
-    return narrowPrecisionToFloat(toDouble(ok));
+    return charactersToFloat(m_data, m_length, ok);
 }
 
 static bool equal(const UChar* a, const char* b, int length)
@@ -657,15 +507,7 @@ int StringImpl::find(const char* chs, int index, bool caseSensitive)
 
 int StringImpl::find(UChar c, int start)
 {
-    unsigned index = start;
-    if (index >= m_length )
-        return -1;
-    while(index < m_length) {
-        if (m_data[index] == c)
-            return index;
-        index++;
-    }
-    return -1;
+    return WebCore::find(m_data, m_length, c, start);
 }
 
 int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
@@ -726,18 +568,7 @@ int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
 
 int StringImpl::reverseFind(UChar c, int index)
 {
-    if (index >= (int)m_length || m_length == 0)
-        return -1;
-
-    if (index < 0)
-        index += m_length;
-    while (1) {
-        if (m_data[index] == c)
-            return index;
-        if (index == 0)
-            return -1;
-        index--;
-    }
+    return WebCore::reverseFind(m_data, m_length, c, index);
 }
 
 int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive)
@@ -1032,20 +863,28 @@ PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* ch
     ASSERT(characters);
     ASSERT(length);
 
-    StringBuffer strippedCopy(length);
+    // Optimize for the case where there are no Null characters by quickly
+    // searching for nulls, and then using StringImpl::create, which will
+    // memcpy the whole buffer.  This is faster than assigning character by
+    // character during the loop. 
+
+    // Fast case.
     int foundNull = 0;
-    for (unsigned i = 0; i < length; i++) {
+    for (unsigned i = 0; !foundNull && i < length; i++) {
         int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
-        strippedCopy[i] = c;
-        foundNull |= ~c;
+        foundNull |= !c;
     }
     if (!foundNull)
-        return adoptRef(new StringImpl(strippedCopy.release(), length, AdoptBuffer()));
+        return StringImpl::create(characters, length);
+    
+    // Slow case.
+    StringBuffer strippedCopy(length);
     unsigned strippedLength = 0;
     for (unsigned i = 0; i < length; i++) {
         if (int c = characters[i])
             strippedCopy[strippedLength++] = c;
     }
+    ASSERT(strippedLength < length);  // Only take the slow case when stripping.
     strippedCopy.shrink(strippedLength);
     return adopt(strippedCopy);
 }