summaryrefslogtreecommitdiffstats
path: root/WebCore/platform/text/StringImpl.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'WebCore/platform/text/StringImpl.cpp')
-rw-r--r--WebCore/platform/text/StringImpl.cpp317
1 files changed, 78 insertions, 239 deletions
diff --git a/WebCore/platform/text/StringImpl.cpp b/WebCore/platform/text/StringImpl.cpp
index 0643de6..911c0dc 100644
--- a/WebCore/platform/text/StringImpl.cpp
+++ b/WebCore/platform/text/StringImpl.cpp
@@ -28,24 +28,18 @@
#include "AtomicString.h"
#include "CString.h"
#include "CharacterNames.h"
-#include "DeprecatedString.h"
#include "FloatConversion.h"
-#include "Length.h"
#include "StringBuffer.h"
#include "StringHash.h"
#include "TextBreakIterator.h"
#include "TextEncoding.h"
#include <kjs/dtoa.h>
-#include <kjs/identifier.h>
#include <wtf/Assertions.h>
#include <wtf/unicode/Unicode.h>
using namespace WTF;
using namespace Unicode;
-using KJS::Identifier;
-using KJS::UString;
-
namespace WebCore {
static inline UChar* newUCharVector(unsigned n)
@@ -60,21 +54,23 @@ static inline void deleteUCharVector(const UChar* p)
// This constructor is used only to create the empty string.
StringImpl::StringImpl()
- : RefCounted<StringImpl>(1)
- , m_length(0)
+ : m_length(0)
, m_data(0)
, m_hash(0)
, m_inTable(false)
, m_hasTerminatingNullCharacter(false)
{
+ // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
+ // with impunity. The empty string is special because it is never entered into
+ // AtomicString's HashKey, but still needs to compare correctly.
+ hash();
}
// This is one of the most common constructors, but it's also used for the copy()
// operation. Because of that, it's the one constructor that doesn't assert the
// length is non-zero, since we support copying the empty string.
inline StringImpl::StringImpl(const UChar* characters, unsigned length)
- : RefCounted<StringImpl>(1)
- , m_length(length)
+ : m_length(length)
, m_hash(0)
, m_inTable(false)
, m_hasTerminatingNullCharacter(false)
@@ -85,8 +81,7 @@ inline StringImpl::StringImpl(const UChar* characters, unsigned length)
}
inline StringImpl::StringImpl(const StringImpl& str, WithTerminatingNullCharacter)
- : RefCounted<StringImpl>(1)
- , m_length(str.m_length)
+ : m_length(str.m_length)
, m_hash(str.m_hash)
, m_inTable(false)
, m_hasTerminatingNullCharacter(true)
@@ -98,8 +93,7 @@ inline StringImpl::StringImpl(const StringImpl& str, WithTerminatingNullCharacte
}
inline StringImpl::StringImpl(const char* characters, unsigned length)
- : RefCounted<StringImpl>(1)
- , m_length(length)
+ : m_length(length)
, m_hash(0)
, m_inTable(false)
, m_hasTerminatingNullCharacter(false)
@@ -116,8 +110,7 @@ inline StringImpl::StringImpl(const char* characters, unsigned length)
}
inline StringImpl::StringImpl(UChar* characters, unsigned length, AdoptBuffer)
- : RefCounted<StringImpl>(1)
- , m_length(length)
+ : m_length(length)
, m_data(characters)
, m_hash(0)
, m_inTable(false)
@@ -127,15 +120,9 @@ inline StringImpl::StringImpl(UChar* characters, unsigned length, AdoptBuffer)
ASSERT(length);
}
-// FIXME: These AtomicString constructors return objects with a refCount of 0,
-// even though the others return objects with a refCount of 1. That preserves
-// the historical behavior for the hash map translator call sites inside the
-// AtomicString code, but is it correct?
-
// This constructor is only for use by AtomicString.
StringImpl::StringImpl(const UChar* characters, unsigned length, unsigned hash)
- : RefCounted<StringImpl>(0)
- , m_length(length)
+ : m_length(length)
, m_hash(hash)
, m_inTable(true)
, m_hasTerminatingNullCharacter(false)
@@ -151,8 +138,7 @@ StringImpl::StringImpl(const UChar* characters, unsigned length, unsigned hash)
// This constructor is only for use by AtomicString.
StringImpl::StringImpl(const char* characters, unsigned length, unsigned hash)
- : RefCounted<StringImpl>(0)
- , m_length(length)
+ : m_length(length)
, m_hash(hash)
, m_inTable(true)
, m_hasTerminatingNullCharacter(false)
@@ -178,8 +164,8 @@ StringImpl::~StringImpl()
StringImpl* StringImpl::empty()
{
- static StringImpl e;
- return &e;
+ static StringImpl* e = new StringImpl;
+ return e;
}
bool StringImpl::containsOnlyWhitespace()
@@ -211,128 +197,6 @@ UChar32 StringImpl::characterStartingAt(unsigned i)
return 0;
}
-static Length parseLength(const UChar* data, unsigned length)
-{
- if (length == 0)
- return Length(1, Relative);
-
- unsigned i = 0;
- while (i < length && isSpaceOrNewline(data[i]))
- ++i;
- if (i < length && (data[i] == '+' || data[i] == '-'))
- ++i;
- while (i < length && Unicode::isDigit(data[i]))
- ++i;
-
- bool ok;
- int r = DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(data), i).string().toInt(&ok);
-
- /* Skip over any remaining digits, we are not that accurate (5.5% => 5%) */
- while (i < length && (Unicode::isDigit(data[i]) || data[i] == '.'))
- ++i;
-
- /* IE Quirk: Skip any whitespace (20 % => 20%) */
- while (i < length && isSpaceOrNewline(data[i]))
- ++i;
-
- if (ok) {
- if (i < length) {
- UChar next = data[i];
- if (next == '%')
- return Length(static_cast<double>(r), Percent);
- if (next == '*')
- return Length(r, Relative);
- }
- return Length(r, Fixed);
- } else {
- if (i < length) {
- UChar next = data[i];
- if (next == '*')
- return Length(1, Relative);
- if (next == '%')
- return Length(1, Relative);
- }
- }
- return Length(0, Relative);
-}
-
-Length StringImpl::toLength()
-{
- return parseLength(m_data, m_length);
-}
-
-static int countCharacter(StringImpl* string, UChar character)
-{
- int count = 0;
- int length = string->length();
- for (int i = 0; i < length; ++i)
- count += (*string)[i] == character;
- return count;
-}
-
-Length* StringImpl::toCoordsArray(int& len)
-{
- StringBuffer spacified(m_length);
- for (unsigned i = 0; i < m_length; i++) {
- UChar cc = m_data[i];
- if (cc > '9' || (cc < '0' && cc != '-' && cc != '*' && cc != '.'))
- spacified[i] = ' ';
- else
- spacified[i] = cc;
- }
- RefPtr<StringImpl> str = adopt(spacified);
-
- str = str->simplifyWhiteSpace();
-
- len = countCharacter(str.get(), ' ') + 1;
- Length* r = new Length[len];
-
- int i = 0;
- int pos = 0;
- int pos2;
-
- while ((pos2 = str->find(' ', pos)) != -1) {
- r[i++] = parseLength(str->characters() + pos, pos2 - pos);
- pos = pos2+1;
- }
- r[i] = parseLength(str->characters() + pos, str->length() - pos);
-
- ASSERT(i == len - 1);
-
- return r;
-}
-
-Length* StringImpl::toLengthArray(int& len)
-{
- RefPtr<StringImpl> str = simplifyWhiteSpace();
- if (!str->length()) {
- len = 1;
- return 0;
- }
-
- len = countCharacter(str.get(), ',') + 1;
- Length* r = new Length[len];
-
- int i = 0;
- int pos = 0;
- int pos2;
-
- while ((pos2 = str->find(',', pos)) != -1) {
- r[i++] = parseLength(str->characters() + pos, pos2 - pos);
- pos = pos2+1;
- }
-
- ASSERT(i == len - 1);
-
- /* IE Quirk: If the last comma is the last char skip it and reduce len by one */
- if (str->length()-pos > 0)
- r[i] = parseLength(str->characters() + pos, str->length() - pos);
- else
- len--;
-
- return r;
-}
-
bool StringImpl::isLower()
{
// Do a faster loop for the case where all the characters are ASCII.
@@ -378,7 +242,7 @@ PassRefPtr<StringImpl> StringImpl::lower()
if (!error && realLength == length)
return adopt(data);
data.resize(realLength);
- Unicode::toLower(data.characters(), length, m_data, m_length, &error);
+ Unicode::toLower(data.characters(), realLength, m_data, m_length, &error);
if (error)
return this;
return adopt(data);
@@ -386,10 +250,26 @@ PassRefPtr<StringImpl> StringImpl::lower()
PassRefPtr<StringImpl> StringImpl::upper()
{
+ StringBuffer data(m_length);
+ int32_t length = m_length;
+
+ // Do a faster loop for the case where all the characters are ASCII.
+ UChar ored = 0;
+ for (int i = 0; i < length; i++) {
+ UChar c = m_data[i];
+ ored |= c;
+ data[i] = toASCIIUpper(c);
+ }
+ if (!(ored & ~0x7F))
+ return adopt(data);
+
+ // Do a slower implementation for cases that include non-ASCII characters.
bool error;
- int32_t length = Unicode::toUpper(0, 0, m_data, m_length, &error);
- StringBuffer data(length);
- Unicode::toUpper(data.characters(), length, m_data, m_length, &error);
+ int32_t realLength = Unicode::toUpper(data.characters(), length, m_data, m_length, &error);
+ if (!error && realLength == length)
+ return adopt(data);
+ data.resize(realLength);
+ Unicode::toUpper(data.characters(), realLength, m_data, m_length, &error);
if (error)
return this;
return adopt(data);
@@ -425,7 +305,7 @@ PassRefPtr<StringImpl> StringImpl::foldCase()
if (!error && realLength == length)
return adopt(data);
data.resize(realLength);
- Unicode::foldCase(data.characters(), length, m_data, m_length, &error);
+ Unicode::foldCase(data.characters(), realLength, m_data, m_length, &error);
if (error)
return this;
return adopt(data);
@@ -513,84 +393,54 @@ PassRefPtr<StringImpl> StringImpl::capitalize(UChar previous)
return adopt(data);
}
-int StringImpl::toInt(bool* ok)
+int StringImpl::toIntStrict(bool* ok, int base)
{
- unsigned i = 0;
+ return charactersToIntStrict(m_data, m_length, ok, base);
+}
- // Allow leading spaces.
- for (; i != m_length; ++i)
- if (!isSpaceOrNewline(m_data[i]))
- break;
-
- // Allow sign.
- if (i != m_length && (m_data[i] == '+' || m_data[i] == '-'))
- ++i;
-
- // Allow digits.
- for (; i != m_length; ++i)
- if (!Unicode::isDigit(m_data[i]))
- break;
-
- return DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_data), i).string().toInt(ok);
+unsigned StringImpl::toUIntStrict(bool* ok, int base)
+{
+ return charactersToUIntStrict(m_data, m_length, ok, base);
}
-int64_t StringImpl::toInt64(bool* ok)
+int64_t StringImpl::toInt64Strict(bool* ok, int base)
{
- unsigned i = 0;
+ return charactersToInt64Strict(m_data, m_length, ok, base);
+}
- // Allow leading spaces.
- for (; i != m_length; ++i)
- if (!isSpaceOrNewline(m_data[i]))
- break;
-
- // Allow sign.
- if (i != m_length && (m_data[i] == '+' || m_data[i] == '-'))
- ++i;
-
- // Allow digits.
- for (; i != m_length; ++i)
- if (!Unicode::isDigit(m_data[i]))
- break;
-
- return DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_data), i).string().toInt64(ok);
+uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
+{
+ return charactersToUInt64Strict(m_data, m_length, ok, base);
}
-uint64_t StringImpl::toUInt64(bool* ok)
+int StringImpl::toInt(bool* ok)
{
- unsigned i = 0;
+ return charactersToInt(m_data, m_length, ok);
+}
- // Allow leading spaces.
- for (; i != m_length; ++i)
- if (!isSpaceOrNewline(m_data[i]))
- break;
+unsigned StringImpl::toUInt(bool* ok)
+{
+ return charactersToUInt(m_data, m_length, ok);
+}
- // Allow digits.
- for (; i != m_length; ++i)
- if (!Unicode::isDigit(m_data[i]))
- break;
-
- return DeprecatedConstString(reinterpret_cast<const DeprecatedChar*>(m_data), i).string().toUInt64(ok);
+int64_t StringImpl::toInt64(bool* ok)
+{
+ return charactersToInt64(m_data, m_length, ok);
+}
+
+uint64_t StringImpl::toUInt64(bool* ok)
+{
+ return charactersToUInt64(m_data, m_length, ok);
}
double StringImpl::toDouble(bool* ok)
{
- if (!m_length) {
- if (ok)
- *ok = false;
- return 0;
- }
- char *end;
- CString latin1String = Latin1Encoding().encode(characters(), length());
- double val = kjs_strtod(latin1String.data(), &end);
- if (ok)
- *ok = end == 0 || *end == '\0';
- return val;
+ return charactersToDouble(m_data, m_length, ok);
}
float StringImpl::toFloat(bool* ok)
{
- // FIXME: This will return ok even when the string fits into a double but not a float.
- return narrowPrecisionToFloat(toDouble(ok));
+ return charactersToFloat(m_data, m_length, ok);
}
static bool equal(const UChar* a, const char* b, int length)
@@ -657,15 +507,7 @@ int StringImpl::find(const char* chs, int index, bool caseSensitive)
int StringImpl::find(UChar c, int start)
{
- unsigned index = start;
- if (index >= m_length )
- return -1;
- while(index < m_length) {
- if (m_data[index] == c)
- return index;
- index++;
- }
- return -1;
+ return WebCore::find(m_data, m_length, c, start);
}
int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
@@ -726,18 +568,7 @@ int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
int StringImpl::reverseFind(UChar c, int index)
{
- if (index >= (int)m_length || m_length == 0)
- return -1;
-
- if (index < 0)
- index += m_length;
- while (1) {
- if (m_data[index] == c)
- return index;
- if (index == 0)
- return -1;
- index--;
- }
+ return WebCore::reverseFind(m_data, m_length, c, index);
}
int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive)
@@ -1032,20 +863,28 @@ PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* ch
ASSERT(characters);
ASSERT(length);
- StringBuffer strippedCopy(length);
+ // Optimize for the case where there are no Null characters by quickly
+ // searching for nulls, and then using StringImpl::create, which will
+ // memcpy the whole buffer. This is faster than assigning character by
+ // character during the loop.
+
+ // Fast case.
int foundNull = 0;
- for (unsigned i = 0; i < length; i++) {
+ for (unsigned i = 0; !foundNull && i < length; i++) {
int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
- strippedCopy[i] = c;
- foundNull |= ~c;
+ foundNull |= !c;
}
if (!foundNull)
- return adoptRef(new StringImpl(strippedCopy.release(), length, AdoptBuffer()));
+ return StringImpl::create(characters, length);
+
+ // Slow case.
+ StringBuffer strippedCopy(length);
unsigned strippedLength = 0;
for (unsigned i = 0; i < length; i++) {
if (int c = characters[i])
strippedCopy[strippedLength++] = c;
}
+ ASSERT(strippedLength < length); // Only take the slow case when stripping.
strippedCopy.shrink(strippedLength);
return adopt(strippedCopy);
}