19 files changed, 88 insertions, 125 deletions
diff --git a/WebCore/platform/text/AtomicString.cpp b/WebCore/platform/text/AtomicString.cpp
index 17d7832..64c03cb 100644
--- a/WebCore/platform/text/AtomicString.cpp
+++ b/WebCore/platform/text/AtomicString.cpp
@@ -103,7 +103,9 @@ static inline bool equal(StringImpl* string, const UChar* characters, unsigned l
     if (string->length() != length)
         return false;
 
-#if PLATFORM(ARM) || PLATFORM(SH4)
+    // FIXME: perhaps we should have a more abstract macro that indicates when
+    // going 4 bytes at a time is unsafe
+#if CPU(ARM) || CPU(SH4)
     const UChar* stringCharacters = string->characters();
     for (unsigned i = 0; i != length; ++i) {
         if (*stringCharacters++ != *characters++)
@@ -250,7 +252,7 @@ PassRefPtr<StringImpl> AtomicString::add(const JSC::Identifier& identifier)
     if (!length)
         return StringImpl::empty();
 
-    HashAndCharacters buffer = { string->computedHash(), string->data(), length }; 
+    HashAndCharacters buffer = { string->existingHash(), string->data(), length }; 
     pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<HashAndCharacters, HashAndCharactersTranslator>(buffer);
     if (!addResult.second)
         return *addResult.first;
@@ -284,7 +286,7 @@ AtomicStringImpl* AtomicString::find(const JSC::Identifier& identifier)
     if (!length)
         return static_cast<AtomicStringImpl*>(StringImpl::empty());
 
-    HashAndCharacters buffer = { string->computedHash(), string->data(), length }; 
+    HashAndCharacters buffer = { string->existingHash(), string->data(), length }; 
     HashSet<StringImpl*>::iterator iterator = stringTable().find<HashAndCharacters, HashAndCharactersTranslator>(buffer);
     if (iterator == stringTable().end())
         return 0;
@@ -302,6 +304,8 @@ DEFINE_GLOBAL(AtomicString, emptyAtom, "")
 DEFINE_GLOBAL(AtomicString, textAtom, "#text")
 DEFINE_GLOBAL(AtomicString, commentAtom, "#comment")
 DEFINE_GLOBAL(AtomicString, starAtom, "*")
+DEFINE_GLOBAL(AtomicString, xmlAtom, "xml")
+DEFINE_GLOBAL(AtomicString, xmlnsAtom, "xmlns")
 
 void AtomicString::init()
 {
@@ -316,6 +320,8 @@ void AtomicString::init()
         new ((void*)&textAtom) AtomicString("#text");
         new ((void*)&commentAtom) AtomicString("#comment");
         new ((void*)&starAtom) AtomicString("*");
+        new ((void*)&xmlAtom) AtomicString("xml");
+        new ((void*)&xmlnsAtom) AtomicString("xmlns");
 
         initialized = true;
     }
diff --git a/WebCore/platform/text/AtomicString.h b/WebCore/platform/text/AtomicString.h
index 47d07c5..64a8bfe 100644
--- a/WebCore/platform/text/AtomicString.h
+++ b/WebCore/platform/text/AtomicString.h
@@ -156,6 +156,8 @@ inline bool equalIgnoringCase(const String& a, const AtomicString& b) { return e
     extern const AtomicString textAtom;
     extern const AtomicString commentAtom;
     extern const AtomicString starAtom;
+    extern const AtomicString xmlAtom;
+    extern const AtomicString xmlnsAtom;
 #endif
 
 } // namespace WebCore
diff --git a/WebCore/platform/text/CharacterNames.h b/WebCore/platform/text/CharacterNames.h
index cd09447..ebaa1f1 100644
--- a/WebCore/platform/text/CharacterNames.h
+++ b/WebCore/platform/text/CharacterNames.h
@@ -37,6 +37,7 @@ namespace WebCore {
 
     const UChar blackSquare = 0x25A0;
     const UChar bullet = 0x2022;
+    const UChar ethiopicPrefaceColon = 0x1366;
     const UChar hebrewPunctuationGeresh = 0x05F3;
     const UChar hebrewPunctuationGershayim = 0x05F4;
     const UChar horizontalEllipsis = 0x2026;
@@ -59,6 +60,7 @@ namespace WebCore {
     const UChar rightToLeftMark = 0x200F;
     const UChar rightToLeftOverride = 0x202E;
     const UChar softHyphen = 0x00AD;
+    const UChar space = 0x0020;
     const UChar whiteBullet = 0x25E6;
     const UChar zeroWidthSpace = 0x200B;
 
diff --git a/WebCore/platform/text/PlatformString.h b/WebCore/platform/text/PlatformString.h
index 247536a..8a379be 100644
--- a/WebCore/platform/text/PlatformString.h
+++ b/WebCore/platform/text/PlatformString.h
@@ -31,16 +31,6 @@
 #include <objc/objc.h>
 #endif
 
-#if USE(JSC)
-#include <runtime/Identifier.h>
-#else
-// runtime/Identifier.h brings in a variety of wtf headers.  We explicitly
-// include them in the case of non-JSC builds to keep things consistent.
-#include <wtf/HashMap.h>
-#include <wtf/HashSet.h>
-#include <wtf/OwnPtr.h>
-#endif
-
 #if PLATFORM(CF)
 typedef const struct __CFString * CFStringRef;
 #endif
@@ -60,6 +50,13 @@ class wxString;
 class BString;
 #endif
 
+#if USE(JSC)
+namespace JSC {
+class Identifier;
+class UString;
+}
+#endif
+
 namespace WebCore {
 
 class CString;
diff --git a/WebCore/platform/text/String.cpp b/WebCore/platform/text/String.cpp
index 24659a4..04b04ab 100644
--- a/WebCore/platform/text/String.cpp
+++ b/WebCore/platform/text/String.cpp
@@ -37,6 +37,8 @@
 #include <wtf/unicode/UTF8.h>
 
 #if USE(JSC)
+#include <runtime/Identifier.h>
+
 using JSC::Identifier;
 using JSC::UString;
 #endif
@@ -352,7 +354,7 @@ String String::format(const char *format, ...)
 
     return buffer;
 
-#elif PLATFORM(WINCE)
+#elif OS(WINCE)
     va_list args;
     va_start(args, format);
 
@@ -444,7 +446,7 @@ String String::number(unsigned long n)
 
 String String::number(long long n)
 {
-#if PLATFORM(WIN_OS) && !PLATFORM(QT)
+#if OS(WINDOWS) && !PLATFORM(QT)
     return String::format("%I64i", n);
 #else
     return String::format("%lli", n);
@@ -453,7 +455,7 @@ String String::number(long long n)
 
 String String::number(unsigned long long n)
 {
-#if PLATFORM(WIN_OS) && !PLATFORM(QT)
+#if OS(WINDOWS) && !PLATFORM(QT)
     return String::format("%I64u", n);
 #else
     return String::format("%llu", n);
diff --git a/WebCore/platform/text/StringBuilder.cpp b/WebCore/platform/text/StringBuilder.cpp
index c21e366..3e34981 100644
--- a/WebCore/platform/text/StringBuilder.cpp
+++ b/WebCore/platform/text/StringBuilder.cpp
@@ -95,4 +95,17 @@ String StringBuilder::toString() const
     return result;
 }
 
+void StringBuilder::clear()
+{
+    m_totalLength = UINT_MAX;
+    m_strings.clear();
+}
+
+unsigned StringBuilder::length() const
+{
+    if (m_totalLength == UINT_MAX)
+        return 0;
+    return m_totalLength;
+}
+
 }
diff --git a/WebCore/platform/text/StringBuilder.h b/WebCore/platform/text/StringBuilder.h
index 8d76b9c..7f72fbf 100644
--- a/WebCore/platform/text/StringBuilder.h
+++ b/WebCore/platform/text/StringBuilder.h
@@ -42,6 +42,9 @@ namespace WebCore {
         void append(const String&);
         void append(UChar);
         void append(char);
+        
+        void clear();
+        unsigned length() const;
 
         String toString() const;
 
diff --git a/WebCore/platform/text/StringHash.h b/WebCore/platform/text/StringHash.h
index 21a478e..e6c548a 100644
--- a/WebCore/platform/text/StringHash.h
+++ b/WebCore/platform/text/StringHash.h
@@ -24,8 +24,8 @@
 
 #include "AtomicString.h"
 #include "PlatformString.h"
-#include <wtf/HashFunctions.h>
 #include <wtf/HashTraits.h>
+#include <wtf/StringHashFunctions.h>
 #include <wtf/unicode/Unicode.h>
 
 namespace WebCore {
@@ -52,7 +52,9 @@ namespace WebCore {
             if (aLength != bLength)
                 return false;
 
-#if PLATFORM(ARM) || PLATFORM(SH4)
+            // FIXME: perhaps we should have a more abstract macro that indicates when
+            // going 4 bytes at a time is unsafe
+#if CPU(ARM) || CPU(SH4)
             const UChar* aChars = a->characters();
             const UChar* bChars = b->characters();
             for (unsigned i = 0; i != aLength; ++i) {
diff --git a/WebCore/platform/text/StringImpl.cpp b/WebCore/platform/text/StringImpl.cpp
index 5cf4ced..3b61a0b 100644
--- a/WebCore/platform/text/StringImpl.cpp
+++ b/WebCore/platform/text/StringImpl.cpp
@@ -34,6 +34,7 @@
 #include "TextBreakIterator.h"
 #include "TextEncoding.h"
 #include "ThreadGlobalData.h"
+#include <runtime/UString.h>
 #include <wtf/dtoa.h>
 #include <wtf/Assertions.h>
 #include <wtf/Threading.h>
@@ -979,7 +980,7 @@ JSC::UString StringImpl::ustring()
 {
     SharedUChar* sharedBuffer = this->sharedBuffer();
     if (sharedBuffer)
-        return JSC::UString::Rep::create(const_cast<UChar*>(m_data), m_length, sharedBuffer);
+        return JSC::UString::Rep::create(sharedBuffer, const_cast<UChar*>(m_data), m_length);
 
     return JSC::UString(m_data, m_length);
 }
diff --git a/WebCore/platform/text/StringImpl.h b/WebCore/platform/text/StringImpl.h
index 5155fa5..f7a9d06 100644
--- a/WebCore/platform/text/StringImpl.h
+++ b/WebCore/platform/text/StringImpl.h
@@ -27,16 +27,12 @@
 #include <wtf/ASCIICType.h>
 #include <wtf/CrossThreadRefCounted.h>
 #include <wtf/OwnFastMallocPtr.h>
-#include <wtf/PassRefPtr.h>
 #include <wtf/PtrAndFlags.h>
 #include <wtf/RefCounted.h>
+#include <wtf/StringHashFunctions.h>
 #include <wtf/Vector.h>
 #include <wtf/unicode/Unicode.h>
 
-#if USE(JSC)
-#include <runtime/UString.h>
-#endif
-
 #if PLATFORM(CF)
 typedef const struct __CFString * CFStringRef;
 #endif
@@ -45,6 +41,10 @@ typedef const struct __CFString * CFStringRef;
 @class NSString;
 #endif
 
+namespace JSC {
+class UString;
+}
+
 namespace WebCore {
 
 class StringBuffer;
@@ -103,8 +103,8 @@ public:
 
     unsigned hash() { if (m_hash == 0) m_hash = computeHash(m_data, m_length); return m_hash; }
     unsigned existingHash() const { ASSERT(m_hash); return m_hash; }
-    static unsigned computeHash(const UChar*, unsigned len);
-    static unsigned computeHash(const char*);
+    inline static unsigned computeHash(const UChar* data, unsigned length) { return WTF::stringHash(data, length); }
+    inline static unsigned computeHash(const char* data) { return WTF::stringHash(data); }
     
     // Returns a StringImpl suitable for use on another thread.
     PassRefPtr<StringImpl> crossThreadString();
@@ -214,91 +214,6 @@ inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) {
 
 bool equalIgnoringNullity(StringImpl*, StringImpl*);
 
-// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
-// or anything like that.
-const unsigned phi = 0x9e3779b9U;
-
-// Paul Hsieh's SuperFastHash
-// http://www.azillionmonkeys.com/qed/hash.html
-inline unsigned StringImpl::computeHash(const UChar* data, unsigned length)
-{
-    unsigned hash = phi;
-    
-    // Main loop.
-    for (unsigned pairCount = length >> 1; pairCount; pairCount--) {
-        hash += data[0];
-        unsigned tmp = (data[1] << 11) ^ hash;
-        hash = (hash << 16) ^ tmp;
-        data += 2;
-        hash += hash >> 11;
-    }
-    
-    // Handle end case.
-    if (length & 1) {
-        hash += data[0];
-        hash ^= hash << 11;
-        hash += hash >> 17;
-    }
-
-    // Force "avalanching" of final 127 bits.
-    hash ^= hash << 3;
-    hash += hash >> 5;
-    hash ^= hash << 2;
-    hash += hash >> 15;
-    hash ^= hash << 10;
-
-    // This avoids ever returning a hash code of 0, since that is used to
-    // signal "hash not computed yet", using a value that is likely to be
-    // effectively the same as 0 when the low bits are masked.
-    hash |= !hash << 31;
-    
-    return hash;
-}
-
-// Paul Hsieh's SuperFastHash
-// http://www.azillionmonkeys.com/qed/hash.html
-inline unsigned StringImpl::computeHash(const char* data)
-{
-    // This hash is designed to work on 16-bit chunks at a time. But since the normal case
-    // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
-    // were 16-bit chunks, which should give matching results
-
-    unsigned hash = phi;
-    
-    // Main loop
-    for (;;) {
-        unsigned char b0 = data[0];
-        if (!b0)
-            break;
-        unsigned char b1 = data[1];
-        if (!b1) {
-            hash += b0;
-            hash ^= hash << 11;
-            hash += hash >> 17;
-            break;
-        }
-        hash += b0;
-        unsigned tmp = (b1 << 11) ^ hash;
-        hash = (hash << 16) ^ tmp;
-        data += 2;
-        hash += hash >> 11;
-    }
-    
-    // Force "avalanching" of final 127 bits.
-    hash ^= hash << 3;
-    hash += hash >> 5;
-    hash ^= hash << 2;
-    hash += hash >> 15;
-    hash ^= hash << 10;
-
-    // This avoids ever returning a hash code of 0, since that is used to
-    // signal "hash not computed yet", using a value that is likely to be
-    // effectively the same as 0 when the low bits are masked.
-    hash |= !hash << 31;
-    
-    return hash;
-}
-
 static inline bool isSpaceOrNewline(UChar c)
 {
     // Use isASCIISpace() for basic Latin-1.
diff --git a/WebCore/platform/text/TextCodecICU.cpp b/WebCore/platform/text/TextCodecICU.cpp
index 7ebce2c..a8a817f 100644
--- a/WebCore/platform/text/TextCodecICU.cpp
+++ b/WebCore/platform/text/TextCodecICU.cpp
@@ -87,7 +87,7 @@ void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar
         const char* name = ucnv_getAvailableName(i);
         UErrorCode error = U_ZERO_ERROR;
         // Try MIME before trying IANA to pick up commonly used names like
-        // 'EUC-JP' instead of horrendeously long names like 
+        // 'EUC-JP' instead of horrendously long names like 
         // 'Extended_UNIX_Code_Packed_Format_for_Japanese'. 
         const char* standardName = ucnv_getStandardName(name, "MIME", &error);
         if (!U_SUCCESS(error) || !standardName) {
diff --git a/WebCore/platform/text/TextEncoding.cpp b/WebCore/platform/text/TextEncoding.cpp
index ec9a8b0..4a30d62 100644
--- a/WebCore/platform/text/TextEncoding.cpp
+++ b/WebCore/platform/text/TextEncoding.cpp
@@ -129,7 +129,7 @@ CString TextEncoding::encode(const UChar* characters, size_t length, Unencodable
     UTF16Normalized.set(g_utf8_to_utf16(UTF8Normalized.get(), -1, 0, &UTF16Length, 0));
 
     return newTextCodec(*this)->encode(UTF16Normalized.get(), UTF16Length, handling);
-#elif PLATFORM(WINCE)
+#elif OS(WINCE)
     // normalization will be done by Windows CE API
     OwnPtr<TextCodec> textCodec = newTextCodec(*this);
     return textCodec.get() ? textCodec->encode(characters, length, handling) : CString();
diff --git a/WebCore/platform/text/TextEncodingDetectorICU.cpp b/WebCore/platform/text/TextEncodingDetectorICU.cpp
index fcb2aa9..c0d11de 100644
--- a/WebCore/platform/text/TextEncodingDetectorICU.cpp
+++ b/WebCore/platform/text/TextEncodingDetectorICU.cpp
@@ -69,7 +69,7 @@ bool detectTextEncoding(const char* data, size_t len,
     // "the context" (parent-encoding, referrer encoding, etc).
     // 2. 'Emulate' Firefox/IE's non-Universal detectors (e.g.
     // Chinese, Japanese, Russian, Korean and Hebrew) by picking the 
-    // encoding with a highest confidence among the detetctor-specific
+    // encoding with a highest confidence among the detector-specific
     // limited set of candidate encodings.
     // Below is a partial implementation of the first part of what's outlined
     // above.
diff --git a/WebCore/platform/text/TextEncodingRegistry.cpp b/WebCore/platform/text/TextEncodingRegistry.cpp
index a4be520..00ad2c9 100644
--- a/WebCore/platform/text/TextEncodingRegistry.cpp
+++ b/WebCore/platform/text/TextEncodingRegistry.cpp
@@ -51,7 +51,7 @@
 #if USE(GLIB_UNICODE)
 #include "gtk/TextCodecGtk.h"
 #endif
-#if PLATFORM(WINCE) && !PLATFORM(QT)
+#if OS(WINCE) && !PLATFORM(QT)
 #include "TextCodecWince.h"
 #endif
 
@@ -230,7 +230,7 @@ static void buildBaseTextCodecMaps()
     TextCodecGtk::registerBaseCodecs(addToTextCodecMap);
 #endif
 
-#if PLATFORM(WINCE) && !PLATFORM(QT)
+#if OS(WINCE) && !PLATFORM(QT)
     TextCodecWince::registerBaseEncodingNames(addToTextEncodingNameMap);
     TextCodecWince::registerBaseCodecs(addToTextCodecMap);
 #endif
@@ -258,7 +258,7 @@ static void extendTextCodecMaps()
     TextCodecGtk::registerExtendedCodecs(addToTextCodecMap);
 #endif
 
-#if PLATFORM(WINCE) && !PLATFORM(QT)
+#if OS(WINCE) && !PLATFORM(QT)
     TextCodecWince::registerExtendedEncodingNames(addToTextEncodingNameMap);
     TextCodecWince::registerExtendedCodecs(addToTextCodecMap);
 #endif
diff --git a/WebCore/platform/text/TextStream.cpp b/WebCore/platform/text/TextStream.cpp
index eb4bae7..baaa8b9 100644
--- a/WebCore/platform/text/TextStream.cpp
+++ b/WebCore/platform/text/TextStream.cpp
@@ -90,6 +90,13 @@ TextStream& TextStream::operator<<(const char* string)
     return *this;
 }
 
+TextStream& TextStream::operator<<(void* p)
+{
+    char buffer[printBufferSize];
+    snprintf(buffer, sizeof(buffer) - 1, "%p", p);
+    return *this << buffer;
+}
+
 TextStream& TextStream::operator<<(const String& string)
 {
     append(m_text, string);
@@ -101,7 +108,7 @@ String TextStream::release()
     return String::adopt(m_text);
 }
 
-#if PLATFORM(WIN_OS) && PLATFORM(X86_64) && COMPILER(MSVC)
+#if OS(WINDOWS) && PLATFORM(X86_64) && COMPILER(MSVC)
 TextStream& TextStream::operator<<(__int64 i)
 {
     char buffer[printBufferSize];
diff --git a/WebCore/platform/text/TextStream.h b/WebCore/platform/text/TextStream.h
index 71034f3..dfaa048 100644
--- a/WebCore/platform/text/TextStream.h
+++ b/WebCore/platform/text/TextStream.h
@@ -43,8 +43,9 @@ public:
     TextStream& operator<<(float);
     TextStream& operator<<(double);
     TextStream& operator<<(const char*);
+    TextStream& operator<<(void*);
     TextStream& operator<<(const String&);
-#if PLATFORM(WIN_OS) && PLATFORM(X86_64) && COMPILER(MSVC)
+#if OS(WINDOWS) && PLATFORM(X86_64) && COMPILER(MSVC)
     TextStream& operator<<(unsigned __int64);
     TextStream& operator<<(__int64);
 #endif
diff --git a/WebCore/platform/text/chromium/TextBreakIteratorInternalICUChromium.cpp b/WebCore/platform/text/chromium/TextBreakIteratorInternalICUChromium.cpp
index 4e2aceb..9adb999 100644
--- a/WebCore/platform/text/chromium/TextBreakIteratorInternalICUChromium.cpp
+++ b/WebCore/platform/text/chromium/TextBreakIteratorInternalICUChromium.cpp
@@ -22,18 +22,29 @@
 #include "config.h"
 #include "TextBreakIteratorInternalICU.h"
 
+#include "CString.h"
+#include "Language.h"
+#include "PlatformString.h"
+#include <wtf/StdLibExtras.h>
+
 namespace WebCore {
 
+static const char* UILanguage()
+{
+    // Chrome's UI language can be different from the OS UI language on Windows.
+    // We want to return Chrome's UI language here.
+    DEFINE_STATIC_LOCAL(CString, locale, (defaultLanguage().latin1()));
+    return locale.data();
+}
+
 const char* currentSearchLocaleID()
 {
-    // FIXME: Should use system locale.
-    return "";
+    return UILanguage();
 }
 
 const char* currentTextBreakLocaleID()
 {
-    // FIXME: Should use system locale.
-    return "en_us";
+    return UILanguage();
 }
 
 } // namespace WebCore
diff --git a/WebCore/platform/text/qt/TextCodecQt.cpp b/WebCore/platform/text/qt/TextCodecQt.cpp
index b3f75cc..21e6e12 100644
--- a/WebCore/platform/text/qt/TextCodecQt.cpp
+++ b/WebCore/platform/text/qt/TextCodecQt.cpp
@@ -97,7 +97,7 @@ String TextCodecQt::decode(const char* bytes, size_t length, bool flush, bool /*
     // We chop input buffer to smaller buffers to avoid excessive memory consumption
     // when the input buffer is big.  This helps reduce peak memory consumption in
     // mobile devices where system RAM is limited.
-#if PLATFORM(SYMBIAN)
+#if OS(SYMBIAN)
     static const int MaxInputChunkSize = 32 * 1024;
 #else
     static const int MaxInputChunkSize = 1024 * 1024;
diff --git a/WebCore/platform/text/wince/TextBreakIteratorWince.cpp b/WebCore/platform/text/wince/TextBreakIteratorWince.cpp
index 26a5be2..7f46e4f 100644
--- a/WebCore/platform/text/wince/TextBreakIteratorWince.cpp
+++ b/WebCore/platform/text/wince/TextBreakIteratorWince.cpp
@@ -23,6 +23,7 @@
 #include "TextBreakIterator.h"
 
 #include "PlatformString.h"
+#include <wtf/StdLibExtras.h>
 #include <wtf/unicode/Unicode.h>
 
 using namespace WTF::Unicode;
@@ -308,4 +309,4 @@ TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
     return characterBreakIterator(string, length);
 }
 
-}
+} // namespace WebCore