26 files changed, 714 insertions, 286 deletions
diff --git a/WebCore/platform/text/AtomicString.cpp b/WebCore/platform/text/AtomicString.cpp
index 5f9abfd..d85f5ee 100644
--- a/WebCore/platform/text/AtomicString.cpp
+++ b/WebCore/platform/text/AtomicString.cpp
@@ -101,7 +101,7 @@ static inline bool equal(StringImpl* string, const UChar* characters, unsigned l
     if (string->length() != length)
         return false;
 
-#if PLATFORM(ARM)
+#if PLATFORM(ARM) || PLATFORM(SH4)
     const UChar* stringCharacters = string->characters();
     for (unsigned i = 0; i != length; ++i) {
         if (*stringCharacters++ != *characters++)
diff --git a/WebCore/platform/text/Base64.cpp b/WebCore/platform/text/Base64.cpp
index 920fa89..be19164 100644
--- a/WebCore/platform/text/Base64.cpp
+++ b/WebCore/platform/text/Base64.cpp
@@ -97,8 +97,8 @@ void base64Encode(const Vector<char>& in, Vector<char>& out, bool insertLFs)
                 count += 4;
             }
             out[didx++] = base64EncMap[(data[sidx] >> 2) & 077];
-            out[didx++] = base64EncMap[(data[sidx + 1] >> 4) & 017 | (data[sidx] << 4) & 077];
-            out[didx++] = base64EncMap[(data[sidx + 2] >> 6) & 003 | (data[sidx + 1] << 2) & 077];
+            out[didx++] = base64EncMap[((data[sidx + 1] >> 4) & 017) | ((data[sidx] << 4) & 077)];
+            out[didx++] = base64EncMap[((data[sidx + 2] >> 6) & 003) | ((data[sidx + 1] << 2) & 077)];
             out[didx++] = base64EncMap[data[sidx + 2] & 077];
             sidx += 3;
         }
@@ -110,7 +110,7 @@ void base64Encode(const Vector<char>& in, Vector<char>& out, bool insertLFs)
 
         out[didx++] = base64EncMap[(data[sidx] >> 2) & 077];
         if (sidx < len - 1) {
-            out[didx++] = base64EncMap[(data[sidx + 1] >> 4) & 017 | (data[sidx] << 4) & 077];
+            out[didx++] = base64EncMap[((data[sidx + 1] >> 4) & 017) | ((data[sidx] << 4) & 077)];
             out[didx++] = base64EncMap[(data[sidx + 1] << 2) & 077];
         } else
             out[didx++] = base64EncMap[(data[sidx] << 4) & 077];
diff --git a/WebCore/platform/text/BidiResolver.h b/WebCore/platform/text/BidiResolver.h
index ffd3d51..8288be4 100644
--- a/WebCore/platform/text/BidiResolver.h
+++ b/WebCore/platform/text/BidiResolver.h
@@ -254,7 +254,16 @@ template <class Iterator, class Run>
 void BidiResolver<Iterator, Run>::appendRun()
 {
     if (!emptyRun && !eor.atEnd()) {
-        addRun(new Run(sor.offset(), eor.offset() + 1, context(), m_direction));
+        unsigned startOffset = sor.offset();
+        unsigned endOffset = eor.offset();
+
+        if (!endOfLine.atEnd() && endOffset >= endOfLine.offset()) {
+            reachedEndOfLine = true;
+            endOffset = endOfLine.offset();
+        }
+
+        if (endOffset >= startOffset)
+            addRun(new Run(startOffset, endOffset + 1, context(), m_direction));
 
         eor.increment();
         sor = eor;
@@ -352,8 +361,8 @@ void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(WTF::Unicode::Dire
                 m_direction = LeftToRight;
             }
         } else if (m_status.eor == ArabicNumber
-            || m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || from == RightToLeft)
-            || m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && from == RightToLeft) {
+            || (m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || from == RightToLeft))
+            || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && from == RightToLeft)) {
             appendRun();
             m_direction = RightToLeft;
         }
@@ -722,8 +731,8 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, boo
                 case WhiteSpaceNeutral:
                 case OtherNeutral:
                     if (m_status.eor == ArabicNumber
-                        || m_status.eor == EuropeanNumber && (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft)
-                        || m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft) {
+                        || (m_status.eor == EuropeanNumber && (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft))
+                        || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft)) {
                         // Terminate the run before the neutrals.
                         appendRun();
                         // Begin an R run for the neutrals.
diff --git a/WebCore/platform/text/CString.cpp b/WebCore/platform/text/CString.cpp
index 8e68628..90990f8 100644
--- a/WebCore/platform/text/CString.cpp
+++ b/WebCore/platform/text/CString.cpp
@@ -47,8 +47,8 @@ void CString::init(const char* str, unsigned length)
         return;
     
     m_buffer = CStringBuffer::create(length + 1);
-    memcpy(m_buffer->data(), str, length); 
-    m_buffer->data()[length] = '\0';
+    memcpy(m_buffer->mutableData(), str, length); 
+    m_buffer->mutableData()[length] = '\0';
 }
 
 const char* CString::data() const
@@ -61,7 +61,7 @@ char* CString::mutableData()
     copyBufferIfNeeded();
     if (!m_buffer)
         return 0;
-    return m_buffer->data();
+    return m_buffer->mutableData();
 }
     
 unsigned CString::length() const
@@ -73,7 +73,7 @@ CString CString::newUninitialized(size_t length, char*& characterBuffer)
 {
     CString result;
     result.m_buffer = CStringBuffer::create(length + 1);
-    char* bytes = result.m_buffer->data();
+    char* bytes = result.m_buffer->mutableData();
     bytes[length] = '\0';
     characterBuffer = bytes;
     return result;
@@ -87,7 +87,7 @@ void CString::copyBufferIfNeeded()
     int len = m_buffer->length();
     RefPtr<CStringBuffer> m_temp = m_buffer;
     m_buffer = CStringBuffer::create(len);
-    memcpy(m_buffer->data(), m_temp->data(), len);
+    memcpy(m_buffer->mutableData(), m_temp->data(), len);
 }
 
 bool operator==(const CString& a, const CString& b)
@@ -99,17 +99,4 @@ bool operator==(const CString& a, const CString& b)
     return !strncmp(a.data(), b.data(), min(a.length(), b.length()));
 }
 
-PassRefPtr<SharedBuffer> CString::releaseBuffer()
-{
-    if (!m_buffer)
-        return 0;
-
-    copyBufferIfNeeded();
-        
-    RefPtr<SharedBuffer> result = m_buffer->releaseBuffer();
-    m_buffer = 0;
-    return result.release();
-}
-
-
-}
+} // namespace WebCore
diff --git a/WebCore/platform/text/CString.h b/WebCore/platform/text/CString.h
index 09f112f..f084ddf 100644
--- a/WebCore/platform/text/CString.h
+++ b/WebCore/platform/text/CString.h
@@ -36,15 +36,15 @@ namespace WebCore {
 
     class CStringBuffer : public RefCounted<CStringBuffer> {
     public:
-        static PassRefPtr<CStringBuffer> create(unsigned length) { return adoptRef(new CStringBuffer(length)); }
-
-        char* data() { return m_vector.data(); }
-        size_t length() const { return m_vector.size(); }
+        const char* data() { return m_vector.data(); }
+        size_t length() { return m_vector.size(); }
         
-        PassRefPtr<SharedBuffer> releaseBuffer() { return SharedBuffer::adoptVector(m_vector); }
-
     private:
+        friend class CString;
+
+        static PassRefPtr<CStringBuffer> create(unsigned length) { return adoptRef(new CStringBuffer(length)); }
         CStringBuffer(unsigned length) : m_vector(length) { }
+        char* mutableData() { return m_vector.data(); }
 
         Vector<char> m_vector;
     };
@@ -56,6 +56,7 @@ namespace WebCore {
         CString() { }
         CString(const char*);
         CString(const char*, unsigned length);
+        CString(CStringBuffer* buffer) : m_buffer(buffer) { }
         static CString newUninitialized(size_t length, char*& characterBuffer);
 
         const char* data() const;
@@ -63,8 +64,8 @@ namespace WebCore {
         unsigned length() const;
 
         bool isNull() const { return !m_buffer; }
-        
-        PassRefPtr<SharedBuffer> releaseBuffer();
+
+        CStringBuffer* buffer() const { return m_buffer.get(); }
 
     private:
         void copyBufferIfNeeded();
diff --git a/WebCore/platform/text/PlatformString.h b/WebCore/platform/text/PlatformString.h
index 35d3079..a1541d2 100644
--- a/WebCore/platform/text/PlatformString.h
+++ b/WebCore/platform/text/PlatformString.h
@@ -27,15 +27,18 @@
 
 #include "StringImpl.h"
 
-#include <wtf/PassRefPtr.h>
+#ifdef __OBJC__
+#include <objc/objc.h>
+#endif
 
 #if USE(JSC)
 #include <runtime/Identifier.h>
 #else
-// runtime/Identifier.h includes HashMap.h and HashSet.h. We explicitly include 
-// them in the case of non-JSC builds to keep things consistent.
+// runtime/Identifier.h brings in a variety of wtf headers.  We explicitly
+// include them in the case of non-JSC builds to keep things consistent.
 #include <wtf/HashMap.h>
 #include <wtf/HashSet.h>
+#include <wtf/OwnPtr.h>
 #endif
 
 #if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
@@ -228,6 +231,9 @@ public:
     static String fromUTF8(const char*, size_t);
     static String fromUTF8(const char*);
 
+    // Tries to convert the passed in string to UTF-8, but will fall back to Latin-1 if the string is not valid UTF-8.
+    static String fromUTF8WithLatin1Fallback(const char*, size_t);
+    
     // Determines the writing direction using the Unicode Bidi Algorithm rules P2 and P3.
     WTF::Unicode::Direction defaultWritingDirection() const { return m_impl ? m_impl->defaultWritingDirection() : WTF::Unicode::LeftToRight; }
 
diff --git a/WebCore/platform/text/String.cpp b/WebCore/platform/text/String.cpp
index 638e45f..733b661 100644
--- a/WebCore/platform/text/String.cpp
+++ b/WebCore/platform/text/String.cpp
@@ -623,6 +623,15 @@ String String::fromUTF8(const char* string)
     return UTF8Encoding().decode(string, strlen(string));
 }
 
+String String::fromUTF8WithLatin1Fallback(const char* string, size_t size)
+{
+    String result = fromUTF8(string, size);
+    if (!result)
+        result = String(string, size);
+    
+    return result;
+}
+
 #if USE(JSC)
 String::String(const Identifier& str)
 {
diff --git a/WebCore/platform/text/StringImpl.cpp b/WebCore/platform/text/StringImpl.cpp
index 0556f8e..6bba990 100644
--- a/WebCore/platform/text/StringImpl.cpp
+++ b/WebCore/platform/text/StringImpl.cpp
@@ -2,7 +2,7 @@
  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
  *           (C) 1999 Antti Koivisto (koivisto@kde.org)
  *           (C) 2001 Dirk Mueller ( mueller@kde.org )
- * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
  * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
  *
  * This library is free software; you can redistribute it and/or
@@ -54,6 +54,27 @@ static inline void deleteUCharVector(const UChar* p)
     fastFree(const_cast<UChar*>(p));
 }
 
+// Some of the factory methods create buffers using fastMalloc.
+// We must ensure that ll allocations of StringImpl are allocated using
+// fastMalloc so that we don't have mis-matched frees. We accomplish 
+// this by overriding the new and delete operators.
+void* StringImpl::operator new(size_t size, void* address)
+{
+    if (address)
+        return address;  // Allocating using an internal buffer
+    return fastMalloc(size);
+}
+
+void* StringImpl::operator new(size_t size)
+{
+    return fastMalloc(size);
+}
+
+void StringImpl::operator delete(void* address)
+{
+    fastFree(address);
+}
+
 // This constructor is used only to create the empty string.
 StringImpl::StringImpl()
     : m_length(0)
@@ -61,6 +82,7 @@ StringImpl::StringImpl()
     , m_hash(0)
     , m_inTable(false)
     , m_hasTerminatingNullCharacter(false)
+    , m_bufferIsInternal(false)
 {
     // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
     // with impunity. The empty string is special because it is never entered into
@@ -76,6 +98,7 @@ inline StringImpl::StringImpl(const UChar* characters, unsigned length)
     , m_hash(0)
     , m_inTable(false)
     , m_hasTerminatingNullCharacter(false)
+    , m_bufferIsInternal(false)
 {
     UChar* data = newUCharVector(length);
     memcpy(data, characters, length * sizeof(UChar));
@@ -87,6 +110,7 @@ inline StringImpl::StringImpl(const StringImpl& str, WithTerminatingNullCharacte
     , m_hash(str.m_hash)
     , m_inTable(false)
     , m_hasTerminatingNullCharacter(true)
+    , m_bufferIsInternal(false)
 {
     UChar* data = newUCharVector(str.m_length + 1);
     memcpy(data, str.m_data, str.m_length * sizeof(UChar));
@@ -99,6 +123,7 @@ inline StringImpl::StringImpl(const char* characters, unsigned length)
     , m_hash(0)
     , m_inTable(false)
     , m_hasTerminatingNullCharacter(false)
+    , m_bufferIsInternal(false)
 {
     ASSERT(characters);
     ASSERT(length);
@@ -117,6 +142,7 @@ inline StringImpl::StringImpl(UChar* characters, unsigned length, AdoptBuffer)
     , m_hash(0)
     , m_inTable(false)
     , m_hasTerminatingNullCharacter(false)
+    , m_bufferIsInternal(false)
 {
     ASSERT(characters);
     ASSERT(length);
@@ -128,6 +154,7 @@ StringImpl::StringImpl(const UChar* characters, unsigned length, unsigned hash)
     , m_hash(hash)
     , m_inTable(true)
     , m_hasTerminatingNullCharacter(false)
+    , m_bufferIsInternal(false)
 {
     ASSERT(hash);
     ASSERT(characters);
@@ -144,6 +171,7 @@ StringImpl::StringImpl(const char* characters, unsigned length, unsigned hash)
     , m_hash(hash)
     , m_inTable(true)
     , m_hasTerminatingNullCharacter(false)
+    , m_bufferIsInternal(false)
 {
     ASSERT(hash);
     ASSERT(characters);
@@ -161,7 +189,8 @@ StringImpl::~StringImpl()
 {
     if (m_inTable)
         AtomicString::remove(this);
-    deleteUCharVector(m_data);
+    if (!m_bufferIsInternal)
+        deleteUCharVector(m_data);
 }
 
 StringImpl* StringImpl::empty()
@@ -907,26 +936,8 @@ WTF::Unicode::Direction StringImpl::defaultWritingDirection()
 }
 
 // This is a hot function because it's used when parsing HTML.
-PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length)
+PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length)
 {
-    ASSERT(characters);
-    ASSERT(length);
-
-    // Optimize for the case where there are no Null characters by quickly
-    // searching for nulls, and then using StringImpl::create, which will
-    // memcpy the whole buffer.  This is faster than assigning character by
-    // character during the loop. 
-
-    // Fast case.
-    int foundNull = 0;
-    for (unsigned i = 0; !foundNull && i < length; i++) {
-        int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
-        foundNull |= !c;
-    }
-    if (!foundNull)
-        return StringImpl::create(characters, length);
-    
-    // Slow case.
     StringBuffer strippedCopy(length);
     unsigned strippedLength = 0;
     for (unsigned i = 0; i < length; i++) {
@@ -958,24 +969,44 @@ PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned leng
 {
     if (!characters || !length)
         return empty();
-    return adoptRef(new StringImpl(characters, length));
+
+    // Allocate a single buffer large enough to contain the StringImpl
+    // struct as well as the data which it contains. This removes one 
+    // heap allocation from this call.
+    size_t size = sizeof(StringImpl) + length * sizeof(UChar);
+    char* buffer = static_cast<char*>(fastMalloc(size));
+    UChar* data = reinterpret_cast<UChar*>(buffer + sizeof(StringImpl));
+    memcpy(data, characters, length * sizeof(UChar));
+    StringImpl* string = new (buffer) StringImpl(data, length, AdoptBuffer());
+    string->m_bufferIsInternal = true;
+    return adoptRef(string);
 }
 
 PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length)
 {
     if (!characters || !length)
         return empty();
-    return adoptRef(new StringImpl(characters, length));
+
+    // Allocate a single buffer large enough to contain the StringImpl
+    // struct as well as the data which it contains. This removes one 
+    // heap allocation from this call.
+    size_t size = sizeof(StringImpl) + length * sizeof(UChar);
+    char* buffer = static_cast<char*>(fastMalloc(size));
+    UChar* data = reinterpret_cast<UChar*>(buffer + sizeof(StringImpl));
+    for (unsigned i = 0; i != length; ++i) {
+        unsigned char c = characters[i];
+        data[i] = c;
+    }
+    StringImpl* string = new (buffer) StringImpl(data, length, AdoptBuffer());
+    string->m_bufferIsInternal = true;
+    return adoptRef(string);
 }
 
 PassRefPtr<StringImpl> StringImpl::create(const char* string)
 {
     if (!string)
         return empty();
-    unsigned length = strlen(string);
-    if (!length)
-        return empty();
-    return adoptRef(new StringImpl(string, length));
+    return create(string, strlen(string));
 }
 
 PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string)
@@ -985,7 +1016,7 @@ PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const Stri
 
 PassRefPtr<StringImpl> StringImpl::copy()
 {
-    return adoptRef(new StringImpl(m_data, m_length));
+    return create(m_data, m_length);
 }
 
 } // namespace WebCore
diff --git a/WebCore/platform/text/StringImpl.h b/WebCore/platform/text/StringImpl.h
index 281aa37..1242f27 100644
--- a/WebCore/platform/text/StringImpl.h
+++ b/WebCore/platform/text/StringImpl.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
- * Copyright (C) 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
@@ -24,7 +24,7 @@
 
 #include <limits.h>
 #include <wtf/ASCIICType.h>
-#include <wtf/Forward.h>
+#include <wtf/PassRefPtr.h>
 #include <wtf/RefCounted.h>
 #include <wtf/Vector.h>
 #include <wtf/unicode/Unicode.h>
@@ -166,12 +166,25 @@ public:
     operator NSString*();
 #endif
 
+    void operator delete(void*);
+
 private:
+    // Allocation from a custom buffer is only allowed internally to avoid
+    // mismatched allocators. Callers should use create().
+    void* operator new(size_t size);
+    void* operator new(size_t size, void* address);
+
+    static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length);
+
     unsigned m_length;
     const UChar* m_data;
     mutable unsigned m_hash;
     bool m_inTable;
     bool m_hasTerminatingNullCharacter;
+    // In some cases, we allocate the StringImpl struct and its data
+    // within a single heap buffer. In this case, the m_data pointer
+    // is an "internal buffer", and does not need to be deallocated.
+    bool m_bufferIsInternal;
 };
 
 bool equal(StringImpl*, StringImpl*);
@@ -274,6 +287,29 @@ static inline bool isSpaceOrNewline(UChar c)
     return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
 }
 
+// This is a hot function because it's used when parsing HTML.
+inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length)
+{
+    ASSERT(characters);
+    ASSERT(length);
+
+    // Optimize for the case where there are no Null characters by quickly
+    // searching for nulls, and then using StringImpl::create, which will
+    // memcpy the whole buffer.  This is faster than assigning character by
+    // character during the loop. 
+
+    // Fast case.
+    int foundNull = 0;
+    for (unsigned i = 0; !foundNull && i < length; i++) {
+        int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
+        foundNull |= !c;
+    }
+    if (!foundNull)
+        return StringImpl::create(characters, length);
+
+    return StringImpl::createStrippingNullCharactersSlowCase(characters, length);
+}
+
 }
 
 namespace WTF {
diff --git a/WebCore/platform/text/TextBreakIterator.h b/WebCore/platform/text/TextBreakIterator.h
index 64717a4..7b3b963 100644
--- a/WebCore/platform/text/TextBreakIterator.h
+++ b/WebCore/platform/text/TextBreakIterator.h
@@ -29,7 +29,19 @@ namespace WebCore {
     class TextBreakIterator;
 
     // Note: The returned iterator is good only until you get another iterator.
+
+    // Iterates over "extended grapheme clusters", as defined in UAX #29.
+    // Note that platform implementations may be less sophisticated - e.g. ICU prior to
+    // version 4.0 only supports "legacy grapheme clusters".
+    // Use this for general text processing, e.g. string truncation.
     TextBreakIterator* characterBreakIterator(const UChar*, int length);
+
+    // This is similar to character break iterator in most cases, but is subject to
+    // platform UI conventions. One notable example where this can be different
+    // from character break iterator is Thai prepend characters, see bug 24342.
+    // Use this for insertion point and selection manipulations.
+    TextBreakIterator* cursorMovementIterator(const UChar*, int length);
+
     TextBreakIterator* wordBreakIterator(const UChar*, int length);
     TextBreakIterator* lineBreakIterator(const UChar*, int length);
     TextBreakIterator* sentenceBreakIterator(const UChar*, int length);
diff --git a/WebCore/platform/text/TextBreakIteratorICU.cpp b/WebCore/platform/text/TextBreakIteratorICU.cpp
index 9941f58..c4fc1b0 100644
--- a/WebCore/platform/text/TextBreakIteratorICU.cpp
+++ b/WebCore/platform/text/TextBreakIteratorICU.cpp
@@ -22,6 +22,7 @@
 #include "config.h"
 #include "TextBreakIterator.h"
 
+#include "PlatformString.h"
 #include "TextBreakIteratorInternalICU.h"
 
 #include <unicode/ubrk.h>
@@ -114,4 +115,119 @@ bool isTextBreak(TextBreakIterator* bi, int pos)
     return ubrk_isBoundary(bi, pos);
 }
 
+#ifndef BUILDING_ON_TIGER
+static TextBreakIterator* setUpIteratorWithRules(bool& createdIterator, TextBreakIterator*& iterator,
+    const char* breakRules, const UChar* string, int length)
+{
+    if (!string)
+        return 0;
+
+    if (!createdIterator) {
+        UParseError parseStatus;
+        UErrorCode openStatus = U_ZERO_ERROR;
+        String rules(breakRules);
+        iterator = static_cast<TextBreakIterator*>(ubrk_openRules(rules.characters(), rules.length(), 0, 0, &parseStatus, &openStatus));
+        createdIterator = true;
+        ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
+    }
+    if (!iterator)
+        return 0;
+
+    UErrorCode setTextStatus = U_ZERO_ERROR;
+    ubrk_setText(iterator, string, length, &setTextStatus);
+    if (U_FAILURE(setTextStatus))
+        return 0;
+
+    return iterator;
+}
+#endif // BUILDING_ON_TIGER
+
+TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
+{
+#ifdef BUILDING_ON_TIGER
+    // ICU 3.2 cannot compile the below rules.
+    return characterBreakIterator(string, length);
+#else
+    // This rule set is based on character-break iterator rules of ICU 4.0
+    // <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>.
+    // The major differences from the original ones are listed below:
+    // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
+    // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342);
+    // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and;
+    // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks.
+    static const char* kRules =
+        "$CR      = [\\p{Grapheme_Cluster_Break = CR}];"
+        "$LF      = [\\p{Grapheme_Cluster_Break = LF}];"
+        "$Control = [\\p{Grapheme_Cluster_Break = Control}];"
+        "$VoiceMarks = [\\uFF9E\\uFF9F];"  // Japanese half-width katakana voiced marks
+        "$Extend  = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks];"
+        "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
+        "$L       = [\\p{Grapheme_Cluster_Break = L}];"
+        "$V       = [\\p{Grapheme_Cluster_Break = V}];"
+        "$T       = [\\p{Grapheme_Cluster_Break = T}];"
+        "$LV      = [\\p{Grapheme_Cluster_Break = LV}];"
+        "$LVT     = [\\p{Grapheme_Cluster_Break = LVT}];"
+        "$Hin0    = [\\u0905-\\u0939];"    // Devanagari Letter A,...,Ha
+        "$HinV    = \\u094D;"              // Devanagari Sign Virama
+        "$Hin1    = [\\u0915-\\u0939];"    // Devanagari Letter Ka,...,Ha
+        "$Ben0    = [\\u0985-\\u09B9];"    // Bengali Letter A,...,Ha
+        "$BenV    = \\u09CD;"              // Bengali Sign Virama
+        "$Ben1    = [\\u0995-\\u09B9];"    // Bengali Letter Ka,...,Ha
+        "$Pan0    = [\\u0A05-\\u0A39];"    // Gurmukhi Letter A,...,Ha
+        "$PanV    = \\u0A4D;"              // Gurmukhi Sign Virama
+        "$Pan1    = [\\u0A15-\\u0A39];"    // Gurmukhi Letter Ka,...,Ha
+        "$Guj0    = [\\u0A85-\\u0AB9];"    // Gujarati Letter A,...,Ha
+        "$GujV    = \\u0ACD;"              // Gujarati Sign Virama
+        "$Guj1    = [\\u0A95-\\u0AB9];"    // Gujarati Letter Ka,...,Ha
+        "$Ori0    = [\\u0B05-\\u0B39];"    // Oriya Letter A,...,Ha
+        "$OriV    = \\u0B4D;"              // Oriya Sign Virama
+        "$Ori1    = [\\u0B15-\\u0B39];"    // Oriya Letter Ka,...,Ha
+        "$Tel0    = [\\u0C05-\\u0C39];"    // Telugu Letter A,...,Ha
+        "$TelV    = \\u0C4D;"              // Telugu Sign Virama
+        "$Tel1    = [\\u0C14-\\u0C39];"    // Telugu Letter Ka,...,Ha
+        "$Kan0    = [\\u0C85-\\u0CB9];"    // Kannada Letter A,...,Ha
+        "$KanV    = \\u0CCD;"              // Kannada Sign Virama
+        "$Kan1    = [\\u0C95-\\u0CB9];"    // Kannada Letter A,...,Ha
+        "$Mal0    = [\\u0D05-\\u0D39];"    // Malayalam Letter A,...,Ha
+        "$MalV    = \\u0D4D;"              // Malayalam Sign Virama
+        "$Mal1    = [\\u0D15-\\u0D39];"    // Malayalam Letter A,...,Ha
+        "!!chain;"
+        "!!forward;"
+        "$CR $LF;"
+        "$L ($L | $V | $LV | $LVT);"
+        "($LV | $V) ($V | $T);"
+        "($LVT | $T) $T;"
+        "[^$Control $CR $LF] $Extend;"
+        "[^$Control $CR $LF] $SpacingMark;"
+        "$Hin0 $HinV $Hin1;"               // Devanagari Virama (forward)
+        "$Ben0 $BenV $Ben1;"               // Bengali Virama (forward)
+        "$Pan0 $PanV $Pan1;"               // Gurmukhi Virama (forward)
+        "$Guj0 $GujV $Guj1;"               // Gujarati Virama (forward)
+        "$Ori0 $OriV $Ori1;"               // Oriya Virama (forward)
+        "$Tel0 $TelV $Tel1;"               // Telugu Virama (forward)
+        "$Kan0 $KanV $Kan1;"               // Kannada Virama (forward)
+        "$Mal0 $MalV $Mal1;"               // Malayalam Virama (forward)
+        "!!reverse;"
+        "$LF $CR;"
+        "($L | $V | $LV | $LVT) $L;"
+        "($V | $T) ($LV | $V);"
+        "$T ($LVT | $T);"
+        "$Extend      [^$Control $CR $LF];"
+        "$SpacingMark [^$Control $CR $LF];"
+        "$Hin1 $HinV $Hin0;"               // Devanagari Virama (backward)
+        "$Ben1 $BenV $Ben0;"               // Bengali Virama (backward)
+        "$Pan1 $PanV $Pan0;"               // Gurmukhi Virama (backward)
+        "$Guj1 $GujV $Guj0;"               // Gujarati Virama (backward)
+        "$Ori1 $OriV $Ori0;"               // Gujarati Virama (backward)
+        "$Tel1 $TelV $Tel0;"               // Telugu Virama (backward)
+        "$Kan1 $KanV $Kan0;"               // Kannada Virama (backward)
+        "$Mal1 $MalV $Mal0;"               // Malayalam Virama (backward)
+        "!!safe_reverse;"
+        "!!safe_forward;";
+    static bool createdCursorMovementIterator = false;
+    static TextBreakIterator* staticCursorMovementIterator;
+    return setUpIteratorWithRules(createdCursorMovementIterator, staticCursorMovementIterator, kRules, string, length);
+#endif // BUILDING_ON_TIGER
+}
+
 }
diff --git a/WebCore/platform/text/TextCodecICU.cpp b/WebCore/platform/text/TextCodecICU.cpp
index 72d45ad..72054fa 100644
--- a/WebCore/platform/text/TextCodecICU.cpp
+++ b/WebCore/platform/text/TextCodecICU.cpp
@@ -334,7 +334,7 @@ String TextCodecICU::decode(const char* bytes, size_t length, bool flush, bool s
 
     // <http://bugs.webkit.org/show_bug.cgi?id=17014>
     // Simplified Chinese pages use the code A3A0 to mean "full-width space", but ICU decodes it as U+E5E5.
-    if (m_encoding == "GBK" || m_encoding == "gb18030")
+    if (strcmp(m_encoding.name(), "GBK") == 0 || strcasecmp(m_encoding.name(), "gb18030") == 0)
         resultString.replace(0xE5E5, ideographicSpace);
 
     return resultString;
diff --git a/WebCore/platform/text/TextDecoder.cpp b/WebCore/platform/text/TextDecoder.cpp
deleted file mode 100644
index e39a6b7..0000000
--- a/WebCore/platform/text/TextDecoder.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- */
-
-#include "config.h"
-#include "TextDecoder.h"
-
-#include "TextEncodingRegistry.h"
-
-// FIXME: Would be nice to also handle BOM for UTF-7 and UTF-32.
-
-namespace WebCore {
-
-TextDecoder::TextDecoder(const TextEncoding& encoding)
-    : m_encoding(encoding)
-    , m_checkedForBOM(false)
-    , m_numBufferedBytes(0)
-{
-}
-
-void TextDecoder::reset(const TextEncoding& encoding)
-{
-    m_encoding = encoding;
-    m_codec.clear();
-    m_checkedForBOM = false;
-    m_numBufferedBytes = 0;
-}
-
-String TextDecoder::checkForBOM(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError)
-{
-    ASSERT(!m_checkedForBOM);
-
-    // Check to see if we found a BOM.
-    size_t numBufferedBytes = m_numBufferedBytes;
-    size_t buf1Len = numBufferedBytes;
-    size_t buf2Len = length;
-    const unsigned char* buf1 = m_bufferedBytes;
-    const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data);
-    unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
-    unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
-    unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
-    unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0;
-
-    const TextEncoding* encodingConsideringBOM = &m_encoding;
-    bool foundBOM = true;
-    size_t lengthOfBOM = 0;
-    if (c1 == 0xFF && c2 == 0xFE) {
-        if (c3 != 0 || c4 != 0)  {
-            encodingConsideringBOM = &UTF16LittleEndianEncoding();
-            lengthOfBOM = 2;
-        } else if (numBufferedBytes + length > sizeof(m_bufferedBytes)) {
-            encodingConsideringBOM = &UTF32LittleEndianEncoding();
-            lengthOfBOM = 4;
-        } else
-            foundBOM = false;
-    } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
-        encodingConsideringBOM = &UTF8Encoding();
-        lengthOfBOM = 3;
-    } else if (c1 == 0xFE && c2 == 0xFF) {
-        encodingConsideringBOM = &UTF16BigEndianEncoding();
-        lengthOfBOM = 2;
-    } else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF) {
-        encodingConsideringBOM = &UTF32BigEndianEncoding();
-        lengthOfBOM = 4;
-    } else
-        foundBOM = false;
-
-    if (!foundBOM && numBufferedBytes + length <= sizeof(m_bufferedBytes) && !flush) {
-        // Continue to look for the BOM.
-        memcpy(&m_bufferedBytes[numBufferedBytes], data, length);
-        m_numBufferedBytes += length;
-        return "";
-    }
-
-    // Done checking for BOM.
-    m_codec.set(newTextCodec(*encodingConsideringBOM).release());
-    if (!m_codec)
-        return String();
-    m_checkedForBOM = true;
-
-    // Skip the BOM.
-    if (foundBOM) {
-        ASSERT(numBufferedBytes < lengthOfBOM);
-        size_t numUnbufferedBOMBytes = lengthOfBOM - numBufferedBytes;
-        ASSERT(numUnbufferedBOMBytes <= length);
-
-        data += numUnbufferedBOMBytes;
-        length -= numUnbufferedBOMBytes;
-        numBufferedBytes = 0;
-        m_numBufferedBytes = 0;
-    }
-
-    // Handle case where we have some buffered bytes to deal with.
-    if (numBufferedBytes) {
-        char bufferedBytes[sizeof(m_bufferedBytes)];
-        memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes);
-        m_numBufferedBytes = 0;
-
-        String bufferedResult = m_codec->decode(bufferedBytes, numBufferedBytes, false, stopOnError, sawError);
-        if (stopOnError && sawError)
-            return bufferedResult;
-        return bufferedResult + m_codec->decode(data, length, flush, stopOnError, sawError);
-    }
-
-    return m_codec->decode(data, length, flush, stopOnError, sawError);
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/TextDecoder.h b/WebCore/platform/text/TextDecoder.h
deleted file mode 100644
index 171cb59..0000000
--- a/WebCore/platform/text/TextDecoder.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- */
-
-#ifndef TextDecoder_h
-#define TextDecoder_h
-
-#include "PlatformString.h"
-#include "TextCodec.h"
-#include "TextEncoding.h"
-#include <wtf/OwnPtr.h>
-
-namespace WebCore {
-
-    class TextCodec;
-
-    class TextDecoder {
-    public:
-        TextDecoder(const TextEncoding&);
-        void reset(const TextEncoding&);
-        const TextEncoding& encoding() const { return m_encoding; };
-
-        String decode(const char* data, size_t length, bool flush, bool stopOnError, bool& sawError)
-        {
-            if (!m_checkedForBOM)
-                return checkForBOM(data, length, flush, stopOnError, sawError);
-            return m_codec->decode(data, length, flush, stopOnError, sawError);
-        }
-
-    private:
-        String checkForBOM(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
-
-        TextEncoding m_encoding;
-        OwnPtr<TextCodec> m_codec;
-
-        bool m_checkedForBOM;
-        unsigned char m_numBufferedBytes;
-        unsigned char m_bufferedBytes[3];
-    };
-
-} // namespace WebCore
-
-#endif // TextDecoder_h
diff --git a/WebCore/platform/text/TextEncoding.cpp b/WebCore/platform/text/TextEncoding.cpp
index 063d96b..ed58412 100644
--- a/WebCore/platform/text/TextEncoding.cpp
+++ b/WebCore/platform/text/TextEncoding.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2004, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
  * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
  *
  * Redistribution and use in source and binary forms, with or without
@@ -30,7 +30,6 @@
 #include "CString.h"
 #include "PlatformString.h"
 #include "TextCodec.h"
-#include "TextDecoder.h"
 #include "TextEncodingRegistry.h"
 #if USE(ICU_UNICODE)
 #include <unicode/unorm.h>
@@ -73,7 +72,7 @@ String TextEncoding::decode(const char* data, size_t length, bool stopOnError, b
     if (!m_name)
         return String();
 
-    return TextDecoder(*this).decode(data, length, true, stopOnError, sawError);
+    return newTextCodec(*this)->decode(data, length, true, stopOnError, sawError);
 }
 
 CString TextEncoding::encode(const UChar* characters, size_t length, UnencodableHandling handling) const
@@ -165,10 +164,23 @@ UChar TextEncoding::backslashAsCurrencySymbol() const
 
 bool TextEncoding::isNonByteBasedEncoding() const
 {
+    if (noExtendedTextEncodingNameUsed()) {
+        return *this == UTF16LittleEndianEncoding()
+            || *this == UTF16BigEndianEncoding();
+    }
+
     return *this == UTF16LittleEndianEncoding()
-           || *this == UTF16BigEndianEncoding()
-           || *this == UTF32BigEndianEncoding()
-           || *this == UTF32LittleEndianEncoding();
+        || *this == UTF16BigEndianEncoding()
+        || *this == UTF32BigEndianEncoding()
+        || *this == UTF32LittleEndianEncoding();
+}
+
+bool TextEncoding::isUTF7Encoding() const
+{
+    if (noExtendedTextEncodingNameUsed())
+        return false;
+
+    return *this == UTF7Encoding();
 }
 
 const TextEncoding& TextEncoding::closestByteBasedEquivalent() const
@@ -185,7 +197,7 @@ const TextEncoding& TextEncoding::closestByteBasedEquivalent() const
 // but it's fraught with problems and we'd rather steer clear of it.
 const TextEncoding& TextEncoding::encodingForFormSubmission() const
 {
-    if (isNonByteBasedEncoding() || *this == UTF7Encoding())
+    if (isNonByteBasedEncoding() || isUTF7Encoding())
         return UTF8Encoding();
     return *this;
 }
diff --git a/WebCore/platform/text/TextEncoding.h b/WebCore/platform/text/TextEncoding.h
index b2bb816..b3909f7 100644
--- a/WebCore/platform/text/TextEncoding.h
+++ b/WebCore/platform/text/TextEncoding.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
+ * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -45,12 +45,14 @@ namespace WebCore {
         bool usesVisualOrdering() const;
         bool isJapanese() const;
         
-        PassRefPtr<StringImpl> displayString(PassRefPtr<StringImpl> str) const {
+        PassRefPtr<StringImpl> displayString(PassRefPtr<StringImpl> str) const
+        {
             if (m_backslashAsCurrencySymbol == '\\' || !str)
                 return str;
             return str->replace('\\', m_backslashAsCurrencySymbol);
         }
-        void displayBuffer(UChar* characters, unsigned len) const {
+        void displayBuffer(UChar* characters, unsigned len) const
+        {
             if (m_backslashAsCurrencySymbol == '\\')
                 return;
             for (unsigned i = 0; i < len; ++i) {
@@ -72,10 +74,11 @@ namespace WebCore {
 
     private:
         UChar backslashAsCurrencySymbol() const;
+        bool isNonByteBasedEncoding() const;
+        bool isUTF7Encoding() const;
 
         const char* m_name;
         UChar m_backslashAsCurrencySymbol;
-        bool isNonByteBasedEncoding() const;
     };
 
     inline bool operator==(const TextEncoding& a, const TextEncoding& b) { return a.name() == b.name(); }
diff --git a/WebCore/platform/text/TextEncodingDetector.h b/WebCore/platform/text/TextEncodingDetector.h
new file mode 100644
index 0000000..9f16ab0
--- /dev/null
+++ b/WebCore/platform/text/TextEncodingDetector.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2009 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextEncodingDetector_h
+#define TextEncodingDetector_h
+
+namespace WebCore {
+
+    class TextEncoding;
+
+    // Given a sequence of bytes in |data| of length |len| and an optional
+    // hintEncodingName, detect the most likely character encoding. 
+    // The way hintEncodingName is used is up to an implementation.
+    // Currently, the only caller sets it to the parent frame encoding.
+    bool detectTextEncoding(const char* data, size_t len,
+                            const char* hintEncodingName,
+                            TextEncoding* detectedEncoding);
+
+} // namespace WebCore
+
+#endif
diff --git a/WebCore/platform/text/TextEncodingDetectorICU.cpp b/WebCore/platform/text/TextEncodingDetectorICU.cpp
new file mode 100644
index 0000000..26c997e
--- /dev/null
+++ b/WebCore/platform/text/TextEncodingDetectorICU.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2008, 2009 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextEncodingDetector.h"
+
+#include "TextEncoding.h"
+#include "UnusedParam.h"
+
+#ifndef BUILDING_ON_TIGER
+#include "unicode/ucnv.h"
+#include "unicode/ucsdet.h"
+#endif
+
+namespace WebCore {
+
+bool detectTextEncoding(const char* data, size_t len,
+                        const char* hintEncodingName,
+                        TextEncoding* detectedEncoding)
+{
+    *detectedEncoding = TextEncoding();
+#ifdef BUILDING_ON_TIGER
+    // Tiger came with ICU 3.2 and does not have the encoding detector.
+    UNUSED_PARAM(data);
+    UNUSED_PARAM(len);
+    UNUSED_PARAM(hintEncodingName);
+    return false;
+#else
+    int matchesCount = 0; 
+    UErrorCode status = U_ZERO_ERROR;
+    UCharsetDetector* detector = ucsdet_open(&status);
+    if (U_FAILURE(status))
+        return false;
+    ucsdet_enableInputFilter(detector, true);
+    ucsdet_setText(detector, data, static_cast<int32_t>(len), &status); 
+    if (U_FAILURE(status))
+        return false;
+
+    // FIXME: A few things we can do other than improving
+    // the ICU detector itself. 
+    // 1. Use ucsdet_detectAll and pick the most likely one given
+    // "the context" (parent-encoding, referrer encoding, etc).
+    // 2. 'Emulate' Firefox/IE's non-Universal detectors (e.g.
+    // Chinese, Japanese, Russian, Korean and Hebrew) by picking the 
+    // encoding with a highest confidence among the detetctor-specific
+    // limited set of candidate encodings.
+    // Below is a partial implementation of the first part of what's outlined
+    // above.
+    const UCharsetMatch** matches = ucsdet_detectAll(detector, &matchesCount, &status);
+    if (U_FAILURE(status)) {
+        ucsdet_close(detector);
+        return false;
+    }
+
+    const char* encoding = 0;
+    if (hintEncodingName) {
+        TextEncoding hintEncoding(hintEncodingName);
+        // 10 is the minimum confidence value consistent with the codepoint
+        // allocation in a given encoding. The size of a chunk passed to
+        // us varies even for the same html file (apparently depending on 
+        // the network load). When we're given a rather short chunk, we 
+        // don't have a sufficiently reliable signal other than the fact that
+        // the chunk is consistent with a set of encodings. So, instead of
+        // setting an arbitrary threshold, we have to scan all the encodings
+        // consistent with the data.  
+        const int32_t kThresold = 10;
+        for (int i = 0; i < matchesCount; ++i) {
+            int32_t confidence = ucsdet_getConfidence(matches[i], &status);
+            if (U_FAILURE(status)) {
+                status = U_ZERO_ERROR;
+                continue;
+            }
+            if (confidence < kThresold)
+                break;
+            const char* matchEncoding = ucsdet_getName(matches[i], &status);
+            if (U_FAILURE(status)) {
+                status = U_ZERO_ERROR;
+                continue;
+            }
+            if (TextEncoding(matchEncoding) == hintEncoding) {
+                encoding = hintEncodingName;
+                break;
+            }
+        }
+    }
+    // If no match is found so far, just pick the top match. 
+    // This can happen, say, when a parent frame in EUC-JP refers to
+    // a child frame in Shift_JIS and both frames do NOT specify the encoding
+    // making us resort to auto-detection (when it IS turned on).
+    if (!encoding && matchesCount > 0)
+        encoding = ucsdet_getName(matches[0], &status);
+    if (U_SUCCESS(status)) {
+        *detectedEncoding = TextEncoding(encoding);
+        ucsdet_close(detector);
+        return true;
+    }    
+    ucsdet_close(detector);
+    return false;
+#endif
+}
+
+}
diff --git a/WebCore/platform/text/TextEncodingDetectorNone.cpp b/WebCore/platform/text/TextEncodingDetectorNone.cpp
new file mode 100644
index 0000000..2655f08
--- /dev/null
+++ b/WebCore/platform/text/TextEncodingDetectorNone.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2009 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextEncodingDetector.h"
+
+#include "TextEncoding.h"
+#include "UnusedParam.h"
+
+namespace WebCore {
+
+bool detectTextEncoding(const char* data, size_t len,
+                        const char* hintEncodingName,
+                        TextEncoding* detectedEncoding)
+{
+    UNUSED_PARAM(data)
+    UNUSED_PARAM(len)
+    UNUSED_PARAM(hintEncodingName)
+
+    *detectedEncoding = TextEncoding();
+    return false;
+}
+
+}
diff --git a/WebCore/platform/text/TextEncodingRegistry.h b/WebCore/platform/text/TextEncodingRegistry.h
index 5ca2039..d204734 100644
--- a/WebCore/platform/text/TextEncodingRegistry.h
+++ b/WebCore/platform/text/TextEncodingRegistry.h
@@ -34,11 +34,8 @@ namespace WebCore {
     class TextCodec;
     class TextEncoding;
 
-    // Only TextEncoding and TextDecoder should use this function directly.
-    // - Use TextDecoder::decode to decode, since it handles BOMs.
-    // - Use TextEncoding::decode to decode if you have all the data at once.
-    //   It's implemented by calling TextDecoder::decode so works just as well.
-    // - Use TextEncoding::encode to encode, since it takes care of normalization.
+    // Use TextResourceDecoder::decode to decode resources, since it handles BOMs.
+    // Use TextEncoding::encode to encode, since it takes care of normalization.
     std::auto_ptr<TextCodec> newTextCodec(const TextEncoding&);
 
     // Only TextEncoding should use this function directly.
diff --git a/WebCore/platform/text/android/TextBreakIteratorInternalICU.cpp b/WebCore/platform/text/android/TextBreakIteratorInternalICU.cpp
new file mode 100644
index 0000000..9bebe74
--- /dev/null
+++ b/WebCore/platform/text/android/TextBreakIteratorInternalICU.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2007, The Android Open Source Project
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextBreakIteratorInternalICU.h"
+
+namespace WebCore {
+
+const char* currentTextBreakLocaleID()
+{
+    return "en_us";
+}
+
+}
diff --git a/WebCore/platform/text/cf/StringImplCF.cpp b/WebCore/platform/text/cf/StringImplCF.cpp
index ff595a5..8a2ae79 100644
--- a/WebCore/platform/text/cf/StringImplCF.cpp
+++ b/WebCore/platform/text/cf/StringImplCF.cpp
@@ -1,5 +1,5 @@
-/**
- * Copyright (C) 2006 Apple Computer, Inc.
+/*
+ * Copyright (C) 2006, 2009 Apple Inc. All rights reserved.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
@@ -24,14 +24,139 @@
 #if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
 
 #include <CoreFoundation/CoreFoundation.h>
+#include <wtf/MainThread.h>
+#include <wtf/PassRefPtr.h>
+#include <wtf/Threading.h>
+
+#if PLATFORM(MAC) && !defined(BUILDING_ON_TIGER)
+#include <objc/objc-auto.h>
+#endif
 
 namespace WebCore {
 
+namespace StringWrapperCFAllocator {
+
+    static StringImpl* currentString;
+
+    static const void* retain(const void* info)
+    {
+        return info;
+    }
+
+    static void release(const void*)
+    {
+        ASSERT_NOT_REACHED();
+    }
+
+    static CFStringRef copyDescription(const void*)
+    {
+        return CFSTR("WebCore::String-based allocator");
+    }
+
+    static void* allocate(CFIndex size, CFOptionFlags, void*)
+    {
+        StringImpl* underlyingString = 0;
+        if (isMainThread()) {
+            underlyingString = currentString;
+            if (underlyingString) {
+                currentString = 0;
+                underlyingString->ref(); // Balanced by call to deref in deallocate below.
+            }
+        }
+        StringImpl** header = static_cast<StringImpl**>(fastMalloc(sizeof(StringImpl*) + size));
+        *header = underlyingString;
+        return header + 1;
+    }
+
+    static void* reallocate(void* pointer, CFIndex newSize, CFOptionFlags, void*)
+    {
+        size_t newAllocationSize = sizeof(StringImpl*) + newSize;
+        StringImpl** header = static_cast<StringImpl**>(pointer) - 1;
+        ASSERT(!*header);
+        header = static_cast<StringImpl**>(fastRealloc(header, newAllocationSize));
+        return header + 1;
+    }
+
+    static void deallocateOnMainThread(void* headerPointer)
+    {
+        StringImpl** header = static_cast<StringImpl**>(headerPointer);
+        StringImpl* underlyingString = *header;
+        ASSERT(underlyingString);
+        underlyingString->deref(); // Balanced by call to ref in allocate above.
+        fastFree(header);
+    }
+
+    static void deallocate(void* pointer, void*)
+    {
+        StringImpl** header = static_cast<StringImpl**>(pointer) - 1;
+        StringImpl* underlyingString = *header;
+        if (!underlyingString)
+            fastFree(header);
+        else {
+            if (!isMainThread())
+                callOnMainThread(deallocateOnMainThread, header);
+            else {
+                underlyingString->deref(); // Balanced by call to ref in allocate above.
+                fastFree(header);
+            }
+        }
+    }
+
+    static CFIndex preferredSize(CFIndex size, CFOptionFlags, void*)
+    {
+        // FIXME: If FastMalloc provided a "good size" callback, we'd want to use it here.
+        // Note that this optimization would help performance for strings created with the
+        // allocator that are mutable, and those typically are only created by callers who
+        // make a new string using the old string's allocator, such as some of the call
+        // sites in CFURL.
+        return size;
+    }
+
+    static CFAllocatorRef create()
+    {
+#if PLATFORM(MAC) && !defined(BUILDING_ON_TIGER)
+        // Since garbage collection isn't compatible with custom allocators, don't use this at all when garbage collection is active.
+        if (objc_collectingEnabled())
+            return 0;
+#endif
+        CFAllocatorContext context = { 0, 0, retain, release, copyDescription, allocate, reallocate, deallocate, preferredSize };
+        return CFAllocatorCreate(0, &context);
+    }
+
+    static CFAllocatorRef allocator()
+    {
+        static CFAllocatorRef allocator = create();
+        return allocator;
+    }
+
+}
+
 CFStringRef StringImpl::createCFString()
 {
-    return CFStringCreateWithCharacters(NULL, reinterpret_cast<const UniChar*>(m_data), m_length);
+    CFAllocatorRef allocator = (m_length && isMainThread()) ? StringWrapperCFAllocator::allocator() : 0;
+    if (!allocator)
+        return CFStringCreateWithCharacters(0, reinterpret_cast<const UniChar*>(m_data), m_length);
+
+    // Put pointer to the StringImpl in a global so the allocator can store it with the CFString.
+    ASSERT(!StringWrapperCFAllocator::currentString);
+    StringWrapperCFAllocator::currentString = this;
+
+    CFStringRef string = CFStringCreateWithCharactersNoCopy(allocator, reinterpret_cast<const UniChar*>(m_data), m_length, kCFAllocatorNull);
+
+    // The allocator cleared the global when it read it, but also clear it here just in case.
+    ASSERT(!StringWrapperCFAllocator::currentString);
+    StringWrapperCFAllocator::currentString = 0;
+
+    return string;
 }
 
+// On StringImpl creation we could check if the allocator is the StringWrapperCFAllocator.
+// If it is, then we could find the original StringImpl and just return that. But to
+// do that we'd have to compute the offset from CFStringRef to the allocated block;
+// the CFStringRef is *not* at the start of an allocated block. Testing shows 1000x
+// more calls to createCFString than calls to the create functions with the appropriate
+// allocator, so it's probably not urgent optimize that case.
+
 }
 
 #endif // PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
diff --git a/WebCore/platform/text/mac/ShapeArabic.c b/WebCore/platform/text/mac/ShapeArabic.c
index 1e0d91b..dd61ce5 100644
--- a/WebCore/platform/text/mac/ShapeArabic.c
+++ b/WebCore/platform/text/mac/ShapeArabic.c
@@ -36,6 +36,8 @@
 
 #include "ShapeArabic.h"
 
+#include <stdbool.h>
+#include <string.h>
 #include <unicode/utypes.h>
 #include <unicode/uchar.h>
 #include <unicode/ustring.h>
diff --git a/WebCore/platform/text/mac/StringImplMac.mm b/WebCore/platform/text/mac/StringImplMac.mm
index 3e0731c..d14c6d8 100644
--- a/WebCore/platform/text/mac/StringImplMac.mm
+++ b/WebCore/platform/text/mac/StringImplMac.mm
@@ -1,5 +1,5 @@
-/**
- * Copyright (C) 2006 Apple Computer, Inc.
+/*
+ * Copyright (C) 2006, 2009 Apple Inc. All rights reserved.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
@@ -21,13 +21,13 @@
 #include "config.h"
 #include "StringImpl.h"
 
-#include <Foundation/Foundation.h>
+#include "FoundationExtras.h"
 
 namespace WebCore {
 
 StringImpl::operator NSString *()
 {
-    return [NSString stringWithCharacters:m_data length:m_length];
+    return HardAutorelease(createCFString());
 }
 
 }
diff --git a/WebCore/platform/text/mac/StringMac.mm b/WebCore/platform/text/mac/StringMac.mm
index 77942ea..758ae1d 100644
--- a/WebCore/platform/text/mac/StringMac.mm
+++ b/WebCore/platform/text/mac/StringMac.mm
@@ -20,6 +20,7 @@
 
 #include "config.h"
 #include "PlatformString.h"
+#include <CoreFoundation/CFString.h>
 
 namespace WebCore {
 
diff --git a/WebCore/platform/text/qt/TextBreakIteratorQt.cpp b/WebCore/platform/text/qt/TextBreakIteratorQt.cpp
index 88b9680..4dc23ee 100644
--- a/WebCore/platform/text/qt/TextBreakIteratorQt.cpp
+++ b/WebCore/platform/text/qt/TextBreakIteratorQt.cpp
@@ -63,6 +63,11 @@ namespace WebCore {
         return static_cast<TextBreakIterator*>(iterator);
     }
 
+    TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
+    {
+        return characterBreakIterator(string, length);
+    }
+
     TextBreakIterator* lineBreakIterator(const UChar* string, int length)
     {
         static QTextBoundaryFinder *iterator = 0;
@@ -250,6 +255,11 @@ TextBreakIterator* characterBreakIterator(const UChar* string, int length)
     return iterator;
 }
 
+TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
+{
+    return characterBreakIterator(string, length);
+}
+
 TextBreakIterator* lineBreakIterator(const UChar*, int)
 {
     // not yet implemented