Merge WebKit at r75315: Initial merge by git.

Change-Id: I570314b346ce101c935ed22a626b48c2af266b84
author: Steve Block <steveblock@google.com> 2011-05-06 11:45:16 +0100
committer: Steve Block <steveblock@google.com> 2011-05-12 13:44:10 +0100
commit: cad810f21b803229eb11403f9209855525a25d57 (patch)
tree: 29a6fd0279be608e0fe9ffe9841f722f0f4e4269 /Source/WebCore/platform/text
parent: 121b0cf4517156d0ac5111caf9830c51b69bae8f (diff)
download: external_webkit-cad810f21b803229eb11403f9209855525a25d57.zip
external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.gz
external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.bz2
85 files changed, 13885 insertions, 0 deletions
diff --git a/Source/WebCore/platform/text/AtomicStringKeyedMRUCache.h b/Source/WebCore/platform/text/AtomicStringKeyedMRUCache.h
new file mode 100644
index 0000000..b3004f7
--- /dev/null
+++ b/Source/WebCore/platform/text/AtomicStringKeyedMRUCache.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef AtomicStringKeyedMRUCache_h
+#define AtomicStringKeyedMRUCache_h
+
+#include <wtf/text/AtomicString.h>
+
+namespace WebCore {
+
+template<typename T, size_t capacity = 4>
+class AtomicStringKeyedMRUCache {
+public:
+    T get(const AtomicString& key)
+    {
+        if (key.isNull()) {
+            DEFINE_STATIC_LOCAL(T, valueForNull, (createValueForNullKey()));
+            return valueForNull;
+        }
+
+        for (size_t i = 0; i < m_cache.size(); ++i) {
+            if (m_cache[i].first == key) {
+                size_t foundIndex = i;
+                if (foundIndex + 1 < m_cache.size()) {
+                    Entry entry = m_cache[foundIndex];
+                    m_cache.remove(foundIndex);
+                    foundIndex = m_cache.size();
+                    m_cache.append(entry);
+                }
+                return m_cache[foundIndex].second;
+            }
+        }
+        if (m_cache.size() == capacity)
+            m_cache.remove(0);
+
+        m_cache.append(std::make_pair(key, createValueForKey(key)));
+        return m_cache.last().second;
+    }
+
+private:
+    T createValueForNullKey();
+    T createValueForKey(const AtomicString&);
+
+    typedef pair<AtomicString, T> Entry;
+    typedef Vector<Entry, capacity> Cache;
+    Cache m_cache;
+};
+
+}
+
+#endif // AtomicStringKeyedMRUCache_h
diff --git a/Source/WebCore/platform/text/Base64.cpp b/Source/WebCore/platform/text/Base64.cpp
new file mode 100644
index 0000000..98b537a
--- /dev/null
+++ b/Source/WebCore/platform/text/Base64.cpp
@@ -0,0 +1,210 @@
+/*
+   Copyright (C) 2000-2001 Dawit Alemayehu <adawit@kde.org>
+   Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org>
+   Copyright (C) 2007, 2008 Apple Inc. All rights reserved.
+   Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License (LGPL)
+   version 2 as published by the Free Software Foundation.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+   This code is based on the java implementation in HTTPClient
+   package by Ronald Tschalär Copyright (C) 1996-1999.
+*/
+
+#include "config.h"
+#include "Base64.h"
+
+#include <limits.h>
+#include <wtf/StringExtras.h>
+#include <wtf/text/WTFString.h>
+
+namespace WebCore {
+
+static const char base64EncMap[64] = {
+    0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+    0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+    0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+    0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,
+    0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E,
+    0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
+    0x77, 0x78, 0x79, 0x7A, 0x30, 0x31, 0x32, 0x33,
+    0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F
+};
+
+static const char base64DecMap[128] = {
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3F,
+    0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
+    0x3C, 0x3D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+    0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
+    0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
+    0x17, 0x18, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20,
+    0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+    0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30,
+    0x31, 0x32, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+void base64Encode(const Vector<char>& in, Vector<char>& out, bool insertLFs)
+{
+    base64Encode(in.data(), in.size(), out, insertLFs);
+}
+
+void base64Encode(const char* data, unsigned len, Vector<char>& out, bool insertLFs)
+{
+    out.clear();
+    if (!len)
+        return;
+
+    // If the input string is pathologically large, just return nothing.
+    // Note: Keep this in sync with the "outLength" computation below.
+    // Rather than being perfectly precise, this is a bit conservative.
+    const unsigned maxInputBufferSize = UINT_MAX / 77 * 76 / 4 * 3 - 2;
+    if (len > maxInputBufferSize)
+        return;
+
+    unsigned sidx = 0;
+    unsigned didx = 0;
+
+    unsigned outLength = ((len + 2) / 3) * 4;
+
+    // Deal with the 76 character per line limit specified in RFC 2045.
+    insertLFs = (insertLFs && outLength > 76);
+    if (insertLFs)
+        outLength += ((outLength - 1) / 76);
+
+    int count = 0;
+    out.grow(outLength);
+
+    // 3-byte to 4-byte conversion + 0-63 to ascii printable conversion
+    if (len > 1) {
+        while (sidx < len - 2) {
+            if (insertLFs) {
+                if (count && !(count % 76))
+                    out[didx++] = '\n';
+                count += 4;
+            }
+            out[didx++] = base64EncMap[(data[sidx] >> 2) & 077];
+            out[didx++] = base64EncMap[((data[sidx + 1] >> 4) & 017) | ((data[sidx] << 4) & 077)];
+            out[didx++] = base64EncMap[((data[sidx + 2] >> 6) & 003) | ((data[sidx + 1] << 2) & 077)];
+            out[didx++] = base64EncMap[data[sidx + 2] & 077];
+            sidx += 3;
+        }
+    }
+
+    if (sidx < len) {
+        if (insertLFs && (count > 0) && !(count % 76))
+           out[didx++] = '\n';
+
+        out[didx++] = base64EncMap[(data[sidx] >> 2) & 077];
+        if (sidx < len - 1) {
+            out[didx++] = base64EncMap[((data[sidx + 1] >> 4) & 017) | ((data[sidx] << 4) & 077)];
+            out[didx++] = base64EncMap[(data[sidx + 1] << 2) & 077];
+        } else
+            out[didx++] = base64EncMap[(data[sidx] << 4) & 077];
+    }
+
+    // Add padding
+    while (didx < out.size()) {
+        out[didx] = '=';
+        didx++;
+    }
+}
+
+bool base64Decode(const Vector<char>& in, Vector<char>& out, Base64DecodePolicy policy)
+{
+    out.clear();
+
+    // If the input string is pathologically large, just return nothing.
+    if (in.size() > UINT_MAX)
+        return false;
+
+    return base64Decode(in.data(), in.size(), out, policy);
+}
+
+template<typename T>
+static inline bool base64DecodeInternal(const T* data, unsigned len, Vector<char>& out, Base64DecodePolicy policy)
+{
+    out.clear();
+    if (!len)
+        return true;
+
+    out.grow(len);
+
+    bool sawEqualsSign = false;
+    unsigned outLength = 0;
+    for (unsigned idx = 0; idx < len; idx++) {
+        unsigned ch = data[idx];
+        if (ch == '=')
+            sawEqualsSign = true;
+        else if (('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z') || ch == '+' || ch == '/') {
+            if (sawEqualsSign)
+                return false;
+            out[outLength] = base64DecMap[ch];
+            outLength++;
+        } else if (policy == FailOnInvalidCharacter || (policy == IgnoreWhitespace && !isSpaceOrNewline(ch)))
+            return false;
+    }
+
+    if (!outLength)
+        return !sawEqualsSign;
+
+    // Valid data is (n * 4 + [0,2,3]) characters long.
+    if ((outLength % 4) == 1)
+        return false;
+    
+    // 4-byte to 3-byte conversion
+    outLength -= (outLength + 3) / 4;
+    if (!outLength)
+        return false;
+
+    unsigned sidx = 0;
+    unsigned didx = 0;
+    if (outLength > 1) {
+        while (didx < outLength - 2) {
+            out[didx] = (((out[sidx] << 2) & 255) | ((out[sidx + 1] >> 4) & 003));
+            out[didx + 1] = (((out[sidx + 1] << 4) & 255) | ((out[sidx + 2] >> 2) & 017));
+            out[didx + 2] = (((out[sidx + 2] << 6) & 255) | (out[sidx + 3] & 077));
+            sidx += 4;
+            didx += 3;
+        }
+    }
+
+    if (didx < outLength)
+        out[didx] = (((out[sidx] << 2) & 255) | ((out[sidx + 1] >> 4) & 003));
+
+    if (++didx < outLength)
+        out[didx] = (((out[sidx + 1] << 4) & 255) | ((out[sidx + 2] >> 2) & 017));
+
+    if (outLength < out.size())
+        out.shrink(outLength);
+
+    return true;
+}
+
+bool base64Decode(const char* data, unsigned len, Vector<char>& out, Base64DecodePolicy policy)
+{
+    return base64DecodeInternal<char>(data, len, out, policy);
+}
+
+bool base64Decode(const String& in, Vector<char>& out, Base64DecodePolicy policy)
+{
+    return base64DecodeInternal<UChar>(in.characters(), in.length(), out, policy);
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/Base64.h b/Source/WebCore/platform/text/Base64.h
new file mode 100644
index 0000000..211bd3c
--- /dev/null
+++ b/Source/WebCore/platform/text/Base64.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org>
+ * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef Base64_h
+#define Base64_h
+
+#include <wtf/Forward.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+enum Base64DecodePolicy { FailOnInvalidCharacter, IgnoreWhitespace, IgnoreInvalidCharacters };
+
+void base64Encode(const Vector<char>&, Vector<char>&, bool insertLFs = false);
+void base64Encode(const char*, unsigned, Vector<char>&, bool insertLFs = false);
+
+bool base64Decode(const String&, Vector<char>&, Base64DecodePolicy = FailOnInvalidCharacter);
+bool base64Decode(const Vector<char>&, Vector<char>&, Base64DecodePolicy = FailOnInvalidCharacter);
+bool base64Decode(const char*, unsigned, Vector<char>&, Base64DecodePolicy = FailOnInvalidCharacter);
+
+}
+
+#endif // Base64_h
diff --git a/Source/WebCore/platform/text/BidiContext.cpp b/Source/WebCore/platform/text/BidiContext.cpp
new file mode 100644
index 0000000..fb6b8cf
--- /dev/null
+++ b/Source/WebCore/platform/text/BidiContext.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2003, 2004, 2006, 2007, 2009, 2010 Apple Inc. All right reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "BidiContext.h"
+
+namespace WebCore {
+
+using namespace WTF::Unicode;
+
+inline PassRefPtr<BidiContext> BidiContext::createUncached(unsigned char level, Direction direction, bool override, BidiContext* parent)
+{
+    return adoptRef(new BidiContext(level, direction, override, parent));
+}
+
+PassRefPtr<BidiContext> BidiContext::create(unsigned char level, Direction direction, bool override, BidiContext* parent)
+{
+    ASSERT(direction == (level % 2 ? RightToLeft : LeftToRight));
+
+    if (parent)
+        return createUncached(level, direction, override, parent);
+
+    ASSERT(level <= 1);
+    if (!level) {
+        if (!override) {
+            static BidiContext* ltrContext = createUncached(0, LeftToRight, false, 0).releaseRef();
+            return ltrContext;
+        }
+
+        static BidiContext* ltrOverrideContext = createUncached(0, LeftToRight, true, 0).releaseRef();
+        return ltrOverrideContext;
+    }
+
+    if (!override) {
+        static BidiContext* rtlContext = createUncached(1, RightToLeft, false, 0).releaseRef();
+        return rtlContext;
+    }
+
+    static BidiContext* rtlOverrideContext = createUncached(1, RightToLeft, true, 0).releaseRef();
+    return rtlOverrideContext;
+}
+
+bool operator==(const BidiContext& c1, const BidiContext& c2)
+{
+    if (&c1 == &c2)
+        return true;
+    if (c1.level() != c2.level() || c1.override() != c2.override() || c1.dir() != c2.dir())
+        return false;
+    if (!c1.parent())
+        return !c2.parent();
+    return c2.parent() && *c1.parent() == *c2.parent();
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/BidiContext.h b/Source/WebCore/platform/text/BidiContext.h
new file mode 100644
index 0000000..b52815f
--- /dev/null
+++ b/Source/WebCore/platform/text/BidiContext.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2003, 2004, 2006, 2007, 2009, 2010 Apple Inc. All right reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef BidiContext_h
+#define BidiContext_h
+
+#include <wtf/Assertions.h>
+#include <wtf/PassRefPtr.h>
+#include <wtf/RefCounted.h>
+#include <wtf/RefPtr.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+// Used to keep track of explicit embeddings.
+class BidiContext : public RefCounted<BidiContext> {
+public:
+    static PassRefPtr<BidiContext> create(unsigned char level, WTF::Unicode::Direction direction, bool override = false, BidiContext* parent = 0);
+
+    BidiContext* parent() const { return m_parent.get(); }
+    unsigned char level() const { return m_level; }
+    WTF::Unicode::Direction dir() const { return static_cast<WTF::Unicode::Direction>(m_direction); }
+    bool override() const { return m_override; }
+
+private:
+    BidiContext(unsigned char level, WTF::Unicode::Direction direction, bool override, BidiContext* parent)
+        : m_level(level)
+        , m_direction(direction)
+        , m_override(override)
+        , m_parent(parent)
+    {
+    }
+
+    static PassRefPtr<BidiContext> createUncached(unsigned char level, WTF::Unicode::Direction, bool override, BidiContext* parent);
+
+    unsigned char m_level;
+    unsigned m_direction : 5; // Direction
+    bool m_override : 1;
+    RefPtr<BidiContext> m_parent;
+};
+
+bool operator==(const BidiContext&, const BidiContext&);
+
+} // namespace WebCore
+
+#endif // BidiContext_h
diff --git a/Source/WebCore/platform/text/BidiResolver.h b/Source/WebCore/platform/text/BidiResolver.h
new file mode 100644
index 0000000..1f87115
--- /dev/null
+++ b/Source/WebCore/platform/text/BidiResolver.h
@@ -0,0 +1,968 @@
+/*
+ * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc.  All right reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef BidiResolver_h
+#define BidiResolver_h
+
+#include "BidiContext.h"
+#include <wtf/Noncopyable.h>
+#include <wtf/PassRefPtr.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+template <class Iterator> struct MidpointState {
+    MidpointState()
+    {
+        reset();
+    }
+    
+    void reset()
+    {
+        numMidpoints = 0;
+        currentMidpoint = 0;
+        betweenMidpoints = false;
+    }
+    
+    // The goal is to reuse the line state across multiple
+    // lines so we just keep an array around for midpoints and never clear it across multiple
+    // lines.  We track the number of items and position using the two other variables.
+    Vector<Iterator> midpoints;
+    unsigned numMidpoints;
+    unsigned currentMidpoint;
+    bool betweenMidpoints;
+};
+
+// The BidiStatus at a given position (typically the end of a line) can
+// be cached and then used to restart bidi resolution at that position.
+struct BidiStatus {
+    BidiStatus()
+        : eor(WTF::Unicode::OtherNeutral)
+        , lastStrong(WTF::Unicode::OtherNeutral)
+        , last(WTF::Unicode::OtherNeutral)
+    {
+    }
+
+    BidiStatus(WTF::Unicode::Direction eorDir, WTF::Unicode::Direction lastStrongDir, WTF::Unicode::Direction lastDir, PassRefPtr<BidiContext> bidiContext)
+        : eor(eorDir)
+        , lastStrong(lastStrongDir)
+        , last(lastDir)
+        , context(bidiContext)
+    {
+    }
+
+    WTF::Unicode::Direction eor;
+    WTF::Unicode::Direction lastStrong;
+    WTF::Unicode::Direction last;
+    RefPtr<BidiContext> context;
+};
+
+inline bool operator==(const BidiStatus& status1, const BidiStatus& status2)
+{
+    return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context);
+}
+
+inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2)
+{
+    return !(status1 == status2);
+}
+
+struct BidiCharacterRun {
+    BidiCharacterRun(int start, int stop, BidiContext* context, WTF::Unicode::Direction dir)
+        : m_start(start)
+        , m_stop(stop)
+        , m_override(context->override())
+        , m_next(0)
+    {
+        if (dir == WTF::Unicode::OtherNeutral)
+            dir = context->dir();
+
+        m_level = context->level();
+
+        // add level of run (cases I1 & I2)
+        if (m_level % 2) {
+            if (dir == WTF::Unicode::LeftToRight || dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
+                m_level++;
+        } else {
+            if (dir == WTF::Unicode::RightToLeft)
+                m_level++;
+            else if (dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
+                m_level += 2;
+        }
+    }
+
+    void destroy() { delete this; }
+
+    int start() const { return m_start; }
+    int stop() const { return m_stop; }
+    unsigned char level() const { return m_level; }
+    bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; }
+    bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; }
+
+    BidiCharacterRun* next() const { return m_next; }
+
+    unsigned char m_level;
+    int m_start;
+    int m_stop;
+    bool m_override;
+    BidiCharacterRun* m_next;
+};
+
+template <class Iterator, class Run> class BidiResolver : public Noncopyable {
+public :
+    BidiResolver()
+        : m_direction(WTF::Unicode::OtherNeutral)
+        , reachedEndOfLine(false)
+        , emptyRun(true)
+        , m_firstRun(0)
+        , m_lastRun(0)
+        , m_logicallyLastRun(0)
+        , m_runCount(0)
+    {
+    }
+
+    const Iterator& position() const { return current; }
+    void setPosition(const Iterator& position) { current = position; }
+
+    void increment() { current.increment(); }
+
+    BidiContext* context() const { return m_status.context.get(); }
+    void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; }
+
+    void setLastDir(WTF::Unicode::Direction lastDir) { m_status.last = lastDir; }
+    void setLastStrongDir(WTF::Unicode::Direction lastStrongDir) { m_status.lastStrong = lastStrongDir; }
+    void setEorDir(WTF::Unicode::Direction eorDir) { m_status.eor = eorDir; }
+
+    WTF::Unicode::Direction dir() const { return m_direction; }
+    void setDir(WTF::Unicode::Direction d) { m_direction = d; }
+
+    const BidiStatus& status() const { return m_status; }
+    void setStatus(const BidiStatus s) { m_status = s; }
+
+    MidpointState<Iterator>& midpointState() { return m_midpointState; }
+
+    void embed(WTF::Unicode::Direction);
+    void commitExplicitEmbedding();
+
+    void createBidiRunsForLine(const Iterator& end, bool visualOrder = false, bool hardLineBreak = false);
+
+    Run* firstRun() const { return m_firstRun; }
+    Run* lastRun() const { return m_lastRun; }
+    Run* logicallyLastRun() const { return m_logicallyLastRun; }
+    unsigned runCount() const { return m_runCount; }
+
+    void addRun(Run*);
+    void prependRun(Run*);
+
+    void moveRunToEnd(Run*);
+    void moveRunToBeginning(Run*);
+
+    void deleteRuns();
+
+protected:
+    void appendRun();
+    void reverseRuns(unsigned start, unsigned end);
+
+    Iterator current;
+    Iterator sor;
+    Iterator eor;
+    Iterator last;
+    BidiStatus m_status;
+    WTF::Unicode::Direction m_direction;
+    Iterator endOfLine;
+    bool reachedEndOfLine;
+    Iterator lastBeforeET;
+    bool emptyRun;
+
+    Run* m_firstRun;
+    Run* m_lastRun;
+    Run* m_logicallyLastRun;
+    unsigned m_runCount;
+    MidpointState<Iterator> m_midpointState;
+
+private:
+    void raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to);
+    void lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from);
+    void checkDirectionInLowerRaiseEmbeddingLevel();
+
+    Vector<WTF::Unicode::Direction, 8> m_currentExplicitEmbeddingSequence;
+};
+
+template <class Iterator, class Run>
+inline void BidiResolver<Iterator, Run>::addRun(Run* run)
+{
+    if (!m_firstRun)
+        m_firstRun = run;
+    else
+        m_lastRun->m_next = run;
+    m_lastRun = run;
+    m_runCount++;
+}
+
+template <class Iterator, class Run>
+inline void BidiResolver<Iterator, Run>::prependRun(Run* run)
+{
+    ASSERT(!run->m_next);
+
+    if (!m_lastRun)
+        m_lastRun = run;
+    else
+        run->m_next = m_firstRun;
+    m_firstRun = run;
+    m_runCount++;
+}
+
+template <class Iterator, class Run>
+inline void BidiResolver<Iterator, Run>::moveRunToEnd(Run* run)
+{
+    ASSERT(m_firstRun);
+    ASSERT(m_lastRun);
+    ASSERT(run->m_next);
+
+    Run* current = 0;
+    Run* next = m_firstRun;
+    while (next != run) {
+        current = next;
+        next = current->next();
+    }
+
+    if (!current)
+        m_firstRun = run->next();
+    else
+        current->m_next = run->m_next;
+
+    run->m_next = 0;
+    m_lastRun->m_next = run;
+    m_lastRun = run;
+}
+
+template <class Iterator, class Run>
+inline void BidiResolver<Iterator, Run>::moveRunToBeginning(Run* run)
+{
+    ASSERT(m_firstRun);
+    ASSERT(m_lastRun);
+    ASSERT(run != m_firstRun);
+
+    Run* current = m_firstRun;
+    Run* next = current->next();
+    while (next != run) {
+        current = next;
+        next = current->next();
+    }
+
+    current->m_next = run->m_next;
+    if (run == m_lastRun)
+        m_lastRun = current;
+
+    run->m_next = m_firstRun;
+    m_firstRun = run;
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::appendRun()
+{
+    if (!emptyRun && !eor.atEnd()) {
+        unsigned startOffset = sor.offset();
+        unsigned endOffset = eor.offset();
+
+        if (!endOfLine.atEnd() && endOffset >= endOfLine.offset()) {
+            reachedEndOfLine = true;
+            endOffset = endOfLine.offset();
+        }
+
+        if (endOffset >= startOffset)
+            addRun(new Run(startOffset, endOffset + 1, context(), m_direction));
+
+        eor.increment();
+        sor = eor;
+    }
+
+    m_direction = WTF::Unicode::OtherNeutral;
+    m_status.eor = WTF::Unicode::OtherNeutral;
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::embed(WTF::Unicode::Direction d)
+{
+    using namespace WTF::Unicode;
+
+    ASSERT(d == PopDirectionalFormat || d == LeftToRightEmbedding || d == LeftToRightOverride || d == RightToLeftEmbedding || d == RightToLeftOverride);
+    m_currentExplicitEmbeddingSequence.append(d);
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel()
+{
+    using namespace WTF::Unicode;
+
+    ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
+    // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
+    // Bidi control characters are included into BidiRun, so last direction
+    // could be one of the bidi embeddings when there are nested embeddings.
+    // For example:  "&#x202a;&#x202b;....."
+    ASSERT(m_status.last == EuropeanNumberSeparator
+        || m_status.last == EuropeanNumberTerminator
+        || m_status.last == CommonNumberSeparator
+        || m_status.last == BoundaryNeutral
+        || m_status.last == BlockSeparator
+        || m_status.last == SegmentSeparator
+        || m_status.last == WhiteSpaceNeutral
+        || m_status.last == OtherNeutral
+        || m_status.last == RightToLeftEmbedding
+        || m_status.last == LeftToRightEmbedding
+        || m_status.last == RightToLeftOverride
+        || m_status.last == LeftToRightOverride
+        || m_status.last == PopDirectionalFormat);
+    if (m_direction == OtherNeutral)
+        m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from)
+{
+    using namespace WTF::Unicode;
+
+    if (!emptyRun && eor != last) {
+        checkDirectionInLowerRaiseEmbeddingLevel();
+        if (from == LeftToRight) {
+            // bidi.sor ... bidi.eor ... bidi.last L
+            if (m_status.eor == EuropeanNumber) {
+                if (m_status.lastStrong != LeftToRight) {
+                    m_direction = EuropeanNumber;
+                    appendRun();
+                }
+            } else if (m_status.eor == ArabicNumber) {
+                m_direction = ArabicNumber;
+                appendRun();
+            } else if (m_status.lastStrong != LeftToRight) {
+                appendRun();
+                m_direction = LeftToRight;
+            }
+        } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) {
+            appendRun();
+            m_direction = RightToLeft;
+        }
+        eor = last;
+    }
+    appendRun();
+    emptyRun = true;
+    // sor for the new run is determined by the higher level (rule X10)
+    setLastDir(from);
+    setLastStrongDir(from);
+    eor = Iterator();
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to)
+{
+    using namespace WTF::Unicode;
+
+    if (!emptyRun && eor != last) {
+        checkDirectionInLowerRaiseEmbeddingLevel();
+        if (to == LeftToRight) {
+            // bidi.sor ... bidi.eor ... bidi.last L
+            if (m_status.eor == EuropeanNumber) {
+                if (m_status.lastStrong != LeftToRight) {
+                    m_direction = EuropeanNumber;
+                    appendRun();
+                }
+            } else if (m_status.eor == ArabicNumber) {
+                m_direction = ArabicNumber;
+                appendRun();
+            } else if (m_status.lastStrong != LeftToRight && from == LeftToRight) {
+                appendRun();
+                m_direction = LeftToRight;
+            }
+        } else if (m_status.eor == ArabicNumber
+            || (m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || from == RightToLeft))
+            || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && from == RightToLeft)) {
+            appendRun();
+            m_direction = RightToLeft;
+        }
+        eor = last;
+    }
+    appendRun();
+    emptyRun = true;
+    setLastDir(to);
+    setLastStrongDir(to);
+    eor = Iterator();
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::commitExplicitEmbedding()
+{
+    using namespace WTF::Unicode;
+
+    unsigned char fromLevel = context()->level();
+    RefPtr<BidiContext> toContext = context();
+
+    for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) {
+        Direction embedding = m_currentExplicitEmbeddingSequence[i];
+        if (embedding == PopDirectionalFormat) {
+            if (BidiContext* parentContext = toContext->parent())
+                toContext = parentContext;
+        } else {
+            Direction direction = (embedding == RightToLeftEmbedding || embedding == RightToLeftOverride) ? RightToLeft : LeftToRight;
+            bool override = embedding == LeftToRightOverride || embedding == RightToLeftOverride;
+            unsigned char level = toContext->level();
+            if (direction == RightToLeft) {
+                // Go to the least greater odd integer
+                level += 1;
+                level |= 1;
+            } else {
+                // Go to the least greater even integer
+                level += 2;
+                level &= ~1;
+            }
+            if (level < 61)
+                toContext = BidiContext::create(level, direction, override, toContext.get());
+        }
+    }
+
+    unsigned char toLevel = toContext->level();
+
+    if (toLevel > fromLevel)
+        raiseExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight, toLevel % 2 ? RightToLeft : LeftToRight);
+    else if (toLevel < fromLevel)
+        lowerExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight);
+
+    setContext(toContext);
+
+    m_currentExplicitEmbeddingSequence.clear();
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::deleteRuns()
+{
+    emptyRun = true;
+    if (!m_firstRun)
+        return;
+
+    Run* curr = m_firstRun;
+    while (curr) {
+        Run* s = curr->next();
+        curr->destroy();
+        curr = s;
+    }
+
+    m_firstRun = 0;
+    m_lastRun = 0;
+    m_runCount = 0;
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::reverseRuns(unsigned start, unsigned end)
+{
+    if (start >= end)
+        return;
+
+    ASSERT(end < m_runCount);
+    
+    // Get the item before the start of the runs to reverse and put it in
+    // |beforeStart|.  |curr| should point to the first run to reverse.
+    Run* curr = m_firstRun;
+    Run* beforeStart = 0;
+    unsigned i = 0;
+    while (i < start) {
+        i++;
+        beforeStart = curr;
+        curr = curr->next();
+    }
+
+    Run* startRun = curr;
+    while (i < end) {
+        i++;
+        curr = curr->next();
+    }
+    Run* endRun = curr;
+    Run* afterEnd = curr->next();
+
+    i = start;
+    curr = startRun;
+    Run* newNext = afterEnd;
+    while (i <= end) {
+        // Do the reversal.
+        Run* next = curr->next();
+        curr->m_next = newNext;
+        newNext = curr;
+        curr = next;
+        i++;
+    }
+
+    // Now hook up beforeStart and afterEnd to the startRun and endRun.
+    if (beforeStart)
+        beforeStart->m_next = endRun;
+    else
+        m_firstRun = endRun;
+
+    startRun->m_next = afterEnd;
+    if (!afterEnd)
+        m_lastRun = startRun;
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, bool visualOrder, bool hardLineBreak)
+{
+    using namespace WTF::Unicode;
+
+    ASSERT(m_direction == OtherNeutral);
+
+    emptyRun = true;
+
+    eor = Iterator();
+
+    last = current;
+    bool pastEnd = false;
+    BidiResolver<Iterator, Run> stateAtEnd;
+
+    while (true) {
+        Direction dirCurrent;
+        if (pastEnd && (hardLineBreak || current.atEnd())) {
+            BidiContext* c = context();
+            while (c->parent())
+                c = c->parent();
+            dirCurrent = c->dir();
+            if (hardLineBreak) {
+                // A deviation from the Unicode Bidi Algorithm in order to match
+                // Mac OS X text and WinIE: a hard line break resets bidi state.
+                stateAtEnd.setContext(c);
+                stateAtEnd.setEorDir(dirCurrent);
+                stateAtEnd.setLastDir(dirCurrent);
+                stateAtEnd.setLastStrongDir(dirCurrent);
+            }
+        } else {
+            dirCurrent = current.direction();
+            if (context()->override()
+                    && dirCurrent != RightToLeftEmbedding
+                    && dirCurrent != LeftToRightEmbedding
+                    && dirCurrent != RightToLeftOverride
+                    && dirCurrent != LeftToRightOverride
+                    && dirCurrent != PopDirectionalFormat)
+                dirCurrent = context()->dir();
+            else if (dirCurrent == NonSpacingMark)
+                dirCurrent = m_status.last;
+        }
+
+        ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
+        switch (dirCurrent) {
+
+        // embedding and overrides (X1-X9 in the Bidi specs)
+        case RightToLeftEmbedding:
+        case LeftToRightEmbedding:
+        case RightToLeftOverride:
+        case LeftToRightOverride:
+        case PopDirectionalFormat:
+            embed(dirCurrent);
+            commitExplicitEmbedding();
+            break;
+
+        // strong types
+        case LeftToRight:
+            switch(m_status.last) {
+                case RightToLeft:
+                case RightToLeftArabic:
+                case EuropeanNumber:
+                case ArabicNumber:
+                    if (m_status.last != EuropeanNumber || m_status.lastStrong != LeftToRight)
+                        appendRun();
+                    break;
+                case LeftToRight:
+                    break;
+                case EuropeanNumberSeparator:
+                case EuropeanNumberTerminator:
+                case CommonNumberSeparator:
+                case BoundaryNeutral:
+                case BlockSeparator:
+                case SegmentSeparator:
+                case WhiteSpaceNeutral:
+                case OtherNeutral:
+                    if (m_status.eor == EuropeanNumber) {
+                        if (m_status.lastStrong != LeftToRight) {
+                            // the numbers need to be on a higher embedding level, so let's close that run
+                            m_direction = EuropeanNumber;
+                            appendRun();
+                            if (context()->dir() != LeftToRight) {
+                                // the neutrals take the embedding direction, which is R
+                                eor = last;
+                                m_direction = RightToLeft;
+                                appendRun();
+                            }
+                        }
+                    } else if (m_status.eor == ArabicNumber) {
+                        // Arabic numbers are always on a higher embedding level, so let's close that run
+                        m_direction = ArabicNumber;
+                        appendRun();
+                        if (context()->dir() != LeftToRight) {
+                            // the neutrals take the embedding direction, which is R
+                            eor = last;
+                            m_direction = RightToLeft;
+                            appendRun();
+                        }
+                    } else if (m_status.lastStrong != LeftToRight) {
+                        //last stuff takes embedding dir
+                        if (context()->dir() == RightToLeft) {
+                            eor = last; 
+                            m_direction = RightToLeft;
+                        }
+                        appendRun();
+                    }
+                default:
+                    break;
+            }
+            eor = current;
+            m_status.eor = LeftToRight;
+            m_status.lastStrong = LeftToRight;
+            m_direction = LeftToRight;
+            break;
+        case RightToLeftArabic:
+        case RightToLeft:
+            switch (m_status.last) {
+                case LeftToRight:
+                case EuropeanNumber:
+                case ArabicNumber:
+                    appendRun();
+                case RightToLeft:
+                case RightToLeftArabic:
+                    break;
+                case EuropeanNumberSeparator:
+                case EuropeanNumberTerminator:
+                case CommonNumberSeparator:
+                case BoundaryNeutral:
+                case BlockSeparator:
+                case SegmentSeparator:
+                case WhiteSpaceNeutral:
+                case OtherNeutral:
+                    if (m_status.eor == EuropeanNumber) {
+                        if (m_status.lastStrong == LeftToRight && context()->dir() == LeftToRight)
+                            eor = last;
+                        appendRun();
+                    } else if (m_status.eor == ArabicNumber)
+                        appendRun();
+                    else if (m_status.lastStrong == LeftToRight) {
+                        if (context()->dir() == LeftToRight)
+                            eor = last;
+                        appendRun();
+                    }
+                default:
+                    break;
+            }
+            eor = current;
+            m_status.eor = RightToLeft;
+            m_status.lastStrong = dirCurrent;
+            m_direction = RightToLeft;
+            break;
+
+            // weak types:
+
+        case EuropeanNumber:
+            if (m_status.lastStrong != RightToLeftArabic) {
+                // if last strong was AL change EN to AN
+                switch (m_status.last) {
+                    case EuropeanNumber:
+                    case LeftToRight:
+                        break;
+                    case RightToLeft:
+                    case RightToLeftArabic:
+                    case ArabicNumber:
+                        eor = last;
+                        appendRun();
+                        m_direction = EuropeanNumber;
+                        break;
+                    case EuropeanNumberSeparator:
+                    case CommonNumberSeparator:
+                        if (m_status.eor == EuropeanNumber)
+                            break;
+                    case EuropeanNumberTerminator:
+                    case BoundaryNeutral:
+                    case BlockSeparator:
+                    case SegmentSeparator:
+                    case WhiteSpaceNeutral:
+                    case OtherNeutral:
+                        if (m_status.eor == EuropeanNumber) {
+                            if (m_status.lastStrong == RightToLeft) {
+                                // ENs on both sides behave like Rs, so the neutrals should be R.
+                                // Terminate the EN run.
+                                appendRun();
+                                // Make an R run.
+                                eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last;
+                                m_direction = RightToLeft;
+                                appendRun();
+                                // Begin a new EN run.
+                                m_direction = EuropeanNumber;
+                            }
+                        } else if (m_status.eor == ArabicNumber) {
+                            // Terminate the AN run.
+                            appendRun();
+                            if (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft) {
+                                // Make an R run.
+                                eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last;
+                                m_direction = RightToLeft;
+                                appendRun();
+                                // Begin a new EN run.
+                                m_direction = EuropeanNumber;
+                            }
+                        } else if (m_status.lastStrong == RightToLeft) {
+                            // Extend the R run to include the neutrals.
+                            eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last;
+                            m_direction = RightToLeft;
+                            appendRun();
+                            // Begin a new EN run.
+                            m_direction = EuropeanNumber;
+                        }
+                    default:
+                        break;
+                }
+                eor = current;
+                m_status.eor = EuropeanNumber;
+                if (m_direction == OtherNeutral)
+                    m_direction = LeftToRight;
+                break;
+            }
+        case ArabicNumber:
+            dirCurrent = ArabicNumber;
+            switch (m_status.last) {
+                case LeftToRight:
+                    if (context()->dir() == LeftToRight)
+                        appendRun();
+                    break;
+                case ArabicNumber:
+                    break;
+                case RightToLeft:
+                case RightToLeftArabic:
+                case EuropeanNumber:
+                    eor = last;
+                    appendRun();
+                    break;
+                case CommonNumberSeparator:
+                    if (m_status.eor == ArabicNumber)
+                        break;
+                case EuropeanNumberSeparator:
+                case EuropeanNumberTerminator:
+                case BoundaryNeutral:
+                case BlockSeparator:
+                case SegmentSeparator:
+                case WhiteSpaceNeutral:
+                case OtherNeutral:
+                    if (m_status.eor == ArabicNumber
+                        || (m_status.eor == EuropeanNumber && (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft))
+                        || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft)) {
+                        // Terminate the run before the neutrals.
+                        appendRun();
+                        // Begin an R run for the neutrals.
+                        m_direction = RightToLeft;
+                    } else if (m_direction == OtherNeutral)
+                        m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
+                    eor = last;
+                    appendRun();
+                default:
+                    break;
+            }
+            eor = current;
+            m_status.eor = ArabicNumber;
+            if (m_direction == OtherNeutral)
+                m_direction = ArabicNumber;
+            break;
+        case EuropeanNumberSeparator:
+        case CommonNumberSeparator:
+            break;
+        case EuropeanNumberTerminator:
+            if (m_status.last == EuropeanNumber) {
+                dirCurrent = EuropeanNumber;
+                eor = current;
+                m_status.eor = dirCurrent;
+            } else if (m_status.last != EuropeanNumberTerminator)
+                lastBeforeET = emptyRun ? eor : last;
+            break;
+
+        // boundary neutrals should be ignored
+        case BoundaryNeutral:
+            if (eor == last)
+                eor = current;
+            break;
+            // neutrals
+        case BlockSeparator:
+            // ### what do we do with newline and paragraph seperators that come to here?
+            break;
+        case SegmentSeparator:
+            // ### implement rule L1
+            break;
+        case WhiteSpaceNeutral:
+            break;
+        case OtherNeutral:
+            break;
+        default:
+            break;
+        }
+
+        if (pastEnd && eor == current) {
+            if (!reachedEndOfLine) {
+                eor = endOfLine;
+                switch (m_status.eor) {
+                    case LeftToRight:
+                    case RightToLeft:
+                    case ArabicNumber:
+                        m_direction = m_status.eor;
+                        break;
+                    case EuropeanNumber:
+                        m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : EuropeanNumber;
+                        break;
+                    default:
+                        ASSERT(false);
+                }
+                appendRun();
+            }
+            current = end;
+            m_status = stateAtEnd.m_status;
+            sor = stateAtEnd.sor; 
+            eor = stateAtEnd.eor;
+            last = stateAtEnd.last;
+            reachedEndOfLine = stateAtEnd.reachedEndOfLine;
+            lastBeforeET = stateAtEnd.lastBeforeET;
+            emptyRun = stateAtEnd.emptyRun;
+            m_direction = OtherNeutral;
+            break;
+        }
+
+        // set m_status.last as needed.
+        switch (dirCurrent) {
+            case EuropeanNumberTerminator:
+                if (m_status.last != EuropeanNumber)
+                    m_status.last = EuropeanNumberTerminator;
+                break;
+            case EuropeanNumberSeparator:
+            case CommonNumberSeparator:
+            case SegmentSeparator:
+            case WhiteSpaceNeutral:
+            case OtherNeutral:
+                switch(m_status.last) {
+                    case LeftToRight:
+                    case RightToLeft:
+                    case RightToLeftArabic:
+                    case EuropeanNumber:
+                    case ArabicNumber:
+                        m_status.last = dirCurrent;
+                        break;
+                    default:
+                        m_status.last = OtherNeutral;
+                    }
+                break;
+            case NonSpacingMark:
+            case BoundaryNeutral:
+                // ignore these
+                break;
+            case EuropeanNumber:
+                // fall through
+            default:
+                m_status.last = dirCurrent;
+        }
+
+        last = current;
+
+        if (emptyRun) {
+            sor = current;
+            emptyRun = false;
+        }
+
+        increment();
+        if (!m_currentExplicitEmbeddingSequence.isEmpty()) {
+            commitExplicitEmbedding();
+            if (pastEnd) {
+                current = end;
+                m_status = stateAtEnd.m_status;
+                sor = stateAtEnd.sor; 
+                eor = stateAtEnd.eor;
+                last = stateAtEnd.last;
+                reachedEndOfLine = stateAtEnd.reachedEndOfLine;
+                lastBeforeET = stateAtEnd.lastBeforeET;
+                emptyRun = stateAtEnd.emptyRun;
+                m_direction = OtherNeutral;
+                break;
+            }
+        }
+
+        if (!pastEnd && (current == end || current.atEnd())) {
+            if (emptyRun)
+                break;
+            stateAtEnd.m_status = m_status;
+            stateAtEnd.sor = sor; 
+            stateAtEnd.eor = eor;
+            stateAtEnd.last = last;
+            stateAtEnd.reachedEndOfLine = reachedEndOfLine;
+            stateAtEnd.lastBeforeET = lastBeforeET;
+            stateAtEnd.emptyRun = emptyRun;
+            endOfLine = last;
+            pastEnd = true;
+        }
+    }
+
+    m_logicallyLastRun = m_lastRun;
+
+    // reorder line according to run structure...
+    // do not reverse for visually ordered web sites
+    if (!visualOrder) {
+
+        // first find highest and lowest levels
+        unsigned char levelLow = 128;
+        unsigned char levelHigh = 0;
+        Run* r = firstRun();
+        while (r) {
+            if (r->m_level > levelHigh)
+                levelHigh = r->m_level;
+            if (r->m_level < levelLow)
+                levelLow = r->m_level;
+            r = r->next();
+        }
+
+        // implements reordering of the line (L2 according to Bidi spec):
+        // L2. From the highest level found in the text to the lowest odd level on each line,
+        // reverse any contiguous sequence of characters that are at that level or higher.
+
+        // reversing is only done up to the lowest odd level
+        if (!(levelLow % 2))
+            levelLow++;
+
+        unsigned count = runCount() - 1;
+
+        while (levelHigh >= levelLow) {
+            unsigned i = 0;
+            Run* currRun = firstRun();
+            while (i < count) {
+                while (i < count && currRun && currRun->m_level < levelHigh) {
+                    i++;
+                    currRun = currRun->next();
+                }
+                unsigned start = i;
+                while (i <= count && currRun && currRun->m_level >= levelHigh) {
+                    i++;
+                    currRun = currRun->next();
+                }
+                unsigned end = i - 1;
+                reverseRuns(start, end);
+            }
+            levelHigh--;
+        }
+    }
+    endOfLine = Iterator();
+}
+
+} // namespace WebCore
+
+#endif // BidiResolver_h
diff --git a/Source/WebCore/platform/text/CharacterNames.h b/Source/WebCore/platform/text/CharacterNames.h
new file mode 100644
index 0000000..c4b496e
--- /dev/null
+++ b/Source/WebCore/platform/text/CharacterNames.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2007, 2009, 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef CharacterNames_h
+#define CharacterNames_h
+
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+// Names here are taken from the Unicode standard.
+
+// Most of these are UChar constants, not UChar32, which makes them
+// more convenient for WebCore code that mostly uses UTF-16.
+
+const UChar32 aegeanWordSeparatorLine = 0x10100;
+const UChar32 aegeanWordSeparatorDot = 0x10101;
+const UChar blackCircle = 0x25CF;
+const UChar blackSquare = 0x25A0;
+const UChar blackUpPointingTriangle = 0x25B2;
+const UChar bullet = 0x2022;
+const UChar bullseye = 0x25CE;
+const UChar carriageReturn = 0x000D;
+const UChar ethiopicPrefaceColon = 0x1366;
+const UChar ethiopicWordspace = 0x1361;
+const UChar fisheye = 0x25C9;
+const UChar hebrewPunctuationGeresh = 0x05F3;
+const UChar hebrewPunctuationGershayim = 0x05F4;
+const UChar horizontalEllipsis = 0x2026;
+const UChar hyphen = 0x2010;
+const UChar hyphenMinus = 0x002D;
+const UChar ideographicComma = 0x3001;
+const UChar ideographicFullStop = 0x3002;
+const UChar ideographicSpace = 0x3000;
+const UChar leftDoubleQuotationMark = 0x201C;
+const UChar leftSingleQuotationMark = 0x2018;
+const UChar leftToRightEmbed = 0x202A;
+const UChar leftToRightMark = 0x200E;
+const UChar leftToRightOverride = 0x202D;
+const UChar minusSign = 0x2212;
+const UChar newlineCharacter = 0x000A;
+const UChar noBreakSpace = 0x00A0;
+const UChar objectReplacementCharacter = 0xFFFC;
+const UChar popDirectionalFormatting = 0x202C;
+const UChar replacementCharacter = 0xFFFD;
+const UChar rightDoubleQuotationMark = 0x201D;
+const UChar rightSingleQuotationMark = 0x2019;
+const UChar rightToLeftEmbed = 0x202B;
+const UChar rightToLeftMark = 0x200F;
+const UChar rightToLeftOverride = 0x202E;
+const UChar sesameDot = 0xFE45;
+const UChar softHyphen = 0x00AD;
+const UChar space = 0x0020;
+const UChar tibetanMarkIntersyllabicTsheg = 0x0F0B;
+const UChar tibetanMarkDelimiterTshegBstar = 0x0F0C;
+const UChar32 ugariticWordDivider = 0x1039F;
+const UChar whiteBullet = 0x25E6;
+const UChar whiteCircle = 0x25CB;
+const UChar whiteSesameDot = 0xFE46;
+const UChar whiteUpPointingTriangle = 0x25B3;
+const UChar yenSign = 0x00A5;
+const UChar zeroWidthJoiner = 0x200D;
+const UChar zeroWidthNonJoiner = 0x200C;
+const UChar zeroWidthSpace = 0x200B;
+
+}
+
+#endif // CharacterNames_h
diff --git a/Source/WebCore/platform/text/Hyphenation.cpp b/Source/WebCore/platform/text/Hyphenation.cpp
new file mode 100644
index 0000000..89f6438
--- /dev/null
+++ b/Source/WebCore/platform/text/Hyphenation.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "Hyphenation.h"
+
+#include "NotImplemented.h"
+
+namespace WebCore {
+
+bool canHyphenate(const AtomicString& /* localeIdentifier */)
+{
+    return false;
+}
+
+size_t lastHyphenLocation(const UChar* /* characters */, size_t /* length */, size_t /* beforeIndex */, const AtomicString& /* localeIdentifier */)
+{
+    ASSERT_NOT_REACHED();
+    return 0;
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/Hyphenation.h b/Source/WebCore/platform/text/Hyphenation.h
new file mode 100644
index 0000000..a99bff0
--- /dev/null
+++ b/Source/WebCore/platform/text/Hyphenation.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef Hyphenation_h
+#define Hyphenation_h
+
+#include <wtf/Forward.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+bool canHyphenate(const AtomicString& localeIdentifier);
+size_t lastHyphenLocation(const UChar*, size_t length, size_t beforeIndex, const AtomicString& localeIdentifier);
+
+} // namespace WebCore
+
+#endif // Hyphenation_h
diff --git a/Source/WebCore/platform/text/LineEnding.cpp b/Source/WebCore/platform/text/LineEnding.cpp
new file mode 100644
index 0000000..00a90eb
--- /dev/null
+++ b/Source/WebCore/platform/text/LineEnding.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "LineEnding.h"
+
+#include "PlatformString.h"
+#include <wtf/text/CString.h>
+
+namespace {
+
+class OutputBuffer {
+public:
+    virtual char* allocate(size_t size) = 0;
+    virtual void copy(const CString&) = 0;
+    virtual ~OutputBuffer() { }
+};
+
+class CStringBuffer : public OutputBuffer {
+public:
+    CStringBuffer(CString& buffer)
+        : m_buffer(buffer)
+    {
+    }
+    virtual ~CStringBuffer() { }
+
+    virtual char* allocate(size_t size)
+    {
+        char* ptr;
+        m_buffer = CString::newUninitialized(size, ptr);
+        return ptr;
+    }
+
+    virtual void copy(const CString& source)
+    {
+        m_buffer = source;
+    }
+
+    const CString& buffer() const { return m_buffer; }
+
+private:
+    CString m_buffer;
+};
+
+class VectorCharAppendBuffer : public OutputBuffer {
+public:
+    VectorCharAppendBuffer(Vector<char>& buffer)
+        : m_buffer(buffer)
+    {
+    }
+    virtual ~VectorCharAppendBuffer() { }
+
+    virtual char* allocate(size_t size)
+    {
+        size_t oldSize = m_buffer.size();
+        m_buffer.grow(oldSize + size);
+        return m_buffer.data() + oldSize;
+    }
+
+    virtual void copy(const CString& source)
+    {
+        m_buffer.append(source.data(), source.length());
+    }
+
+private:
+    Vector<char>& m_buffer;
+};
+
+void internalNormalizeLineEndingsToCRLF(const CString& from, OutputBuffer& buffer)
+{
+    // Compute the new length.
+    size_t newLen = 0;
+    const char* p = from.data();
+    while (char c = *p++) {
+        if (c == '\r') {
+            // Safe to look ahead because of trailing '\0'.
+            if (*p != '\n') {
+                // Turn CR into CRLF.
+                newLen += 2;
+            }
+        } else if (c == '\n') {
+            // Turn LF into CRLF.
+            newLen += 2;
+        } else {
+            // Leave other characters alone.
+            newLen += 1;
+        }
+    }
+    if (newLen < from.length())
+        return;
+
+    if (newLen == from.length()) {
+        buffer.copy(from);
+        return;
+    }
+
+    p = from.data();
+    char* q = buffer.allocate(newLen);
+
+    // Make a copy of the string.
+    while (char c = *p++) {
+        if (c == '\r') {
+            // Safe to look ahead because of trailing '\0'.
+            if (*p != '\n') {
+                // Turn CR into CRLF.
+                *q++ = '\r';
+                *q++ = '\n';
+            }
+        } else if (c == '\n') {
+            // Turn LF into CRLF.
+            *q++ = '\r';
+            *q++ = '\n';
+        } else {
+            // Leave other characters alone.
+            *q++ = c;
+        }
+    }
+}
+
+};
+
+namespace WebCore {
+
+void normalizeToCROrLF(const CString& from, Vector<char>& result, bool toCR);
+
+// Normalize all line-endings to CR or LF.
+void normalizeToCROrLF(const CString& from, Vector<char>& result, bool toCR)
+{
+    // Compute the new length.
+    size_t newLen = 0;
+    bool needFix = false;
+    const char* p = from.data();
+    char fromEndingChar = toCR ? '\n' : '\r';
+    char toEndingChar = toCR ? '\r' : '\n';
+    while (char c = *p++) {
+        if (c == '\r' && *p == '\n') {
+            // Turn CRLF into CR or LF.
+            p++;
+            needFix = true;
+        } else if (c == fromEndingChar) {
+            // Turn CR/LF into LF/CR.
+            needFix = true;
+        }
+        newLen += 1;
+    }
+
+    // Grow the result buffer.
+    p = from.data();
+    size_t oldResultSize = result.size();
+    result.grow(oldResultSize + newLen);
+    char* q = result.data() + oldResultSize;
+
+    // If no need to fix the string, just copy the string over.
+    if (!needFix) {
+        memcpy(q, p, from.length());
+        return;
+    }
+
+    // Make a copy of the string.
+    while (char c = *p++) {
+        if (c == '\r' && *p == '\n') {
+            // Turn CRLF or CR into CR or LF.
+            p++;
+            *q++ = toEndingChar;
+        } else if (c == fromEndingChar) {
+            // Turn CR/LF into LF/CR.
+            *q++ = toEndingChar;
+        } else {
+            // Leave other characters alone.
+            *q++ = c;
+        }
+    }
+}
+
+CString normalizeLineEndingsToCRLF(const CString& from)
+{
+    CString result;
+    CStringBuffer buffer(result);
+    internalNormalizeLineEndingsToCRLF(from, buffer);
+    return buffer.buffer();
+}
+
+void normalizeLineEndingsToCR(const CString& from, Vector<char>& result)
+{
+    normalizeToCROrLF(from, result, true);
+}
+
+void normalizeLineEndingsToLF(const CString& from, Vector<char>& result)
+{
+    normalizeToCROrLF(from, result, false);
+}
+
+void normalizeLineEndingsToNative(const CString& from, Vector<char>& result)
+{
+#if OS(WINDOWS)
+    VectorCharAppendBuffer buffer(result);
+    internalNormalizeLineEndingsToCRLF(from, buffer);
+#else
+    normalizeLineEndingsToLF(from, result);
+#endif
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/LineEnding.h b/Source/WebCore/platform/text/LineEnding.h
new file mode 100644
index 0000000..4306ce8
--- /dev/null
+++ b/Source/WebCore/platform/text/LineEnding.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LineEnding_h
+#define LineEnding_h
+
+#include <wtf/Forward.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+// Normalize all line-endings in the given string to CRLF.
+CString normalizeLineEndingsToCRLF(const CString& from);
+
+// Normalize all line-endings in the given string to CR and append the result to the given buffer.
+void normalizeLineEndingsToCR(const CString& from, Vector<char>& result);
+
+// Normalize all line-endings in the given string to LF and append the result to the given buffer.
+void normalizeLineEndingsToLF(const CString& from, Vector<char>& result);
+
+// Normalize all line-endings in the given string to the native line-endings and append the result to the given buffer.
+// (Normalize to CRLF on Windows and normalize to LF on all other platforms.)
+void normalizeLineEndingsToNative(const CString& from, Vector<char>& result);
+
+} // namespace WebCore
+
+#endif // LineEnding_h
diff --git a/Source/WebCore/platform/text/ParserUtilities.h b/Source/WebCore/platform/text/ParserUtilities.h
new file mode 100644
index 0000000..3105214
--- /dev/null
+++ b/Source/WebCore/platform/text/ParserUtilities.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2003 The Karbon Developers
+ * Copyright (C) 2006, 2007 Rob Buis <buis@kde.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef ParserUtilities_h
+#define ParserUtilities_h
+
+#include "PlatformString.h"
+
+namespace WebCore {
+
+    inline bool skipString(const UChar*& ptr, const UChar* end, const UChar* name, int length)
+    {
+        if (end - ptr < length)
+            return false;
+        if (memcmp(name, ptr, sizeof(UChar) * length))
+            return false;
+        ptr += length;
+        return true;
+    }
+
+    inline bool skipString(const UChar*& ptr, const UChar* end, const char* str)
+    {
+        int length = strlen(str);
+        if (end - ptr < length)
+            return false;
+        for (int i = 0; i < length; ++i) {
+            if (ptr[i] != str[i])
+                return false;
+        }
+        ptr += length;
+        return true;
+    }
+
+} // namspace WebCore
+
+#endif // ParserUtilities_h
diff --git a/Source/WebCore/platform/text/PlatformString.h b/Source/WebCore/platform/text/PlatformString.h
new file mode 100644
index 0000000..e525bd4
--- /dev/null
+++ b/Source/WebCore/platform/text/PlatformString.h
@@ -0,0 +1,45 @@
+/*
+ * (C) 1999 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef PlatformString_h
+#define PlatformString_h
+
+// This file would be called String.h, but that conflicts with <string.h>
+// on systems without case-sensitive file systems.
+
+#include <wtf/text/WTFString.h>
+
+namespace WebCore {
+
+class SharedBuffer;
+
+PassRefPtr<SharedBuffer> utf8Buffer(const String&);
+// Counts the number of grapheme clusters. A surrogate pair or a sequence
+// of a non-combining character and following combining characters is
+// counted as 1 grapheme cluster.
+unsigned numGraphemeClusters(const String& s);
+// Returns the number of characters which will be less than or equal to
+// the specified grapheme cluster length.
+unsigned numCharactersInGraphemeClusters(const String& s, unsigned);
+
+} // namespace WebCore
+
+#endif
diff --git a/Source/WebCore/platform/text/RegularExpression.cpp b/Source/WebCore/platform/text/RegularExpression.cpp
new file mode 100644
index 0000000..9b063c9
--- /dev/null
+++ b/Source/WebCore/platform/text/RegularExpression.cpp
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2008 Collabora Ltd.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "RegularExpression.h"
+
+#include "Logging.h"
+#include <pcre/pcre.h>
+
+namespace WebCore {
+
+class RegularExpression::Private : public RefCounted<RegularExpression::Private> {
+public:
+    static PassRefPtr<Private> create(const String& pattern, TextCaseSensitivity);
+    ~Private();
+
+    JSRegExp* regexp() const { return m_regexp; }
+    int lastMatchLength;    
+
+private:
+    Private(const String& pattern, TextCaseSensitivity);
+    static JSRegExp* compile(const String& pattern, TextCaseSensitivity);
+
+    JSRegExp* m_regexp;
+};
+
+inline JSRegExp* RegularExpression::Private::compile(const String& pattern, TextCaseSensitivity caseSensitivity)
+{
+    const char* errorMessage;
+    JSRegExp* regexp = jsRegExpCompile(pattern.characters(), pattern.length(),
+        caseSensitivity == TextCaseSensitive ? JSRegExpDoNotIgnoreCase : JSRegExpIgnoreCase, JSRegExpSingleLine,
+        0, &errorMessage);
+    if (!regexp)
+        LOG_ERROR("RegularExpression: pcre_compile failed with '%s'", errorMessage);
+    return regexp;
+}
+
+inline RegularExpression::Private::Private(const String& pattern, TextCaseSensitivity caseSensitivity)
+    : lastMatchLength(-1)
+    , m_regexp(compile(pattern, caseSensitivity))
+{
+}
+
+inline PassRefPtr<RegularExpression::Private> RegularExpression::Private::create(const String& pattern, TextCaseSensitivity caseSensitivity)
+{
+    return adoptRef(new Private(pattern, caseSensitivity));
+}
+
+RegularExpression::Private::~Private()
+{
+    jsRegExpFree(m_regexp);
+}
+
+RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity)
+    : d(Private::create(pattern, caseSensitivity))
+{
+}
+
+RegularExpression::RegularExpression(const RegularExpression& re)
+    : d(re.d)
+{
+}
+
+RegularExpression::~RegularExpression()
+{
+}
+
+RegularExpression& RegularExpression::operator=(const RegularExpression& re)
+{
+    d = re.d;
+    return *this;
+}
+
+int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
+{
+    if (!d->regexp())
+        return -1;
+
+    if (str.isNull())
+        return -1;
+
+    // First 2 offsets are start and end offsets; 3rd entry is used internally by pcre
+    static const size_t maxOffsets = 3;
+    int offsets[maxOffsets];
+    int result = jsRegExpExecute(d->regexp(), str.characters(), str.length(), startFrom, offsets, maxOffsets);
+    if (result < 0) {
+        if (result != JSRegExpErrorNoMatch)
+            LOG_ERROR("RegularExpression: pcre_exec() failed with result %d", result);
+        d->lastMatchLength = -1;
+        return -1;
+    }
+
+    // 1 means 1 match; 0 means more than one match. First match is recorded in offsets.
+    d->lastMatchLength = offsets[1] - offsets[0];
+    if (matchLength)
+        *matchLength = d->lastMatchLength;
+    return offsets[0];
+}
+
+int RegularExpression::searchRev(const String& str) const
+{
+    // FIXME: This could be faster if it actually searched backwards.
+    // Instead, it just searches forwards, multiple times until it finds the last match.
+
+    int start = 0;
+    int pos;
+    int lastPos = -1;
+    int lastMatchLength = -1;
+    do {
+        int matchLength;
+        pos = match(str, start, &matchLength);
+        if (pos >= 0) {
+            if (pos + matchLength > lastPos + lastMatchLength) {
+                // replace last match if this one is later and not a subset of the last match
+                lastPos = pos;
+                lastMatchLength = matchLength;
+            }
+            start = pos + 1;
+        }
+    } while (pos != -1);
+    d->lastMatchLength = lastMatchLength;
+    return lastPos;
+}
+
+int RegularExpression::matchedLength() const
+{
+    return d->lastMatchLength;
+}
+
+void replace(String& string, const RegularExpression& target, const String& replacement)
+{
+    int index = 0;
+    while (index < static_cast<int>(string.length())) {
+        int matchLength;
+        index = target.match(string, index, &matchLength);
+        if (index < 0)
+            break;
+        string.replace(index, matchLength, replacement);
+        index += replacement.length();
+        if (!matchLength)
+            break;  // Avoid infinite loop on 0-length matches, e.g. [a-z]*
+    }
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/RegularExpression.h b/Source/WebCore/platform/text/RegularExpression.h
new file mode 100644
index 0000000..f1611e5
--- /dev/null
+++ b/Source/WebCore/platform/text/RegularExpression.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2003, 2008, 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef RegularExpression_h
+#define RegularExpression_h
+
+#include "PlatformString.h"
+
+namespace WebCore {
+
+class RegularExpression : public FastAllocBase {
+public:
+    RegularExpression(const String&, TextCaseSensitivity);
+    ~RegularExpression();
+
+    RegularExpression(const RegularExpression&);
+    RegularExpression& operator=(const RegularExpression&);
+
+    int match(const String&, int startFrom = 0, int* matchLength = 0) const;
+    int searchRev(const String&) const;
+
+    int matchedLength() const;
+
+private:
+    class Private;    
+    RefPtr<Private> d;
+};
+
+void replace(String&, const RegularExpression&, const String&);
+
+} // namespace WebCore
+
+#endif // RegularExpression_h
diff --git a/Source/WebCore/platform/text/SegmentedString.cpp b/Source/WebCore/platform/text/SegmentedString.cpp
new file mode 100644
index 0000000..a371582
--- /dev/null
+++ b/Source/WebCore/platform/text/SegmentedString.cpp
@@ -0,0 +1,274 @@
+/*
+    Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public License
+    along with this library; see the file COPYING.LIB.  If not, write to
+    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+    Boston, MA 02110-1301, USA.
+*/
+
+#include "config.h"
+#include "SegmentedString.h"
+
+namespace WebCore {
+
+SegmentedString::SegmentedString(const SegmentedString &other)
+    : m_pushedChar1(other.m_pushedChar1)
+    , m_pushedChar2(other.m_pushedChar2)
+    , m_currentString(other.m_currentString)
+    , m_substrings(other.m_substrings)
+    , m_composite(other.m_composite)
+    , m_closed(other.m_closed)
+{
+    if (other.m_currentChar == &other.m_pushedChar1)
+        m_currentChar = &m_pushedChar1;
+    else if (other.m_currentChar == &other.m_pushedChar2)
+        m_currentChar = &m_pushedChar2;
+    else
+        m_currentChar = other.m_currentChar;
+}
+
+const SegmentedString& SegmentedString::operator=(const SegmentedString &other)
+{
+    m_pushedChar1 = other.m_pushedChar1;
+    m_pushedChar2 = other.m_pushedChar2;
+    m_currentString = other.m_currentString;
+    m_substrings = other.m_substrings;
+    m_composite = other.m_composite;
+    if (other.m_currentChar == &other.m_pushedChar1)
+        m_currentChar = &m_pushedChar1;
+    else if (other.m_currentChar == &other.m_pushedChar2)
+        m_currentChar = &m_pushedChar2;
+    else
+        m_currentChar = other.m_currentChar;
+    m_closed = other.m_closed;
+    m_numberOfCharactersConsumedPriorToCurrentString = other.m_numberOfCharactersConsumedPriorToCurrentString;
+    m_numberOfCharactersConsumedPriorToCurrentLine = other.m_numberOfCharactersConsumedPriorToCurrentLine;
+    m_currentLine = other.m_currentLine;
+
+    return *this;
+}
+
+unsigned SegmentedString::length() const
+{
+    unsigned length = m_currentString.m_length;
+    if (m_pushedChar1) {
+        ++length;
+        if (m_pushedChar2)
+            ++length;
+    }
+    if (m_composite) {
+        Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin();
+        Deque<SegmentedSubstring>::const_iterator e = m_substrings.end();
+        for (; it != e; ++it)
+            length += it->m_length;
+    }
+    return length;
+}
+
+void SegmentedString::setExcludeLineNumbers()
+{
+    m_currentString.setExcludeLineNumbers();
+    if (m_composite) {
+        Deque<SegmentedSubstring>::iterator it = m_substrings.begin();
+        Deque<SegmentedSubstring>::iterator e = m_substrings.end();
+        for (; it != e; ++it)
+            it->setExcludeLineNumbers();
+    }
+}
+
+void SegmentedString::clear()
+{
+    m_pushedChar1 = 0;
+    m_pushedChar2 = 0;
+    m_currentChar = 0;
+    m_currentString.clear();
+    m_substrings.clear();
+    m_composite = false;
+    m_closed = false;
+}
+
+void SegmentedString::append(const SegmentedSubstring &s)
+{
+    ASSERT(!m_closed);
+    if (s.m_length) {
+        if (!m_currentString.m_length) {
+            m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed();
+            m_currentString = s;
+        } else {
+            m_substrings.append(s);
+            m_composite = true;
+        }
+    }
+}
+
+void SegmentedString::prepend(const SegmentedSubstring &s)
+{
+    ASSERT(!escaped());
+    ASSERT(!s.numberOfCharactersConsumed());
+    if (s.m_length) {
+        // FIXME: We're assuming that the prepend were originally consumed by
+        //        this SegmentedString.  We're also ASSERTing that s is a fresh
+        //        SegmentedSubstring.  These assumptions are sufficient for our
+        //        current use, but we might need to handle the more elaborate
+        //        cases in the future.
+        m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed();
+        m_numberOfCharactersConsumedPriorToCurrentString -= s.m_length;
+        if (!m_currentString.m_length)
+            m_currentString = s;
+        else {
+            // Shift our m_currentString into our list.
+            m_substrings.prepend(m_currentString);
+            m_currentString = s;
+            m_composite = true;
+        }
+    }
+}
+
+void SegmentedString::close()
+{
+    // Closing a stream twice is likely a coding mistake.
+    ASSERT(!m_closed);
+    m_closed = true;
+}
+
+void SegmentedString::append(const SegmentedString &s)
+{
+    ASSERT(!m_closed);
+    ASSERT(!s.escaped());
+    append(s.m_currentString);
+    if (s.m_composite) {
+        Deque<SegmentedSubstring>::const_iterator it = s.m_substrings.begin();
+        Deque<SegmentedSubstring>::const_iterator e = s.m_substrings.end();
+        for (; it != e; ++it)
+            append(*it);
+    }
+    m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
+}
+
+void SegmentedString::prepend(const SegmentedString &s)
+{
+    ASSERT(!escaped());
+    ASSERT(!s.escaped());
+    if (s.m_composite) {
+        Deque<SegmentedSubstring>::const_reverse_iterator it = s.m_substrings.rbegin();
+        Deque<SegmentedSubstring>::const_reverse_iterator e = s.m_substrings.rend();
+        for (; it != e; ++it)
+            prepend(*it);
+    }
+    prepend(s.m_currentString);
+    m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
+}
+
+void SegmentedString::advanceSubstring()
+{
+    if (m_composite) {
+        m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed();
+        m_currentString = m_substrings.takeFirst();
+        // If we've previously consumed some characters of the non-current
+        // string, we now account for those characters as part of the current
+        // string, not as part of "prior to current string."
+        m_numberOfCharactersConsumedPriorToCurrentString -= m_currentString.numberOfCharactersConsumed();
+        if (m_substrings.isEmpty())
+            m_composite = false;
+    } else {
+        m_currentString.clear();
+    }
+}
+
+int SegmentedString::numberOfCharactersConsumedSlow() const
+{
+    int result = m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed();
+    if (m_pushedChar1) {
+        --result;
+        if (m_pushedChar2)
+            --result;
+    }
+    return result;
+}
+
+String SegmentedString::toString() const
+{
+    String result;
+    if (m_pushedChar1) {
+        result.append(m_pushedChar1);
+        if (m_pushedChar2)
+            result.append(m_pushedChar2);
+    }
+    m_currentString.appendTo(result);
+    if (m_composite) {
+        Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin();
+        Deque<SegmentedSubstring>::const_iterator e = m_substrings.end();
+        for (; it != e; ++it)
+            it->appendTo(result);
+    }
+    return result;
+}
+
+void SegmentedString::advance(unsigned count, UChar* consumedCharacters)
+{
+    ASSERT(count <= length());
+    for (unsigned i = 0; i < count; ++i) {
+        consumedCharacters[i] = *current();
+        advance();
+    }
+}
+
+void SegmentedString::advanceSlowCase()
+{
+    if (m_pushedChar1) {
+        m_pushedChar1 = m_pushedChar2;
+        m_pushedChar2 = 0;
+    } else if (m_currentString.m_current) {
+        ++m_currentString.m_current;
+        if (--m_currentString.m_length == 0)
+            advanceSubstring();
+    }
+    m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
+}
+
+void SegmentedString::advanceSlowCase(int& lineNumber)
+{
+    if (m_pushedChar1) {
+        m_pushedChar1 = m_pushedChar2;
+        m_pushedChar2 = 0;
+    } else if (m_currentString.m_current) {
+        if (*m_currentString.m_current++ == '\n' && m_currentString.doNotExcludeLineNumbers()) {
+            ++lineNumber;
+            ++m_currentLine;
+            m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed();
+        }
+        if (--m_currentString.m_length == 0)
+            advanceSubstring();
+    }
+    m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
+}
+
+WTF::ZeroBasedNumber SegmentedString::currentLine() const
+{
+    return WTF::ZeroBasedNumber::fromZeroBasedInt(m_currentLine);
+}
+
+WTF::ZeroBasedNumber SegmentedString::currentColumn() const
+{
+    int zeroBasedColumn = numberOfCharactersConsumedSlow() - m_numberOfCharactersConsumedPriorToCurrentLine;
+    return WTF::ZeroBasedNumber::fromZeroBasedInt(zeroBasedColumn);
+}
+
+void SegmentedString::setCurrentPosition(WTF::ZeroBasedNumber line, WTF::ZeroBasedNumber columnAftreProlog, int prologLength)
+{
+    m_currentLine = line.zeroBasedInt();
+    m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumedSlow() + prologLength - columnAftreProlog.zeroBasedInt();
+}
+
+}
diff --git a/Source/WebCore/platform/text/SegmentedString.h b/Source/WebCore/platform/text/SegmentedString.h
new file mode 100644
index 0000000..5f548c7
--- /dev/null
+++ b/Source/WebCore/platform/text/SegmentedString.h
@@ -0,0 +1,282 @@
+/*
+    Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public License
+    along with this library; see the file COPYING.LIB.  If not, write to
+    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+    Boston, MA 02110-1301, USA.
+*/
+
+#ifndef SegmentedString_h
+#define SegmentedString_h
+
+#include "PlatformString.h"
+#include <wtf/Deque.h>
+#include <wtf/text/TextPosition.h>
+
+namespace WebCore {
+
+class SegmentedString;
+
+class SegmentedSubstring {
+public:
+    SegmentedSubstring() : m_length(0), m_current(0), m_doNotExcludeLineNumbers(true) {}
+    SegmentedSubstring(const String& str)
+        : m_length(str.length())
+        , m_current(str.isEmpty() ? 0 : str.characters())
+        , m_string(str)
+        , m_doNotExcludeLineNumbers(true)
+    {
+    }
+
+    void clear() { m_length = 0; m_current = 0; }
+    
+    bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; }
+    bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; }
+
+    void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }
+
+    int numberOfCharactersConsumed() const { return m_string.length() - m_length; }
+
+    void appendTo(String& str) const
+    {
+        if (m_string.characters() == m_current) {
+            if (str.isEmpty())
+                str = m_string;
+            else
+                str.append(m_string);
+        } else {
+            str.append(String(m_current, m_length));
+        }
+    }
+
+public:
+    int m_length;
+    const UChar* m_current;
+
+private:
+    String m_string;
+    bool m_doNotExcludeLineNumbers;
+};
+
+class SegmentedString {
+public:
+    SegmentedString()
+        : m_pushedChar1(0)
+        , m_pushedChar2(0)
+        , m_currentChar(0)
+        , m_numberOfCharactersConsumedPriorToCurrentString(0)
+        , m_numberOfCharactersConsumedPriorToCurrentLine(0)
+        , m_currentLine(0)
+        , m_composite(false)
+        , m_closed(false)
+    {
+    }
+
+    SegmentedString(const String& str)
+        : m_pushedChar1(0)
+        , m_pushedChar2(0)
+        , m_currentString(str)
+        , m_currentChar(m_currentString.m_current)
+        , m_numberOfCharactersConsumedPriorToCurrentString(0)
+        , m_numberOfCharactersConsumedPriorToCurrentLine(0)
+        , m_currentLine(0)
+        , m_composite(false)
+        , m_closed(false)
+    {
+    }
+
+    SegmentedString(const SegmentedString&);
+
+    const SegmentedString& operator=(const SegmentedString&);
+
+    void clear();
+    void close();
+
+    void append(const SegmentedString&);
+    void prepend(const SegmentedString&);
+
+    bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
+    void setExcludeLineNumbers();
+
+    void push(UChar c)
+    {
+        if (!m_pushedChar1) {
+            m_pushedChar1 = c;
+            m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
+        } else {
+            ASSERT(!m_pushedChar2);
+            m_pushedChar2 = c;
+        }
+    }
+
+    bool isEmpty() const { return !current(); }
+    unsigned length() const;
+
+    bool isClosed() const { return m_closed; }
+
+    enum LookAheadResult {
+        DidNotMatch,
+        DidMatch,
+        NotEnoughCharacters,
+    };
+
+    LookAheadResult lookAhead(const String& string) { return lookAheadInline<SegmentedString::equalsLiterally>(string); }
+    LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline<SegmentedString::equalsIgnoringCase>(string); }
+
+    void advance()
+    {
+        if (!m_pushedChar1 && m_currentString.m_length > 1) {
+            --m_currentString.m_length;
+            m_currentChar = ++m_currentString.m_current;
+            return;
+        }
+        advanceSlowCase();
+    }
+
+    void advanceAndASSERT(UChar expectedCharacter)
+    {
+        ASSERT_UNUSED(expectedCharacter, *current() == expectedCharacter);
+        advance();
+    }
+
+    void advanceAndASSERTIgnoringCase(UChar expectedCharacter)
+    {
+        ASSERT_UNUSED(expectedCharacter, WTF::Unicode::foldCase(*current()) == WTF::Unicode::foldCase(expectedCharacter));
+        advance();
+    }
+
+    void advancePastNewline(int& lineNumber)
+    {
+        ASSERT(*current() == '\n');
+        if (!m_pushedChar1 && m_currentString.m_length > 1) {
+            int newLineFlag = m_currentString.doNotExcludeLineNumbers();
+            lineNumber += newLineFlag;
+            m_currentLine += newLineFlag;
+            if (newLineFlag)
+                m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed();
+            --m_currentString.m_length;
+            m_currentChar = ++m_currentString.m_current;
+            return;
+        }
+        advanceSlowCase(lineNumber);
+    }
+    
+    void advancePastNonNewline()
+    {
+        ASSERT(*current() != '\n');
+        if (!m_pushedChar1 && m_currentString.m_length > 1) {
+            --m_currentString.m_length;
+            m_currentChar = ++m_currentString.m_current;
+            return;
+        }
+        advanceSlowCase();
+    }
+    
+    void advance(int& lineNumber)
+    {
+        if (!m_pushedChar1 && m_currentString.m_length > 1) {
+            int newLineFlag = (*m_currentString.m_current == '\n') & m_currentString.doNotExcludeLineNumbers();
+            lineNumber += newLineFlag;
+            m_currentLine += newLineFlag;
+            if (newLineFlag)
+                m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
+            --m_currentString.m_length;
+            m_currentChar = ++m_currentString.m_current;
+            return;
+        }
+        advanceSlowCase(lineNumber);
+    }
+
+    // Writes the consumed characters into consumedCharacters, which must
+    // have space for at least |count| characters.
+    void advance(unsigned count, UChar* consumedCharacters);
+
+    bool escaped() const { return m_pushedChar1; }
+
+    int numberOfCharactersConsumed() const
+    {
+        // We don't currently handle the case when there are pushed character.
+        ASSERT(!m_pushedChar1);
+        return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed();
+    }
+
+    int numberOfCharactersConsumedSlow() const;
+
+    String toString() const;
+
+    const UChar& operator*() const { return *current(); }
+    const UChar* operator->() const { return current(); }
+    
+
+    // The method is moderately slow, comparing to currentLine method.
+    WTF::ZeroBasedNumber currentColumn() const;
+    WTF::ZeroBasedNumber currentLine() const;
+    // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog
+    // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
+    void setCurrentPosition(WTF::ZeroBasedNumber line, WTF::ZeroBasedNumber columnAftreProlog, int prologLength);
+
+private:
+    void append(const SegmentedSubstring&);
+    void prepend(const SegmentedSubstring&);
+
+    void advanceSlowCase();
+    void advanceSlowCase(int& lineNumber);
+    void advanceSubstring();
+    const UChar* current() const { return m_currentChar; }
+
+    static bool equalsLiterally(const UChar* str1, const UChar* str2, size_t count) { return !memcmp(str1, str2, count * sizeof(UChar)); }
+    static bool equalsIgnoringCase(const UChar* str1, const UChar* str2, size_t count) { return !WTF::Unicode::umemcasecmp(str1, str2, count); }
+
+    template<bool equals(const UChar* str1, const UChar* str2, size_t count)>
+    inline LookAheadResult lookAheadInline(const String& string)
+    {
+        if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) {
+            if (equals(string.characters(), m_currentString.m_current, string.length()))
+                return DidMatch;
+            return DidNotMatch;
+        }
+        return lookAheadSlowCase<equals>(string);
+    }
+
+    template<bool equals(const UChar* str1, const UChar* str2, size_t count)>
+    LookAheadResult lookAheadSlowCase(const String& string)
+    {
+        unsigned count = string.length();
+        if (count > length())
+            return NotEnoughCharacters;
+        UChar* consumedCharacters;
+        String consumedString = String::createUninitialized(count, consumedCharacters);
+        advance(count, consumedCharacters);
+        LookAheadResult result = DidNotMatch;
+        if (equals(string.characters(), consumedCharacters, count))
+            result = DidMatch;
+        prepend(SegmentedString(consumedString));
+        return result;
+    }
+
+    UChar m_pushedChar1;
+    UChar m_pushedChar2;
+    SegmentedSubstring m_currentString;
+    const UChar* m_currentChar;
+    int m_numberOfCharactersConsumedPriorToCurrentString;
+    int m_numberOfCharactersConsumedPriorToCurrentLine;
+    int m_currentLine;
+    Deque<SegmentedSubstring> m_substrings;
+    bool m_composite;
+    bool m_closed;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/platform/text/String.cpp b/Source/WebCore/platform/text/String.cpp
new file mode 100644
index 0000000..f2f8d2e
--- /dev/null
+++ b/Source/WebCore/platform/text/String.cpp
@@ -0,0 +1,77 @@
+/*
+ * (C) 1999 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2007-2009 Torch Mobile, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+#include "PlatformString.h"
+
+#include "SharedBuffer.h"
+#include "TextBreakIterator.h"
+#include <wtf/unicode/UTF8.h>
+#include <wtf/unicode/Unicode.h>
+
+using namespace WTF;
+using namespace WTF::Unicode;
+
+namespace WebCore {
+
+PassRefPtr<SharedBuffer> utf8Buffer(const String& string)
+{
+    // Allocate a buffer big enough to hold all the characters.
+    const int length = string.length();
+    Vector<char> buffer(length * 3);
+
+    // Convert to runs of 8-bit characters.
+    char* p = buffer.data();
+    const UChar* d = string.characters();
+    ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), true);
+    if (result != conversionOK)
+        return 0;
+
+    buffer.shrink(p - buffer.data());
+    return SharedBuffer::adoptVector(buffer);
+}
+
+unsigned numGraphemeClusters(const String& s)
+{
+    TextBreakIterator* it = characterBreakIterator(s.characters(), s.length());
+    if (!it)
+        return s.length();
+
+    unsigned num = 0;
+    while (textBreakNext(it) != TextBreakDone)
+        ++num;
+    return num;
+}
+
+unsigned numCharactersInGraphemeClusters(const String& s, unsigned numGraphemeClusters)
+{
+    TextBreakIterator* it = characterBreakIterator(s.characters(), s.length());
+    if (!it)
+        return min(s.length(), numGraphemeClusters);
+
+    for (unsigned i = 0; i < numGraphemeClusters; ++i) {
+        if (textBreakNext(it) == TextBreakDone)
+            return s.length();
+    }
+    return textBreakCurrent(it);
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/SuffixTree.h b/Source/WebCore/platform/text/SuffixTree.h
new file mode 100644
index 0000000..f11fd23
--- /dev/null
+++ b/Source/WebCore/platform/text/SuffixTree.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2010 Adam Barth. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SuffixTree_h
+#define SuffixTree_h
+
+#include "PlatformString.h"
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+class UnicodeCodebook {
+public:
+    static int codeWord(UChar c) { return c; }
+    enum { codeSize = 1 << 8 * sizeof(UChar) };
+};
+
+class ASCIICodebook {
+public:
+    static int codeWord(UChar c) { return c & (codeSize - 1); }
+    enum { codeSize = 1 << (8 * sizeof(char) - 1) };
+};
+
+template<typename Codebook>
+class SuffixTree {
+public:
+    SuffixTree(const String& text, unsigned depth)
+        : m_depth(depth)
+        , m_leaf(true)
+    {
+        build(text);
+    }
+
+    bool mightContain(const String& query)
+    {
+        Node* current = &m_root;
+        int limit = std::min(m_depth, query.length());
+        for (int i = 0; i < limit; ++i) {
+            current = current->at(Codebook::codeWord(query[i]));
+            if (!current)
+                return false;
+        }
+        return true;
+    }
+
+private:
+    class Node {
+    public:
+        Node(bool isLeaf = false)
+        {
+            m_children.resize(Codebook::codeSize);
+            m_children.fill(0);
+            m_isLeaf = isLeaf;
+        }
+
+        ~Node()
+        {
+            for (unsigned i = 0; i < m_children.size(); ++i) {
+                Node* child = m_children.at(i);
+                if (child && !child->m_isLeaf)
+                    delete child;
+            }
+        }
+
+        Node*& at(int codeWord) { return m_children.at(codeWord); }
+
+    private:
+        typedef Vector<Node*, Codebook::codeSize> ChildrenVector;
+
+        ChildrenVector m_children;
+        bool m_isLeaf;
+    };
+
+    void build(const String& text)
+    {
+        for (unsigned base = 0; base < text.length(); ++base) {
+            Node* current = &m_root;
+            unsigned limit = std::min(base + m_depth, text.length());
+            for (unsigned offset = 0; base + offset < limit; ++offset) {
+                ASSERT(current != &m_leaf);
+                Node*& child = current->at(Codebook::codeWord(text[base + offset]));
+                if (!child)
+                    child = base + offset + 1 == limit ? &m_leaf : new Node();
+                current = child;
+            }
+        }
+    }
+
+    Node m_root;
+    unsigned m_depth;
+
+    // Instead of allocating a fresh empty leaf node for ever leaf in the tree
+    // (there can be a lot of these), we alias all the leaves to this "static"
+    // leaf node.
+    Node m_leaf;
+};
+
+} // namespace WebCore
+
+#endif // SuffixTree_h
diff --git a/Source/WebCore/platform/text/TextBoundaries.cpp b/Source/WebCore/platform/text/TextBoundaries.cpp
new file mode 100644
index 0000000..fbb261b
--- /dev/null
+++ b/Source/WebCore/platform/text/TextBoundaries.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2006, 2007 Apple Inc.  All rights reserved.
+ * Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextBoundaries.h"
+
+#include "TextBreakIterator.h"
+#include <wtf/text/StringImpl.h>
+#include <wtf/unicode/Unicode.h>
+
+using namespace WTF;
+using namespace Unicode;
+
+namespace WebCore {
+
+int endOfFirstWordBoundaryContext(const UChar* characters, int length)
+{
+    for (int i = 0; i < length; ) {
+        int first = i;
+        UChar32 ch;
+        U16_NEXT(characters, i, length, ch);
+        if (!requiresContextForWordBoundary(ch))
+            return first;
+    }
+    return length;
+}
+
+int startOfLastWordBoundaryContext(const UChar* characters, int length)
+{
+    for (int i = length; i > 0; ) {
+        int last = i;
+        UChar32 ch;
+        U16_PREV(characters, 0, i, ch);
+        if (!requiresContextForWordBoundary(ch))
+            return last;
+    }
+    return 0;
+}
+
+#if !PLATFORM(BREWMP) && !PLATFORM(MAC) && !PLATFORM(QT)
+
+int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward)
+{
+    TextBreakIterator* it = wordBreakIterator(chars, len);
+
+    if (forward) {
+        position = textBreakFollowing(it, position);
+        while (position != TextBreakDone) {
+            // We stop searching when the character preceeding the break
+            // is alphanumeric.
+            if (position < len && isAlphanumeric(chars[position - 1]))
+                return position;
+
+            position = textBreakFollowing(it, position);
+        }
+
+        return len;
+    } else {
+        position = textBreakPreceding(it, position);
+        while (position != TextBreakDone) {
+            // We stop searching when the character following the break
+            // is alphanumeric.
+            if (position > 0 && isAlphanumeric(chars[position]))
+                return position;
+
+            position = textBreakPreceding(it, position);
+        }
+
+        return 0;
+    }
+}
+
+void findWordBoundary(const UChar* chars, int len, int position, int* start, int* end)
+{
+    TextBreakIterator* it = wordBreakIterator(chars, len);
+    *end = textBreakFollowing(it, position);
+    if (*end < 0)
+        *end = textBreakLast(it);
+    *start = textBreakPrevious(it);
+}
+
+#endif // !PLATFORM(BREWMP) && !PLATFORM(MAC) && !PLATFORM(QT)
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/TextBoundaries.h b/Source/WebCore/platform/text/TextBoundaries.h
new file mode 100644
index 0000000..870ab62
--- /dev/null
+++ b/Source/WebCore/platform/text/TextBoundaries.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextBoundaries_h
+#define TextBoundaries_h
+
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+    inline bool requiresContextForWordBoundary(UChar32 ch)
+    {
+        return WTF::Unicode::hasLineBreakingPropertyComplexContext(ch);
+    }
+
+    int endOfFirstWordBoundaryContext(const UChar* characters, int length);
+    int startOfLastWordBoundaryContext(const UChar* characters, int length);
+
+    void findWordBoundary(const UChar*, int len, int position, int* start, int* end);
+    int findNextWordFromIndex(const UChar*, int len, int position, bool forward);
+
+}
+
+#endif
diff --git a/Source/WebCore/platform/text/TextBreakIterator.h b/Source/WebCore/platform/text/TextBreakIterator.h
new file mode 100644
index 0000000..17cf5f0
--- /dev/null
+++ b/Source/WebCore/platform/text/TextBreakIterator.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef TextBreakIterator_h
+#define TextBreakIterator_h
+
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+    class TextBreakIterator;
+
+    // Note: The returned iterator is good only until you get another iterator.
+
+    // Iterates over "extended grapheme clusters", as defined in UAX #29.
+    // Note that platform implementations may be less sophisticated - e.g. ICU prior to
+    // version 4.0 only supports "legacy grapheme clusters".
+    // Use this for general text processing, e.g. string truncation.
+    TextBreakIterator* characterBreakIterator(const UChar*, int length);
+
+    // This is similar to character break iterator in most cases, but is subject to
+    // platform UI conventions. One notable example where this can be different
+    // from character break iterator is Thai prepend characters, see bug 24342.
+    // Use this for insertion point and selection manipulations.
+    TextBreakIterator* cursorMovementIterator(const UChar*, int length);
+
+    TextBreakIterator* wordBreakIterator(const UChar*, int length);
+    TextBreakIterator* lineBreakIterator(const UChar*, int length);
+    TextBreakIterator* sentenceBreakIterator(const UChar*, int length);
+
+    int textBreakFirst(TextBreakIterator*);
+    int textBreakLast(TextBreakIterator*);
+    int textBreakNext(TextBreakIterator*);
+    int textBreakPrevious(TextBreakIterator*);
+    int textBreakCurrent(TextBreakIterator*);
+    int textBreakPreceding(TextBreakIterator*, int);
+    int textBreakFollowing(TextBreakIterator*, int);
+    bool isTextBreak(TextBreakIterator*, int);
+
+    const int TextBreakDone = -1;
+
+}
+
+#endif
diff --git a/Source/WebCore/platform/text/TextBreakIteratorICU.cpp b/Source/WebCore/platform/text/TextBreakIteratorICU.cpp
new file mode 100644
index 0000000..f5575ee
--- /dev/null
+++ b/Source/WebCore/platform/text/TextBreakIteratorICU.cpp
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIterator.h"
+
+#include "PlatformString.h"
+#include "TextBreakIteratorInternalICU.h"
+#include <unicode/ubrk.h>
+#include <wtf/Assertions.h>
+
+namespace WebCore {
+
+static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator,
+    UBreakIteratorType type, const UChar* string, int length)
+{
+    if (!string)
+        return 0;
+
+    if (!createdIterator) {
+        UErrorCode openStatus = U_ZERO_ERROR;
+        iterator = reinterpret_cast<TextBreakIterator*>(ubrk_open(type, currentTextBreakLocaleID(), 0, 0, &openStatus));
+        createdIterator = true;
+        ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
+    }
+    if (!iterator)
+        return 0;
+
+    UErrorCode setTextStatus = U_ZERO_ERROR;
+    ubrk_setText(reinterpret_cast<UBreakIterator*>(iterator), string, length, &setTextStatus);
+    if (U_FAILURE(setTextStatus))
+        return 0;
+
+    return iterator;
+}
+
+TextBreakIterator* characterBreakIterator(const UChar* string, int length)
+{
+    static bool createdCharacterBreakIterator = false;
+    static TextBreakIterator* staticCharacterBreakIterator;
+    return setUpIterator(createdCharacterBreakIterator,
+        staticCharacterBreakIterator, UBRK_CHARACTER, string, length);
+}
+
+TextBreakIterator* wordBreakIterator(const UChar* string, int length)
+{
+    static bool createdWordBreakIterator = false;
+    static TextBreakIterator* staticWordBreakIterator;
+    return setUpIterator(createdWordBreakIterator,
+        staticWordBreakIterator, UBRK_WORD, string, length);
+}
+
+TextBreakIterator* lineBreakIterator(const UChar* string, int length)
+{
+    static bool createdLineBreakIterator = false;
+    static TextBreakIterator* staticLineBreakIterator;
+    return setUpIterator(createdLineBreakIterator,
+        staticLineBreakIterator, UBRK_LINE, string, length);
+}
+
+TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
+{
+    static bool createdSentenceBreakIterator = false;
+    static TextBreakIterator* staticSentenceBreakIterator;
+    return setUpIterator(createdSentenceBreakIterator,
+        staticSentenceBreakIterator, UBRK_SENTENCE, string, length);
+}
+
+int textBreakFirst(TextBreakIterator* iterator)
+{
+    return ubrk_first(reinterpret_cast<UBreakIterator*>(iterator));
+}
+
+int textBreakLast(TextBreakIterator* iterator)
+{
+    return ubrk_last(reinterpret_cast<UBreakIterator*>(iterator));
+}
+
+int textBreakNext(TextBreakIterator* iterator)
+{
+    return ubrk_next(reinterpret_cast<UBreakIterator*>(iterator));
+}
+
+int textBreakPrevious(TextBreakIterator* iterator)
+{
+    return ubrk_previous(reinterpret_cast<UBreakIterator*>(iterator));
+}
+
+int textBreakPreceding(TextBreakIterator* iterator, int pos)
+{
+    return ubrk_preceding(reinterpret_cast<UBreakIterator*>(iterator), pos);
+}
+
+int textBreakFollowing(TextBreakIterator* iterator, int pos)
+{
+    return ubrk_following(reinterpret_cast<UBreakIterator*>(iterator), pos);
+}
+
+int textBreakCurrent(TextBreakIterator* iterator)
+{
+    return ubrk_current(reinterpret_cast<UBreakIterator*>(iterator));
+}
+
+bool isTextBreak(TextBreakIterator* iterator, int position)
+{
+    return ubrk_isBoundary(reinterpret_cast<UBreakIterator*>(iterator), position);
+}
+
+#ifndef BUILDING_ON_TIGER
+static TextBreakIterator* setUpIteratorWithRules(bool& createdIterator, TextBreakIterator*& iterator,
+    const char* breakRules, const UChar* string, int length)
+{
+    if (!string)
+        return 0;
+
+    if (!createdIterator) {
+        UParseError parseStatus;
+        UErrorCode openStatus = U_ZERO_ERROR;
+        String rules(breakRules);
+        iterator = reinterpret_cast<TextBreakIterator*>(ubrk_openRules(rules.characters(), rules.length(), 0, 0, &parseStatus, &openStatus));
+        createdIterator = true;
+        ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
+    }
+    if (!iterator)
+        return 0;
+
+    UErrorCode setTextStatus = U_ZERO_ERROR;
+    ubrk_setText(reinterpret_cast<UBreakIterator*>(iterator), string, length, &setTextStatus);
+    if (U_FAILURE(setTextStatus))
+        return 0;
+
+    return iterator;
+}
+#endif // BUILDING_ON_TIGER
+
+TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
+{
+#ifdef BUILDING_ON_TIGER
+    // ICU 3.2 cannot compile the below rules.
+    return characterBreakIterator(string, length);
+#else
+    // This rule set is based on character-break iterator rules of ICU 4.0
+    // <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>.
+    // The major differences from the original ones are listed below:
+    // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
+    // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342);
+    // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and;
+    // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks.
+    static const char* kRules =
+        "$CR      = [\\p{Grapheme_Cluster_Break = CR}];"
+        "$LF      = [\\p{Grapheme_Cluster_Break = LF}];"
+        "$Control = [\\p{Grapheme_Cluster_Break = Control}];"
+        "$VoiceMarks = [\\uFF9E\\uFF9F];"  // Japanese half-width katakana voiced marks
+        "$Extend  = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];"
+        "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
+        "$L       = [\\p{Grapheme_Cluster_Break = L}];"
+        "$V       = [\\p{Grapheme_Cluster_Break = V}];"
+        "$T       = [\\p{Grapheme_Cluster_Break = T}];"
+        "$LV      = [\\p{Grapheme_Cluster_Break = LV}];"
+        "$LVT     = [\\p{Grapheme_Cluster_Break = LVT}];"
+        "$Hin0    = [\\u0905-\\u0939];"    // Devanagari Letter A,...,Ha
+        "$HinV    = \\u094D;"              // Devanagari Sign Virama
+        "$Hin1    = [\\u0915-\\u0939];"    // Devanagari Letter Ka,...,Ha
+        "$Ben0    = [\\u0985-\\u09B9];"    // Bengali Letter A,...,Ha
+        "$BenV    = \\u09CD;"              // Bengali Sign Virama
+        "$Ben1    = [\\u0995-\\u09B9];"    // Bengali Letter Ka,...,Ha
+        "$Pan0    = [\\u0A05-\\u0A39];"    // Gurmukhi Letter A,...,Ha
+        "$PanV    = \\u0A4D;"              // Gurmukhi Sign Virama
+        "$Pan1    = [\\u0A15-\\u0A39];"    // Gurmukhi Letter Ka,...,Ha
+        "$Guj0    = [\\u0A85-\\u0AB9];"    // Gujarati Letter A,...,Ha
+        "$GujV    = \\u0ACD;"              // Gujarati Sign Virama
+        "$Guj1    = [\\u0A95-\\u0AB9];"    // Gujarati Letter Ka,...,Ha
+        "$Ori0    = [\\u0B05-\\u0B39];"    // Oriya Letter A,...,Ha
+        "$OriV    = \\u0B4D;"              // Oriya Sign Virama
+        "$Ori1    = [\\u0B15-\\u0B39];"    // Oriya Letter Ka,...,Ha
+        "$Tel0    = [\\u0C05-\\u0C39];"    // Telugu Letter A,...,Ha
+        "$TelV    = \\u0C4D;"              // Telugu Sign Virama
+        "$Tel1    = [\\u0C14-\\u0C39];"    // Telugu Letter Ka,...,Ha
+        "$Kan0    = [\\u0C85-\\u0CB9];"    // Kannada Letter A,...,Ha
+        "$KanV    = \\u0CCD;"              // Kannada Sign Virama
+        "$Kan1    = [\\u0C95-\\u0CB9];"    // Kannada Letter A,...,Ha
+        "$Mal0    = [\\u0D05-\\u0D39];"    // Malayalam Letter A,...,Ha
+        "$MalV    = \\u0D4D;"              // Malayalam Sign Virama
+        "$Mal1    = [\\u0D15-\\u0D39];"    // Malayalam Letter A,...,Ha
+        "!!chain;"
+        "!!forward;"
+        "$CR $LF;"
+        "$L ($L | $V | $LV | $LVT);"
+        "($LV | $V) ($V | $T);"
+        "($LVT | $T) $T;"
+        "[^$Control $CR $LF] $Extend;"
+        "[^$Control $CR $LF] $SpacingMark;"
+        "$Hin0 $HinV $Hin1;"               // Devanagari Virama (forward)
+        "$Ben0 $BenV $Ben1;"               // Bengali Virama (forward)
+        "$Pan0 $PanV $Pan1;"               // Gurmukhi Virama (forward)
+        "$Guj0 $GujV $Guj1;"               // Gujarati Virama (forward)
+        "$Ori0 $OriV $Ori1;"               // Oriya Virama (forward)
+        "$Tel0 $TelV $Tel1;"               // Telugu Virama (forward)
+        "$Kan0 $KanV $Kan1;"               // Kannada Virama (forward)
+        "$Mal0 $MalV $Mal1;"               // Malayalam Virama (forward)
+        "!!reverse;"
+        "$LF $CR;"
+        "($L | $V | $LV | $LVT) $L;"
+        "($V | $T) ($LV | $V);"
+        "$T ($LVT | $T);"
+        "$Extend      [^$Control $CR $LF];"
+        "$SpacingMark [^$Control $CR $LF];"
+        "$Hin1 $HinV $Hin0;"               // Devanagari Virama (backward)
+        "$Ben1 $BenV $Ben0;"               // Bengali Virama (backward)
+        "$Pan1 $PanV $Pan0;"               // Gurmukhi Virama (backward)
+        "$Guj1 $GujV $Guj0;"               // Gujarati Virama (backward)
+        "$Ori1 $OriV $Ori0;"               // Gujarati Virama (backward)
+        "$Tel1 $TelV $Tel0;"               // Telugu Virama (backward)
+        "$Kan1 $KanV $Kan0;"               // Kannada Virama (backward)
+        "$Mal1 $MalV $Mal0;"               // Malayalam Virama (backward)
+        "!!safe_reverse;"
+        "!!safe_forward;";
+    static bool createdCursorMovementIterator = false;
+    static TextBreakIterator* staticCursorMovementIterator;
+    return setUpIteratorWithRules(createdCursorMovementIterator, staticCursorMovementIterator, kRules, string, length);
+#endif // BUILDING_ON_TIGER
+}
+
+}
diff --git a/Source/WebCore/platform/text/TextBreakIteratorInternalICU.h b/Source/WebCore/platform/text/TextBreakIteratorInternalICU.h
new file mode 100644
index 0000000..68b7003
--- /dev/null
+++ b/Source/WebCore/platform/text/TextBreakIteratorInternalICU.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef TextBreakIteratorInternalICU_h
+#define TextBreakIteratorInternalICU_h
+
+// FIXME: Now that this handles locales for ICU, not just for text breaking,
+// this file and the various implementation files should be renamed.
+
+namespace WebCore {
+
+    const char* currentSearchLocaleID();
+    const char* currentTextBreakLocaleID();
+
+}
+
+#endif
diff --git a/Source/WebCore/platform/text/TextCodec.cpp b/Source/WebCore/platform/text/TextCodec.cpp
new file mode 100644
index 0000000..4222ee1
--- /dev/null
+++ b/Source/WebCore/platform/text/TextCodec.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextCodec.h"
+
+#include "PlatformString.h"
+#include <wtf/StringExtras.h>
+
+namespace WebCore {
+
+TextCodec::~TextCodec()
+{
+}
+
+int TextCodec::getUnencodableReplacement(unsigned codePoint, UnencodableHandling handling, UnencodableReplacementArray replacement)
+{
+    switch (handling) {
+        case QuestionMarksForUnencodables:
+            replacement[0] = '?';
+            replacement[1] = 0;
+            return 1;
+        case EntitiesForUnencodables:
+            snprintf(replacement, sizeof(UnencodableReplacementArray), "&#%u;", codePoint);
+            return static_cast<int>(strlen(replacement));
+        case URLEncodedEntitiesForUnencodables:
+            snprintf(replacement, sizeof(UnencodableReplacementArray), "%%26%%23%u%%3B", codePoint);
+            return static_cast<int>(strlen(replacement));
+    }
+    ASSERT_NOT_REACHED();
+    replacement[0] = 0;
+    return 0;
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/TextCodec.h b/Source/WebCore/platform/text/TextCodec.h
new file mode 100644
index 0000000..c6af38a
--- /dev/null
+++ b/Source/WebCore/platform/text/TextCodec.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextCodec_h
+#define TextCodec_h
+
+#include <memory>
+#include <wtf/Forward.h>
+#include <wtf/Noncopyable.h>
+#include <wtf/PassOwnPtr.h>
+#include <wtf/Vector.h>
+#include <wtf/unicode/Unicode.h>
+
+#include "PlatformString.h"
+
+namespace WebCore {
+    class TextEncoding;
+
+    // Specifies what will happen when a character is encountered that is
+    // not encodable in the character set.
+    enum UnencodableHandling {
+        // Substitutes the replacement character "?".
+        QuestionMarksForUnencodables,
+
+        // Encodes the character as an XML entity. For example, U+06DE
+        // would be "&#1758;" (0x6DE = 1758 in octal).
+        EntitiesForUnencodables,
+
+        // Encodes the character as en entity as above, but escaped
+        // non-alphanumeric characters. This is used in URLs.
+        // For example, U+6DE would be "%26%231758%3B".
+        URLEncodedEntitiesForUnencodables,
+    };
+
+    typedef char UnencodableReplacementArray[32];
+
+    class TextCodec : public Noncopyable {
+    public:
+        virtual ~TextCodec();
+
+        String decode(const char* str, size_t length, bool flush = false)
+        {
+            bool ignored;
+            return decode(str, length, flush, false, ignored);
+        }
+        
+        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) = 0;
+        virtual CString encode(const UChar*, size_t length, UnencodableHandling) = 0;
+
+        // Fills a null-terminated string representation of the given
+        // unencodable character into the given replacement buffer. 
+        // The length of the string (not including the null) will be returned.
+        static int getUnencodableReplacement(unsigned codePoint, UnencodableHandling, UnencodableReplacementArray);
+    };
+
+    typedef void (*EncodingNameRegistrar)(const char* alias, const char* name);
+
+    typedef PassOwnPtr<TextCodec> (*NewTextCodecFunction)(const TextEncoding&, const void* additionalData);
+    typedef void (*TextCodecRegistrar)(const char* name, NewTextCodecFunction, const void* additionalData);
+
+} // namespace WebCore
+
+#endif // TextCodec_h
diff --git a/Source/WebCore/platform/text/TextCodecICU.cpp b/Source/WebCore/platform/text/TextCodecICU.cpp
new file mode 100644
index 0000000..6a579f9
--- /dev/null
+++ b/Source/WebCore/platform/text/TextCodecICU.cpp
@@ -0,0 +1,490 @@
+/*
+ * Copyright (C) 2004, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextCodecICU.h"
+
+#include "CharacterNames.h"
+#include "PlatformString.h"
+#include "ThreadGlobalData.h"
+#include <unicode/ucnv.h>
+#include <unicode/ucnv_cb.h>
+#include <wtf/Assertions.h>
+#include <wtf/text/CString.h>
+#include <wtf/PassOwnPtr.h>
+#include <wtf/StringExtras.h>
+#include <wtf/Threading.h>
+
+using std::min;
+
+namespace WebCore {
+
+const size_t ConversionBufferSize = 16384;
+
+ICUConverterWrapper::~ICUConverterWrapper()
+{
+    if (converter)
+        ucnv_close(converter);
+}
+
+static UConverter*& cachedConverterICU()
+{
+    return threadGlobalData().cachedConverterICU().converter;
+}
+
+static PassOwnPtr<TextCodec> newTextCodecICU(const TextEncoding& encoding, const void*)
+{
+    return new TextCodecICU(encoding);
+}
+
+void TextCodecICU::registerBaseEncodingNames(EncodingNameRegistrar registrar)
+{
+    registrar("UTF-8", "UTF-8");
+}
+
+void TextCodecICU::registerBaseCodecs(TextCodecRegistrar registrar)
+{
+    registrar("UTF-8", newTextCodecICU, 0);
+}
+
+void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
+{
+    // We register Hebrew with logical ordering using a separate name.
+    // Otherwise, this would share the same canonical name as the
+    // visual ordering case, and then TextEncoding could not tell them
+    // apart; ICU treats these names as synonyms.
+    registrar("ISO-8859-8-I", "ISO-8859-8-I");
+
+    int32_t numEncodings = ucnv_countAvailable();
+    for (int32_t i = 0; i < numEncodings; ++i) {
+        const char* name = ucnv_getAvailableName(i);
+        UErrorCode error = U_ZERO_ERROR;
+        // Try MIME before trying IANA to pick up commonly used names like
+        // 'EUC-JP' instead of horrendously long names like 
+        // 'Extended_UNIX_Code_Packed_Format_for_Japanese'. 
+        const char* standardName = ucnv_getStandardName(name, "MIME", &error);
+        if (!U_SUCCESS(error) || !standardName) {
+            error = U_ZERO_ERROR;
+            // Try IANA to pick up 'windows-12xx' and other names
+            // which are not preferred MIME names but are widely used. 
+            standardName = ucnv_getStandardName(name, "IANA", &error);
+            if (!U_SUCCESS(error) || !standardName)
+                continue;
+        }
+
+        // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers.
+        // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding
+        //    for encoding GB_2312-80 and several others. So, we need to override this behavior, too.
+        if (strcmp(standardName, "GB2312") == 0 || strcmp(standardName, "GB_2312-80") == 0)
+            standardName = "GBK";
+        // Similarly, EUC-KR encodings all map to an extended version.
+        else if (strcmp(standardName, "KSC_5601") == 0 || strcmp(standardName, "EUC-KR") == 0 || strcmp(standardName, "cp1363") == 0)
+            standardName = "windows-949";
+        // And so on.
+        else if (strcasecmp(standardName, "iso-8859-9") == 0) // This name is returned in different case by ICU 3.2 and 3.6.
+            standardName = "windows-1254";
+        else if (strcmp(standardName, "TIS-620") == 0)
+            standardName = "windows-874";
+
+        registrar(standardName, standardName);
+
+        uint16_t numAliases = ucnv_countAliases(name, &error);
+        ASSERT(U_SUCCESS(error));
+        if (U_SUCCESS(error))
+            for (uint16_t j = 0; j < numAliases; ++j) {
+                error = U_ZERO_ERROR;
+                const char* alias = ucnv_getAlias(name, j, &error);
+                ASSERT(U_SUCCESS(error));
+                if (U_SUCCESS(error) && alias != standardName)
+                    registrar(alias, standardName);
+            }
+    }
+
+    // Additional aliases.
+    // These are present in modern versions of ICU, but not in ICU 3.2 (shipped with Mac OS X 10.4).
+    registrar("macroman", "macintosh");
+    registrar("maccyrillic", "x-mac-cyrillic");
+
+    // Additional aliases that historically were present in the encoding
+    // table in WebKit on Macintosh that don't seem to be present in ICU.
+    // Perhaps we can prove these are not used on the web and remove them.
+    // Or perhaps we can get them added to ICU.
+    registrar("x-mac-roman", "macintosh");
+    registrar("x-mac-ukrainian", "x-mac-cyrillic");
+    registrar("cn-big5", "Big5");
+    registrar("x-x-big5", "Big5");
+    registrar("cn-gb", "GBK");
+    registrar("csgb231280", "GBK");
+    registrar("x-euc-cn", "GBK");
+    registrar("x-gbk", "GBK");
+    registrar("csISO88598I", "ISO-8859-8-I");
+    registrar("koi", "KOI8-R");
+    registrar("logical", "ISO-8859-8-I");
+    registrar("unicode11utf8", "UTF-8");
+    registrar("unicode20utf8", "UTF-8");
+    registrar("x-unicode20utf8", "UTF-8");
+    registrar("visual", "ISO-8859-8");
+    registrar("winarabic", "windows-1256");
+    registrar("winbaltic", "windows-1257");
+    registrar("wincyrillic", "windows-1251");
+    registrar("iso-8859-11", "windows-874");
+    registrar("iso8859-11", "windows-874");
+    registrar("dos-874", "windows-874");
+    registrar("wingreek", "windows-1253");
+    registrar("winhebrew", "windows-1255");
+    registrar("winlatin2", "windows-1250");
+    registrar("winturkish", "windows-1254");
+    registrar("winvietnamese", "windows-1258");
+    registrar("x-cp1250", "windows-1250");
+    registrar("x-cp1251", "windows-1251");
+    registrar("x-euc", "EUC-JP");
+    registrar("x-windows-949", "windows-949");
+    registrar("x-uhc", "windows-949");
+    registrar("utf8", "UTF-8");
+    registrar("shift-jis", "Shift_JIS");
+
+    // These aliases are present in modern versions of ICU, but use different codecs, and have no standard names.
+    // They are not present in ICU 3.2.
+    registrar("dos-720", "cp864");
+    registrar("jis7", "ISO-2022-JP");
+
+    // Alternative spelling of ISO encoding names.
+    registrar("ISO8859-1", "ISO-8859-1");
+    registrar("ISO8859-2", "ISO-8859-2");
+    registrar("ISO8859-3", "ISO-8859-3");
+    registrar("ISO8859-4", "ISO-8859-4");
+    registrar("ISO8859-5", "ISO-8859-5");
+    registrar("ISO8859-6", "ISO-8859-6");
+    registrar("ISO8859-7", "ISO-8859-7");
+    registrar("ISO8859-8", "ISO-8859-8");
+    registrar("ISO8859-8-I", "ISO-8859-8-I");
+    registrar("ISO8859-9", "ISO-8859-9");
+    registrar("ISO8859-10", "ISO-8859-10");
+    registrar("ISO8859-13", "ISO-8859-13");
+    registrar("ISO8859-14", "ISO-8859-14");
+    registrar("ISO8859-15", "ISO-8859-15");
+    // Not registering ISO8859-16, because Firefox (as of version 3.6.6) doesn't know this particular alias,
+    // and because older versions of ICU don't support ISO-8859-16 encoding at all.
+}
+
+void TextCodecICU::registerExtendedCodecs(TextCodecRegistrar registrar)
+{
+    // See comment above in registerEncodingNames.
+    registrar("ISO-8859-8-I", newTextCodecICU, 0);
+
+    int32_t numEncodings = ucnv_countAvailable();
+    for (int32_t i = 0; i < numEncodings; ++i) {
+        const char* name = ucnv_getAvailableName(i);
+        UErrorCode error = U_ZERO_ERROR;
+        const char* standardName = ucnv_getStandardName(name, "MIME", &error);
+        if (!U_SUCCESS(error) || !standardName) {
+            error = U_ZERO_ERROR;
+            standardName = ucnv_getStandardName(name, "IANA", &error);
+            if (!U_SUCCESS(error) || !standardName)
+                continue;
+        }
+        registrar(standardName, newTextCodecICU, 0);
+    }
+}
+
+TextCodecICU::TextCodecICU(const TextEncoding& encoding)
+    : m_encoding(encoding)
+    , m_numBufferedBytes(0)
+    , m_converterICU(0)
+    , m_needsGBKFallbacks(false)
+{
+}
+
+TextCodecICU::~TextCodecICU()
+{
+    releaseICUConverter();
+}
+
+void TextCodecICU::releaseICUConverter() const
+{
+    if (m_converterICU) {
+        UConverter*& cachedConverter = cachedConverterICU();
+        if (cachedConverter)
+            ucnv_close(cachedConverter);
+        cachedConverter = m_converterICU;
+        m_converterICU = 0;
+    }
+}
+
+void TextCodecICU::createICUConverter() const
+{
+    ASSERT(!m_converterICU);
+
+    const char* name = m_encoding.name();
+    m_needsGBKFallbacks = name[0] == 'G' && name[1] == 'B' && name[2] == 'K' && !name[3];
+
+    UErrorCode err;
+
+    UConverter*& cachedConverter = cachedConverterICU();
+    if (cachedConverter) {
+        err = U_ZERO_ERROR;
+        const char* cachedName = ucnv_getName(cachedConverter, &err);
+        if (U_SUCCESS(err) && m_encoding == cachedName) {
+            m_converterICU = cachedConverter;
+            cachedConverter = 0;
+            return;
+        }
+    }
+
+    err = U_ZERO_ERROR;
+    m_converterICU = ucnv_open(m_encoding.name(), &err);
+#if !LOG_DISABLED
+    if (err == U_AMBIGUOUS_ALIAS_WARNING)
+        LOG_ERROR("ICU ambiguous alias warning for encoding: %s", m_encoding.name());
+#endif
+    if (m_converterICU)
+        ucnv_setFallback(m_converterICU, TRUE);
+}
+
+int TextCodecICU::decodeToBuffer(UChar* target, UChar* targetLimit, const char*& source, const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err)
+{
+    UChar* targetStart = target;
+    err = U_ZERO_ERROR;
+    ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err);
+    return target - targetStart;
+}
+
+class ErrorCallbackSetter {
+public:
+    ErrorCallbackSetter(UConverter* converter, bool stopOnError)
+        : m_converter(converter)
+        , m_shouldStopOnEncodingErrors(stopOnError)
+    {
+        if (m_shouldStopOnEncodingErrors) {
+            UErrorCode err = U_ZERO_ERROR;
+            ucnv_setToUCallBack(m_converter, UCNV_TO_U_CALLBACK_SUBSTITUTE,
+                           UCNV_SUB_STOP_ON_ILLEGAL, &m_savedAction,
+                           &m_savedContext, &err);
+            ASSERT(err == U_ZERO_ERROR);
+        }
+    }
+    ~ErrorCallbackSetter()
+    {
+        if (m_shouldStopOnEncodingErrors) {
+            UErrorCode err = U_ZERO_ERROR;
+            const void* oldContext;
+            UConverterToUCallback oldAction;
+            ucnv_setToUCallBack(m_converter, m_savedAction,
+                   m_savedContext, &oldAction,
+                   &oldContext, &err);
+            ASSERT(oldAction == UCNV_TO_U_CALLBACK_SUBSTITUTE);
+            ASSERT(!strcmp(static_cast<const char*>(oldContext), UCNV_SUB_STOP_ON_ILLEGAL));
+            ASSERT(err == U_ZERO_ERROR);
+        }
+    }
+private:
+    UConverter* m_converter;
+    bool m_shouldStopOnEncodingErrors;
+    const void* m_savedContext;
+    UConverterToUCallback m_savedAction;
+};
+
+String TextCodecICU::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
+{
+    // Get a converter for the passed-in encoding.
+    if (!m_converterICU) {
+        createICUConverter();
+        ASSERT(m_converterICU);
+        if (!m_converterICU) {
+            LOG_ERROR("error creating ICU encoder even though encoding was in table");
+            return String();
+        }
+    }
+    
+    ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError);
+
+    Vector<UChar> result;
+
+    UChar buffer[ConversionBufferSize];
+    UChar* bufferLimit = buffer + ConversionBufferSize;
+    const char* source = reinterpret_cast<const char*>(bytes);
+    const char* sourceLimit = source + length;
+    int32_t* offsets = NULL;
+    UErrorCode err = U_ZERO_ERROR;
+
+    do {
+        int ucharsDecoded = decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, flush, err);
+        result.append(buffer, ucharsDecoded);
+    } while (err == U_BUFFER_OVERFLOW_ERROR);
+
+    if (U_FAILURE(err)) {
+        // flush the converter so it can be reused, and not be bothered by this error.
+        do {
+            decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, true, err);
+        } while (source < sourceLimit);
+        sawError = true;
+    }
+
+    String resultString = String::adopt(result);
+
+    // <http://bugs.webkit.org/show_bug.cgi?id=17014>
+    // Simplified Chinese pages use the code A3A0 to mean "full-width space", but ICU decodes it as U+E5E5.
+    if (strcmp(m_encoding.name(), "GBK") == 0 || strcasecmp(m_encoding.name(), "gb18030") == 0)
+        resultString.replace(0xE5E5, ideographicSpace);
+
+    return resultString;
+}
+
+// We need to apply these fallbacks ourselves as they are not currently supported by ICU and
+// they were provided by the old TEC encoding path
+// Needed to fix <rdar://problem/4708689>
+static UChar getGbkEscape(UChar32 codePoint)
+{
+    switch (codePoint) {
+        case 0x01F9:
+            return 0xE7C8;
+        case 0x1E3F:
+            return 0xE7C7;
+        case 0x22EF:
+            return 0x2026;
+        case 0x301C:
+            return 0xFF5E;
+        default:
+            return 0;
+    }
+}
+
+// Invalid character handler when writing escaped entities for unrepresentable
+// characters. See the declaration of TextCodec::encode for more.
+static void urlEscapedEntityCallback(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
+                                     UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
+{
+    if (reason == UCNV_UNASSIGNED) {
+        *err = U_ZERO_ERROR;
+
+        UnencodableReplacementArray entity;
+        int entityLen = TextCodec::getUnencodableReplacement(codePoint, URLEncodedEntitiesForUnencodables, entity);
+        ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err);
+    } else
+        UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
+}
+
+// Substitutes special GBK characters, escaping all other unassigned entities.
+static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
+                              UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) 
+{
+    UChar outChar;
+    if (reason == UCNV_UNASSIGNED && (outChar = getGbkEscape(codePoint))) {
+        const UChar* source = &outChar;
+        *err = U_ZERO_ERROR;
+        ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
+        return;
+    }
+    UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
+}
+
+// Combines both gbkUrlEscapedEntityCallback and GBK character substitution.
+static void gbkUrlEscapedEntityCallack(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
+                                       UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) 
+{
+    if (reason == UCNV_UNASSIGNED) {
+        if (UChar outChar = getGbkEscape(codePoint)) {
+            const UChar* source = &outChar;
+            *err = U_ZERO_ERROR;
+            ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
+            return;
+        }
+        urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint, reason, err);
+        return;
+    }
+    UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
+}
+
+static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
+                                  UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) 
+{
+    UChar outChar;
+    if (reason == UCNV_UNASSIGNED && (outChar = getGbkEscape(codePoint))) {
+        const UChar* source = &outChar;
+        *err = U_ZERO_ERROR;
+        ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
+        return;
+    }
+    UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
+}
+
+CString TextCodecICU::encode(const UChar* characters, size_t length, UnencodableHandling handling)
+{
+    if (!length)
+        return "";
+
+    if (!m_converterICU)
+        createICUConverter();
+    if (!m_converterICU)
+        return CString();
+
+    // FIXME: We should see if there is "force ASCII range" mode in ICU;
+    // until then, we change the backslash into a yen sign.
+    // Encoding will change the yen sign back into a backslash.
+    String copy(characters, length);
+    copy = m_encoding.displayString(copy.impl());
+
+    const UChar* source = copy.characters();
+    const UChar* sourceLimit = source + copy.length();
+
+    UErrorCode err = U_ZERO_ERROR;
+
+    switch (handling) {
+        case QuestionMarksForUnencodables:
+            ucnv_setSubstChars(m_converterICU, "?", 1, &err);
+            ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
+            break;
+        case EntitiesForUnencodables:
+            ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
+            break;
+        case URLEncodedEntitiesForUnencodables:
+            ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err);
+            break;
+    }
+
+    ASSERT(U_SUCCESS(err));
+    if (U_FAILURE(err))
+        return CString();
+
+    Vector<char> result;
+    size_t size = 0;
+    do {
+        char buffer[ConversionBufferSize];
+        char* target = buffer;
+        char* targetLimit = target + ConversionBufferSize;
+        err = U_ZERO_ERROR;
+        ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true, &err);
+        size_t count = target - buffer;
+        result.grow(size + count);
+        memcpy(result.data() + size, buffer, count);
+        size += count;
+    } while (err == U_BUFFER_OVERFLOW_ERROR);
+
+    return CString(result.data(), size);
+}
+
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/TextCodecICU.h b/Source/WebCore/platform/text/TextCodecICU.h
new file mode 100644
index 0000000..bf517f7
--- /dev/null
+++ b/Source/WebCore/platform/text/TextCodecICU.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2004, 2006, 2007 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextCodecICU_h
+#define TextCodecICU_h
+
+#include "TextCodec.h"
+#include "TextEncoding.h"
+
+#include <unicode/utypes.h>
+
+typedef struct UConverter UConverter;
+
+namespace WebCore {
+
+    class TextCodecICU : public TextCodec {
+    public:
+        static void registerBaseEncodingNames(EncodingNameRegistrar);
+        static void registerBaseCodecs(TextCodecRegistrar);
+
+        static void registerExtendedEncodingNames(EncodingNameRegistrar);
+        static void registerExtendedCodecs(TextCodecRegistrar);
+
+        TextCodecICU(const TextEncoding&);
+        virtual ~TextCodecICU();
+
+        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+        virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+
+    private:
+        void createICUConverter() const;
+        void releaseICUConverter() const;
+        bool needsGBKFallbacks() const { return m_needsGBKFallbacks; }
+        void setNeedsGBKFallbacks(bool needsFallbacks) { m_needsGBKFallbacks = needsFallbacks; }
+        
+        int decodeToBuffer(UChar* buffer, UChar* bufferLimit, const char*& source,
+            const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err);
+
+        TextEncoding m_encoding;
+        unsigned m_numBufferedBytes;
+        unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
+        mutable UConverter* m_converterICU;
+        mutable bool m_needsGBKFallbacks;
+    };
+
+    struct ICUConverterWrapper {
+        ICUConverterWrapper()
+            : converter(0)
+        {
+        }
+        ~ICUConverterWrapper();
+
+        UConverter* converter;
+    };
+
+} // namespace WebCore
+
+#endif // TextCodecICU_h
diff --git a/Source/WebCore/platform/text/TextCodecLatin1.cpp b/Source/WebCore/platform/text/TextCodecLatin1.cpp
new file mode 100644
index 0000000..2a217c5
--- /dev/null
+++ b/Source/WebCore/platform/text/TextCodecLatin1.cpp
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextCodecLatin1.h"
+
+#include "PlatformString.h"
+#include <stdio.h>
+#include <wtf/text/CString.h>
+#include <wtf/text/StringBuffer.h>
+#include <wtf/PassOwnPtr.h>
+
+namespace WebCore {
+
+static const UChar table[256] = {
+    0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, // 00-07
+    0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, // 08-0F
+    0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, // 10-17
+    0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, // 18-1F
+    0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, // 20-27
+    0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, // 28-2F
+    0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, // 30-37
+    0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, // 38-3F
+    0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, // 40-47
+    0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, // 48-4F
+    0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, // 50-57
+    0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, // 58-5F
+    0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, // 60-67
+    0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, // 68-6F
+    0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, // 70-77
+    0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, // 78-7F
+    0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
+    0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
+    0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
+    0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, // 98-9F
+    0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, // A0-A7
+    0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, // A8-AF
+    0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, // B0-B7
+    0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, // B8-BF
+    0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, // C0-C7
+    0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, // C8-CF
+    0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, // D0-D7
+    0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, // D8-DF
+    0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, // E0-E7
+    0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, // E8-EF
+    0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, // F0-F7
+    0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF  // F8-FF
+};
+
+void TextCodecLatin1::registerEncodingNames(EncodingNameRegistrar registrar)
+{
+    registrar("windows-1252", "windows-1252");
+    registrar("ISO-8859-1", "ISO-8859-1");
+    registrar("US-ASCII", "US-ASCII");
+
+    registrar("WinLatin1", "windows-1252");
+    registrar("ibm-1252", "windows-1252");
+    registrar("ibm-1252_P100-2000", "windows-1252");
+
+    registrar("CP819", "ISO-8859-1");
+    registrar("IBM819", "ISO-8859-1");
+    registrar("csISOLatin1", "ISO-8859-1");
+    registrar("iso-ir-100", "ISO-8859-1");
+    registrar("iso_8859-1:1987", "ISO-8859-1");
+    registrar("l1", "ISO-8859-1");
+    registrar("latin1", "ISO-8859-1");
+
+    registrar("ANSI_X3.4-1968", "US-ASCII");
+    registrar("ANSI_X3.4-1986", "US-ASCII");
+    registrar("ASCII", "US-ASCII");
+    registrar("IBM367", "US-ASCII");
+    registrar("ISO646-US", "US-ASCII");
+    registrar("ISO_646.irv:1991", "US-ASCII");
+    registrar("cp367", "US-ASCII");
+    registrar("csASCII", "US-ASCII");
+    registrar("ibm-367_P100-1995", "US-ASCII");
+    registrar("iso-ir-6", "US-ASCII");
+    registrar("iso-ir-6-us", "US-ASCII");
+    registrar("us", "US-ASCII");
+    registrar("x-ansi", "US-ASCII");
+}
+
+static PassOwnPtr<TextCodec> newStreamingTextDecoderWindowsLatin1(const TextEncoding&, const void*)
+{
+    return new TextCodecLatin1;
+}
+
+void TextCodecLatin1::registerCodecs(TextCodecRegistrar registrar)
+{
+    registrar("windows-1252", newStreamingTextDecoderWindowsLatin1, 0);
+
+    // ASCII and Latin-1 both decode as Windows Latin-1 although they retain unique identities.
+    registrar("ISO-8859-1", newStreamingTextDecoderWindowsLatin1, 0);
+    registrar("US-ASCII", newStreamingTextDecoderWindowsLatin1, 0);
+}
+
+template<size_t size> struct NonASCIIMask;
+template<> struct NonASCIIMask<4> {
+    static unsigned value() { return 0x80808080U; }
+};
+template<> struct NonASCIIMask<8> {
+    static unsigned long long value() { return 0x8080808080808080ULL; }
+};
+
+template<size_t size> struct UCharByteFiller;
+template<> struct UCharByteFiller<4> {
+    static void copy(UChar* dest, const unsigned char* src)
+    {
+        dest[0] = src[0];
+        dest[1] = src[1];
+        dest[2] = src[2];
+        dest[3] = src[3];
+    }
+};
+template<> struct UCharByteFiller<8> {
+    static void copy(UChar* dest, const unsigned char* src)
+    {
+        dest[0] = src[0];
+        dest[1] = src[1];
+        dest[2] = src[2];
+        dest[3] = src[3];
+        dest[4] = src[4];
+        dest[5] = src[5];
+        dest[6] = src[6];
+        dest[7] = src[7];
+    }
+};
+
+String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&)
+{
+    UChar* characters;
+    String result = String::createUninitialized(length, characters);
+
+    const unsigned char* src = reinterpret_cast<const unsigned char*>(bytes);
+    const unsigned char* end = reinterpret_cast<const unsigned char*>(bytes + length);
+    const unsigned char* alignedEnd = reinterpret_cast<const unsigned char*>(reinterpret_cast<ptrdiff_t>(end) & ~(sizeof(uintptr_t) - 1));
+    UChar* dest = characters;
+
+    while (src < end) {
+        if (*src < 0x80) {
+            // Fast path for values < 0x80 (most Latin-1 text will be ASCII)
+            // Wait until we're at a properly aligned address, then read full CPU words.
+            if (!(reinterpret_cast<ptrdiff_t>(src) & (sizeof(uintptr_t) - 1))) {
+                while (src < alignedEnd) {
+                    uintptr_t chunk = *reinterpret_cast_ptr<const uintptr_t*>(src);
+
+                    if (chunk & NonASCIIMask<sizeof(uintptr_t)>::value())
+                        goto useLookupTable;
+
+                    UCharByteFiller<sizeof(uintptr_t)>::copy(dest, src);
+
+                    src += sizeof(uintptr_t);
+                    dest += sizeof(uintptr_t);
+                }
+
+                if (src == end)
+                    break;
+            }
+            *dest = *src;
+        } else {
+useLookupTable:
+            *dest = table[*src];
+        }
+
+        ++src;
+        ++dest;
+    }
+
+    return result;
+}
+
+static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length, UnencodableHandling handling)
+{
+    Vector<char> result(length);
+    char* bytes = result.data();
+
+    size_t resultLength = 0;
+    for (size_t i = 0; i < length; ) {
+        UChar32 c;
+        U16_NEXT(characters, i, length, c);
+        unsigned char b = c;
+        // Do an efficient check to detect characters other than 00-7F and A0-FF.
+        if (b != c || (c & 0xE0) == 0x80) {
+            // Look for a way to encode this with Windows Latin-1.
+            for (b = 0x80; b < 0xA0; ++b)
+                if (table[b] == c)
+                    goto gotByte;
+            // No way to encode this character with Windows Latin-1.
+            UnencodableReplacementArray replacement;
+            int replacementLength = TextCodec::getUnencodableReplacement(c, handling, replacement);
+            result.grow(resultLength + replacementLength + length - i);
+            bytes = result.data();
+            memcpy(bytes + resultLength, replacement, replacementLength);
+            resultLength += replacementLength;
+            continue;
+        }
+    gotByte:
+        bytes[resultLength++] = b;
+    }
+
+    return CString(bytes, resultLength);
+}
+
+CString TextCodecLatin1::encode(const UChar* characters, size_t length, UnencodableHandling handling)
+{
+    {
+        char* bytes;
+        CString string = CString::newUninitialized(length, bytes);
+
+        // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII.
+        UChar ored = 0;
+        for (size_t i = 0; i < length; ++i) {
+            UChar c = characters[i];
+            bytes[i] = c;
+            ored |= c;
+        }
+
+        if (!(ored & 0xFF80))
+            return string;
+    }
+
+    // If it wasn't all ASCII, call the function that handles more-complex cases.
+    return encodeComplexWindowsLatin1(characters, length, handling);
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/TextCodecLatin1.h b/Source/WebCore/platform/text/TextCodecLatin1.h
new file mode 100644
index 0000000..f035d01
--- /dev/null
+++ b/Source/WebCore/platform/text/TextCodecLatin1.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextCodecLatin1_h
+#define TextCodecLatin1_h
+
+#include "TextCodec.h"
+
+namespace WebCore {
+
+    class TextCodecLatin1 : public TextCodec {
+    public:
+        static void registerEncodingNames(EncodingNameRegistrar);
+        static void registerCodecs(TextCodecRegistrar);
+
+        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+        virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+    };
+
+} // namespace WebCore
+
+#endif // TextCodecLatin1_h
diff --git a/Source/WebCore/platform/text/TextCodecUTF16.cpp b/Source/WebCore/platform/text/TextCodecUTF16.cpp
new file mode 100644
index 0000000..e88e83b
--- /dev/null
+++ b/Source/WebCore/platform/text/TextCodecUTF16.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2004, 2006, 2008, 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextCodecUTF16.h"
+
+#include "PlatformString.h"
+#include <wtf/text/CString.h>
+#include <wtf/text/StringBuffer.h>
+#include <wtf/PassOwnPtr.h>
+
+using namespace std;
+
+namespace WebCore {
+
+void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar)
+{
+    registrar("UTF-16LE", "UTF-16LE");
+    registrar("UTF-16BE", "UTF-16BE");
+
+    registrar("ISO-10646-UCS-2", "UTF-16LE");
+    registrar("UCS-2", "UTF-16LE");
+    registrar("UTF-16", "UTF-16LE");
+    registrar("Unicode", "UTF-16LE");
+    registrar("csUnicode", "UTF-16LE");
+    registrar("unicodeFEFF", "UTF-16LE");
+
+    registrar("unicodeFFFE", "UTF-16BE");
+}
+
+static PassOwnPtr<TextCodec> newStreamingTextDecoderUTF16LE(const TextEncoding&, const void*)
+{
+    return new TextCodecUTF16(true);
+}
+
+static PassOwnPtr<TextCodec> newStreamingTextDecoderUTF16BE(const TextEncoding&, const void*)
+{
+    return new TextCodecUTF16(false);
+}
+
+void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar)
+{
+    registrar("UTF-16LE", newStreamingTextDecoderUTF16LE, 0);
+    registrar("UTF-16BE", newStreamingTextDecoderUTF16BE, 0);
+}
+
+String TextCodecUTF16::decode(const char* bytes, size_t length, bool, bool, bool&)
+{
+    if (!length)
+        return String();
+
+    const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes);
+    size_t numBytes = length + m_haveBufferedByte;
+    size_t numChars = numBytes / 2;
+
+    StringBuffer buffer(numChars);
+    UChar* q = buffer.characters();
+
+    if (m_haveBufferedByte) {
+        UChar c;
+        if (m_littleEndian)
+            c = m_bufferedByte | (p[0] << 8);
+        else
+            c = (m_bufferedByte << 8) | p[0];
+        *q++ = c;
+        m_haveBufferedByte = false;
+        p += 1;
+        numChars -= 1;
+    }
+
+    if (m_littleEndian) {
+        for (size_t i = 0; i < numChars; ++i) {
+            UChar c = p[0] | (p[1] << 8);
+            p += 2;
+            *q++ = c;
+        }
+    } else {
+        for (size_t i = 0; i < numChars; ++i) {
+            UChar c = (p[0] << 8) | p[1];
+            p += 2;
+            *q++ = c;
+        }
+    }
+
+    if (numBytes & 1) {
+        ASSERT(!m_haveBufferedByte);
+        m_haveBufferedByte = true;
+        m_bufferedByte = p[0];
+    }
+
+    buffer.shrink(q - buffer.characters());
+
+    return String::adopt(buffer);
+}
+
+CString TextCodecUTF16::encode(const UChar* characters, size_t length, UnencodableHandling)
+{
+    // We need to be sure we can double the length without overflowing.
+    // Since the passed-in length is the length of an actual existing
+    // character buffer, each character is two bytes, and we know
+    // the buffer doesn't occupy the entire address space, we can
+    // assert here that doubling the length does not overflow size_t
+    // and there's no need for a runtime check.
+    ASSERT(length <= numeric_limits<size_t>::max() / 2);
+
+    char* bytes;
+    CString string = CString::newUninitialized(length * 2, bytes);
+
+    // FIXME: CString is not a reasonable data structure for encoded UTF-16, which will have
+    // null characters inside it. Perhaps the result of encode should not be a CString.
+    if (m_littleEndian) {
+        for (size_t i = 0; i < length; ++i) {
+            UChar c = characters[i];
+            bytes[i * 2] = c;
+            bytes[i * 2 + 1] = c >> 8;
+        }
+    } else {
+        for (size_t i = 0; i < length; ++i) {
+            UChar c = characters[i];
+            bytes[i * 2] = c >> 8;
+            bytes[i * 2 + 1] = c;
+        }
+    }
+
+    return string;
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/TextCodecUTF16.h b/Source/WebCore/platform/text/TextCodecUTF16.h
new file mode 100644
index 0000000..8ce9476
--- /dev/null
+++ b/Source/WebCore/platform/text/TextCodecUTF16.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextCodecUTF16_h
+#define TextCodecUTF16_h
+
+#include "TextCodec.h"
+
+namespace WebCore {
+
+    class TextCodecUTF16 : public TextCodec {
+    public:
+        static void registerEncodingNames(EncodingNameRegistrar);
+        static void registerCodecs(TextCodecRegistrar);
+
+        TextCodecUTF16(bool littleEndian) : m_littleEndian(littleEndian), m_haveBufferedByte(false) { }
+
+        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+        virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+
+    private:
+        bool m_littleEndian;
+        bool m_haveBufferedByte;
+        unsigned char m_bufferedByte;
+    };
+
+} // namespace WebCore
+
+#endif // TextCodecUTF16_h
diff --git a/Source/WebCore/platform/text/TextCodecUserDefined.cpp b/Source/WebCore/platform/text/TextCodecUserDefined.cpp
new file mode 100644
index 0000000..70d8673
--- /dev/null
+++ b/Source/WebCore/platform/text/TextCodecUserDefined.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2007, 2008 Apple, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextCodecUserDefined.h"
+
+#include "PlatformString.h"
+#include <stdio.h>
+#include <wtf/text/CString.h>
+#include <wtf/text/StringBuffer.h>
+#include <wtf/PassOwnPtr.h>
+
+namespace WebCore {
+
+void TextCodecUserDefined::registerEncodingNames(EncodingNameRegistrar registrar)
+{
+    registrar("x-user-defined", "x-user-defined");
+}
+
+static PassOwnPtr<TextCodec> newStreamingTextDecoderUserDefined(const TextEncoding&, const void*)
+{
+    return new TextCodecUserDefined;
+}
+
+void TextCodecUserDefined::registerCodecs(TextCodecRegistrar registrar)
+{
+    registrar("x-user-defined", newStreamingTextDecoderUserDefined, 0);
+}
+
+String TextCodecUserDefined::decode(const char* bytes, size_t length, bool, bool, bool&)
+{
+    UChar* buffer;
+    String result = String::createUninitialized(length, buffer);
+
+    for (size_t i = 0; i < length; ++i) {
+        signed char c = bytes[i];
+        buffer[i] = c & 0xF7FF;
+    }
+
+    return result;
+}
+
+static CString encodeComplexUserDefined(const UChar* characters, size_t length, UnencodableHandling handling)
+{
+    Vector<char> result(length);
+    char* bytes = result.data();
+
+    size_t resultLength = 0;
+    for (size_t i = 0; i < length; ) {
+        UChar32 c;
+        U16_NEXT(characters, i, length, c);
+        signed char signedByte = c;
+        if ((signedByte & 0xF7FF) == c)
+            bytes[resultLength++] = signedByte;
+        else {
+            // No way to encode this character with x-user-defined.
+            UnencodableReplacementArray replacement;
+            int replacementLength = TextCodec::getUnencodableReplacement(c, handling, replacement);
+            result.grow(resultLength + replacementLength + length - i);
+            bytes = result.data();
+            memcpy(bytes + resultLength, replacement, replacementLength);
+            resultLength += replacementLength;
+        }
+    }
+
+    return CString(bytes, resultLength);
+}
+
+CString TextCodecUserDefined::encode(const UChar* characters, size_t length, UnencodableHandling handling)
+{
+    char* bytes;
+    CString string = CString::newUninitialized(length, bytes);
+
+    // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII.
+    UChar ored = 0;
+    for (size_t i = 0; i < length; ++i) {
+        UChar c = characters[i];
+        bytes[i] = c;
+        ored |= c;
+    }
+
+    if (!(ored & 0xFF80))
+        return string;
+
+    // If it wasn't all ASCII, call the function that handles more-complex cases.
+    return encodeComplexUserDefined(characters, length, handling);
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/TextCodecUserDefined.h b/Source/WebCore/platform/text/TextCodecUserDefined.h
new file mode 100644
index 0000000..d1b3160
--- /dev/null
+++ b/Source/WebCore/platform/text/TextCodecUserDefined.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2007 Apple, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextCodecUserDefined_h
+#define TextCodecUserDefined_h
+
+#include "TextCodec.h"
+
+namespace WebCore {
+
+    class TextCodecUserDefined : public TextCodec {
+    public:
+        static void registerEncodingNames(EncodingNameRegistrar);
+        static void registerCodecs(TextCodecRegistrar);
+
+        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+        virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+    };
+
+} // namespace WebCore
+
+#endif // TextCodecUserDefined_h
diff --git a/Source/WebCore/platform/text/TextDirection.h b/Source/WebCore/platform/text/TextDirection.h
new file mode 100644
index 0000000..5be416e
--- /dev/null
+++ b/Source/WebCore/platform/text/TextDirection.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2003, 2006 Apple Computer, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextDirection_h
+#define TextDirection_h
+
+namespace WebCore {
+
+    enum TextDirection { RTL, LTR };
+
+}
+
+#endif
diff --git a/Source/WebCore/platform/text/TextEncoding.cpp b/Source/WebCore/platform/text/TextEncoding.cpp
new file mode 100644
index 0000000..33313a0
--- /dev/null
+++ b/Source/WebCore/platform/text/TextEncoding.cpp
@@ -0,0 +1,265 @@
+/*
+ * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ * Copyright (C) 2007-2009 Torch Mobile, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextEncoding.h"
+
+#include "PlatformString.h"
+#include "TextCodec.h"
+#include "TextEncodingRegistry.h"
+#if USE(ICU_UNICODE)
+#include <unicode/unorm.h>
+#elif USE(QT4_UNICODE)
+#include <QString>
+#elif USE(GLIB_UNICODE)
+#include <glib.h>
+#include "GOwnPtr.h"
+#endif
+#include <wtf/text/CString.h>
+#include <wtf/OwnPtr.h>
+#include <wtf/StdLibExtras.h>
+
+namespace WebCore {
+
+static const TextEncoding& UTF7Encoding()
+{
+    static TextEncoding globalUTF7Encoding("UTF-7");
+    return globalUTF7Encoding;
+}
+
+TextEncoding::TextEncoding(const char* name)
+    : m_name(atomicCanonicalTextEncodingName(name))
+    , m_backslashAsCurrencySymbol(backslashAsCurrencySymbol())
+{
+}
+
+TextEncoding::TextEncoding(const String& name)
+    : m_name(atomicCanonicalTextEncodingName(name.characters(), name.length()))
+    , m_backslashAsCurrencySymbol(backslashAsCurrencySymbol())
+{
+}
+
+String TextEncoding::decode(const char* data, size_t length, bool stopOnError, bool& sawError) const
+{
+    if (!m_name)
+        return String();
+
+    return newTextCodec(*this)->decode(data, length, true, stopOnError, sawError);
+}
+
+CString TextEncoding::encode(const UChar* characters, size_t length, UnencodableHandling handling) const
+{
+    if (!m_name)
+        return CString();
+
+    if (!length)
+        return "";
+
+#if USE(ICU_UNICODE)
+    // FIXME: What's the right place to do normalization?
+    // It's a little strange to do it inside the encode function.
+    // Perhaps normalization should be an explicit step done before calling encode.
+
+    const UChar* source = characters;
+    size_t sourceLength = length;
+
+    Vector<UChar> normalizedCharacters;
+
+    UErrorCode err = U_ZERO_ERROR;
+    if (unorm_quickCheck(source, sourceLength, UNORM_NFC, &err) != UNORM_YES) {
+        // First try using the length of the original string, since normalization to NFC rarely increases length.
+        normalizedCharacters.grow(sourceLength);
+        int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), length, &err);
+        if (err == U_BUFFER_OVERFLOW_ERROR) {
+            err = U_ZERO_ERROR;
+            normalizedCharacters.resize(normalizedLength);
+            normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), normalizedLength, &err);
+        }
+        ASSERT(U_SUCCESS(err));
+
+        source = normalizedCharacters.data();
+        sourceLength = normalizedLength;
+    }
+    return newTextCodec(*this)->encode(source, sourceLength, handling);
+#elif USE(QT4_UNICODE)
+    QString str(reinterpret_cast<const QChar*>(characters), length);
+    str = str.normalized(QString::NormalizationForm_C);
+    return newTextCodec(*this)->encode(reinterpret_cast<const UChar *>(str.utf16()), str.length(), handling);
+#elif USE(GLIB_UNICODE)
+    GOwnPtr<char> UTF8Source;
+    UTF8Source.set(g_utf16_to_utf8(characters, length, 0, 0, 0));
+    if (!UTF8Source) {
+        // If conversion to UTF-8 failed, try with the string without normalization
+        return newTextCodec(*this)->encode(characters, length, handling);
+    }
+
+    GOwnPtr<char> UTF8Normalized;
+    UTF8Normalized.set(g_utf8_normalize(UTF8Source.get(), -1, G_NORMALIZE_NFC));
+
+    long UTF16Length;
+    GOwnPtr<UChar> UTF16Normalized;
+    UTF16Normalized.set(g_utf8_to_utf16(UTF8Normalized.get(), -1, 0, &UTF16Length, 0));
+
+    return newTextCodec(*this)->encode(UTF16Normalized.get(), UTF16Length, handling);
+#elif OS(WINCE)
+    // normalization will be done by Windows CE API
+    OwnPtr<TextCodec> textCodec = newTextCodec(*this);
+    return textCodec.get() ? textCodec->encode(characters, length, handling) : CString();
+#elif USE(BREWMP_UNICODE)
+    // FIXME: not sure if Brew MP normalizes the input string automatically
+    OwnPtr<TextCodec> textCodec = newTextCodec(*this);
+    return textCodec.get() ? textCodec->encode(characters, length, handling) : CString();
+#endif
+}
+
+const char* TextEncoding::domName() const
+{
+    if (noExtendedTextEncodingNameUsed())
+        return m_name;
+
+    // We treat EUC-KR as windows-949 (its superset), but need to expose 
+    // the name 'EUC-KR' because the name 'windows-949' is not recognized by
+    // most Korean web servers even though they do use the encoding
+    // 'windows-949' with the name 'EUC-KR'. 
+    // FIXME: This is not thread-safe. At the moment, this function is
+    // only accessed in a single thread, but eventually has to be made
+    // thread-safe along with usesVisualOrdering().
+    static const char* const a = atomicCanonicalTextEncodingName("windows-949");
+    if (m_name == a)
+        return "EUC-KR";
+    return m_name;
+}
+
+bool TextEncoding::usesVisualOrdering() const
+{
+    if (noExtendedTextEncodingNameUsed())
+        return false;
+
+    static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8");
+    return m_name == a;
+}
+
+bool TextEncoding::isJapanese() const
+{
+    return isJapaneseEncoding(m_name);
+}
+
+UChar TextEncoding::backslashAsCurrencySymbol() const
+{
+    return shouldShowBackslashAsCurrencySymbolIn(m_name) ? 0x00A5 : '\\';
+}
+
+bool TextEncoding::isNonByteBasedEncoding() const
+{
+    if (noExtendedTextEncodingNameUsed()) {
+        return *this == UTF16LittleEndianEncoding()
+            || *this == UTF16BigEndianEncoding();
+    }
+
+    return *this == UTF16LittleEndianEncoding()
+        || *this == UTF16BigEndianEncoding()
+        || *this == UTF32BigEndianEncoding()
+        || *this == UTF32LittleEndianEncoding();
+}
+
+bool TextEncoding::isUTF7Encoding() const
+{
+    if (noExtendedTextEncodingNameUsed())
+        return false;
+
+    return *this == UTF7Encoding();
+}
+
+const TextEncoding& TextEncoding::closestByteBasedEquivalent() const
+{
+    if (isNonByteBasedEncoding())
+        return UTF8Encoding();
+    return *this; 
+}
+
+// HTML5 specifies that UTF-8 be used in form submission when a form is 
+// is a part of a document in UTF-16 probably because UTF-16 is not a 
+// byte-based encoding and can contain 0x00. By extension, the same
+// should be done for UTF-32. In case of UTF-7, it is a byte-based encoding,
+// but it's fraught with problems and we'd rather steer clear of it.
+const TextEncoding& TextEncoding::encodingForFormSubmission() const
+{
+    if (isNonByteBasedEncoding() || isUTF7Encoding())
+        return UTF8Encoding();
+    return *this;
+}
+
+const TextEncoding& ASCIIEncoding()
+{
+    static TextEncoding globalASCIIEncoding("ASCII");
+    return globalASCIIEncoding;
+}
+
+const TextEncoding& Latin1Encoding()
+{
+    static TextEncoding globalLatin1Encoding("latin1");
+    return globalLatin1Encoding;
+}
+
+const TextEncoding& UTF16BigEndianEncoding()
+{
+    static TextEncoding globalUTF16BigEndianEncoding("UTF-16BE");
+    return globalUTF16BigEndianEncoding;
+}
+
+const TextEncoding& UTF16LittleEndianEncoding()
+{
+    static TextEncoding globalUTF16LittleEndianEncoding("UTF-16LE");
+    return globalUTF16LittleEndianEncoding;
+}
+
+const TextEncoding& UTF32BigEndianEncoding()
+{
+    static TextEncoding globalUTF32BigEndianEncoding("UTF-32BE");
+    return globalUTF32BigEndianEncoding;
+}
+
+const TextEncoding& UTF32LittleEndianEncoding()
+{
+    static TextEncoding globalUTF32LittleEndianEncoding("UTF-32LE");
+    return globalUTF32LittleEndianEncoding;
+}
+
+const TextEncoding& UTF8Encoding()
+{
+    static TextEncoding globalUTF8Encoding("UTF-8");
+    ASSERT(globalUTF8Encoding.isValid());
+    return globalUTF8Encoding;
+}
+
+const TextEncoding& WindowsLatin1Encoding()
+{
+    static TextEncoding globalWindowsLatin1Encoding("WinLatin-1");
+    return globalWindowsLatin1Encoding;
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/TextEncoding.h b/Source/WebCore/platform/text/TextEncoding.h
new file mode 100644
index 0000000..675625b
--- /dev/null
+++ b/Source/WebCore/platform/text/TextEncoding.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextEncoding_h
+#define TextEncoding_h
+
+#include "TextCodec.h"
+#include <wtf/Forward.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+    class TextEncoding {
+    public:
+        TextEncoding() : m_name(0) { }
+        TextEncoding(const char* name);
+        TextEncoding(const String& name);
+
+        bool isValid() const { return m_name; }
+        const char* name() const { return m_name; }
+        const char* domName() const; // name exposed via DOM
+        bool usesVisualOrdering() const;
+        bool isJapanese() const;
+        
+        PassRefPtr<StringImpl> displayString(PassRefPtr<StringImpl> str) const
+        {
+            if (m_backslashAsCurrencySymbol == '\\' || !str)
+                return str;
+            return str->replace('\\', m_backslashAsCurrencySymbol);
+        }
+        void displayBuffer(UChar* characters, unsigned len) const
+        {
+            if (m_backslashAsCurrencySymbol == '\\')
+                return;
+            for (unsigned i = 0; i < len; ++i) {
+                if (characters[i] == '\\')
+                    characters[i] = m_backslashAsCurrencySymbol;
+            }
+        }
+
+        const TextEncoding& closestByteBasedEquivalent() const;
+        const TextEncoding& encodingForFormSubmission() const;
+
+        String decode(const char* str, size_t length) const
+        {
+            bool ignored;
+            return decode(str, length, false, ignored);
+        }
+        String decode(const char*, size_t length, bool stopOnError, bool& sawError) const;
+        CString encode(const UChar*, size_t length, UnencodableHandling) const;
+
+        UChar backslashAsCurrencySymbol() const;
+
+    private:
+        bool isNonByteBasedEncoding() const;
+        bool isUTF7Encoding() const;
+
+        const char* m_name;
+        UChar m_backslashAsCurrencySymbol;
+    };
+
+    inline bool operator==(const TextEncoding& a, const TextEncoding& b) { return a.name() == b.name(); }
+    inline bool operator!=(const TextEncoding& a, const TextEncoding& b) { return a.name() != b.name(); }
+
+    const TextEncoding& ASCIIEncoding();
+    const TextEncoding& Latin1Encoding();
+    const TextEncoding& UTF16BigEndianEncoding();
+    const TextEncoding& UTF16LittleEndianEncoding();
+    const TextEncoding& UTF32BigEndianEncoding();
+    const TextEncoding& UTF32LittleEndianEncoding();
+    const TextEncoding& UTF8Encoding();
+    const TextEncoding& WindowsLatin1Encoding();
+
+} // namespace WebCore
+
+#endif // TextEncoding_h
diff --git a/Source/WebCore/platform/text/TextEncodingDetector.h b/Source/WebCore/platform/text/TextEncodingDetector.h
new file mode 100644
index 0000000..9f16ab0
--- /dev/null
+++ b/Source/WebCore/platform/text/TextEncodingDetector.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2009 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextEncodingDetector_h
+#define TextEncodingDetector_h
+
+namespace WebCore {
+
+    class TextEncoding;
+
+    // Given a sequence of bytes in |data| of length |len| and an optional
+    // hintEncodingName, detect the most likely character encoding. 
+    // The way hintEncodingName is used is up to an implementation.
+    // Currently, the only caller sets it to the parent frame encoding.
+    bool detectTextEncoding(const char* data, size_t len,
+                            const char* hintEncodingName,
+                            TextEncoding* detectedEncoding);
+
+} // namespace WebCore
+
+#endif
diff --git a/Source/WebCore/platform/text/TextEncodingDetectorICU.cpp b/Source/WebCore/platform/text/TextEncodingDetectorICU.cpp
new file mode 100644
index 0000000..c0d11de
--- /dev/null
+++ b/Source/WebCore/platform/text/TextEncodingDetectorICU.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2008, 2009 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextEncodingDetector.h"
+
+#include "TextEncoding.h"
+#include <wtf/UnusedParam.h>
+
+#ifndef BUILDING_ON_TIGER
+#include "unicode/ucnv.h"
+#include "unicode/ucsdet.h"
+#endif
+
+namespace WebCore {
+
+bool detectTextEncoding(const char* data, size_t len,
+                        const char* hintEncodingName,
+                        TextEncoding* detectedEncoding)
+{
+    *detectedEncoding = TextEncoding();
+#ifdef BUILDING_ON_TIGER
+    // Tiger came with ICU 3.2 and does not have the encoding detector.
+    UNUSED_PARAM(data);
+    UNUSED_PARAM(len);
+    UNUSED_PARAM(hintEncodingName);
+    return false;
+#else
+    int matchesCount = 0; 
+    UErrorCode status = U_ZERO_ERROR;
+    UCharsetDetector* detector = ucsdet_open(&status);
+    if (U_FAILURE(status))
+        return false;
+    ucsdet_enableInputFilter(detector, true);
+    ucsdet_setText(detector, data, static_cast<int32_t>(len), &status); 
+    if (U_FAILURE(status))
+        return false;
+
+    // FIXME: A few things we can do other than improving
+    // the ICU detector itself. 
+    // 1. Use ucsdet_detectAll and pick the most likely one given
+    // "the context" (parent-encoding, referrer encoding, etc).
+    // 2. 'Emulate' Firefox/IE's non-Universal detectors (e.g.
+    // Chinese, Japanese, Russian, Korean and Hebrew) by picking the 
+    // encoding with a highest confidence among the detector-specific
+    // limited set of candidate encodings.
+    // Below is a partial implementation of the first part of what's outlined
+    // above.
+    const UCharsetMatch** matches = ucsdet_detectAll(detector, &matchesCount, &status);
+    if (U_FAILURE(status)) {
+        ucsdet_close(detector);
+        return false;
+    }
+
+    const char* encoding = 0;
+    if (hintEncodingName) {
+        TextEncoding hintEncoding(hintEncodingName);
+        // 10 is the minimum confidence value consistent with the codepoint
+        // allocation in a given encoding. The size of a chunk passed to
+        // us varies even for the same html file (apparently depending on 
+        // the network load). When we're given a rather short chunk, we 
+        // don't have a sufficiently reliable signal other than the fact that
+        // the chunk is consistent with a set of encodings. So, instead of
+        // setting an arbitrary threshold, we have to scan all the encodings
+        // consistent with the data.  
+        const int32_t kThresold = 10;
+        for (int i = 0; i < matchesCount; ++i) {
+            int32_t confidence = ucsdet_getConfidence(matches[i], &status);
+            if (U_FAILURE(status)) {
+                status = U_ZERO_ERROR;
+                continue;
+            }
+            if (confidence < kThresold)
+                break;
+            const char* matchEncoding = ucsdet_getName(matches[i], &status);
+            if (U_FAILURE(status)) {
+                status = U_ZERO_ERROR;
+                continue;
+            }
+            if (TextEncoding(matchEncoding) == hintEncoding) {
+                encoding = hintEncodingName;
+                break;
+            }
+        }
+    }
+    // If no match is found so far, just pick the top match. 
+    // This can happen, say, when a parent frame in EUC-JP refers to
+    // a child frame in Shift_JIS and both frames do NOT specify the encoding
+    // making us resort to auto-detection (when it IS turned on).
+    if (!encoding && matchesCount > 0)
+        encoding = ucsdet_getName(matches[0], &status);
+    if (U_SUCCESS(status)) {
+        *detectedEncoding = TextEncoding(encoding);
+        ucsdet_close(detector);
+        return true;
+    }    
+    ucsdet_close(detector);
+    return false;
+#endif
+}
+
+}
diff --git a/Source/WebCore/platform/text/TextEncodingDetectorNone.cpp b/Source/WebCore/platform/text/TextEncodingDetectorNone.cpp
new file mode 100644
index 0000000..3b62bc5
--- /dev/null
+++ b/Source/WebCore/platform/text/TextEncodingDetectorNone.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2009 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextEncodingDetector.h"
+
+#include "TextEncoding.h"
+
+namespace WebCore {
+
+bool detectTextEncoding(const char*, size_t, const char*, TextEncoding* detectedEncoding)
+{
+    *detectedEncoding = TextEncoding();
+    return false;
+}
+
+}
diff --git a/Source/WebCore/platform/text/TextEncodingRegistry.cpp b/Source/WebCore/platform/text/TextEncodingRegistry.cpp
new file mode 100644
index 0000000..c0c0255
--- /dev/null
+++ b/Source/WebCore/platform/text/TextEncodingRegistry.cpp
@@ -0,0 +1,402 @@
+/*
+ * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
+ * Copyright (C) 2007-2009 Torch Mobile, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextEncodingRegistry.h"
+
+#include "PlatformString.h"
+#include "TextCodecLatin1.h"
+#include "TextCodecUserDefined.h"
+#include "TextCodecUTF16.h"
+#include "TextEncoding.h"
+#include <wtf/ASCIICType.h>
+#include <wtf/Assertions.h>
+#include <wtf/HashFunctions.h>
+#include <wtf/HashMap.h>
+#include <wtf/HashSet.h>
+#include <wtf/StdLibExtras.h>
+#include <wtf/StringExtras.h>
+#include <wtf/Threading.h>
+
+#if USE(ICU_UNICODE)
+#include "TextCodecICU.h"
+#endif
+#if PLATFORM(MAC)
+#include "TextCodecMac.h"
+#endif
+#if PLATFORM(QT)
+#include "qt/TextCodecQt.h"
+#endif
+#if USE(GLIB_UNICODE)
+#include "gtk/TextCodecGtk.h"
+#endif
+#if USE(BREWMP_UNICODE)
+#include "brew/TextCodecBrew.h"
+#endif
+#if OS(WINCE) && !PLATFORM(QT)
+#include "TextCodecWinCE.h"
+#endif
+
+using namespace WTF;
+
+namespace WebCore {
+
+const size_t maxEncodingNameLength = 63;
+
+// Hash for all-ASCII strings that does case folding.
+struct TextEncodingNameHash {
+
+    static bool equal(const char* s1, const char* s2)
+    {
+        char c1;
+        char c2;
+        do {
+            c1 = *s1++;
+            c2 = *s2++;
+            if (toASCIILower(c1) != toASCIILower(c2))
+                return false;
+        } while (c1 && c2);
+        return !c1 && !c2;
+    }
+
+    // This algorithm is the one-at-a-time hash from:
+    // http://burtleburtle.net/bob/hash/hashfaq.html
+    // http://burtleburtle.net/bob/hash/doobs.html
+    static unsigned hash(const char* s)
+    {
+        unsigned h = WTF::stringHashingStartValue;
+        for (;;) {
+            char c = *s++;
+            if (!c) {
+                h += (h << 3);
+                h ^= (h >> 11);
+                h += (h << 15);
+                return h;
+            }
+            h += toASCIILower(c);
+            h += (h << 10); 
+            h ^= (h >> 6); 
+        }
+    }
+
+    static const bool safeToCompareToEmptyOrDeleted = false;
+};
+
+struct TextCodecFactory {
+    NewTextCodecFunction function;
+    const void* additionalData;
+    TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { }
+};
+
+typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap;
+typedef HashMap<const char*, TextCodecFactory> TextCodecMap;
+
+static Mutex& encodingRegistryMutex()
+{
+    // We don't have to use AtomicallyInitializedStatic here because
+    // this function is called on the main thread for any page before
+    // it is used in worker threads.
+    DEFINE_STATIC_LOCAL(Mutex, mutex, ());
+    return mutex;
+}
+
+static TextEncodingNameMap* textEncodingNameMap;
+static TextCodecMap* textCodecMap;
+static bool didExtendTextCodecMaps;
+static HashSet<const char*>* japaneseEncodings;
+static HashSet<const char*>* nonBackslashEncodings;
+
+static const char* const textEncodingNameBlacklist[] = {
+    "UTF-7"
+};
+
+#if ERROR_DISABLED
+
+static inline void checkExistingName(const char*, const char*) { }
+
+#else
+
+static void checkExistingName(const char* alias, const char* atomicName)
+{
+    const char* oldAtomicName = textEncodingNameMap->get(alias);
+    if (!oldAtomicName)
+        return;
+    if (oldAtomicName == atomicName)
+        return;
+    // Keep the warning silent about one case where we know this will happen.
+    if (strcmp(alias, "ISO-8859-8-I") == 0
+            && strcmp(oldAtomicName, "ISO-8859-8-I") == 0
+            && strcasecmp(atomicName, "iso-8859-8") == 0)
+        return;
+    LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s", alias, oldAtomicName, atomicName);
+}
+
+#endif
+
+static bool isUndesiredAlias(const char* alias)
+{
+    // Reject aliases with version numbers that are supported by some back-ends (such as "ISO_2022,locale=ja,version=0" in ICU).
+    for (const char* p = alias; *p; ++p) {
+        if (*p == ',')
+            return true;
+    }
+    // 8859_1 is known to (at least) ICU, but other browsers don't support this name - and having it caused a compatibility
+    // problem, see bug 43554.
+    if (0 == strcmp(alias, "8859_1"))
+        return true;
+    return false;
+}
+
+static void addToTextEncodingNameMap(const char* alias, const char* name)
+{
+    ASSERT(strlen(alias) <= maxEncodingNameLength);
+    if (isUndesiredAlias(alias))
+        return;
+    const char* atomicName = textEncodingNameMap->get(name);
+    ASSERT(strcmp(alias, name) == 0 || atomicName);
+    if (!atomicName)
+        atomicName = name;
+    checkExistingName(alias, atomicName);
+    textEncodingNameMap->add(alias, atomicName);
+}
+
+static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData)
+{
+    const char* atomicName = textEncodingNameMap->get(name);
+    ASSERT(atomicName);
+    textCodecMap->add(atomicName, TextCodecFactory(function, additionalData));
+}
+
+static void pruneBlacklistedCodecs()
+{
+    for (size_t i = 0; i < WTF_ARRAY_LENGTH(textEncodingNameBlacklist); ++i) {
+        const char* atomicName = textEncodingNameMap->get(textEncodingNameBlacklist[i]);
+        if (!atomicName)
+            continue;
+
+        Vector<const char*> names;
+        TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
+        TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
+        for (; it != end; ++it) {
+            if (it->second == atomicName)
+                names.append(it->first);
+        }
+
+        size_t length = names.size();
+        for (size_t j = 0; j < length; ++j)
+            textEncodingNameMap->remove(names[j]);
+
+        textCodecMap->remove(atomicName);
+    }
+}
+
+static void buildBaseTextCodecMaps()
+{
+    ASSERT(isMainThread());
+    ASSERT(!textCodecMap);
+    ASSERT(!textEncodingNameMap);
+
+    textCodecMap = new TextCodecMap;
+    textEncodingNameMap = new TextEncodingNameMap;
+
+    TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap);
+    TextCodecLatin1::registerCodecs(addToTextCodecMap);
+
+    TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap);
+    TextCodecUTF16::registerCodecs(addToTextCodecMap);
+
+    TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap);
+    TextCodecUserDefined::registerCodecs(addToTextCodecMap);
+
+#if USE(ICU_UNICODE)
+    TextCodecICU::registerBaseEncodingNames(addToTextEncodingNameMap);
+    TextCodecICU::registerBaseCodecs(addToTextCodecMap);
+#endif
+
+#if USE(GLIB_UNICODE)
+    TextCodecGtk::registerBaseEncodingNames(addToTextEncodingNameMap);
+    TextCodecGtk::registerBaseCodecs(addToTextCodecMap);
+#endif
+
+#if USE(BREWMP_UNICODE)
+    TextCodecBrew::registerBaseEncodingNames(addToTextEncodingNameMap);
+    TextCodecBrew::registerBaseCodecs(addToTextCodecMap);
+#endif
+
+#if OS(WINCE) && !PLATFORM(QT)
+    TextCodecWinCE::registerBaseEncodingNames(addToTextEncodingNameMap);
+    TextCodecWinCE::registerBaseCodecs(addToTextCodecMap);
+#endif
+}
+
+static void addEncodingName(HashSet<const char*>* set, const char* name)
+{
+    // We must not use atomicCanonicalTextEncodingName() because this function is called in it.
+    const char* atomicName = textEncodingNameMap->get(name);
+    if (atomicName)
+        set->add(atomicName);
+}
+
+static void buildQuirksSets()
+{
+    // FIXME: Having isJapaneseEncoding() and shouldShowBackslashAsCurrencySymbolIn()
+    // and initializing the sets for them in TextEncodingRegistry.cpp look strange.
+
+    ASSERT(!japaneseEncodings);
+    ASSERT(!nonBackslashEncodings);
+
+    japaneseEncodings = new HashSet<const char*>();
+    addEncodingName(japaneseEncodings, "EUC-JP");
+    addEncodingName(japaneseEncodings, "ISO-2022-JP");
+    addEncodingName(japaneseEncodings, "ISO-2022-JP-1");
+    addEncodingName(japaneseEncodings, "ISO-2022-JP-2");
+    addEncodingName(japaneseEncodings, "ISO-2022-JP-3");
+    addEncodingName(japaneseEncodings, "JIS_C6226-1978");
+    addEncodingName(japaneseEncodings, "JIS_X0201");
+    addEncodingName(japaneseEncodings, "JIS_X0208-1983");
+    addEncodingName(japaneseEncodings, "JIS_X0208-1990");
+    addEncodingName(japaneseEncodings, "JIS_X0212-1990");
+    addEncodingName(japaneseEncodings, "Shift_JIS");
+    addEncodingName(japaneseEncodings, "Shift_JIS_X0213-2000");
+    addEncodingName(japaneseEncodings, "cp932");
+    addEncodingName(japaneseEncodings, "x-mac-japanese");
+
+    nonBackslashEncodings = new HashSet<const char*>();
+    // The text encodings below treat backslash as a currency symbol for IE compatibility.
+    // See http://blogs.msdn.com/michkap/archive/2005/09/17/469941.aspx for more information.
+    addEncodingName(nonBackslashEncodings, "x-mac-japanese");
+    addEncodingName(nonBackslashEncodings, "ISO-2022-JP");
+    addEncodingName(nonBackslashEncodings, "EUC-JP");
+    // Shift_JIS_X0213-2000 is not the same encoding as Shift_JIS on Mac. We need to register both of them.
+    addEncodingName(nonBackslashEncodings, "Shift_JIS");
+    addEncodingName(nonBackslashEncodings, "Shift_JIS_X0213-2000");
+}
+
+bool isJapaneseEncoding(const char* canonicalEncodingName)
+{
+    return canonicalEncodingName && japaneseEncodings && japaneseEncodings->contains(canonicalEncodingName);
+}
+
+bool shouldShowBackslashAsCurrencySymbolIn(const char* canonicalEncodingName)
+{
+    return canonicalEncodingName && nonBackslashEncodings && nonBackslashEncodings->contains(canonicalEncodingName);
+}
+
+static void extendTextCodecMaps()
+{
+#if USE(ICU_UNICODE)
+    TextCodecICU::registerExtendedEncodingNames(addToTextEncodingNameMap);
+    TextCodecICU::registerExtendedCodecs(addToTextCodecMap);
+#endif
+
+#if USE(QT4_UNICODE)
+    TextCodecQt::registerEncodingNames(addToTextEncodingNameMap);
+    TextCodecQt::registerCodecs(addToTextCodecMap);
+#endif
+
+#if PLATFORM(MAC)
+    TextCodecMac::registerEncodingNames(addToTextEncodingNameMap);
+    TextCodecMac::registerCodecs(addToTextCodecMap);
+#endif
+
+#if USE(GLIB_UNICODE)
+    TextCodecGtk::registerExtendedEncodingNames(addToTextEncodingNameMap);
+    TextCodecGtk::registerExtendedCodecs(addToTextCodecMap);
+#endif
+
+#if OS(WINCE) && !PLATFORM(QT)
+    TextCodecWinCE::registerExtendedEncodingNames(addToTextEncodingNameMap);
+    TextCodecWinCE::registerExtendedCodecs(addToTextCodecMap);
+#endif
+
+    pruneBlacklistedCodecs();
+    buildQuirksSets();
+}
+
+PassOwnPtr<TextCodec> newTextCodec(const TextEncoding& encoding)
+{
+    MutexLocker lock(encodingRegistryMutex());
+
+    ASSERT(textCodecMap);
+    TextCodecFactory factory = textCodecMap->get(encoding.name());
+    ASSERT(factory.function);
+    return factory.function(encoding, factory.additionalData);
+}
+
+const char* atomicCanonicalTextEncodingName(const char* name)
+{
+    if (!name || !name[0])
+        return 0;
+    if (!textEncodingNameMap)
+        buildBaseTextCodecMaps();
+
+    MutexLocker lock(encodingRegistryMutex());
+
+    if (const char* atomicName = textEncodingNameMap->get(name))
+        return atomicName;
+    if (didExtendTextCodecMaps)
+        return 0;
+    extendTextCodecMaps();
+    didExtendTextCodecMaps = true;
+    return textEncodingNameMap->get(name);
+}
+
+const char* atomicCanonicalTextEncodingName(const UChar* characters, size_t length)
+{
+    char buffer[maxEncodingNameLength + 1];
+    size_t j = 0;
+    for (size_t i = 0; i < length; ++i) {
+        UChar c = characters[i];
+        if (j == maxEncodingNameLength)
+            return 0;
+        buffer[j++] = c;
+    }
+    buffer[j] = 0;
+    return atomicCanonicalTextEncodingName(buffer);
+}
+
+bool noExtendedTextEncodingNameUsed()
+{
+    // If the calling thread did not use extended encoding names, it is fine for it to use a stale false value.
+    return !didExtendTextCodecMaps;
+}
+
+#ifndef NDEBUG
+void dumpTextEncodingNameMap()
+{
+    unsigned size = textEncodingNameMap->size();
+    fprintf(stderr, "Dumping %u entries in WebCore::textEncodingNameMap...\n", size);
+
+    MutexLocker lock(encodingRegistryMutex());
+
+    TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
+    TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
+    for (; it != end; ++it)
+        fprintf(stderr, "'%s' => '%s'\n", it->first, it->second);
+}
+#endif
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/TextEncodingRegistry.h b/Source/WebCore/platform/text/TextEncodingRegistry.h
new file mode 100644
index 0000000..16844c6
--- /dev/null
+++ b/Source/WebCore/platform/text/TextEncodingRegistry.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextEncodingRegistry_h
+#define TextEncodingRegistry_h
+
+#include <memory>
+#include <wtf/PassOwnPtr.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+    class TextCodec;
+    class TextEncoding;
+
+    // Use TextResourceDecoder::decode to decode resources, since it handles BOMs.
+    // Use TextEncoding::encode to encode, since it takes care of normalization.
+    PassOwnPtr<TextCodec> newTextCodec(const TextEncoding&);
+
+    // Only TextEncoding should use the following functions directly.
+    const char* atomicCanonicalTextEncodingName(const char* alias);
+    const char* atomicCanonicalTextEncodingName(const UChar* aliasCharacters, size_t aliasLength);
+    bool noExtendedTextEncodingNameUsed();
+    bool isJapaneseEncoding(const char* canonicalEncodingName);
+    bool shouldShowBackslashAsCurrencySymbolIn(const char* canonicalEncodingName);
+
+#ifndef NDEBUG
+    void dumpTextEncodingNameMap();
+#endif
+}
+
+#endif // TextEncodingRegistry_h
diff --git a/Source/WebCore/platform/text/TextStream.cpp b/Source/WebCore/platform/text/TextStream.cpp
new file mode 100644
index 0000000..1094fa4
--- /dev/null
+++ b/Source/WebCore/platform/text/TextStream.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2004, 2008, 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextStream.h"
+
+#include "PlatformString.h"
+#include <wtf/StringExtras.h>
+
+using namespace std;
+
+namespace WebCore {
+
+static const size_t printBufferSize = 100; // large enough for any integer or floating point value in string format, including trailing null character
+
+TextStream& TextStream::operator<<(bool b)
+{
+    return *this << (b ? "1" : "0");
+}
+
+TextStream& TextStream::operator<<(int i)
+{
+    char buffer[printBufferSize];
+    snprintf(buffer, sizeof(buffer) - 1, "%d", i);
+    return *this << buffer;
+}
+
+TextStream& TextStream::operator<<(unsigned i)
+{
+    char buffer[printBufferSize];
+    snprintf(buffer, sizeof(buffer) - 1, "%u", i);
+    return *this << buffer;
+}
+
+TextStream& TextStream::operator<<(long i)
+{
+    char buffer[printBufferSize];
+    snprintf(buffer, sizeof(buffer) - 1, "%ld", i);
+    return *this << buffer;
+}
+
+TextStream& TextStream::operator<<(unsigned long i)
+{
+    char buffer[printBufferSize];
+    snprintf(buffer, sizeof(buffer) - 1, "%lu", i);
+    return *this << buffer;
+}
+
+TextStream& TextStream::operator<<(float f)
+{
+    char buffer[printBufferSize];
+    snprintf(buffer, sizeof(buffer) - 1, "%.2f", f);
+    return *this << buffer;
+}
+
+TextStream& TextStream::operator<<(double d)
+{
+    char buffer[printBufferSize];
+    snprintf(buffer, sizeof(buffer) - 1, "%.2f", d);
+    return *this << buffer;
+}
+
+TextStream& TextStream::operator<<(const char* string)
+{
+    size_t stringLength = strlen(string);
+    size_t textLength = m_text.size();
+    if (stringLength > numeric_limits<size_t>::max() - textLength)
+        CRASH();
+    m_text.grow(textLength + stringLength);
+    for (size_t i = 0; i < stringLength; ++i)
+        m_text[textLength + i] = string[i];
+    return *this;
+}
+
+TextStream& TextStream::operator<<(const void* p)
+{
+    char buffer[printBufferSize];
+    snprintf(buffer, sizeof(buffer) - 1, "%p", p);
+    return *this << buffer;
+}
+
+TextStream& TextStream::operator<<(const String& string)
+{
+    append(m_text, string);
+    return *this;
+}
+
+String TextStream::release()
+{
+    return String::adopt(m_text);
+}
+
+#if OS(WINDOWS) && CPU(X86_64)
+TextStream& TextStream::operator<<(__int64 i)
+{
+    char buffer[printBufferSize];
+    snprintf(buffer, sizeof(buffer) - 1, "%I64i", i);
+    return *this << buffer;
+}
+TextStream& TextStream::operator<<(unsigned __int64 i)
+{
+    char buffer[printBufferSize];
+    snprintf(buffer, sizeof(buffer) - 1, "%I64u", i);
+    return *this << buffer;
+}
+#endif
+
+}
diff --git a/Source/WebCore/platform/text/TextStream.h b/Source/WebCore/platform/text/TextStream.h
new file mode 100644
index 0000000..e7e4cc0
--- /dev/null
+++ b/Source/WebCore/platform/text/TextStream.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2004, 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextStream_h
+#define TextStream_h
+
+#include <wtf/Forward.h>
+#include <wtf/Vector.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+class TextStream {
+public:
+    TextStream& operator<<(bool);
+    TextStream& operator<<(int);
+    TextStream& operator<<(unsigned);
+    TextStream& operator<<(long);
+    TextStream& operator<<(unsigned long);
+    TextStream& operator<<(float);
+    TextStream& operator<<(double);
+    TextStream& operator<<(const char*);
+    TextStream& operator<<(const void*);
+    TextStream& operator<<(const String&);
+#if OS(WINDOWS) && CPU(X86_64)
+    TextStream& operator<<(unsigned __int64);
+    TextStream& operator<<(__int64);
+#endif
+
+    String release();
+
+private:
+    Vector<UChar> m_text;
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/platform/text/UnicodeRange.cpp b/Source/WebCore/platform/text/UnicodeRange.cpp
new file mode 100644
index 0000000..0373441
--- /dev/null
+++ b/Source/WebCore/platform/text/UnicodeRange.cpp
@@ -0,0 +1,462 @@
+/*
+ * Copyright (C) 2007 Apple Computer, Inc.
+ *
+ * Portions are Copyright (C) 1998 Netscape Communications Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * Alternatively, the contents of this file may be used under the terms
+ * of either the Mozilla Public License Version 1.1, found at
+ * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public
+ * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html
+ * (the "GPL"), in which case the provisions of the MPL or the GPL are
+ * applicable instead of those above.  If you wish to allow use of your
+ * version of this file only under the terms of one of those two
+ * licenses (the MPL or the GPL) and not to allow others to use your
+ * version of this file under the LGPL, indicate your decision by
+ * deletingthe provisions above and replace them with the notice and
+ * other provisions required by the MPL or the GPL, as the case may be.
+ * If you do not delete the provisions above, a recipient may use your
+ * version of this file under any of the LGPL, the MPL or the GPL.
+ */
+
+#include "config.h"
+#include "UnicodeRange.h"
+
+namespace WebCore {
+
+// This table depends on unicode range definitions. 
+// Each item's index must correspond to a unicode range value
+// eg. x-cyrillic = LangGroupTable[cRangeCyrillic]
+static const char* gUnicodeRangeToLangGroupTable[] = 
+{
+  "x-cyrillic",
+  "el",
+  "tr",
+  "he",
+  "ar",
+  "x-baltic",
+  "th",
+  "ko",
+  "ja",
+  "zh-CN",
+  "zh-TW",
+  "x-devanagari",
+  "x-tamil",
+  "x-armn",
+  "x-beng",
+  "x-cans",
+  "x-ethi",
+  "x-geor",
+  "x-gujr",
+  "x-guru",
+  "x-khmr",
+  "x-mlym"
+};
+
+/**********************************************************************
+ * Unicode subranges as defined in unicode 3.0
+ * x-western, x-central-euro, tr, x-baltic  -> latin 
+ *  0000 - 036f 
+ *  1e00 - 1eff
+ *  2000 - 206f  (general punctuation)
+ *  20a0 - 20cf  (currency symbols)
+ *  2100 - 214f  (letterlike symbols)
+ *  2150 - 218f  (Number Forms)
+ * el         -> greek
+ *  0370 - 03ff
+ *  1f00 - 1fff
+ * x-cyrillic -> cyrillic
+ *  0400 - 04ff
+ * he         -> hebrew
+ *  0590 - 05ff
+ * ar         -> arabic
+ *  0600 - 06ff
+ *  fb50 - fdff (arabic presentation forms)
+ *  fe70 - feff (arabic presentation forms b)
+ * th - thai
+ *  0e00 - 0e7f
+ * ko        -> korean
+ *  ac00 - d7af  (hangul Syllables)
+ *  1100 - 11ff    (jamo)
+ *  3130 - 318f (hangul compatibility jamo)
+ * ja
+ *  3040 - 309f (hiragana)
+ *  30a0 - 30ff (katakana)
+ * zh-CN
+ * zh-TW
+ *
+ * CJK
+ *  3100 - 312f (bopomofo)
+ *  31a0 - 31bf (bopomofo extended)
+ *  3000 - 303f (CJK Symbols and Punctuation) 
+ *  2e80 - 2eff (CJK radicals supplement)
+ *  2f00 - 2fdf (Kangxi Radicals)
+ *  2ff0 - 2fff (Ideographic Description Characters)
+ *  3190 - 319f (kanbun)
+ *  3200 - 32ff (Enclosed CJK letters and Months)
+ *  3300 - 33ff (CJK compatibility)
+ *  3400 - 4dbf (CJK Unified Ideographs Extension A)
+ *  4e00 - 9faf (CJK Unified Ideographs)
+ *  f900 - fa5f (CJK Compatibility Ideographs)
+ *  fe30 - fe4f (CJK compatibility Forms)
+ *  ff00 - ffef (halfwidth and fullwidth forms)
+ *
+ * Armenian
+ *  0530 - 058f 
+ * Sriac 
+ *  0700 - 074f
+ * Thaana
+ *  0780 - 07bf
+ * Devanagari
+ *  0900 - 097f
+ * Bengali
+ *  0980 - 09ff
+ * Gurmukhi
+ *  0a00 - 0a7f
+ * Gujarati
+ *  0a80 - 0aff
+ * Oriya
+ *  0b00 - 0b7f
+ * Tamil
+ *  0b80 - 0bff
+ * Telugu
+ *  0c00 - 0c7f
+ * Kannada
+ *  0c80 - 0cff
+ * Malayalam
+ *  0d00 - 0d7f
+ * Sinhala
+ *  0d80 - 0def
+ * Lao
+ *  0e80 - 0eff
+ * Tibetan
+ *  0f00 - 0fbf
+ * Myanmar
+ *  1000 - 109f
+ * Georgian
+ *  10a0 - 10ff
+ * Ethiopic
+ *  1200 - 137f
+ * Cherokee
+ *  13a0 - 13ff
+ * Canadian Aboriginal Syllabics
+ *  1400 - 167f
+ * Ogham
+ *  1680 - 169f
+ * Runic 
+ *  16a0 - 16ff
+ * Khmer
+ *  1780 - 17ff
+ * Mongolian
+ *  1800 - 18af
+ * Misc - superscripts and subscripts
+ *  2070 - 209f
+ * Misc - Combining Diacritical Marks for Symbols
+ *  20d0 - 20ff
+ * Misc - Arrows
+ *  2190 - 21ff
+ * Misc - Mathematical Operators
+ *  2200 - 22ff
+ * Misc - Miscellaneous Technical
+ *  2300 - 23ff
+ * Misc - Control picture
+ *  2400 - 243f
+ * Misc - Optical character recognition
+ *  2440 - 2450
+ * Misc - Enclose Alphanumerics
+ *  2460 - 24ff
+ * Misc - Box Drawing 
+ *  2500 - 257f
+ * Misc - Block Elements
+ *  2580 - 259f
+ * Misc - Geometric Shapes
+ *  25a0 - 25ff
+ * Misc - Miscellaneous Symbols
+ *  2600 - 267f
+ * Misc - Dingbats
+ *  2700 - 27bf
+ * Misc - Braille Patterns
+ *  2800 - 28ff
+ * Yi Syllables
+ *  a000 - a48f
+ * Yi radicals
+ *  a490 - a4cf
+ * Alphabetic Presentation Forms
+ *  fb00 - fb4f
+ * Misc - Combining half Marks
+ *  fe20 - fe2f
+ * Misc - small form variants
+ *  fe50 - fe6f
+ * Misc - Specials
+ *  fff0 - ffff
+ *********************************************************************/
+
+static const unsigned cNumSubTables = 9;
+static const unsigned cSubTableSize = 16;
+
+static const unsigned char gUnicodeSubrangeTable[cNumSubTables][cSubTableSize] = 
+{ 
+  { // table for X---
+    cRangeTableBase+1,  //u0xxx
+    cRangeTableBase+2,  //u1xxx
+    cRangeTableBase+3,  //u2xxx
+    cRangeSetCJK,       //u3xxx
+    cRangeSetCJK,       //u4xxx
+    cRangeSetCJK,       //u5xxx
+    cRangeSetCJK,       //u6xxx
+    cRangeSetCJK,       //u7xxx
+    cRangeSetCJK,       //u8xxx
+    cRangeSetCJK,       //u9xxx
+    cRangeTableBase+4,  //uaxxx
+    cRangeKorean,       //ubxxx
+    cRangeKorean,       //ucxxx
+    cRangeTableBase+5,  //udxxx
+    cRangePrivate,      //uexxx
+    cRangeTableBase+6   //ufxxx
+  },
+  { //table for 0X--
+    cRangeSetLatin,          //u00xx
+    cRangeSetLatin,          //u01xx
+    cRangeSetLatin,          //u02xx
+    cRangeGreek,             //u03xx     XXX 0300-036f is in fact cRangeCombiningDiacriticalMarks
+    cRangeCyrillic,          //u04xx
+    cRangeTableBase+7,       //u05xx, includes Cyrillic supplement, Hebrew, and Armenian
+    cRangeArabic,            //u06xx
+    cRangeTertiaryTable,     //u07xx
+    cRangeUnassigned,        //u08xx
+    cRangeTertiaryTable,     //u09xx
+    cRangeTertiaryTable,     //u0axx
+    cRangeTertiaryTable,     //u0bxx
+    cRangeTertiaryTable,     //u0cxx
+    cRangeTertiaryTable,     //u0dxx
+    cRangeTertiaryTable,     //u0exx
+    cRangeTibetan,           //u0fxx
+  },
+  { //table for 1x--
+    cRangeTertiaryTable,     //u10xx
+    cRangeKorean,            //u11xx
+    cRangeEthiopic,          //u12xx
+    cRangeTertiaryTable,     //u13xx
+    cRangeCanadian,          //u14xx
+    cRangeCanadian,          //u15xx
+    cRangeTertiaryTable,     //u16xx
+    cRangeKhmer,             //u17xx
+    cRangeMongolian,         //u18xx
+    cRangeUnassigned,        //u19xx
+    cRangeUnassigned,        //u1axx
+    cRangeUnassigned,        //u1bxx
+    cRangeUnassigned,        //u1cxx
+    cRangeUnassigned,        //u1dxx
+    cRangeSetLatin,          //u1exx
+    cRangeGreek,             //u1fxx
+  },
+  { //table for 2x--
+    cRangeSetLatin,          //u20xx
+    cRangeSetLatin,          //u21xx
+    cRangeMathOperators,     //u22xx
+    cRangeMiscTechnical,     //u23xx
+    cRangeControlOpticalEnclose, //u24xx
+    cRangeBoxBlockGeometrics, //u25xx
+    cRangeMiscSymbols,       //u26xx
+    cRangeDingbats,          //u27xx
+    cRangeBraillePattern,    //u28xx
+    cRangeUnassigned,        //u29xx
+    cRangeUnassigned,        //u2axx
+    cRangeUnassigned,        //u2bxx
+    cRangeUnassigned,        //u2cxx
+    cRangeUnassigned,        //u2dxx
+    cRangeSetCJK,            //u2exx
+    cRangeSetCJK,            //u2fxx
+  },
+  {  //table for ax--
+    cRangeYi,                //ua0xx
+    cRangeYi,                //ua1xx
+    cRangeYi,                //ua2xx
+    cRangeYi,                //ua3xx
+    cRangeYi,                //ua4xx
+    cRangeUnassigned,        //ua5xx
+    cRangeUnassigned,        //ua6xx
+    cRangeUnassigned,        //ua7xx
+    cRangeUnassigned,        //ua8xx
+    cRangeUnassigned,        //ua9xx
+    cRangeUnassigned,        //uaaxx
+    cRangeUnassigned,        //uabxx
+    cRangeKorean,            //uacxx
+    cRangeKorean,            //uadxx
+    cRangeKorean,            //uaexx
+    cRangeKorean,            //uafxx
+  },
+  {  //table for dx--
+    cRangeKorean,            //ud0xx
+    cRangeKorean,            //ud1xx
+    cRangeKorean,            //ud2xx
+    cRangeKorean,            //ud3xx
+    cRangeKorean,            //ud4xx
+    cRangeKorean,            //ud5xx
+    cRangeKorean,            //ud6xx
+    cRangeKorean,            //ud7xx
+    cRangeSurrogate,         //ud8xx
+    cRangeSurrogate,         //ud9xx
+    cRangeSurrogate,         //udaxx
+    cRangeSurrogate,         //udbxx
+    cRangeSurrogate,         //udcxx
+    cRangeSurrogate,         //uddxx
+    cRangeSurrogate,         //udexx
+    cRangeSurrogate,         //udfxx
+  },
+  { // table for fx--
+    cRangePrivate,           //uf0xx 
+    cRangePrivate,           //uf1xx 
+    cRangePrivate,           //uf2xx 
+    cRangePrivate,           //uf3xx 
+    cRangePrivate,           //uf4xx 
+    cRangePrivate,           //uf5xx 
+    cRangePrivate,           //uf6xx 
+    cRangePrivate,           //uf7xx 
+    cRangePrivate,           //uf8xx 
+    cRangeSetCJK,            //uf9xx 
+    cRangeSetCJK,            //ufaxx 
+    cRangeArabic,            //ufbxx, includes alphabic presentation form
+    cRangeArabic,            //ufcxx
+    cRangeArabic,            //ufdxx
+    cRangeArabic,            //ufexx, includes Combining half marks, 
+                             //                CJK compatibility forms, 
+                             //                CJK compatibility forms, 
+                             //                small form variants
+    cRangeTableBase+8,       //uffxx, halfwidth and fullwidth forms, includes Specials
+  },
+  { //table for 0x0500 - 0x05ff
+    cRangeCyrillic,          //u050x
+    cRangeCyrillic,          //u051x
+    cRangeCyrillic,          //u052x
+    cRangeArmenian,          //u053x
+    cRangeArmenian,          //u054x
+    cRangeArmenian,          //u055x
+    cRangeArmenian,          //u056x
+    cRangeArmenian,          //u057x
+    cRangeArmenian,          //u058x
+    cRangeHebrew,            //u059x
+    cRangeHebrew,            //u05ax
+    cRangeHebrew,            //u05bx
+    cRangeHebrew,            //u05cx
+    cRangeHebrew,            //u05dx
+    cRangeHebrew,            //u05ex
+    cRangeHebrew,            //u05fx
+  },
+  { //table for 0xff00 - 0xffff
+    cRangeSetCJK,            //uff0x, fullwidth latin
+    cRangeSetCJK,            //uff1x, fullwidth latin
+    cRangeSetCJK,            //uff2x, fullwidth latin
+    cRangeSetCJK,            //uff3x, fullwidth latin
+    cRangeSetCJK,            //uff4x, fullwidth latin
+    cRangeSetCJK,            //uff5x, fullwidth latin
+    cRangeSetCJK,            //uff6x, halfwidth katakana
+    cRangeSetCJK,            //uff7x, halfwidth katakana
+    cRangeSetCJK,            //uff8x, halfwidth katakana
+    cRangeSetCJK,            //uff9x, halfwidth katakana
+    cRangeSetCJK,            //uffax, halfwidth hangul jamo
+    cRangeSetCJK,            //uffbx, halfwidth hangul jamo
+    cRangeSetCJK,            //uffcx, halfwidth hangul jamo
+    cRangeSetCJK,            //uffdx, halfwidth hangul jamo
+    cRangeSetCJK,            //uffex, fullwidth symbols
+    cRangeSpecials,          //ufffx, Specials
+  },
+};
+
+// Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80) 
+// code points so that the number of entries in the tertiary range
+// table for that range is obtained by dividing (0x1700 - 0x0700) by 128.
+// Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal 
+// syllabaries take multiple chunks and Ogham and Runic share a single chunk.
+static const unsigned cTertiaryTableSize = ((0x1700 - 0x0700) / 0x80);
+
+static const unsigned char gUnicodeTertiaryRangeTable[cTertiaryTableSize] =
+{ //table for 0x0700 - 0x1600 
+    cRangeSyriac,            //u070x
+    cRangeThaana,            //u078x
+    cRangeUnassigned,        //u080x  place holder(resolved in the 2ndary tab.)
+    cRangeUnassigned,        //u088x  place holder(resolved in the 2ndary tab.)
+    cRangeDevanagari,        //u090x
+    cRangeBengali,           //u098x
+    cRangeGurmukhi,          //u0a0x
+    cRangeGujarati,          //u0a8x
+    cRangeOriya,             //u0b0x
+    cRangeTamil,             //u0b8x
+    cRangeTelugu,            //u0c0x
+    cRangeKannada,           //u0c8x
+    cRangeMalayalam,         //u0d0x
+    cRangeSinhala,           //u0d8x
+    cRangeThai,              //u0e0x  
+    cRangeLao,               //u0e8x
+    cRangeTibetan,           //u0f0x  place holder(resolved in the 2ndary tab.)
+    cRangeTibetan,           //u0f8x  place holder(resolved in the 2ndary tab.)
+    cRangeMyanmar,           //u100x
+    cRangeGeorgian,          //u108x
+    cRangeKorean,            //u110x  place holder(resolved in the 2ndary tab.)
+    cRangeKorean,            //u118x  place holder(resolved in the 2ndary tab.)
+    cRangeEthiopic,          //u120x  place holder(resolved in the 2ndary tab.)
+    cRangeEthiopic,          //u128x  place holder(resolved in the 2ndary tab.)
+    cRangeEthiopic,          //u130x  
+    cRangeCherokee,          //u138x
+    cRangeCanadian,          //u140x  place holder(resolved in the 2ndary tab.)
+    cRangeCanadian,          //u148x  place holder(resolved in the 2ndary tab.)
+    cRangeCanadian,          //u150x  place holder(resolved in the 2ndary tab.)
+    cRangeCanadian,          //u158x  place holder(resolved in the 2ndary tab.)
+    cRangeCanadian,          //u160x  
+    cRangeOghamRunic,        //u168x  this contains two scripts, Ogham & Runic
+};
+
+// A two level index is almost enough for locating a range, with the 
+// exception of u03xx and u05xx. Since we don't really care about range for
+// combining diacritical marks in our font application, they are 
+// not discriminated further.  Future adoption of this method for other use 
+// should be aware of this limitation. The implementation can be extended if 
+// there is such a need.
+// For Indic, Southeast Asian scripts and some other scripts between
+// U+0700 and U+16FF, it's extended to the third level.
+unsigned int findCharUnicodeRange(UChar32 ch)
+{
+    if (ch >= 0xFFFF)
+        return 0;
+
+    unsigned int range;
+
+    //search the first table
+    range = gUnicodeSubrangeTable[0][ch >> 12];
+  
+    if (range < cRangeTableBase)
+        // we try to get a specific range 
+        return range;
+
+    // otherwise, we have one more table to look at
+    range = gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x0f00) >> 8];
+    if (range < cRangeTableBase)
+        return range;
+    if (range < cRangeTertiaryTable)
+        return gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x00f0) >> 4];
+
+    // Yet another table to look at : U+0700 - U+16FF : 128 code point blocks
+    return gUnicodeTertiaryRangeTable[(ch - 0x0700) >> 7];
+}
+
+const char* langGroupFromUnicodeRange(unsigned char unicodeRange)
+{
+    if (cRangeSpecificItemNum > unicodeRange)  
+        return gUnicodeRangeToLangGroupTable[unicodeRange];
+    return 0;
+}
+
+}
diff --git a/Source/WebCore/platform/text/UnicodeRange.h b/Source/WebCore/platform/text/UnicodeRange.h
new file mode 100644
index 0000000..2278a0e
--- /dev/null
+++ b/Source/WebCore/platform/text/UnicodeRange.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2007 Apple Computer, Inc.
+ *
+ * Portions are Copyright (C) 1998 Netscape Communications Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * Alternatively, the contents of this file may be used under the terms
+ * of either the Mozilla Public License Version 1.1, found at
+ * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public
+ * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html
+ * (the "GPL"), in which case the provisions of the MPL or the GPL are
+ * applicable instead of those above.  If you wish to allow use of your
+ * version of this file only under the terms of one of those two
+ * licenses (the MPL or the GPL) and not to allow others to use your
+ * version of this file under the LGPL, indicate your decision by
+ * deletingthe provisions above and replace them with the notice and
+ * other provisions required by the MPL or the GPL, as the case may be.
+ * If you do not delete the provisions above, a recipient may use your
+ * version of this file under any of the LGPL, the MPL or the GPL.
+ */
+
+#ifndef UnicodeRange_H
+#define UnicodeRange_H
+
+#if PLATFORM(HAIKU)
+#include "stdint.h"
+#endif
+
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+// The following constants define unicode subranges
+// values below cRangeNum must be continuous so that we can map to 
+// a lang group directly.
+// All ranges we care about should fit within 32 bits.
+
+// Frequently used range definitions
+const unsigned char   cRangeCyrillic =    0;
+const unsigned char   cRangeGreek    =    1;
+const unsigned char   cRangeTurkish  =    2;
+const unsigned char   cRangeHebrew   =    3;
+const unsigned char   cRangeArabic   =    4;
+const unsigned char   cRangeBaltic   =    5;
+const unsigned char   cRangeThai     =    6;
+const unsigned char   cRangeKorean   =    7;
+const unsigned char   cRangeJapanese =    8;
+const unsigned char   cRangeSChinese =    9;
+const unsigned char   cRangeTChinese =   10;
+const unsigned char   cRangeDevanagari = 11;
+const unsigned char   cRangeTamil    =   12;
+const unsigned char   cRangeArmenian =   13;
+const unsigned char   cRangeBengali  =   14;
+const unsigned char   cRangeCanadian =   15;
+const unsigned char   cRangeEthiopic =   16;
+const unsigned char   cRangeGeorgian =   17;
+const unsigned char   cRangeGujarati =   18;
+const unsigned char   cRangeGurmukhi =   19;
+const unsigned char   cRangeKhmer    =   20;
+const unsigned char   cRangeMalayalam =  21;
+
+const unsigned char   cRangeSpecificItemNum = 22;
+
+//range/rangeSet grow to this place 22-29
+
+const unsigned char   cRangeSetStart  =  30;    // range set definition starts from here
+const unsigned char   cRangeSetLatin  =  30;
+const unsigned char   cRangeSetCJK    =  31;
+const unsigned char   cRangeSetEnd    =  31;   // range set definition ends here
+
+// less frequently used range definition
+const unsigned char   cRangeSurrogate            = 32;
+const unsigned char   cRangePrivate              = 33;
+const unsigned char   cRangeMisc                 = 34;
+const unsigned char   cRangeUnassigned           = 35;
+const unsigned char   cRangeSyriac               = 36;
+const unsigned char   cRangeThaana               = 37;
+const unsigned char   cRangeOriya                = 38;
+const unsigned char   cRangeTelugu               = 39;
+const unsigned char   cRangeKannada              = 40;
+const unsigned char   cRangeSinhala              = 41;
+const unsigned char   cRangeLao                  = 42;
+const unsigned char   cRangeTibetan              = 43;
+const unsigned char   cRangeMyanmar              = 44;
+const unsigned char   cRangeCherokee             = 45;
+const unsigned char   cRangeOghamRunic           = 46;
+const unsigned char   cRangeMongolian            = 47;
+const unsigned char   cRangeMathOperators        = 48;
+const unsigned char   cRangeMiscTechnical        = 49;
+const unsigned char   cRangeControlOpticalEnclose = 50;
+const unsigned char   cRangeBoxBlockGeometrics   = 51;
+const unsigned char   cRangeMiscSymbols          = 52;
+const unsigned char   cRangeDingbats             = 53;
+const unsigned char   cRangeBraillePattern       = 54;
+const unsigned char   cRangeYi                   = 55;
+const unsigned char   cRangeCombiningDiacriticalMarks = 56;
+const unsigned char   cRangeSpecials             = 57;
+
+const unsigned char   cRangeTableBase   = 128;    //values over 127 are reserved for internal use only
+const unsigned char   cRangeTertiaryTable  = 145; // leave room for 16 subtable 
+                                            // indices (cRangeTableBase + 1 ..
+                                            // cRangeTableBase + 16)
+
+
+
+unsigned int findCharUnicodeRange(UChar32 ch);
+const char* langGroupFromUnicodeRange(unsigned char unicodeRange);
+
+}
+
+#endif // UnicodeRange_H
diff --git a/Source/WebCore/platform/text/android/HyphenationAndroid.cpp b/Source/WebCore/platform/text/android/HyphenationAndroid.cpp
new file mode 100644
index 0000000..d1bd839
--- /dev/null
+++ b/Source/WebCore/platform/text/android/HyphenationAndroid.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2010, The Android Open Source Project
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "Hyphenation.h"
+
+// For external hyphenation library.
+#include "hyphen.h"
+#include <utils/AssetManager.h>
+#include <wtf/text/CString.h>
+#include <wtf/text/WTFString.h>
+
+extern android::AssetManager* globalAssetManager();
+
+using namespace WTF;
+
+namespace WebCore {
+
+static HyphenDict* loadHyphenationDictionary()
+{
+    android::AssetManager* am = globalAssetManager();
+    // Only support English for now.
+    android::Asset* a = am->open("webkit/hyph_en_US.dic",
+        android::Asset::ACCESS_BUFFER);
+    if (!a) {
+        // Asset webkit/hyph_en_US.dic not found!
+        return 0;
+    }
+    const CString dictContents = String(static_cast<const char*>(a->getBuffer(false)),
+        a->getLength()).utf8();
+    HyphenDict* dict = hnj_hyphen_load_from_buffer(dictContents.data(),
+        dictContents.length());
+    delete a;
+
+    return dict;
+}
+
+bool canHyphenate(const AtomicString& /* localeIdentifier */)
+{
+    // FIXME: Check that the locale identifier matches the available dictionary.
+    return true;
+}
+
+size_t lastHyphenLocation(const UChar* characters, size_t length, size_t beforeIndex, const AtomicString& /* localeIdentifier */)
+{
+    static const size_t minWordLen = 5;
+    static const size_t maxWordLen = 100;
+    if (beforeIndex <= 0 || length < minWordLen || length > maxWordLen)
+        return 0;
+
+    static HyphenDict* dict = loadHyphenationDictionary();
+    if (!dict)
+        return 0;
+
+    char word[maxWordLen];
+    size_t wordLength = 0;
+    for (size_t i = 0; i < length; ++i) {
+        const UChar ch = characters[i];
+        // Only English for now.
+        // To really make it language aware, we need something like language
+        // detection or rely on the langAttr in the html element.  Though
+        // seems right now the langAttr is not used or quite implemented in
+        // webkit.
+        if (!isASCIIAlpha(ch)) {
+            // Bypass leading spaces.
+            if (isASCIISpace(ch) && !wordLength)
+              continue;
+            return 0;
+        }
+        word[wordLength++] = ch;
+    }
+    if (wordLength < minWordLen)
+        return 0;
+
+    static const int extraBuffer = 5;
+    const int leadingSpacesCount = length - wordLength;
+    char hyphens[maxWordLen + extraBuffer];
+    if (!hnj_hyphen_hyphenate(dict, word, wordLength, hyphens)) {
+        for (size_t i = beforeIndex - 2 - leadingSpacesCount; i > 0; --i) {
+            if (hyphens[i] & 1)
+                return i + 1 + leadingSpacesCount;
+        }
+    }
+
+    return 0;
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/android/TextBreakIteratorInternalICU.cpp b/Source/WebCore/platform/text/android/TextBreakIteratorInternalICU.cpp
new file mode 100644
index 0000000..9732e92
--- /dev/null
+++ b/Source/WebCore/platform/text/android/TextBreakIteratorInternalICU.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2007, The Android Open Source Project
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextBreakIteratorInternalICU.h"
+
+namespace WebCore {
+
+const char* currentSearchLocaleID()
+{
+    // FIXME: Should use system locale.
+    return "";
+}
+
+const char* currentTextBreakLocaleID()
+{
+    // FIXME: Should use system locale.
+    return "en_us";
+}
+
+}
diff --git a/Source/WebCore/platform/text/brew/TextBoundariesBrew.cpp b/Source/WebCore/platform/text/brew/TextBoundariesBrew.cpp
new file mode 100644
index 0000000..506bdcf
--- /dev/null
+++ b/Source/WebCore/platform/text/brew/TextBoundariesBrew.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2006 Zack Rusin <zack@kde.org>
+ * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextBoundaries.h"
+
+#include "NotImplemented.h"
+#include "PlatformString.h"
+
+using namespace WTF::Unicode;
+
+namespace WebCore {
+
+int findNextWordFromIndex(const UChar* buffer, int len, int position, bool forward)
+{
+    notImplemented();
+    return 0;
+}
+
+void findWordBoundary(const UChar* buffer, int len, int position, int* start, int* end)
+{
+    if (position > len) {
+        *start = 0;
+        *end = 0;
+        return;
+    }
+
+    String str(buffer, len);
+
+    int currentPosition = position - 1;
+    String foundWord;
+    while (currentPosition >= 0 && isLetter(str[currentPosition])) {
+        UChar c = str[currentPosition];
+        foundWord.insert(&c, 1, 0);
+        --currentPosition;
+    }
+
+    // currentPosition == 0 means the first char is not letter
+    // currentPosition == -1 means we reached the beginning
+    int startPos = (currentPosition < 0) ? 0 : ++currentPosition;
+    currentPosition = position;
+    while (isLetter(str[currentPosition])) {
+        foundWord.append(str[currentPosition]);
+        ++currentPosition;
+    }
+
+    *start = startPos;
+    *end = currentPosition;
+}
+
+}
diff --git a/Source/WebCore/platform/text/brew/TextBreakIteratorBrew.cpp b/Source/WebCore/platform/text/brew/TextBreakIteratorBrew.cpp
new file mode 100644
index 0000000..7f46e4f
--- /dev/null
+++ b/Source/WebCore/platform/text/brew/TextBreakIteratorBrew.cpp
@@ -0,0 +1,312 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007-2009 Torch Mobile, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIterator.h"
+
+#include "PlatformString.h"
+#include <wtf/StdLibExtras.h>
+#include <wtf/unicode/Unicode.h>
+
+using namespace WTF::Unicode;
+
+namespace WebCore {
+
+// Hack, not entirely correct
+static inline bool isCharStop(UChar c)
+{
+    CharCategory charCategory = category(c);
+    return charCategory != Mark_NonSpacing && (charCategory != Other_Surrogate || (c < 0xd800 || c >= 0xdc00));
+}
+
+static inline bool isLineStop(UChar c)
+{
+    return category(c) != Separator_Line;
+}
+
+static inline bool isSentenceStop(UChar c)
+{
+    return isPunct(c);
+}
+
+class TextBreakIterator {
+public:
+    void reset(const UChar* str, int len)
+    {
+        string = str;
+        length = len;
+        currentPos = 0;
+    }
+    virtual int first() = 0;
+    virtual int next() = 0;
+    virtual int previous() = 0;
+    int following(int position)
+    {
+        currentPos = position;
+        return next();
+    }
+    int preceding(int position)
+    {
+        currentPos = position;
+        return previous();
+    }
+
+    int currentPos;
+    const UChar* string;
+    int length;
+};
+
+struct WordBreakIterator: TextBreakIterator {
+    virtual int first();
+    virtual int next();
+    virtual int previous();
+};
+
+struct CharBreakIterator: TextBreakIterator {
+    virtual int first();
+    virtual int next();
+    virtual int previous();
+};
+
+struct LineBreakIterator: TextBreakIterator {
+    virtual int first();
+    virtual int next();
+    virtual int previous();
+};
+
+struct SentenceBreakIterator : TextBreakIterator {
+    virtual int first();
+    virtual int next();
+    virtual int previous();
+};
+
+int WordBreakIterator::first()
+{
+    currentPos = 0;
+    return currentPos;
+}
+
+int WordBreakIterator::next()
+{
+    if (currentPos == length) {
+        currentPos = -1;
+        return currentPos;
+    }
+    bool haveSpace = false;
+    while (currentPos < length) {
+        if (haveSpace && !isSpace(string[currentPos]))
+            break;
+        if (isSpace(string[currentPos]))
+            haveSpace = true;
+        ++currentPos;
+    }
+    return currentPos;
+}
+
+int WordBreakIterator::previous()
+{
+    if (!currentPos) {
+        currentPos = -1;
+        return currentPos;
+    }
+    bool haveSpace = false;
+    while (currentPos > 0) {
+        if (haveSpace && !isSpace(string[currentPos]))
+            break;
+        if (isSpace(string[currentPos]))
+            haveSpace = true;
+        --currentPos;
+    }
+    return currentPos;
+}
+
+int CharBreakIterator::first()
+{
+    currentPos = 0;
+    return currentPos;
+}
+
+int CharBreakIterator::next()
+{
+    if (currentPos >= length)
+        return -1;
+    ++currentPos;
+    while (currentPos < length && !isCharStop(string[currentPos]))
+        ++currentPos;
+    return currentPos;
+}
+
+int CharBreakIterator::previous()
+{
+    if (currentPos <= 0)
+        return -1;
+    if (currentPos > length)
+        currentPos = length;
+    --currentPos;
+    while (currentPos > 0 && !isCharStop(string[currentPos]))
+        --currentPos;
+    return currentPos;
+}
+
+int LineBreakIterator::first()
+{
+    currentPos = 0;
+    return currentPos;
+}
+
+int LineBreakIterator::next()
+{
+    if (currentPos == length) {
+        currentPos = -1;
+        return currentPos;
+    }
+    bool haveSpace = false;
+    while (currentPos < length) {
+        if (haveSpace && !isLineStop(string[currentPos]))
+            break;
+        if (isLineStop(string[currentPos]))
+            haveSpace = true;
+        ++currentPos;
+    }
+    return currentPos;
+}
+
+int LineBreakIterator::previous()
+{
+    if (!currentPos) {
+        currentPos = -1;
+        return currentPos;
+    }
+    bool haveSpace = false;
+    while (currentPos > 0) {
+        if (haveSpace && !isLineStop(string[currentPos]))
+            break;
+        if (isLineStop(string[currentPos]))
+            haveSpace = true;
+        --currentPos;
+    }
+    return currentPos;
+}
+
+int SentenceBreakIterator::first()
+{
+    currentPos = 0;
+    return currentPos;
+}
+
+int SentenceBreakIterator::next()
+{
+    if (currentPos == length) {
+        currentPos = -1;
+        return currentPos;
+    }
+    bool haveSpace = false;
+    while (currentPos < length) {
+        if (haveSpace && !isSentenceStop(string[currentPos]))
+            break;
+        if (isSentenceStop(string[currentPos]))
+            haveSpace = true;
+        ++currentPos;
+    }
+    return currentPos;
+}
+
+int SentenceBreakIterator::previous()
+{
+    if (!currentPos) {
+        currentPos = -1;
+        return currentPos;
+    }
+    bool haveSpace = false;
+    while (currentPos > 0) {
+        if (haveSpace && !isSentenceStop(string[currentPos]))
+            break;
+        if (isSentenceStop(string[currentPos]))
+            haveSpace = true;
+        --currentPos;
+    }
+    return currentPos;
+}
+
+TextBreakIterator* wordBreakIterator(const UChar* string, int length)
+{
+    DEFINE_STATIC_LOCAL(WordBreakIterator, iterator, ());
+    iterator.reset(string, length);
+    return &iterator;
+}
+
+TextBreakIterator* characterBreakIterator(const UChar* string, int length)
+{
+    DEFINE_STATIC_LOCAL(CharBreakIterator, iterator, ());
+    iterator.reset(string, length);
+    return &iterator;
+}
+
+TextBreakIterator* lineBreakIterator(const UChar* string, int length)
+{
+    DEFINE_STATIC_LOCAL(LineBreakIterator , iterator, ());
+    iterator.reset(string, length);
+    return &iterator;
+}
+
+TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
+{
+    DEFINE_STATIC_LOCAL(SentenceBreakIterator, iterator, ());
+    iterator.reset(string, length);
+    return &iterator;
+}
+
+int textBreakFirst(TextBreakIterator* breakIterator)
+{
+    return breakIterator->first();
+}
+
+int textBreakNext(TextBreakIterator* breakIterator)
+{
+    return breakIterator->next();
+}
+
+int textBreakPreceding(TextBreakIterator* breakIterator, int position)
+{
+    return breakIterator->preceding(position);
+}
+
+int textBreakFollowing(TextBreakIterator* breakIterator, int position)
+{
+    return breakIterator->following(position);
+}
+
+int textBreakCurrent(TextBreakIterator* breakIterator)
+{
+    return breakIterator->currentPos;
+}
+
+bool isTextBreak(TextBreakIterator*, int)
+{
+    return true;
+}
+
+TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
+{
+    return characterBreakIterator(string, length);
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/brew/TextCodecBrew.cpp b/Source/WebCore/platform/text/brew/TextCodecBrew.cpp
new file mode 100644
index 0000000..1f32298
--- /dev/null
+++ b/Source/WebCore/platform/text/brew/TextCodecBrew.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (C) 2010 Company 100, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextCodecBrew.h"
+
+#include "AEEAppGen.h"
+#include "AEEICharsetConv.h"
+#include "NotImplemented.h"
+#include "PlatformString.h"
+#include <wtf/Assertions.h>
+#include <wtf/text/CString.h>
+
+namespace WebCore {
+
+// FIXME: Not sure if there are Brew MP devices which use big endian.
+const char* WebCore::TextCodecBrew::m_internalEncodingName = "UTF-16LE";
+
+static PassOwnPtr<TextCodec> newTextCodecBrew(const TextEncoding& encoding, const void*)
+{
+    return new TextCodecBrew(encoding);
+}
+
+void TextCodecBrew::registerBaseEncodingNames(EncodingNameRegistrar registrar)
+{
+    registrar("UTF-8", "UTF-8");
+}
+
+void TextCodecBrew::registerBaseCodecs(TextCodecRegistrar registrar)
+{
+    registrar("UTF-8", newTextCodecBrew, 0);
+}
+
+void TextCodecBrew::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
+{
+    // FIXME: Not sure how to enumerate all available encodings.
+    notImplemented();
+}
+
+void TextCodecBrew::registerExtendedCodecs(TextCodecRegistrar registrar)
+{
+    notImplemented();
+}
+
+TextCodecBrew::TextCodecBrew(const TextEncoding& encoding)
+    : m_charsetConverter(0)
+    , m_encoding(encoding)
+    , m_numBufferedBytes(0)
+{
+    String format = String::format("%s>%s", encoding.name(), m_internalEncodingName);
+
+    IShell* shell = reinterpret_cast<AEEApplet*>(GETAPPINSTANCE())->m_pIShell;
+    AEECLSID classID = ISHELL_GetHandler(shell, AEEIID_ICharsetConv, format.latin1().data());
+    ISHELL_CreateInstance(shell, classID, reinterpret_cast<void**>(&m_charsetConverter));
+
+    ASSERT(m_charsetConverter);
+}
+
+TextCodecBrew::~TextCodecBrew()
+{
+    if (m_charsetConverter)
+        ICharsetConv_Release(m_charsetConverter);
+}
+
+String TextCodecBrew::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
+{
+    int code = ICharsetConv_Initialize(m_charsetConverter, m_encoding.name(), m_internalEncodingName, 0);
+    ASSERT(code == AEE_SUCCESS);
+
+    Vector<UChar> result;
+    Vector<unsigned char> prefixedBytes(length);
+
+    int srcSize;
+    unsigned char* srcBegin;
+
+    if (m_numBufferedBytes) {
+        srcSize = length + m_numBufferedBytes;
+        prefixedBytes.grow(srcSize);
+        memcpy(prefixedBytes.data(), m_bufferedBytes, m_numBufferedBytes);
+        memcpy(prefixedBytes.data() + m_numBufferedBytes, bytes, length);
+
+        srcBegin = prefixedBytes.data();
+
+        // all buffered bytes are consumed now
+        m_numBufferedBytes = 0;
+    } else {
+        srcSize = length;
+        srcBegin = const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(bytes));
+    }
+
+    unsigned char* src = srcBegin;
+    unsigned char* srcEnd = srcBegin + srcSize;
+
+    Vector<UChar> dstBuffer(srcSize);
+
+    while (src < srcEnd) {
+        int numCharsConverted;
+        unsigned char* dstBegin = reinterpret_cast<unsigned char*>(dstBuffer.data());
+        unsigned char* dst = dstBegin;
+        int dstSize = dstBuffer.size() * sizeof(UChar);
+
+        code = ICharsetConv_CharsetConvert(m_charsetConverter, &src, &srcSize, &dst, &dstSize, &numCharsConverted);
+        ASSERT(code != AEE_ENOSUCH);
+
+        if (code == AEE_EBUFFERTOOSMALL) {
+            // Increase the buffer and try it again.
+            dstBuffer.grow(dstBuffer.size() * 2);
+            continue;
+        }
+
+        if (code == AEE_EBADITEM) {
+            sawError = true;
+            if (stopOnError) {
+                result.append(L'?');
+                break;
+            }
+
+            src++;
+        }
+
+        if (code == AEE_EINCOMPLETEITEM) {
+            if (flush) {
+                LOG_ERROR("Partial bytes at end of input while flush requested.");
+                sawError = true;
+                return String();
+            }
+
+            m_numBufferedBytes = srcEnd - src;
+            memcpy(m_bufferedBytes, src, m_numBufferedBytes);
+            break;
+        }
+
+        int numChars = (dst - dstBegin) / sizeof(UChar);
+        if (numChars > 0)
+            result.append(dstBuffer.data(), numChars);
+    }
+
+    return String::adopt(result);
+}
+
+CString TextCodecBrew::encode(const UChar* characters, size_t length, UnencodableHandling handling)
+{
+    if (!length)
+        return "";
+
+    unsigned int replacementCharacter = '?';
+
+    // FIXME: Impossible to handle EntitiesForUnencodables or URLEncodedEntitiesForUnencodables with ICharsetConv.
+    int code = ICharsetConv_Initialize(m_charsetConverter, m_internalEncodingName, m_encoding.name(), replacementCharacter);
+    ASSERT(code == AEE_SUCCESS);
+
+    Vector<char> result;
+
+    int srcSize = length * sizeof(UChar);
+    unsigned char* srcBegin = const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(characters));
+    unsigned char* src = srcBegin;
+    unsigned char* srcEnd = srcBegin + srcSize;
+
+    Vector<unsigned char> dstBuffer(length * sizeof(UChar));
+
+    while (src < srcEnd) {
+        int numCharsConverted;
+        unsigned char* dstBegin = dstBuffer.data();
+        unsigned char* dst = dstBegin;
+        int dstSize = dstBuffer.size();
+
+        code = ICharsetConv_CharsetConvert(m_charsetConverter, &src, &srcSize, &dst, &dstSize, &numCharsConverted);
+        ASSERT(code != AEE_EINCOMPLETEITEM);
+
+        if (code == AEE_ENOSUCH) {
+            LOG_ERROR("Conversion error, Code=%d", code);
+            return CString();
+        }
+
+        if (code == AEE_EBUFFERTOOSMALL) {
+            // Increase the buffer and try it again.
+            dstBuffer.grow(dstBuffer.size() * 2);
+            continue;
+        }
+
+        if (code == AEE_EBADITEM)
+            src += sizeof(UChar); // Skip the invalid character
+
+        int numBytes = dst - dstBegin;
+        if (numBytes > 0)
+            result.append(dstBuffer.data(), numBytes);
+    }
+
+    return CString(result.data(), result.size());
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/brew/TextCodecBrew.h b/Source/WebCore/platform/text/brew/TextCodecBrew.h
new file mode 100644
index 0000000..97e2c87
--- /dev/null
+++ b/Source/WebCore/platform/text/brew/TextCodecBrew.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2010 Company 100, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextCodecBrew_h
+#define TextCodecBrew_h
+
+#include "TextCodec.h"
+#include "TextEncoding.h"
+
+typedef struct ICharsetConv ICharsetConv;
+
+namespace WebCore {
+
+class TextCodecBrew : public TextCodec {
+public:
+    static void registerBaseEncodingNames(EncodingNameRegistrar);
+    static void registerBaseCodecs(TextCodecRegistrar);
+
+    static void registerExtendedEncodingNames(EncodingNameRegistrar);
+    static void registerExtendedCodecs(TextCodecRegistrar);
+
+    TextCodecBrew(const TextEncoding&);
+    virtual ~TextCodecBrew();
+
+    virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+    virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+
+private:
+    TextEncoding m_encoding;
+    size_t m_numBufferedBytes;
+    unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
+    ICharsetConv* m_charsetConverter;
+
+    static const char* m_internalEncodingName;
+};
+
+} // namespace WebCore
+
+#endif // TextCodecBrew_h
diff --git a/Source/WebCore/platform/text/cf/HyphenationCF.cpp b/Source/WebCore/platform/text/cf/HyphenationCF.cpp
new file mode 100644
index 0000000..3adacad
--- /dev/null
+++ b/Source/WebCore/platform/text/cf/HyphenationCF.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "Hyphenation.h"
+
+#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !defined(BUILDING_ON_SNOW_LEOPARD)
+
+#include "AtomicStringKeyedMRUCache.h"
+#include "TextBreakIteratorInternalICU.h"
+#include <wtf/ListHashSet.h>
+#include <wtf/RetainPtr.h>
+
+namespace WebCore {
+
+#if !PLATFORM(WIN) || (defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7)
+
+template<>
+RetainPtr<CFLocaleRef> AtomicStringKeyedMRUCache<RetainPtr<CFLocaleRef> >::createValueForNullKey()
+{
+    RetainPtr<CFLocaleRef> locale(AdoptCF, CFLocaleCopyCurrent());
+
+    return CFStringIsHyphenationAvailableForLocale(locale.get()) ? locale : 0;
+}
+
+template<>
+RetainPtr<CFLocaleRef> AtomicStringKeyedMRUCache<RetainPtr<CFLocaleRef> >::createValueForKey(const AtomicString& localeIdentifier)
+{
+    RetainPtr<CFStringRef> cfLocaleIdentifier(AdoptCF, localeIdentifier.createCFString());
+    RetainPtr<CFLocaleRef> locale(AdoptCF, CFLocaleCreate(kCFAllocatorDefault, cfLocaleIdentifier.get()));
+
+    return CFStringIsHyphenationAvailableForLocale(locale.get()) ? locale : 0;
+}
+
+static AtomicStringKeyedMRUCache<RetainPtr<CFLocaleRef> >& cfLocaleCache()
+{
+    DEFINE_STATIC_LOCAL(AtomicStringKeyedMRUCache<RetainPtr<CFLocaleRef> >, cache, ());
+    return cache;
+}
+
+bool canHyphenate(const AtomicString& localeIdentifier)
+{
+    return cfLocaleCache().get(localeIdentifier);
+}
+
+size_t lastHyphenLocation(const UChar* characters, size_t length, size_t beforeIndex, const AtomicString& localeIdentifier)
+{
+    RetainPtr<CFStringRef> string(AdoptCF, CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, reinterpret_cast<const UniChar*>(characters), length, kCFAllocatorNull));
+
+    RetainPtr<CFLocaleRef> locale = cfLocaleCache().get(localeIdentifier);
+    ASSERT(locale);
+
+    CFIndex result = CFStringGetHyphenationLocationBeforeIndex(string.get(), beforeIndex, CFRangeMake(0, length), 0, locale.get(), 0);
+    return result == kCFNotFound ? 0 : result;
+}
+
+#else
+
+bool canHyphenate(const AtomicString&)
+{
+    return false;
+}
+
+size_t lastHyphenLocation(const UChar*, size_t, size_t, const AtomicString&)
+{
+    ASSERT_NOT_REACHED();
+    return 0;
+}
+
+#endif // PLATFORM(WIN) && (!defined(MAC_OS_X_VERSION_10_7) || MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_7)
+
+} // namespace WebCore
+
+#endif // !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !defined(BUILDING_ON_SNOW_LEOPARD)
diff --git a/Source/WebCore/platform/text/cf/StringCF.cpp b/Source/WebCore/platform/text/cf/StringCF.cpp
new file mode 100644
index 0000000..dcaf8fb
--- /dev/null
+++ b/Source/WebCore/platform/text/cf/StringCF.cpp
@@ -0,0 +1,55 @@
+/**
+ * Copyright (C) 2006 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "PlatformString.h"
+
+#if PLATFORM(CF)
+
+#include <CoreFoundation/CoreFoundation.h>
+
+namespace WTF {
+
+String::String(CFStringRef str)
+{
+    if (!str)
+        return;
+
+    CFIndex size = CFStringGetLength(str);
+    if (size == 0)
+        m_impl = StringImpl::empty();
+    else {
+        Vector<UChar, 1024> buffer(size);
+        CFStringGetCharacters(str, CFRangeMake(0, size), (UniChar*)buffer.data());
+        m_impl = StringImpl::create(buffer.data(), size);
+    }
+}
+
+CFStringRef String::createCFString() const
+{
+    if (!m_impl)
+        return static_cast<CFStringRef>(CFRetain(CFSTR("")));
+
+    return m_impl->createCFString();
+}
+
+}
+
+#endif // PLATFORM(CF)
diff --git a/Source/WebCore/platform/text/cf/StringImplCF.cpp b/Source/WebCore/platform/text/cf/StringImplCF.cpp
new file mode 100644
index 0000000..0157918
--- /dev/null
+++ b/Source/WebCore/platform/text/cf/StringImplCF.cpp
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2006, 2009 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include <wtf/text/StringImpl.h>
+
+#if PLATFORM(CF)
+
+#include <CoreFoundation/CoreFoundation.h>
+#include <wtf/MainThread.h>
+#include <wtf/PassRefPtr.h>
+#include <wtf/Threading.h>
+
+#if PLATFORM(MAC) && !defined(BUILDING_ON_TIGER)
+#include <objc/objc-auto.h>
+#endif
+
+namespace WTF {
+
+namespace StringWrapperCFAllocator {
+
+    static StringImpl* currentString;
+
+    static const void* retain(const void* info)
+    {
+        return info;
+    }
+
+    static void release(const void*)
+    {
+        ASSERT_NOT_REACHED();
+    }
+
+    static CFStringRef copyDescription(const void*)
+    {
+        return CFSTR("WTF::String-based allocator");
+    }
+
+    static void* allocate(CFIndex size, CFOptionFlags, void*)
+    {
+        StringImpl* underlyingString = 0;
+        if (isMainThread()) {
+            underlyingString = currentString;
+            if (underlyingString) {
+                currentString = 0;
+                underlyingString->ref(); // Balanced by call to deref in deallocate below.
+            }
+        }
+        StringImpl** header = static_cast<StringImpl**>(fastMalloc(sizeof(StringImpl*) + size));
+        *header = underlyingString;
+        return header + 1;
+    }
+
+    static void* reallocate(void* pointer, CFIndex newSize, CFOptionFlags, void*)
+    {
+        size_t newAllocationSize = sizeof(StringImpl*) + newSize;
+        StringImpl** header = static_cast<StringImpl**>(pointer) - 1;
+        ASSERT(!*header);
+        header = static_cast<StringImpl**>(fastRealloc(header, newAllocationSize));
+        return header + 1;
+    }
+
+    static void deallocateOnMainThread(void* headerPointer)
+    {
+        StringImpl** header = static_cast<StringImpl**>(headerPointer);
+        StringImpl* underlyingString = *header;
+        ASSERT(underlyingString);
+        underlyingString->deref(); // Balanced by call to ref in allocate above.
+        fastFree(header);
+    }
+
+    static void deallocate(void* pointer, void*)
+    {
+        StringImpl** header = static_cast<StringImpl**>(pointer) - 1;
+        StringImpl* underlyingString = *header;
+        if (!underlyingString)
+            fastFree(header);
+        else {
+            if (!isMainThread())
+                callOnMainThread(deallocateOnMainThread, header);
+            else {
+                underlyingString->deref(); // Balanced by call to ref in allocate above.
+                fastFree(header);
+            }
+        }
+    }
+
+    static CFIndex preferredSize(CFIndex size, CFOptionFlags, void*)
+    {
+        // FIXME: If FastMalloc provided a "good size" callback, we'd want to use it here.
+        // Note that this optimization would help performance for strings created with the
+        // allocator that are mutable, and those typically are only created by callers who
+        // make a new string using the old string's allocator, such as some of the call
+        // sites in CFURL.
+        return size;
+    }
+
+    static CFAllocatorRef create()
+    {
+#if PLATFORM(MAC) && !defined(BUILDING_ON_TIGER)
+        // Since garbage collection isn't compatible with custom allocators, don't use this at all when garbage collection is active.
+        if (objc_collectingEnabled())
+            return 0;
+#endif
+        CFAllocatorContext context = { 0, 0, retain, release, copyDescription, allocate, reallocate, deallocate, preferredSize };
+        return CFAllocatorCreate(0, &context);
+    }
+
+    static CFAllocatorRef allocator()
+    {
+        static CFAllocatorRef allocator = create();
+        return allocator;
+    }
+
+}
+
+CFStringRef StringImpl::createCFString()
+{
+    CFAllocatorRef allocator = (m_length && isMainThread()) ? StringWrapperCFAllocator::allocator() : 0;
+    if (!allocator)
+        return CFStringCreateWithCharacters(0, reinterpret_cast<const UniChar*>(m_data), m_length);
+
+    // Put pointer to the StringImpl in a global so the allocator can store it with the CFString.
+    ASSERT(!StringWrapperCFAllocator::currentString);
+    StringWrapperCFAllocator::currentString = this;
+
+    CFStringRef string = CFStringCreateWithCharactersNoCopy(allocator, reinterpret_cast<const UniChar*>(m_data), m_length, kCFAllocatorNull);
+
+    // The allocator cleared the global when it read it, but also clear it here just in case.
+    ASSERT(!StringWrapperCFAllocator::currentString);
+    StringWrapperCFAllocator::currentString = 0;
+
+    return string;
+}
+
+// On StringImpl creation we could check if the allocator is the StringWrapperCFAllocator.
+// If it is, then we could find the original StringImpl and just return that. But to
+// do that we'd have to compute the offset from CFStringRef to the allocated block;
+// the CFStringRef is *not* at the start of an allocated block. Testing shows 1000x
+// more calls to createCFString than calls to the create functions with the appropriate
+// allocator, so it's probably not urgent optimize that case.
+
+}
+
+#endif // PLATFORM(CF)
diff --git a/Source/WebCore/platform/text/chromium/TextBreakIteratorInternalICUChromium.cpp b/Source/WebCore/platform/text/chromium/TextBreakIteratorInternalICUChromium.cpp
new file mode 100644
index 0000000..e390a65
--- /dev/null
+++ b/Source/WebCore/platform/text/chromium/TextBreakIteratorInternalICUChromium.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ * Copyright (C) 2008, 2009 Google Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIteratorInternalICU.h"
+
+#include "Language.h"
+#include "PlatformString.h"
+#include <wtf/StdLibExtras.h>
+#include <wtf/text/CString.h>
+
+namespace WebCore {
+
+static const char* UILanguage()
+{
+    // Chrome's UI language can be different from the OS UI language on Windows.
+    // We want to return Chrome's UI language here.
+    DEFINE_STATIC_LOCAL(CString, locale, (defaultLanguage().latin1()));
+    return locale.data();
+}
+
+const char* currentSearchLocaleID()
+{
+    return UILanguage();
+}
+
+const char* currentTextBreakLocaleID()
+{
+    return UILanguage();
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/efl/TextBreakIteratorInternalICUEfl.cpp b/Source/WebCore/platform/text/efl/TextBreakIteratorInternalICUEfl.cpp
new file mode 100644
index 0000000..0056869
--- /dev/null
+++ b/Source/WebCore/platform/text/efl/TextBreakIteratorInternalICUEfl.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2007 Alp Toker <alp@atoker.com>
+ * Copyright (C) 2009-2010 ProFUSION embedded systems
+ * Copyright (C) 2009-2010 Samsung Electronics
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+#include "TextBreakIteratorInternalICU.h"
+
+namespace WebCore {
+
+const char* currentSearchLocaleID()
+{
+    // FIXME: Should use system locale.
+    return "";
+}
+
+const char* currentTextBreakLocaleID()
+{
+    return "en_us";
+}
+
+}
diff --git a/Source/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp b/Source/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp
new file mode 100644
index 0000000..990e331
--- /dev/null
+++ b/Source/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp
@@ -0,0 +1,365 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ * Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
+ * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
+ * Copyright (C) 2010 Igalia S.L.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+
+#include "TextBreakIterator.h"
+
+#include "GOwnPtr.h"
+#include <pango/pango.h>
+using namespace std;
+
+#define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF)
+
+namespace WebCore {
+
+class CharacterIterator {
+public:
+    bool setText(const UChar* string, int length);
+    const gchar* getText() { return m_utf8.get(); }
+    int getLength() { return m_length; }
+    glong getSize() { return m_size; }
+    void setIndex(int index);
+    int getIndex() { return m_index; }
+    void setUTF16Index(int index);
+    int getUTF16Index() { return m_utf16Index; }
+    int getUTF16Length() { return m_utf16Length; }
+    int first();
+    int last();
+    int next();
+    int previous();
+private:
+    int characterSize(int index);
+
+    GOwnPtr<char> m_utf8;
+    int m_length;
+    long m_size;
+    int m_index;
+    int m_utf16Index;
+    int m_utf16Length;
+};
+
+int CharacterIterator::characterSize(int index)
+{
+    if (index == m_length || index < 0)
+        return 0;
+    if (m_length == m_utf16Length)
+        return 1;
+
+    gchar* indexPtr = g_utf8_offset_to_pointer(m_utf8.get(), index);
+    gunichar character = g_utf8_get_char(indexPtr);
+    return UTF8_IS_SURROGATE(character) ? 2 : 1;
+}
+
+bool CharacterIterator::setText(const UChar* string, int length)
+{
+    long utf8Size = 0;
+    m_utf8.set(g_utf16_to_utf8(string, length, 0, &utf8Size, 0));
+    if (!utf8Size)
+        return false;
+
+    m_utf16Length = length;
+    m_length = g_utf8_strlen(m_utf8.get(), utf8Size);
+    m_size = utf8Size;
+    m_index = 0;
+    m_utf16Index = 0;
+
+    return true;
+}
+
+void CharacterIterator::setIndex(int index)
+{
+    if (index == m_index)
+        return;
+    if (index <= 0)
+        m_index = m_utf16Index = 0;
+    else if (index >= m_length) {
+        m_index = m_length;
+        m_utf16Index = m_utf16Length;
+    } else if (m_length == m_utf16Length)
+        m_index = m_utf16Index = index;
+    else {
+        m_index = index;
+        int utf16Index = 0;
+        int utf8Index = 0;
+        while (utf8Index < index) {
+            utf16Index += characterSize(utf8Index);
+            utf8Index++;
+        }
+        m_utf16Index = utf16Index;
+    }
+}
+
+void CharacterIterator::setUTF16Index(int index)
+{
+    if (index == m_utf16Index)
+        return;
+    if (index <= 0)
+        m_utf16Index = m_index = 0;
+    else if (index >= m_utf16Length) {
+        m_utf16Index = m_utf16Length;
+        m_index = m_length;
+    } else if (m_length == m_utf16Length)
+        m_utf16Index = m_index = index;
+    else {
+        m_utf16Index = index;
+        int utf16Index = 0;
+        int utf8Index = 0;
+        while (utf16Index < index) {
+            utf16Index += characterSize(utf8Index);
+            utf8Index++;
+        }
+        m_index = utf8Index;
+    }
+}
+
+int CharacterIterator::first()
+{
+    m_index = m_utf16Index = 0;
+    return m_index;
+}
+
+int CharacterIterator::last()
+{
+    m_index = m_length;
+    m_utf16Index = m_utf16Length;
+    return m_index;
+}
+
+int CharacterIterator::next()
+{
+    int next = m_index + 1;
+
+    if (next <= m_length) {
+        m_utf16Index = min(m_utf16Index + characterSize(m_index), m_utf16Length);
+        m_index = next;
+    } else {
+        m_index = TextBreakDone;
+        m_utf16Index = TextBreakDone;
+    }
+
+    return m_index;
+}
+
+int CharacterIterator::previous()
+{
+    int previous = m_index - 1;
+
+    if (previous >= 0) {
+        m_utf16Index = max(m_utf16Index - characterSize(previous), 0);
+        m_index = previous;
+    } else {
+        m_index = TextBreakDone;
+        m_utf16Index = TextBreakDone;
+    }
+
+    return m_index;
+}
+
+enum UBreakIteratorType {
+    UBRK_CHARACTER,
+    UBRK_WORD,
+    UBRK_LINE,
+    UBRK_SENTENCE
+};
+
+class TextBreakIterator {
+public:
+    UBreakIteratorType m_type;
+    PangoLogAttr* m_logAttrs;
+    CharacterIterator m_charIterator;
+};
+
+static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator,
+    UBreakIteratorType type, const UChar* string, int length)
+{
+    if (!string)
+        return 0;
+
+    if (!createdIterator) {
+        iterator = new TextBreakIterator();
+        createdIterator = true;
+    }
+    if (!iterator)
+        return 0;
+
+    if (!iterator->m_charIterator.setText(string, length))
+        return 0;
+
+    int charLength = iterator->m_charIterator.getLength();
+
+    iterator->m_type = type;
+    if (createdIterator)
+        g_free(iterator->m_logAttrs);
+    iterator->m_logAttrs = g_new0(PangoLogAttr, charLength + 1);
+    pango_get_log_attrs(iterator->m_charIterator.getText(), iterator->m_charIterator.getSize(),
+                        -1, 0, iterator->m_logAttrs, charLength + 1);
+
+    return iterator;
+}
+
+TextBreakIterator* characterBreakIterator(const UChar* string, int length)
+{
+    static bool createdCharacterBreakIterator = false;
+    static TextBreakIterator* staticCharacterBreakIterator;
+    return setUpIterator(createdCharacterBreakIterator, staticCharacterBreakIterator, UBRK_CHARACTER, string, length);
+}
+
+TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
+{
+    // FIXME: This needs closer inspection to achieve behaviour identical to the ICU version.
+    return characterBreakIterator(string, length);
+}
+
+TextBreakIterator* wordBreakIterator(const UChar* string, int length)
+{
+    static bool createdWordBreakIterator = false;
+    static TextBreakIterator* staticWordBreakIterator;
+    return setUpIterator(createdWordBreakIterator, staticWordBreakIterator, UBRK_WORD, string, length);
+}
+
+TextBreakIterator* lineBreakIterator(const UChar* string, int length)
+{
+    static bool createdLineBreakIterator = false;
+    static TextBreakIterator* staticLineBreakIterator;
+    return setUpIterator(createdLineBreakIterator, staticLineBreakIterator, UBRK_LINE, string, length);
+}
+
+TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
+{
+    static bool createdSentenceBreakIterator = false;
+    static TextBreakIterator* staticSentenceBreakIterator;
+    return setUpIterator(createdSentenceBreakIterator, staticSentenceBreakIterator, UBRK_SENTENCE, string, length);
+}
+
+int textBreakFirst(TextBreakIterator* iterator)
+{
+    iterator->m_charIterator.first();
+    return iterator->m_charIterator.getUTF16Index();
+}
+
+int textBreakLast(TextBreakIterator* iterator)
+{
+    // TextBreakLast is not meant to find just any break according to bi->m_type 
+    // but really the one near the last character.
+    // (cmp ICU documentation for ubrk_first and ubrk_last)
+    // From ICU docs for ubrk_last:
+    // "Determine the index immediately beyond the last character in the text being scanned." 
+
+    // So we should advance or traverse back based on bi->m_logAttrs cursor positions.
+    // If last character position in the original string is a whitespace,
+    // traverse to the left until the first non-white character position is found
+    // and return the position of the first white-space char after this one.
+    // Otherwise return m_length, as "the first character beyond the last" is outside our string.
+    
+    bool whiteSpaceAtTheEnd = true;
+    int nextWhiteSpacePos = iterator->m_charIterator.getLength();
+
+    int pos = iterator->m_charIterator.last();
+    while (pos >= 0 && whiteSpaceAtTheEnd) {
+        if (iterator->m_logAttrs[pos].is_cursor_position) {
+            if (whiteSpaceAtTheEnd = iterator->m_logAttrs[pos].is_white)
+                nextWhiteSpacePos = pos;
+        }
+        pos = iterator->m_charIterator.previous();
+    }
+    iterator->m_charIterator.setIndex(nextWhiteSpacePos);
+    return iterator->m_charIterator.getUTF16Index();
+}
+
+int textBreakNext(TextBreakIterator* iterator)
+{
+    while (iterator->m_charIterator.next() != TextBreakDone) {
+        int index = iterator->m_charIterator.getIndex();
+
+        // FIXME: UBRK_WORD case: Single multibyte characters (i.e. white space around them), such as the euro symbol €, 
+        // are not marked as word_start & word_end as opposed to the way ICU does it.
+        // This leads to - for example - different word selection behaviour when right clicking.
+
+        if ((iterator->m_type == UBRK_LINE && iterator->m_logAttrs[index].is_line_break)
+            || (iterator->m_type == UBRK_WORD && (iterator->m_logAttrs[index].is_word_start || iterator->m_logAttrs[index].is_word_end))
+            || (iterator->m_type == UBRK_CHARACTER && iterator->m_logAttrs[index].is_cursor_position)
+            || (iterator->m_type == UBRK_SENTENCE && iterator->m_logAttrs[index].is_sentence_boundary)) {
+            break;
+        }
+    }
+    return iterator->m_charIterator.getUTF16Index();
+}
+
+int textBreakPrevious(TextBreakIterator* iterator)
+{
+    while (iterator->m_charIterator.previous() != TextBreakDone) {
+        int index = iterator->m_charIterator.getIndex();
+
+        if ((iterator->m_type == UBRK_LINE && iterator->m_logAttrs[index].is_line_break)
+            || (iterator->m_type == UBRK_WORD && (iterator->m_logAttrs[index].is_word_start || iterator->m_logAttrs[index].is_word_end))
+            || (iterator->m_type == UBRK_CHARACTER && iterator->m_logAttrs[index].is_cursor_position)
+            || (iterator->m_type == UBRK_SENTENCE && iterator->m_logAttrs[index].is_sentence_boundary)) {
+            break;
+        }
+    }
+    return iterator->m_charIterator.getUTF16Index();
+}
+
+int textBreakPreceding(TextBreakIterator* iterator, int offset)
+{
+    if (offset > iterator->m_charIterator.getUTF16Length())
+        return TextBreakDone;
+    if (offset < 0)
+        return 0;
+    iterator->m_charIterator.setUTF16Index(offset);
+    return textBreakPrevious(iterator);
+}
+
+int textBreakFollowing(TextBreakIterator* iterator, int offset)
+{
+    if (offset > iterator->m_charIterator.getUTF16Length())
+        return TextBreakDone;
+    if (offset < 0)
+        return 0;
+    iterator->m_charIterator.setUTF16Index(offset);
+    return textBreakNext(iterator);
+}
+
+int textBreakCurrent(TextBreakIterator* iterator)
+{
+    return iterator->m_charIterator.getUTF16Index();
+}
+
+bool isTextBreak(TextBreakIterator* iterator, int offset)
+{
+    if (!offset)
+        return true;
+    if (offset > iterator->m_charIterator.getUTF16Length())
+        return false;
+
+    iterator->m_charIterator.setUTF16Index(offset);
+
+    int index = iterator->m_charIterator.getIndex();
+    iterator->m_charIterator.previous();
+    textBreakNext(iterator);
+    return iterator->m_charIterator.getIndex() == index;
+}
+
+}
diff --git a/Source/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp b/Source/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp
new file mode 100644
index 0000000..35e5a05
--- /dev/null
+++ b/Source/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2007 Alp Toker <alp@atoker.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+#include "TextBreakIteratorInternalICU.h"
+
+namespace WebCore {
+
+const char* currentSearchLocaleID()
+{
+    // FIXME: Should use system locale.
+    return "";
+}
+
+const char* currentTextBreakLocaleID()
+{
+    // FIXME: Should use system locale.
+    return "en_us";
+}
+
+}
diff --git a/Source/WebCore/platform/text/gtk/TextCodecGtk.cpp b/Source/WebCore/platform/text/gtk/TextCodecGtk.cpp
new file mode 100644
index 0000000..c5bd7e8
--- /dev/null
+++ b/Source/WebCore/platform/text/gtk/TextCodecGtk.cpp
@@ -0,0 +1,578 @@
+/*
+ * Copyright (C) 2004, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ * Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
+ * Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextCodecGtk.h"
+
+#include <gio/gio.h>
+#include "GOwnPtr.h"
+#include "Logging.h"
+#include "PlatformString.h"
+#include <wtf/Assertions.h>
+#include <wtf/HashMap.h>
+#include <wtf/text/CString.h>
+
+using std::min;
+
+namespace WebCore {
+
+// TextCodec's appendOmittingBOM() is gone (http://trac.webkit.org/changeset/33380). 
+// That's why we need to avoid generating extra BOM's for the conversion result.
+// This can be achieved by specifying the UTF-16 codecs' endianness explicitly when initializing GLib.
+
+#if (G_BYTE_ORDER == G_BIG_ENDIAN)
+static const gchar* internalEncodingName = "UTF-16BE";
+#else
+static const gchar* internalEncodingName = "UTF-16LE";
+#endif
+
+
+const size_t ConversionBufferSize = 16384;
+    
+
+static PassOwnPtr<TextCodec> newTextCodecGtk(const TextEncoding& encoding, const void*)
+{
+    return new TextCodecGtk(encoding);
+}
+
+static bool isEncodingAvailable(const gchar* encodingName)
+{
+    GIConv tester;
+    // test decoding
+    tester = g_iconv_open(internalEncodingName, encodingName);
+    if (tester == reinterpret_cast<GIConv>(-1)) {
+        return false;
+    } else {
+        g_iconv_close(tester);
+        // test encoding
+        tester = g_iconv_open(encodingName, internalEncodingName);
+        if (tester == reinterpret_cast<GIConv>(-1)) {
+            return false;
+        } else {
+            g_iconv_close(tester);
+            return true;
+        }
+    }
+}
+
+static bool registerEncodingNameIfAvailable(EncodingNameRegistrar registrar, const char* canonicalName)
+{
+    if (isEncodingAvailable(canonicalName)) {
+        registrar(canonicalName, canonicalName);
+        return true;
+    }
+
+    return false;
+}
+
+static void registerEncodingAliasIfAvailable(EncodingNameRegistrar registrar, const char* canonicalName, const char* aliasName)
+{
+    if (isEncodingAvailable(aliasName))
+        registrar(aliasName, canonicalName);
+}
+
+static void registerCodecIfAvailable(TextCodecRegistrar registrar, const char* codecName)
+{
+    if (isEncodingAvailable(codecName))
+        registrar(codecName, newTextCodecGtk, 0);
+}
+
+void TextCodecGtk::registerBaseEncodingNames(EncodingNameRegistrar registrar)
+{
+    // Unicode
+    registerEncodingNameIfAvailable(registrar, "UTF-8");
+    registerEncodingNameIfAvailable(registrar, "UTF-32");
+    registerEncodingNameIfAvailable(registrar, "UTF-32BE");
+    registerEncodingNameIfAvailable(registrar, "UTF-32LE");
+
+    // Western
+    if (registerEncodingNameIfAvailable(registrar, "ISO-8859-1")) {
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "CP819");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "IBM819");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO-IR-100");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO8859-1");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO_8859-1");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO_8859-1:1987");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "L1");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "LATIN1");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "CSISOLATIN1");
+    }
+}
+
+void TextCodecGtk::registerBaseCodecs(TextCodecRegistrar registrar)
+{
+    // Unicode
+    registerCodecIfAvailable(registrar, "UTF-8");
+    registerCodecIfAvailable(registrar, "UTF-32");
+    registerCodecIfAvailable(registrar, "UTF-32BE");
+    registerCodecIfAvailable(registrar, "UTF-32LE");
+
+    // Western
+    registerCodecIfAvailable(registrar, "ISO-8859-1");
+}
+
+void TextCodecGtk::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
+{
+    // Western
+    if (registerEncodingNameIfAvailable(registrar, "MACROMAN")) {
+        registerEncodingAliasIfAvailable(registrar, "MACROMAN", "MAC");
+        registerEncodingAliasIfAvailable(registrar, "MACROMAN", "MACINTOSH");
+        registerEncodingAliasIfAvailable(registrar, "MACROMAN", "CSMACINTOSH");
+    }
+
+    // Japanese
+    if (registerEncodingNameIfAvailable(registrar, "Shift_JIS")) {
+        registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "MS_KANJI");
+        registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "SHIFT-JIS");
+        registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "SJIS");
+        registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "CSSHIFTJIS");
+    }
+    if (registerEncodingNameIfAvailable(registrar, "EUC-JP")) {
+        registerEncodingAliasIfAvailable(registrar, "EUC-JP", "EUC_JP");
+        registerEncodingAliasIfAvailable(registrar, "EUC-JP", "EUCJP");
+        registerEncodingAliasIfAvailable(registrar, "EUC-JP", "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE");
+        registerEncodingAliasIfAvailable(registrar, "EUC-JP", "CSEUCPKDFMTJAPANESE");
+    }
+    registerEncodingNameIfAvailable(registrar, "ISO-2022-JP");
+
+    // Traditional Chinese
+    if (registerEncodingNameIfAvailable(registrar, "BIG5")) {
+        registerEncodingAliasIfAvailable(registrar, "BIG5", "BIG-5");
+        registerEncodingAliasIfAvailable(registrar, "BIG5", "BIG-FIVE");
+        registerEncodingAliasIfAvailable(registrar, "BIG5", "BIGFIVE");
+        registerEncodingAliasIfAvailable(registrar, "BIG5", "CN-BIG5");
+        registerEncodingAliasIfAvailable(registrar, "BIG5", "CSBIG5");
+    }
+    if (registerEncodingNameIfAvailable(registrar, "BIG5-HKSCS")) {
+        registerEncodingAliasIfAvailable(registrar, "BIG5-HKSCS", "BIG5-HKSCS:2004");
+        registerEncodingAliasIfAvailable(registrar, "BIG5-HKSCS", "BIG5HKSCS");
+    }
+    registerEncodingNameIfAvailable(registrar, "CP950");
+
+    // Korean
+    if (registerEncodingNameIfAvailable(registrar, "ISO-2022-KR"))
+        registerEncodingAliasIfAvailable(registrar, "ISO-2022-KR", "CSISO2022KR");
+    if (registerEncodingNameIfAvailable(registrar, "CP949"))
+        registerEncodingAliasIfAvailable(registrar, "CP949", "UHC");
+    if (registerEncodingNameIfAvailable(registrar, "EUC-KR"))
+        registerEncodingAliasIfAvailable(registrar, "EUC-KR", "CSEUCKR");
+
+    // Arabic
+    if (registerEncodingNameIfAvailable(registrar, "ISO-8859-6")) {
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ARABIC");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ASMO-708");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ECMA-114");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO-IR-127");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO8859-6");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO_8859-6");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO_8859-6:1987");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "CSISOLATINARABIC");
+    }
+    // rearranged, windows-1256 now declared the canonical name and put to lowercase to fix /fast/encoding/ahram-org-eg.html test case
+    if (registerEncodingNameIfAvailable(registrar, "windows-1256")) {
+        registerEncodingAliasIfAvailable(registrar, "windows-1256", "CP1256");
+        registerEncodingAliasIfAvailable(registrar, "windows-1256", "MS-ARAB");
+    }
+
+    // Hebrew
+    if (registerEncodingNameIfAvailable(registrar, "ISO-8859-8")) {
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "HEBREW");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO-8859-8");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO-IR-138");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO8859-8");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO_8859-8");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO_8859-8:1988");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "CSISOLATINHEBREW");
+    }
+    // rearranged, moved windows-1255 as canonical and lowercased, fixing /fast/encoding/meta-charset.html
+    if (registerEncodingNameIfAvailable(registrar, "windows-1255")) {
+        registerEncodingAliasIfAvailable(registrar, "windows-1255", "CP1255");
+        registerEncodingAliasIfAvailable(registrar, "windows-1255", "MS-HEBR");
+    }
+
+    // Greek
+    if (registerEncodingNameIfAvailable(registrar, "ISO-8859-7")) {
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ECMA-118");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ELOT_928");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "GREEK");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "GREEK8");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO-IR-126");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO8859-7");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO_8859-7");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO_8859-7:1987");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO_8859-7:2003");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "CSI");
+    }
+    if (registerEncodingNameIfAvailable(registrar, "CP869")) {
+        registerEncodingAliasIfAvailable(registrar, "CP869", "869");
+        registerEncodingAliasIfAvailable(registrar, "CP869", "CP-GR");
+        registerEncodingAliasIfAvailable(registrar, "CP869", "IBM869");
+        registerEncodingAliasIfAvailable(registrar, "CP869", "CSIBM869");
+    }
+    registerEncodingNameIfAvailable(registrar, "WINDOWS-1253");
+
+    // Cyrillic
+    if (registerEncodingNameIfAvailable(registrar, "ISO-8859-5")) {
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "CYRILLIC");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO-IR-144");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO8859-5");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO_8859-5");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO_8859-5:1988");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "CSISOLATINCYRILLIC");
+    }
+    if (registerEncodingNameIfAvailable(registrar, "KOI8-R"))
+        registerEncodingAliasIfAvailable(registrar, "KOI8-R", "CSKOI8R");
+    if (registerEncodingNameIfAvailable(registrar, "CP866")) {
+        registerEncodingAliasIfAvailable(registrar, "CP866", "866");
+        registerEncodingAliasIfAvailable(registrar, "CP866", "IBM866");
+        registerEncodingAliasIfAvailable(registrar, "CP866", "CSIBM866");
+    }
+    registerEncodingNameIfAvailable(registrar, "KOI8-U");
+    // CP1251 added to pass /fast/encoding/charset-cp1251.html
+    if (registerEncodingNameIfAvailable(registrar, "windows-1251"))
+        registerEncodingAliasIfAvailable(registrar, "windows-1251", "CP1251");
+    if (registerEncodingNameIfAvailable(registrar, "mac-cyrillic")) {
+        registerEncodingAliasIfAvailable(registrar, "mac-cyrillic", "MACCYRILLIC");
+        registerEncodingAliasIfAvailable(registrar, "mac-cyrillic", "x-mac-cyrillic");
+    }
+
+    // Thai
+    if (registerEncodingNameIfAvailable(registrar, "CP874"))
+        registerEncodingAliasIfAvailable(registrar, "CP874", "WINDOWS-874");
+    registerEncodingNameIfAvailable(registrar, "TIS-620");
+
+    // Simplified Chinese
+    registerEncodingNameIfAvailable(registrar, "GBK");
+    if (registerEncodingNameIfAvailable(registrar, "HZ"))
+        registerEncodingAliasIfAvailable(registrar, "HZ", "HZ-GB-2312");
+    registerEncodingNameIfAvailable(registrar, "GB18030");
+    if (registerEncodingNameIfAvailable(registrar, "EUC-CN")) {
+        registerEncodingAliasIfAvailable(registrar, "EUC-CN", "EUCCN");
+        registerEncodingAliasIfAvailable(registrar, "EUC-CN", "GB2312");
+        registerEncodingAliasIfAvailable(registrar, "EUC-CN", "CN-GB");
+        registerEncodingAliasIfAvailable(registrar, "EUC-CN", "CSGB2312");
+        registerEncodingAliasIfAvailable(registrar, "EUC-CN", "EUC_CN");
+    }
+    if (registerEncodingNameIfAvailable(registrar, "GB_2312-80")) {
+        registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "CHINESE");
+        registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "csISO58GB231280");
+        registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "GB2312.1980-0");
+        registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "ISO-IR-58");
+    }
+
+    // Central European
+    if (registerEncodingNameIfAvailable(registrar, "ISO-8859-2")) {
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO-IR-101");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO8859-2");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO_8859-2");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO_8859-2:1987");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "L2");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "LATIN2");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "CSISOLATIN2");
+    }
+    if (registerEncodingNameIfAvailable(registrar, "CP1250")) {
+        registerEncodingAliasIfAvailable(registrar, "CP1250", "MS-EE");
+        registerEncodingAliasIfAvailable(registrar, "CP1250", "WINDOWS-1250");
+    }
+    registerEncodingNameIfAvailable(registrar, "MAC-CENTRALEUROPE");
+
+    // Vietnamese
+    if (registerEncodingNameIfAvailable(registrar, "CP1258"))
+        registerEncodingAliasIfAvailable(registrar, "CP1258", "WINDOWS-1258");
+
+    // Turkish
+    if (registerEncodingNameIfAvailable(registrar, "CP1254")) {
+        registerEncodingAliasIfAvailable(registrar, "CP1254", "MS-TURK");
+        registerEncodingAliasIfAvailable(registrar, "CP1254", "WINDOWS-1254");
+    }
+    if (registerEncodingNameIfAvailable(registrar, "ISO-8859-9")) {
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO-IR-148");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO8859-9");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO_8859-9");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO_8859-9:1989");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "L5");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "LATIN5");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "CSISOLATIN5");
+    }
+
+    // Baltic
+    if (registerEncodingNameIfAvailable(registrar, "CP1257")) {
+        registerEncodingAliasIfAvailable(registrar, "CP1257", "WINBALTRIM");
+        registerEncodingAliasIfAvailable(registrar, "CP1257", "WINDOWS-1257");
+    }
+    if (registerEncodingNameIfAvailable(registrar, "ISO-8859-4")) {
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO-IR-110");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO8859-4");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO_8859-4");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO_8859-4:1988");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "L4");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "LATIN4");
+        registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "CSISOLATIN4");
+    }
+}
+
+void TextCodecGtk::registerExtendedCodecs(TextCodecRegistrar registrar)
+{
+    // Western
+    registerCodecIfAvailable(registrar, "MACROMAN");
+
+    // Japanese
+    registerCodecIfAvailable(registrar, "Shift_JIS");
+    registerCodecIfAvailable(registrar, "EUC-JP");
+    registerCodecIfAvailable(registrar, "ISO-2022-JP");
+
+    // Traditional Chinese
+    registerCodecIfAvailable(registrar, "BIG5");
+    registerCodecIfAvailable(registrar, "BIG5-HKSCS");
+    registerCodecIfAvailable(registrar, "CP950");
+
+    // Korean
+    registerCodecIfAvailable(registrar, "ISO-2022-KR");
+    registerCodecIfAvailable(registrar, "CP949");
+    registerCodecIfAvailable(registrar, "EUC-KR");
+
+    // Arabic
+    registerCodecIfAvailable(registrar, "ISO-8859-6");
+    // rearranged, windows-1256 now declared the canonical name and put to lowercase to fix /fast/encoding/ahram-org-eg.html test case
+    registerCodecIfAvailable(registrar, "windows-1256");
+
+    // Hebrew
+    registerCodecIfAvailable(registrar, "ISO-8859-8");
+    // rearranged, moved windows-1255 as canonical and lowercased, fixing /fast/encoding/meta-charset.html
+    registerCodecIfAvailable(registrar, "windows-1255");
+
+    // Greek
+    registerCodecIfAvailable(registrar, "ISO-8859-7");
+    registerCodecIfAvailable(registrar, "CP869");
+    registerCodecIfAvailable(registrar, "WINDOWS-1253");
+
+    // Cyrillic
+    registerCodecIfAvailable(registrar, "ISO-8859-5");
+    registerCodecIfAvailable(registrar, "KOI8-R");
+    registerCodecIfAvailable(registrar, "CP866");
+    registerCodecIfAvailable(registrar, "KOI8-U");
+    // CP1251 added to pass /fast/encoding/charset-cp1251.html
+    registerCodecIfAvailable(registrar, "windows-1251");
+    registerCodecIfAvailable(registrar, "mac-cyrillic");
+
+    // Thai
+    registerCodecIfAvailable(registrar, "CP874");
+    registerCodecIfAvailable(registrar, "TIS-620");
+
+    // Simplified Chinese
+    registerCodecIfAvailable(registrar, "GBK");
+    registerCodecIfAvailable(registrar, "HZ");
+    registerCodecIfAvailable(registrar, "GB18030");
+    registerCodecIfAvailable(registrar, "EUC-CN");
+    registerCodecIfAvailable(registrar, "GB_2312-80");
+
+    // Central European
+    registerCodecIfAvailable(registrar, "ISO-8859-2");
+    registerCodecIfAvailable(registrar, "CP1250");
+    registerCodecIfAvailable(registrar, "MAC-CENTRALEUROPE");
+
+    // Vietnamese
+    registerCodecIfAvailable(registrar, "CP1258");
+
+    // Turkish
+    registerCodecIfAvailable(registrar, "CP1254");
+    registerCodecIfAvailable(registrar, "ISO-8859-9");
+
+    // Baltic
+    registerCodecIfAvailable(registrar, "CP1257");
+    registerCodecIfAvailable(registrar, "ISO-8859-4");
+}
+
+TextCodecGtk::TextCodecGtk(const TextEncoding& encoding)
+    : m_encoding(encoding)
+    , m_numBufferedBytes(0)
+{
+}
+
+TextCodecGtk::~TextCodecGtk()
+{
+}
+
+void TextCodecGtk::createIConvDecoder() const
+{
+    ASSERT(!m_iconvDecoder);
+
+    m_iconvDecoder = adoptGRef(g_charset_converter_new(internalEncodingName, m_encoding.name(), 0));
+}
+
+void TextCodecGtk::createIConvEncoder() const
+{
+    ASSERT(!m_iconvEncoder);
+
+    m_iconvEncoder = adoptGRef(g_charset_converter_new(m_encoding.name(), internalEncodingName, 0));
+}
+
+String TextCodecGtk::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
+{
+    // Get a converter for the passed-in encoding.
+    if (!m_iconvDecoder)
+        createIConvDecoder();
+    if (!m_iconvDecoder) {
+        LOG_ERROR("Error creating IConv encoder even though encoding was in table.");
+        return String();
+    }
+
+    Vector<UChar> result;
+
+    gsize bytesRead = 0;
+    gsize bytesWritten = 0;
+    const gchar* input = bytes;
+    gsize inputLength = length;
+    gchar buffer[ConversionBufferSize];
+    int flags = !length ? G_CONVERTER_INPUT_AT_END : G_CONVERTER_NO_FLAGS;
+    if (flush)
+        flags |= G_CONVERTER_FLUSH;
+
+    bool bufferWasFull = false;
+    char* prefixedBytes = 0;
+
+    if (m_numBufferedBytes) {
+        inputLength = length + m_numBufferedBytes;
+        prefixedBytes = static_cast<char*>(fastMalloc(inputLength));
+        memcpy(prefixedBytes, m_bufferedBytes, m_numBufferedBytes);
+        memcpy(prefixedBytes + m_numBufferedBytes, bytes, length);
+
+        input = prefixedBytes;
+
+        // all buffered bytes are consumed now
+        m_numBufferedBytes = 0;
+    }
+
+    do {
+        GOwnPtr<GError> error;
+        GConverterResult res = g_converter_convert(G_CONVERTER(m_iconvDecoder.get()),
+                                                   input, inputLength,
+                                                   buffer, sizeof(buffer),
+                                                   static_cast<GConverterFlags>(flags),
+                                                   &bytesRead, &bytesWritten,
+                                                   &error.outPtr());
+        input += bytesRead;
+        inputLength -= bytesRead;
+
+        if (res == G_CONVERTER_ERROR) {
+            if (g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT)) {
+                // There is not enough input to fully determine what the conversion should produce,
+                // save it to a buffer to prepend it to the next input.
+                memcpy(m_bufferedBytes, input, inputLength);
+                m_numBufferedBytes = inputLength;
+                inputLength = 0;
+            } else if (g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_NO_SPACE))
+                bufferWasFull = true;
+            else if (g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_INVALID_DATA)) {
+                if (stopOnError)
+                    sawError = true;
+                if (inputLength) {
+                    // Ignore invalid character.
+                    input += 1;
+                    inputLength -= 1;
+                }
+            } else {
+                sawError = true;
+                LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", error->code, error->message);
+                m_numBufferedBytes = 0; // Reset state for subsequent calls to decode.
+                fastFree(prefixedBytes);
+                return String();
+            }
+        }
+
+        result.append(reinterpret_cast<UChar*>(buffer), bytesWritten / sizeof(UChar));
+    } while ((inputLength || bufferWasFull) && !sawError);
+
+    fastFree(prefixedBytes);
+
+    return String::adopt(result);
+}
+
+CString TextCodecGtk::encode(const UChar* characters, size_t length, UnencodableHandling handling)
+{
+    if (!length)
+        return "";
+
+    if (!m_iconvEncoder)
+        createIConvEncoder();
+    if (!m_iconvEncoder) {
+        LOG_ERROR("Error creating IConv encoder even though encoding was in table.");
+        return CString();
+    }
+
+    gsize bytesRead = 0;
+    gsize bytesWritten = 0;
+    const gchar* input = reinterpret_cast<const char*>(characters);
+    gsize inputLength = length * sizeof(UChar);
+    gchar buffer[ConversionBufferSize];
+    Vector<char> result;
+    GOwnPtr<GError> error;
+
+    size_t size = 0;
+    do {
+        g_converter_convert(G_CONVERTER(m_iconvEncoder.get()),
+                            input, inputLength,
+                            buffer, sizeof(buffer),
+                            G_CONVERTER_INPUT_AT_END,
+                            &bytesRead, &bytesWritten,
+                            &error.outPtr());
+        input += bytesRead;
+        inputLength -= bytesRead;
+        if (bytesWritten > 0) {
+            result.grow(size + bytesWritten);
+            memcpy(result.data() + size, buffer, bytesWritten);
+            size += bytesWritten;
+        }
+
+        if (error && g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_INVALID_DATA)) {
+            UChar codePoint = reinterpret_cast<const UChar*>(input)[0];
+            UnencodableReplacementArray replacement;
+            int replacementLength = TextCodec::getUnencodableReplacement(codePoint, handling, replacement);
+
+            // Consume the invalid character.
+            input += sizeof(UChar);
+            inputLength -= sizeof(UChar);
+
+            // Append replacement string to result buffer.
+            result.grow(size + replacementLength);
+            memcpy(result.data() + size, replacement, replacementLength);
+            size += replacementLength;
+
+            error.clear();
+        }
+    } while (inputLength && !error.get());
+
+    if (error) {
+        LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", error->code, error->message);
+        return CString();
+    }
+
+    return CString(result.data(), size);
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/gtk/TextCodecGtk.h b/Source/WebCore/platform/text/gtk/TextCodecGtk.h
new file mode 100644
index 0000000..bb3a445
--- /dev/null
+++ b/Source/WebCore/platform/text/gtk/TextCodecGtk.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2004, 2006, 2007 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ * Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
+ * Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextCodecGTK_h
+#define TextCodecGTK_h
+
+#include "GRefPtr.h"
+#include <glib.h>
+#include "TextCodec.h"
+#include "TextEncoding.h"
+
+namespace WebCore {
+
+    class TextCodecGtk : public TextCodec {
+    public:
+        static void registerBaseEncodingNames(EncodingNameRegistrar);
+        static void registerBaseCodecs(TextCodecRegistrar);
+
+        static void registerExtendedEncodingNames(EncodingNameRegistrar);
+        static void registerExtendedCodecs(TextCodecRegistrar);
+
+        TextCodecGtk(const TextEncoding&);
+        virtual ~TextCodecGtk();
+
+        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+        virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+
+    private:
+        void createIConvDecoder() const;
+        void createIConvEncoder() const;
+
+        TextEncoding m_encoding;
+        size_t m_numBufferedBytes;
+        unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character        
+        mutable GRefPtr<GCharsetConverter> m_iconvDecoder;
+        mutable GRefPtr<GCharsetConverter> m_iconvEncoder;
+    };
+
+} // namespace WebCore
+
+#endif // TextCodecGTK_h
diff --git a/Source/WebCore/platform/text/haiku/TextBreakIteratorInternalICUHaiku.cpp b/Source/WebCore/platform/text/haiku/TextBreakIteratorInternalICUHaiku.cpp
new file mode 100644
index 0000000..8bb8c70
--- /dev/null
+++ b/Source/WebCore/platform/text/haiku/TextBreakIteratorInternalICUHaiku.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIteratorInternalICU.h"
+
+#include "NotImplemented.h"
+
+
+namespace WebCore {
+
+const char* currentSearchLocaleID()
+{
+    notImplemented();
+    return "";
+}
+
+const char* currentTextBreakLocaleID()
+{
+    notImplemented();
+    return "en_us";
+}
+
+} // namespace WebCore
+
diff --git a/Source/WebCore/platform/text/mac/CharsetData.h b/Source/WebCore/platform/text/mac/CharsetData.h
new file mode 100644
index 0000000..458cecb
--- /dev/null
+++ b/Source/WebCore/platform/text/mac/CharsetData.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2003, 2006 Apple Computer, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+namespace WebCore {
+
+    #define kTextEncodingISOLatinThai kCFStringEncodingISOLatinThai
+
+    struct CharsetEntry {
+        const char* name;
+        ::TextEncoding encoding;
+    };
+
+    extern const CharsetEntry CharsetTable[];
+
+}
diff --git a/Source/WebCore/platform/text/mac/HyphenationMac.mm b/Source/WebCore/platform/text/mac/HyphenationMac.mm
new file mode 100644
index 0000000..d5c9283
--- /dev/null
+++ b/Source/WebCore/platform/text/mac/HyphenationMac.mm
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2010 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#import "config.h"
+#import "Hyphenation.h"
+
+#if defined(BUILDING_ON_TIGER) || defined(BUILDING_ON_LEOPARD) || defined(BUILDING_ON_SNOW_LEOPARD)
+
+#import "AtomicStringKeyedMRUCache.h"
+#import "TextBreakIteratorInternalICU.h"
+#import "WebCoreSystemInterface.h"
+#import <wtf/RetainPtr.h>
+
+namespace WebCore {
+
+template<>
+bool AtomicStringKeyedMRUCache<bool>::createValueForNullKey()
+{
+    return !strcmp(currentSearchLocaleID(), "en");
+}
+
+template<>
+bool AtomicStringKeyedMRUCache<bool>::createValueForKey(const AtomicString& localeIdentifier)
+{
+    RetainPtr<CFStringRef> cfLocaleIdentifier(AdoptCF, localeIdentifier.createCFString());
+    RetainPtr<CFDictionaryRef> components(AdoptCF, CFLocaleCreateComponentsFromLocaleIdentifier(kCFAllocatorDefault, cfLocaleIdentifier.get()));
+    CFStringRef language = reinterpret_cast<CFStringRef>(CFDictionaryGetValue(components.get(), kCFLocaleLanguageCode));
+    static CFStringRef englishLanguage = CFSTR("en");
+    return language && CFEqual(language, englishLanguage);
+}
+
+bool canHyphenate(const AtomicString& localeIdentifier)
+{
+    DEFINE_STATIC_LOCAL(AtomicStringKeyedMRUCache<bool>, isEnglishCache, ());
+    return isEnglishCache.get(localeIdentifier);
+}
+
+size_t lastHyphenLocation(const UChar* characters, size_t length, size_t beforeIndex, const AtomicString& localeIdentifier)
+{
+    ASSERT_UNUSED(localeIdentifier, canHyphenate(localeIdentifier));
+
+    RetainPtr<CFStringRef> string(AdoptCF, CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, characters, length, kCFAllocatorNull));
+    return wkGetHyphenationLocationBeforeIndex(string.get(), beforeIndex);
+}
+
+} // namespace WebCore
+
+#endif // defined(BUILDING_ON_TIGER) || defined(BUILDING_ON_LEOPARD) || defined(BUILDING_ON_SNOW_LEOPARD)
diff --git a/Source/WebCore/platform/text/mac/ShapeArabic.c b/Source/WebCore/platform/text/mac/ShapeArabic.c
new file mode 100644
index 0000000..dd61ce5
--- /dev/null
+++ b/Source/WebCore/platform/text/mac/ShapeArabic.c
@@ -0,0 +1,556 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2000-2004, International Business Machines
+*   Corporation and others. All Rights Reserved.
+*   Copyright (C) 2007 Apple Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a copy of this
+*   software and associated documentation files (the "Software"), to deal in the Software
+*   without restriction, including without limitation the rights to use, copy, modify,
+*   merge, publish, distribute, and/or sell copies of the Software, and to permit persons
+*   to whom the Software is furnished to do so, provided that the above copyright notice(s)
+*   and this permission notice appear in all copies of the Software and that both the above
+*   copyright notice(s) and this permission notice appear in supporting documentation.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+*   INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+*   PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER
+*   OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
+*   CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+*   PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+*   OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*
+*   Except as contained in this notice, the name of a copyright holder shall not be used in
+*   advertising or otherwise to promote the sale, use or other dealings in this Software
+*   without prior written authorization of the copyright holder.
+*
+******************************************************************************
+*
+*   Arabic letter shaping implemented by Ayman Roshdy
+*/
+
+#include "config.h"
+
+#if USE(ATSUI)
+
+#include "ShapeArabic.h"
+
+#include <stdbool.h>
+#include <string.h>
+#include <unicode/utypes.h>
+#include <unicode/uchar.h>
+#include <unicode/ustring.h>
+#include <unicode/ushape.h>
+#include <wtf/Assertions.h>
+
+/*
+ * ### TODO in general for letter shaping:
+ * - the letter shaping code is UTF-16-unaware; needs update
+ *   + especially invertBuffer()?!
+ * - needs to handle the "Arabic Tail" that is used in some legacy codepages
+ *   as a glyph fragment of wide-glyph letters
+ *   + IBM Unicode conversion tables map it to U+200B (ZWSP)
+ *   + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms
+ */
+
+/* definitions for Arabic letter shaping ------------------------------------ */
+
+#define IRRELEVANT 4
+#define LAMTYPE    16
+#define ALEFTYPE   32
+#define LINKR      1
+#define LINKL      2
+
+static const UChar IrrelevantPos[] = {
+    0x0, 0x2, 0x4, 0x6,
+    0x8, 0xA, 0xC, 0xE,
+};
+
+static const UChar araLink[178]=
+{
+  1           + 32 + 256 * 0x11,/*0x0622*/
+  1           + 32 + 256 * 0x13,/*0x0623*/
+  1                + 256 * 0x15,/*0x0624*/
+  1           + 32 + 256 * 0x17,/*0x0625*/
+  1 + 2            + 256 * 0x19,/*0x0626*/
+  1           + 32 + 256 * 0x1D,/*0x0627*/
+  1 + 2            + 256 * 0x1F,/*0x0628*/
+  1                + 256 * 0x23,/*0x0629*/
+  1 + 2            + 256 * 0x25,/*0x062A*/
+  1 + 2            + 256 * 0x29,/*0x062B*/
+  1 + 2            + 256 * 0x2D,/*0x062C*/
+  1 + 2            + 256 * 0x31,/*0x062D*/
+  1 + 2            + 256 * 0x35,/*0x062E*/
+  1                + 256 * 0x39,/*0x062F*/
+  1                + 256 * 0x3B,/*0x0630*/
+  1                + 256 * 0x3D,/*0x0631*/
+  1                + 256 * 0x3F,/*0x0632*/
+  1 + 2            + 256 * 0x41,/*0x0633*/
+  1 + 2            + 256 * 0x45,/*0x0634*/
+  1 + 2            + 256 * 0x49,/*0x0635*/
+  1 + 2            + 256 * 0x4D,/*0x0636*/
+  1 + 2            + 256 * 0x51,/*0x0637*/
+  1 + 2            + 256 * 0x55,/*0x0638*/
+  1 + 2            + 256 * 0x59,/*0x0639*/
+  1 + 2            + 256 * 0x5D,/*0x063A*/
+  0, 0, 0, 0, 0,                /*0x063B-0x063F*/
+  1 + 2,                        /*0x0640*/
+  1 + 2            + 256 * 0x61,/*0x0641*/
+  1 + 2            + 256 * 0x65,/*0x0642*/
+  1 + 2            + 256 * 0x69,/*0x0643*/
+  1 + 2       + 16 + 256 * 0x6D,/*0x0644*/
+  1 + 2            + 256 * 0x71,/*0x0645*/
+  1 + 2            + 256 * 0x75,/*0x0646*/
+  1 + 2            + 256 * 0x79,/*0x0647*/
+  1                + 256 * 0x7D,/*0x0648*/
+  1                + 256 * 0x7F,/*0x0649*/
+  1 + 2            + 256 * 0x81,/*0x064A*/
+  4, 4, 4, 4,                   /*0x064B-0x064E*/
+  4, 4, 4, 4,                   /*0x064F-0x0652*/
+  4, 4, 4, 0, 0,                /*0x0653-0x0657*/
+  0, 0, 0, 0,                   /*0x0658-0x065B*/
+  1                + 256 * 0x85,/*0x065C*/
+  1                + 256 * 0x87,/*0x065D*/
+  1                + 256 * 0x89,/*0x065E*/
+  1                + 256 * 0x8B,/*0x065F*/
+  0, 0, 0, 0, 0,                /*0x0660-0x0664*/
+  0, 0, 0, 0, 0,                /*0x0665-0x0669*/
+  0, 0, 0, 0, 0, 0,             /*0x066A-0x066F*/
+  4,                            /*0x0670*/
+  0,                            /*0x0671*/
+  1           + 32,             /*0x0672*/
+  1           + 32,             /*0x0673*/
+  0,                            /*0x0674*/
+  1           + 32,             /*0x0675*/
+  1, 1,                         /*0x0676-0x0677*/
+  1+2,                          /*0x0678*/
+  1+2              + 256 * 0x16,/*0x0679*/
+  1+2              + 256 * 0x0E,/*0x067A*/
+  1+2              + 256 * 0x02,/*0x067B*/
+  1+2, 1+2,                     /*0x067C-0x067D*/
+  1+2              + 256 * 0x06,/*0x067E*/
+  1+2              + 256 * 0x12,/*0x067F*/
+  1+2              + 256 * 0x0A,/*0x0680*/
+  1+2, 1+2,                     /*0x0681-0x0682*/
+  1+2              + 256 * 0x26,/*0x0683*/
+  1+2              + 256 * 0x22,/*0x0684*/
+  1+2,                          /*0x0685*/
+  1+2              + 256 * 0x2A,/*0x0686*/
+  1+2              + 256 * 0x2E,/*0x0687*/
+  1                + 256 * 0x38,/*0x0688*/
+  1, 1, 1,                      /*0x0689-0x068B*/
+  1                + 256 * 0x34,/*0x068C*/
+  1                + 256 * 0x32,/*0x068D*/
+  1                + 256 * 0x36,/*0x068E*/
+  1, 1,                         /*0x068F-0x0690*/
+  1                + 256 * 0x3C,/*0x0691*/
+  1, 1, 1, 1, 1, 1,             /*0x0692-0x0697*/
+  1                + 256 * 0x3A,/*0x0698*/
+  1,                            /*0x0699*/
+  1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x069F*/
+  1+2, 1+2, 1+2, 1+2,           /*0x06A0-0x06A3*/
+  1+2              + 256 * 0x2E,/*0x06A4*/
+  1+2,                          /*0x06A5*/
+  1+2              + 256 * 0x1E,/*0x06A6*/
+  1+2, 1+2,                     /*0x06A7-0x06A8*/
+  1+2              + 256 * 0x3E,/*0x06A9*/
+  1+2, 1+2, 1+2,                /*0x06AA-0x06AC*/
+  1+2              + 256 * 0x83,/*0x06AD*/
+  1+2,                          /*0x06AE*/
+  1+2              + 256 * 0x42,/*0x06AF*/
+  1+2,                          /*0x06B0*/
+  1+2              + 256 * 0x4A,/*0x06B1*/
+  1+2,                          /*0x06B2*/
+  1+2              + 256 * 0x46,/*0x06B3*/
+  1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x06B4-0x06B9*/
+  1+2,                          /*0x06BA*/          // FIXME: Seems to have a final form
+  1+2              + 256 * 0x50,/*0x06BB*/
+  1+2, 1+2,                     /*0x06BC-0x06BD*/
+  1+2              + 256 * 0x5A,/*0x06BE*/
+  1+2,                          /*0x06BF*/
+  1,                            /*0x06C0*/
+  1+2              + 256 * 0x56,/*0x06C1*/
+  1+2,                          /*0x06C2*/
+  1, 1,                         /*0x06C3-0x06C4*/
+  1                + 256 * 0x90,/*0x06C5*/
+  1                + 256 * 0x89,/*0x06C6*/
+  1                + 256 * 0x87,/*0x06C7*/
+  1                + 256 * 0x8B,/*0x06C8*/
+  1                + 256 * 0x92,/*0x06C9*/
+  1,                            /*0x06CA*/
+  1                + 256 * 0x8E,/*0x06CB*/
+  1+2              + 256 * 0xAC,/*0x06CC*/
+  1,                            /*0x06CD*/
+  1+2,                          /*0x06CE*/
+  1,                            /*0x06CF*/
+  1+2              + 256 * 0x94,/*0x06D0*/
+  1+2,                          /*0x06D1*/
+  1                + 256 * 0x5E,/*0x06D2*/
+  1                + 256 * 0x60 /*0x06D3*/
+};
+
+static const UChar presLink[141]=
+{
+  1 + 2,                        /*0xFE70*/
+  1 + 2,                        /*0xFE71*/
+  1 + 2, 0, 1+ 2, 0, 1+ 2,      /*0xFE72-0xFE76*/
+  1 + 2,                        /*0xFE77*/
+  1+ 2, 1 + 2, 1+2, 1 + 2,      /*0xFE78-0xFE81*/
+  1+ 2, 1 + 2, 1+2, 1 + 2,      /*0xFE82-0xFE85*/
+  0, 0 + 32, 1 + 32, 0 + 32,    /*0xFE86-0xFE89*/
+  1 + 32, 0, 1,  0 + 32,        /*0xFE8A-0xFE8D*/
+  1 + 32, 0, 2,  1 + 2,         /*0xFE8E-0xFE91*/
+  1, 0 + 32, 1 + 32, 0,         /*0xFE92-0xFE95*/
+  2, 1 + 2, 1, 0,               /*0xFE96-0xFE99*/
+  1, 0, 2, 1 + 2,               /*0xFE9A-0xFE9D*/
+  1, 0, 2, 1 + 2,               /*0xFE9E-0xFEA1*/
+  1, 0, 2, 1 + 2,               /*0xFEA2-0xFEA5*/
+  1, 0, 2, 1 + 2,               /*0xFEA6-0xFEA9*/
+  1, 0, 2, 1 + 2,               /*0xFEAA-0xFEAD*/
+  1, 0, 1, 0,                   /*0xFEAE-0xFEB1*/
+  1, 0, 1, 0,                   /*0xFEB2-0xFEB5*/
+  1, 0, 2, 1+2,                 /*0xFEB6-0xFEB9*/
+  1, 0, 2, 1+2,                 /*0xFEBA-0xFEBD*/
+  1, 0, 2, 1+2,                 /*0xFEBE-0xFEC1*/
+  1, 0, 2, 1+2,                 /*0xFEC2-0xFEC5*/
+  1, 0, 2, 1+2,                 /*0xFEC6-0xFEC9*/
+  1, 0, 2, 1+2,                 /*0xFECA-0xFECD*/
+  1, 0, 2, 1+2,                 /*0xFECE-0xFED1*/
+  1, 0, 2, 1+2,                 /*0xFED2-0xFED5*/
+  1, 0, 2, 1+2,                 /*0xFED6-0xFED9*/
+  1, 0, 2, 1+2,                 /*0xFEDA-0xFEDD*/
+  1, 0, 2, 1+2,                 /*0xFEDE-0xFEE1*/
+  1, 0 + 16, 2 + 16, 1 + 2 +16, /*0xFEE2-0xFEE5*/
+  1 + 16, 0, 2, 1+2,            /*0xFEE6-0xFEE9*/
+  1, 0, 2, 1+2,                 /*0xFEEA-0xFEED*/
+  1, 0, 2, 1+2,                 /*0xFEEE-0xFEF1*/
+  1, 0, 1, 0,                   /*0xFEF2-0xFEF5*/
+  1, 0, 2, 1+2,                 /*0xFEF6-0xFEF9*/
+  1, 0, 1, 0,                   /*0xFEFA-0xFEFD*/
+  1, 0, 1, 0,
+  1
+};
+
+static const UChar convertFEto06[] =
+{
+/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
+/*FE7*/   0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652,
+/*FE8*/   0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628,
+/*FE9*/   0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C,
+/*FEA*/   0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632,
+/*FEB*/   0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636,
+/*FEC*/   0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A,
+/*FED*/   0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644,
+/*FEE*/   0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649,
+/*FEF*/   0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F
+};
+
+static const UChar shapeTable[4][4][4]=
+{
+  { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} },
+  { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} },
+  { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} },
+  { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }
+};
+
+/*
+ *Name     : changeLamAlef
+ *Function : Converts the Alef characters into an equivalent
+ *           LamAlef location in the 0x06xx Range, this is an
+ *           intermediate stage in the operation of the program
+ *           later it'll be converted into the 0xFExx LamAlefs
+ *           in the shaping function.
+ */
+static UChar
+changeLamAlef(UChar ch) {
+
+    switch(ch) {
+    case 0x0622 :
+        return(0x065C);
+        break;
+    case 0x0623 :
+        return(0x065D);
+        break;
+    case 0x0625 :
+        return(0x065E);
+        break;
+    case 0x0627 :
+        return(0x065F);
+        break;
+    default :
+        return(0);
+        break;
+    }
+}
+
+/*
+ *Name     : specialChar
+ *Function : Special Arabic characters need special handling in the shapeUnicode
+ *           function, this function returns 1 or 2 for these special characters
+ */
+static int32_t
+specialChar(UChar ch) {
+
+    if( (ch>0x0621 && ch<0x0626)||(ch==0x0627)||(ch>0x062e && ch<0x0633)||
+        (ch>0x0647 && ch<0x064a)||(ch==0x0629) ) {
+        return (1);
+    }
+    else
+    if( ch>=0x064B && ch<= 0x0652 )
+        return (2);
+    else
+    if( (ch>=0x0653 && ch<= 0x0655) || ch == 0x0670 ||
+        (ch>=0xFE70 && ch<= 0xFE7F) )
+        return (3);
+    else
+        return (0);
+}
+
+/*
+ *Name     : getLink
+ *Function : Resolves the link between the characters as
+ *           Arabic characters have four forms :
+ *           Isolated, Initial, Middle and Final Form
+ */
+static UChar
+getLink(UChar ch) {
+
+    if(ch >= 0x0622 && ch <= 0x06D3) {
+        return(araLink[ch-0x0622]);
+    } else if(ch == 0x200D) {
+        return(3);
+    } else if(ch >= 0x206D && ch <= 0x206F) {
+        return(4);
+    } else if(ch >= 0xFE70 && ch <= 0xFEFC) {
+        return(presLink[ch-0xFE70]);
+    } else {
+        return(0);
+    }
+}
+
+/*
+ *Name     : isTashkeelChar
+ *Function : Returns 1 for Tashkeel characters else return 0
+ */
+static int32_t
+isTashkeelChar(UChar ch) {
+
+    if( ch>=0x064B && ch<= 0x0652 )
+        return (1);
+    else
+        return (0);
+}
+
+/*
+ *Name     : shapeUnicode
+ *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped
+ *           arabic Unicode buffer in FExx Range
+ */
+static int32_t
+shapeUnicode(UChar *dest, int32_t sourceLength,
+             int32_t destSize,
+             int tashkeelFlag) {
+
+    int32_t          i, iend;
+    int32_t          prevPos, lastPos,Nx, Nw;
+    unsigned int     Shape;
+    int32_t          flag;
+    int32_t          lamalef_found = 0;
+    UChar            prevLink = 0, lastLink = 0, currLink, nextLink = 0;
+    UChar            wLamalef;
+
+    /*
+     * Converts the input buffer from FExx Range into 06xx Range
+     * to make sure that all characters are in the 06xx range
+     * even the lamalef is converted to the special region in
+     * the 06xx range
+     */
+    for (i = 0; i < sourceLength; i++) {
+        UChar inputChar = dest[i];
+        if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) {
+            dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ;
+        }
+    }
+
+    /* sets the index to the end of the buffer, together with the step point to -1 */
+    i = 0;
+    iend = sourceLength;
+
+    /*
+     * This function resolves the link between the characters .
+     * Arabic characters have four forms :
+     * Isolated Form, Initial Form, Middle Form and Final Form
+     */
+    currLink = getLink(dest[i]);
+
+    prevPos = i;
+    lastPos = i;
+    Nx = sourceLength + 2, Nw = 0;
+
+    while (i != iend) {
+        /* If high byte of currLink > 0 then more than one shape */
+        if ((currLink & 0xFF00) > 0 || isTashkeelChar(dest[i])) {
+            Nw = i + 1;
+            while (Nx >= sourceLength) {         /* we need to know about next char */
+                if(Nw == iend) {
+                    nextLink = 0;
+                    Nx = -1;
+                } else {
+                    nextLink = getLink(dest[Nw]);
+                    if((nextLink & IRRELEVANT) == 0) {
+                        Nx = Nw;
+                    } else {
+                        Nw = Nw + 1;
+                    }
+                }
+            }
+
+            if ( ((currLink & ALEFTYPE) > 0)  &&  ((lastLink & LAMTYPE) > 0) ) {
+                lamalef_found = 1;
+                wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */
+                if ( wLamalef != 0) {
+                    dest[i] = ' ';               /* The default case is to drop the Alef and replace */
+                    dest[lastPos] =wLamalef;     /* it by a space.                                   */
+                    i=lastPos;
+                }
+                lastLink = prevLink;
+                currLink = getLink(wLamalef);
+            }
+            /*
+             * get the proper shape according to link ability of neighbors
+             * and of character; depends on the order of the shapes
+             * (isolated, initial, middle, final) in the compatibility area
+             */
+             flag  = specialChar(dest[i]);
+
+             Shape = shapeTable[nextLink & (LINKR + LINKL)]
+                               [lastLink & (LINKR + LINKL)]
+                               [currLink & (LINKR + LINKL)];
+
+             if (flag == 1) {
+                 Shape = (Shape == 1 || Shape == 3) ? 1 : 0;
+             }
+             else
+             if(flag == 2) {
+                 if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) &&
+                      dest[i] != 0x064C && dest[i] != 0x064D ) {
+                     Shape = 1;
+                     if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE )
+                         Shape = 0;
+                 }
+                 else {
+                     Shape = 0;
+                 }
+             }
+
+             if(flag == 2) {
+                 dest[i] =  0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape;
+             }
+             else
+                 dest[i] = (UChar)((dest[i] < 0x0670 ? 0xFE70 : 0xFB50) + (currLink >> 8) + Shape);
+        }
+
+        /* move one notch forward */
+        if ((currLink & IRRELEVANT) == 0) {
+              prevLink = lastLink;
+              lastLink = currLink;
+              prevPos = lastPos;
+              lastPos = i;
+        }
+
+        i++;
+        if (i == Nx) {
+            currLink = nextLink;
+            Nx = sourceLength + 2;
+        }
+        else if(i != iend) {
+            currLink = getLink(dest[i]);
+        }
+    }
+
+    destSize = sourceLength;
+
+    return destSize;
+}
+
+int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int32_t destCapacity, uint32_t options, UErrorCode *pErrorCode) {
+    int32_t destLength;
+
+    /* usual error checking */
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */
+    if( source==NULL || sourceLength<-1 ||
+        (dest==NULL && destCapacity!=0) || destCapacity<0 ||
+        options>=U_SHAPE_DIGIT_TYPE_RESERVED ||
+        (options&U_SHAPE_DIGITS_MASK)>=U_SHAPE_DIGITS_RESERVED
+    ) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    /* determine the source length */
+    if(sourceLength==-1) {
+        sourceLength=u_strlen(source);
+    }
+    if(sourceLength==0) {
+        return 0;
+    }
+
+    /* check that source and destination do not overlap */
+    if( dest!=NULL &&
+        ((source<=dest && dest<source+sourceLength) ||
+         (dest<=source && source<dest+destCapacity))
+    ) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) {
+        int32_t outputSize = sourceLength;
+
+        /* calculate destination size */
+        /* TODO: do we ever need to do this pure preflighting? */
+        ASSERT((options&U_SHAPE_LENGTH_MASK) != U_SHAPE_LENGTH_GROW_SHRINK);
+
+        if(outputSize>destCapacity) {
+            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+            return outputSize;
+        }
+
+        /* Start of Arabic letter shaping part */
+        memcpy(dest, source, sourceLength*U_SIZEOF_UCHAR);
+
+        ASSERT((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL);
+
+        switch(options&U_SHAPE_LETTERS_MASK) {
+        case U_SHAPE_LETTERS_SHAPE :
+            /* Call the shaping function with tashkeel flag == 1 */
+            destLength = shapeUnicode(dest,sourceLength,destCapacity,1);
+            break;
+        case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED :
+            /* Call the shaping function with tashkeel flag == 0 */
+            destLength = shapeUnicode(dest,sourceLength,destCapacity,0);
+            break;
+        case U_SHAPE_LETTERS_UNSHAPE :
+            ASSERT_NOT_REACHED();
+            break;
+        default :
+            /* will never occur because of validity checks above */
+            destLength = 0;
+            break;
+        }
+
+        /* End of Arabic letter shaping part */
+    } else
+        ASSERT_NOT_REACHED();
+
+    ASSERT((options & U_SHAPE_DIGITS_MASK) == U_SHAPE_DIGITS_NOOP); 
+
+    return sourceLength;
+}
+
+#endif // USE(ATSUI)
diff --git a/Source/WebCore/platform/text/mac/ShapeArabic.h b/Source/WebCore/platform/text/mac/ShapeArabic.h
new file mode 100644
index 0000000..8aa577d
--- /dev/null
+++ b/Source/WebCore/platform/text/mac/ShapeArabic.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2007 Apple Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef ShapeArabic_h
+#define ShapeArabic_h
+
+#if USE(ATSUI)
+
+#include <unicode/ushape.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int32_t destCapacity, uint32_t options, UErrorCode *pErrorCode);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // USE(ATSUI)
+#endif // ShapeArabic_h
diff --git a/Source/WebCore/platform/text/mac/StringImplMac.mm b/Source/WebCore/platform/text/mac/StringImplMac.mm
new file mode 100644
index 0000000..6f5e953
--- /dev/null
+++ b/Source/WebCore/platform/text/mac/StringImplMac.mm
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2006, 2009 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include <wtf/text/StringImpl.h>
+
+#include "FoundationExtras.h"
+
+namespace WTF {
+
+StringImpl::operator NSString *()
+{
+    return HardAutorelease(createCFString());
+}
+
+}
diff --git a/Source/WebCore/platform/text/mac/StringMac.mm b/Source/WebCore/platform/text/mac/StringMac.mm
new file mode 100644
index 0000000..7e98b2b
--- /dev/null
+++ b/Source/WebCore/platform/text/mac/StringMac.mm
@@ -0,0 +1,42 @@
+/**
+ * Copyright (C) 2006 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "PlatformString.h"
+#include <CoreFoundation/CFString.h>
+
+namespace WTF {
+
+String::String(NSString* str)
+{
+    if (!str)
+        return;
+
+    CFIndex size = CFStringGetLength(reinterpret_cast<CFStringRef>(str));
+    if (size == 0)
+        m_impl = StringImpl::empty();
+    else {
+        Vector<UChar, 1024> buffer(size);
+        CFStringGetCharacters(reinterpret_cast<CFStringRef>(str), CFRangeMake(0, size), buffer.data());
+        m_impl = StringImpl::create(buffer.data(), size);
+    }
+}
+
+}
diff --git a/Source/WebCore/platform/text/mac/TextBoundaries.mm b/Source/WebCore/platform/text/mac/TextBoundaries.mm
new file mode 100644
index 0000000..bd7ddf8
--- /dev/null
+++ b/Source/WebCore/platform/text/mac/TextBoundaries.mm
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#import "config.h"
+#import "TextBoundaries.h"
+
+using namespace WTF::Unicode;
+
+namespace WebCore {
+
+void findWordBoundary(const UChar* chars, int len, int position, int* start, int* end)
+{
+    NSString* string = [[NSString alloc] initWithCharactersNoCopy:const_cast<unichar*>(chars)
+        length:len freeWhenDone:NO];
+    NSAttributedString* attr = [[NSAttributedString alloc] initWithString:string];
+    NSRange range = [attr doubleClickAtIndex:(position >= len) ? len - 1 : position];
+    [attr release];
+    [string release];
+    *start = range.location;
+    *end = range.location + range.length;
+}
+
+int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward)
+{   
+    NSString* string = [[NSString alloc] initWithCharactersNoCopy:const_cast<unichar*>(chars)
+        length:len freeWhenDone:NO];
+    NSAttributedString* attr = [[NSAttributedString alloc] initWithString:string];
+    int result = [attr nextWordFromIndex:position forward:forward];
+    [attr release];
+    [string release];
+    return result;
+}
+
+}
diff --git a/Source/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm b/Source/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm
new file mode 100644
index 0000000..6af5616
--- /dev/null
+++ b/Source/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2007, 2009 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIteratorInternalICU.h"
+
+#include <wtf/RetainPtr.h>
+
+namespace WebCore {
+
+static const int maxLocaleStringLength = 32;
+
+static inline RetainPtr<CFStringRef> textBreakLocalePreference()
+{
+    RetainPtr<CFPropertyListRef> locale(AdoptCF, CFPreferencesCopyValue(CFSTR("AppleTextBreakLocale"),
+        kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost));
+    if (!locale || CFGetTypeID(locale.get()) != CFStringGetTypeID())
+        return 0;
+    return static_cast<CFStringRef>(locale.get());
+}
+
+static RetainPtr<CFStringRef> topLanguagePreference()
+{
+    NSArray *languagesArray = [[NSUserDefaults standardUserDefaults] arrayForKey:@"AppleLanguages"];
+    if (!languagesArray)
+        return 0;
+    if ([languagesArray count] < 1)
+        return 0;
+    NSString *value = [languagesArray objectAtIndex:0];
+    if (![value isKindOfClass:[NSString class]])
+        return 0;
+    return reinterpret_cast<CFStringRef>(value);
+}
+
+static RetainPtr<CFStringRef> canonicalLanguageIdentifier(CFStringRef locale)
+{
+    if (!locale)
+        return 0;
+    RetainPtr<CFStringRef> canonicalLocale(AdoptCF,
+        CFLocaleCreateCanonicalLanguageIdentifierFromString(kCFAllocatorDefault, locale));
+    if (!canonicalLocale)
+        return locale;
+    return canonicalLocale;
+}
+
+static void getLocale(CFStringRef locale, char localeStringBuffer[maxLocaleStringLength])
+{
+    // Empty string means "root locale", and that is what we use if we can't get a preference.
+    localeStringBuffer[0] = 0;
+    if (!locale)
+        return;
+    CFStringGetCString(locale, localeStringBuffer, maxLocaleStringLength, kCFStringEncodingASCII);
+}
+
+static void getSearchLocale(char localeStringBuffer[maxLocaleStringLength])
+{
+    getLocale(canonicalLanguageIdentifier(topLanguagePreference().get()).get(), localeStringBuffer);
+}
+
+const char* currentSearchLocaleID()
+{
+    static char localeStringBuffer[maxLocaleStringLength];
+    static bool gotSearchLocale = false;
+    if (!gotSearchLocale) {
+        getSearchLocale(localeStringBuffer);
+        gotSearchLocale = true;
+    }
+    return localeStringBuffer;
+}
+
+static void getTextBreakLocale(char localeStringBuffer[maxLocaleStringLength])
+{
+    // If there is no text break locale, use the top language preference.
+    RetainPtr<CFStringRef> locale = textBreakLocalePreference();
+    if (!locale)
+        locale = topLanguagePreference();
+    getLocale(canonicalLanguageIdentifier(locale.get()).get(), localeStringBuffer);
+}
+
+const char* currentTextBreakLocaleID()
+{
+    static char localeStringBuffer[maxLocaleStringLength];
+    static bool gotTextBreakLocale = false;
+    if (!gotTextBreakLocale) {
+        getTextBreakLocale(localeStringBuffer);
+        gotTextBreakLocale = true;
+    }
+    return localeStringBuffer;
+}
+
+}
diff --git a/Source/WebCore/platform/text/mac/TextCodecMac.cpp b/Source/WebCore/platform/text/mac/TextCodecMac.cpp
new file mode 100644
index 0000000..b743f3d
--- /dev/null
+++ b/Source/WebCore/platform/text/mac/TextCodecMac.cpp
@@ -0,0 +1,329 @@
+/*
+ * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "TextCodecMac.h"
+
+#include "CharacterNames.h"
+#include "CharsetData.h"
+#include "PlatformString.h"
+#include "ThreadGlobalData.h"
+#include <wtf/Assertions.h>
+#include <wtf/text/CString.h>
+#include <wtf/PassOwnPtr.h>
+#include <wtf/RetainPtr.h>
+#include <wtf/Threading.h>
+
+using namespace std;
+
+namespace WebCore {
+
+// We need to keep this because ICU doesn't support some of the encodings that we need:
+// <http://bugs.webkit.org/show_bug.cgi?id=4195>.
+
+const size_t ConversionBufferSize = 16384;
+
+static TECConverterWrapper& cachedConverterTEC()
+{
+    return threadGlobalData().cachedConverterTEC();
+}
+
+void TextCodecMac::registerEncodingNames(EncodingNameRegistrar registrar)
+{
+    TECTextEncodingID lastEncoding = invalidEncoding;
+    const char* lastName = 0;
+
+    for (size_t i = 0; CharsetTable[i].name; ++i) {
+        if (CharsetTable[i].encoding != lastEncoding) {
+            lastEncoding = CharsetTable[i].encoding;
+            lastName = CharsetTable[i].name;
+        }
+        registrar(CharsetTable[i].name, lastName);
+    }
+}
+
+static PassOwnPtr<TextCodec> newTextCodecMac(const TextEncoding&, const void* additionalData)
+{
+    return new TextCodecMac(*static_cast<const TECTextEncodingID*>(additionalData));
+}
+
+void TextCodecMac::registerCodecs(TextCodecRegistrar registrar)
+{
+    TECTextEncodingID lastEncoding = invalidEncoding;
+
+    for (size_t i = 0; CharsetTable[i].name; ++i)
+        if (CharsetTable[i].encoding != lastEncoding) {
+            registrar(CharsetTable[i].name, newTextCodecMac, &CharsetTable[i].encoding);
+            lastEncoding = CharsetTable[i].encoding;
+        }
+}
+
+TextCodecMac::TextCodecMac(TECTextEncodingID encoding)
+    : m_encoding(encoding)
+    , m_numBufferedBytes(0)
+    , m_converterTEC(0)
+{
+}
+
+TextCodecMac::~TextCodecMac()
+{
+    releaseTECConverter();
+}
+
+void TextCodecMac::releaseTECConverter() const
+{
+    if (m_converterTEC) {
+        TECConverterWrapper& cachedConverter = cachedConverterTEC();
+        if (cachedConverter.converter)
+            TECDisposeConverter(cachedConverter.converter);
+        cachedConverter.converter = m_converterTEC;
+        cachedConverter.encoding = m_encoding;
+        m_converterTEC = 0;
+    }
+}
+
+OSStatus TextCodecMac::createTECConverter() const
+{
+    TECConverterWrapper& cachedConverter = cachedConverterTEC();
+
+    bool cachedEncodingEqual = cachedConverter.encoding == m_encoding;
+    cachedConverter.encoding = invalidEncoding;
+
+    if (cachedEncodingEqual && cachedConverter.converter) {
+        m_converterTEC = cachedConverter.converter;
+        cachedConverter.converter = 0;
+
+        TECClearConverterContextInfo(m_converterTEC);
+    } else {
+        OSStatus status = TECCreateConverter(&m_converterTEC, m_encoding,
+            CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicode16BitFormat));
+        if (status)
+            return status;
+
+        TECSetBasicOptions(m_converterTEC, kUnicodeForceASCIIRangeMask);
+    }
+
+    return noErr;
+}
+
+OSStatus TextCodecMac::decode(const unsigned char* inputBuffer, int inputBufferLength, int& inputLength,
+    void *outputBuffer, int outputBufferLength, int& outputLength)
+{
+    OSStatus status;
+    unsigned long bytesRead = 0;
+    unsigned long bytesWritten = 0;
+
+    if (m_numBufferedBytes != 0) {
+        // Finish converting a partial character that's in our buffer.
+        
+        // First, fill the partial character buffer with as many bytes as are available.
+        ASSERT(m_numBufferedBytes < sizeof(m_bufferedBytes));
+        const int spaceInBuffer = sizeof(m_bufferedBytes) - m_numBufferedBytes;
+        const int bytesToPutInBuffer = min(spaceInBuffer, inputBufferLength);
+        ASSERT(bytesToPutInBuffer != 0);
+        memcpy(m_bufferedBytes + m_numBufferedBytes, inputBuffer, bytesToPutInBuffer);
+
+        // Now, do a conversion on the buffer.
+        status = TECConvertText(m_converterTEC, m_bufferedBytes, m_numBufferedBytes + bytesToPutInBuffer, &bytesRead,
+            reinterpret_cast<unsigned char*>(outputBuffer), outputBufferLength, &bytesWritten);
+        ASSERT(bytesRead <= m_numBufferedBytes + bytesToPutInBuffer);
+
+        if (status == kTECPartialCharErr && bytesRead == 0) {
+            // Handle the case where the partial character was not converted.
+            if (bytesToPutInBuffer >= spaceInBuffer) {
+                LOG_ERROR("TECConvertText gave a kTECPartialCharErr but read none of the %zu bytes in the buffer", sizeof(m_bufferedBytes));
+                m_numBufferedBytes = 0;
+                status = kTECUnmappableElementErr; // should never happen, but use this error code
+            } else {
+                // Tell the caller we read all the source bytes and keep them in the buffer.
+                m_numBufferedBytes += bytesToPutInBuffer;
+                bytesRead = bytesToPutInBuffer;
+                status = noErr;
+            }
+        } else {
+            // We are done with the partial character buffer.
+            // Also, we have read some of the bytes from the main buffer.
+            if (bytesRead > m_numBufferedBytes) {
+                bytesRead -= m_numBufferedBytes;
+            } else {
+                LOG_ERROR("TECConvertText accepted some bytes it previously rejected with kTECPartialCharErr");
+                bytesRead = 0;
+            }
+            m_numBufferedBytes = 0;
+            if (status == kTECPartialCharErr) {
+                // While there may be a partial character problem in the small buffer,
+                // we have to try again and not get confused and think there is a partial
+                // character problem in the large buffer.
+                status = noErr;
+            }
+        }
+    } else {
+        status = TECConvertText(m_converterTEC, inputBuffer, inputBufferLength, &bytesRead,
+            static_cast<unsigned char*>(outputBuffer), outputBufferLength, &bytesWritten);
+        ASSERT(static_cast<int>(bytesRead) <= inputBufferLength);
+    }
+
+    // Work around bug 3351093, where sometimes we get kTECBufferBelowMinimumSizeErr instead of kTECOutputBufferFullStatus.
+    if (status == kTECBufferBelowMinimumSizeErr && bytesWritten != 0)
+        status = kTECOutputBufferFullStatus;
+
+    inputLength = bytesRead;
+    outputLength = bytesWritten;
+    return status;
+}
+
+String TextCodecMac::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
+{
+    // Get a converter for the passed-in encoding.
+    if (!m_converterTEC && createTECConverter() != noErr)
+        return String();
+    
+    Vector<UChar> result;
+
+    const unsigned char* sourcePointer = reinterpret_cast<const unsigned char*>(bytes);
+    int sourceLength = length;
+    bool bufferWasFull = false;
+    UniChar buffer[ConversionBufferSize];
+
+    while ((sourceLength || bufferWasFull) && !sawError) {
+        int bytesRead = 0;
+        int bytesWritten = 0;
+        OSStatus status = decode(sourcePointer, sourceLength, bytesRead, buffer, sizeof(buffer), bytesWritten);
+        ASSERT(bytesRead <= sourceLength);
+        sourcePointer += bytesRead;
+        sourceLength -= bytesRead;
+        
+        switch (status) {
+            case noErr:
+            case kTECOutputBufferFullStatus:
+                break;
+            case kTextMalformedInputErr:
+            case kTextUndefinedElementErr:
+                // FIXME: Put FFFD character into the output string in this case?
+                TECClearConverterContextInfo(m_converterTEC);
+                if (stopOnError) {
+                    sawError = true;
+                    break;
+                }
+                if (sourceLength) {
+                    sourcePointer += 1;
+                    sourceLength -= 1;
+                }
+                break;
+            case kTECPartialCharErr: {
+                // Put the partial character into the buffer.
+                ASSERT(m_numBufferedBytes == 0);
+                const int bufferSize = sizeof(m_numBufferedBytes);
+                if (sourceLength < bufferSize) {
+                    memcpy(m_bufferedBytes, sourcePointer, sourceLength);
+                    m_numBufferedBytes = sourceLength;
+                } else {
+                    LOG_ERROR("TECConvertText gave a kTECPartialCharErr, but left %u bytes in the buffer", sourceLength);
+                }
+                sourceLength = 0;
+                break;
+            }
+            default:
+                sawError = true;
+                return String();
+        }
+
+        ASSERT(!(bytesWritten % sizeof(UChar)));
+        result.append(buffer, bytesWritten / sizeof(UChar));
+
+        bufferWasFull = status == kTECOutputBufferFullStatus;
+    }
+    
+    if (flush) {
+        unsigned long bytesWritten = 0;
+        TECFlushText(m_converterTEC, reinterpret_cast<unsigned char*>(buffer), sizeof(buffer), &bytesWritten);
+        ASSERT(!(bytesWritten % sizeof(UChar)));
+        result.append(buffer, bytesWritten / sizeof(UChar));
+    }
+
+    String resultString = String::adopt(result);
+
+    // <rdar://problem/3225472>
+    // Simplified Chinese pages use the code A3A0 to mean "full-width space".
+    // But GB18030 decodes it to U+E5E5, which is correct in theory but not in practice.
+    // To work around, just change all occurences of U+E5E5 to U+3000 (ideographic space).
+    if (m_encoding == kCFStringEncodingGB_18030_2000)
+        resultString.replace(0xE5E5, ideographicSpace);
+    
+    return resultString;
+}
+
+CString TextCodecMac::encode(const UChar* characters, size_t length, UnencodableHandling handling)
+{
+    // FIXME: We should really use TEC here instead of CFString for consistency with the other direction.
+
+    // FIXME: Since there's no "force ASCII range" mode in CFString, we change the backslash into a yen sign.
+    // Encoding will change the yen sign back into a backslash.
+    String copy(characters, length);
+    copy.replace('\\', m_backslashAsCurrencySymbol);
+    RetainPtr<CFStringRef> cfs(AdoptCF, copy.createCFString());
+
+    CFIndex startPos = 0;
+    CFIndex charactersLeft = CFStringGetLength(cfs.get());
+    Vector<char> result;
+    size_t size = 0;
+    UInt8 lossByte = handling == QuestionMarksForUnencodables ? '?' : 0;
+    while (charactersLeft > 0) {
+        CFRange range = CFRangeMake(startPos, charactersLeft);
+        CFIndex bufferLength;
+        CFStringGetBytes(cfs.get(), range, m_encoding, lossByte, false, NULL, 0x7FFFFFFF, &bufferLength);
+
+        result.grow(size + bufferLength);
+        unsigned char* buffer = reinterpret_cast<unsigned char*>(result.data() + size);
+        CFIndex charactersConverted = CFStringGetBytes(cfs.get(), range, m_encoding, lossByte, false, buffer, bufferLength, &bufferLength);
+        size += bufferLength;
+
+        if (charactersConverted != charactersLeft) {
+            unsigned badChar = CFStringGetCharacterAtIndex(cfs.get(), startPos + charactersConverted);
+            ++charactersConverted;
+            if ((badChar & 0xFC00) == 0xD800 && charactersConverted != charactersLeft) { // is high surrogate
+                UniChar low = CFStringGetCharacterAtIndex(cfs.get(), startPos + charactersConverted);
+                if ((low & 0xFC00) == 0xDC00) { // is low surrogate
+                    badChar <<= 10;
+                    badChar += low;
+                    badChar += 0x10000 - (0xD800 << 10) - 0xDC00;
+                    ++charactersConverted;
+                }
+            }
+            UnencodableReplacementArray entity;
+            int entityLength = getUnencodableReplacement(badChar, handling, entity);
+            result.grow(size + entityLength);
+            memcpy(result.data() + size, entity, entityLength);
+            size += entityLength;
+        }
+
+        startPos += charactersConverted;
+        charactersLeft -= charactersConverted;
+    }
+    return CString(result.data(), size);
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/mac/TextCodecMac.h b/Source/WebCore/platform/text/mac/TextCodecMac.h
new file mode 100644
index 0000000..3e7a237
--- /dev/null
+++ b/Source/WebCore/platform/text/mac/TextCodecMac.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc.  All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextCodecMac_h
+#define TextCodecMac_h
+
+#include "TextCodec.h"
+#include <CoreServices/CoreServices.h>
+
+namespace WebCore {
+
+    typedef ::TextEncoding TECTextEncodingID;
+    const TECTextEncodingID invalidEncoding = kCFStringEncodingInvalidId;
+
+    class TextCodecMac : public TextCodec {
+    public:
+        static void registerEncodingNames(EncodingNameRegistrar);
+        static void registerCodecs(TextCodecRegistrar);
+
+        explicit TextCodecMac(TECTextEncodingID);
+        virtual ~TextCodecMac();
+
+        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+        virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+
+    private:
+        OSStatus decode(const unsigned char* inputBuffer, int inputBufferLength, int& inputLength,
+            void* outputBuffer, int outputBufferLength, int& outputLength);
+
+        OSStatus createTECConverter() const;
+        void releaseTECConverter() const;
+
+        TECTextEncodingID m_encoding;
+        UChar m_backslashAsCurrencySymbol;
+        unsigned m_numBufferedBytes;
+        unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
+        mutable TECObjectRef m_converterTEC;
+    };
+
+    struct TECConverterWrapper {
+        TECConverterWrapper() : converter(0), encoding(invalidEncoding) { }
+        ~TECConverterWrapper() { if (converter) TECDisposeConverter(converter); }
+
+        TECObjectRef converter;
+        TECTextEncodingID encoding;
+    };
+
+} // namespace WebCore
+
+#endif // TextCodecMac_h
diff --git a/Source/WebCore/platform/text/mac/character-sets.txt b/Source/WebCore/platform/text/mac/character-sets.txt
new file mode 100644
index 0000000..475e78e
--- /dev/null
+++ b/Source/WebCore/platform/text/mac/character-sets.txt
@@ -0,0 +1,1868 @@
+
+===================================================================
+CHARACTER SETS
+
+(last updated 28 January 2005)
+
+These are the official names for character sets that may be used in
+the Internet and may be referred to in Internet documentation.  These
+names are expressed in ANSI_X3.4-1968 which is commonly called
+US-ASCII or simply ASCII.  The character set most commonly use in the
+Internet and used especially in protocol standards is US-ASCII, this
+is strongly encouraged.  The use of the name US-ASCII is also
+encouraged.
+
+The character set names may be up to 40 characters taken from the
+printable characters of US-ASCII.  However, no distinction is made
+between use of upper and lower case letters.
+
+The MIBenum value is a unique value for use in MIBs to identify coded
+character sets.
+
+The value space for MIBenum values has been divided into three
+regions. The first region (3-999) consists of coded character sets
+that have been standardized by some standard setting organization.
+This region is intended for standards that do not have subset
+implementations. The second region (1000-1999) is for the Unicode and
+ISO/IEC 10646 coded character sets together with a specification of a
+(set of) sub-repertoires that may occur.  The third region (>1999) is
+intended for vendor specific coded character sets.
+
+	Assigned MIB enum Numbers
+	-------------------------
+	0-2		Reserved
+	3-999		Set By Standards Organizations
+	1000-1999	Unicode / 10646
+	2000-2999	Vendor
+
+The aliases that start with "cs" have been added for use with the
+IANA-CHARSET-MIB as originally defined in RFC3808, and as currently
+maintained by IANA at http://www/iana.org/assignments/ianacharset-mib.
+Note that the ianacharset-mib needs to be kept in sync with this
+registry.  These aliases that start with "cs" contain the standard 
+numbers along with suggestive names in order to facilitate applications 
+that want to display the names in user interfaces.  The "cs" stands 
+for character set and is provided for applications that need a lower 
+case first letter but want to use mixed case thereafter that cannot 
+contain any special characters, such as underbar ("_") and dash ("-").  
+
+If the character set is from an ISO standard, its cs alias is the ISO
+standard number or name.  If the character set is not from an ISO
+standard, but is registered with ISO (IPSJ/ITSCJ is the current ISO
+Registration Authority), the ISO Registry number is specified as
+ISOnnn followed by letters suggestive of the name or standards number
+of the code set.  When a national or international standard is
+revised, the year of revision is added to the cs alias of the new
+character set entry in the IANA Registry in order to distinguish the
+revised character set from the original character set.
+
+
+Character Set                                               Reference
+-------------                                               ---------
+
+Name: ANSI_X3.4-1968                                   [RFC1345,KXS2]
+MIBenum: 3
+Source: ECMA registry
+Alias: iso-ir-6
+Alias: ANSI_X3.4-1986
+Alias: ISO_646.irv:1991
+Alias: ASCII
+Alias: ISO646-US
+Alias: US-ASCII (preferred MIME name)
+Alias: us
+Alias: IBM367
+Alias: cp367
+Alias: csASCII
+
+Name: ISO-10646-UTF-1
+MIBenum: 27
+Source: Universal Transfer Format (1), this is the multibyte
+        encoding, that subsets ASCII-7. It does not have byte
+        ordering issues.
+Alias: csISO10646UTF1
+
+Name: ISO_646.basic:1983                                [RFC1345,KXS2]
+MIBenum: 28
+Source: ECMA registry
+Alias: ref
+Alias: csISO646basic1983
+
+Name: INVARIANT                                         [RFC1345,KXS2]
+MIBenum: 29
+Alias: csINVARIANT
+
+Name: ISO_646.irv:1983                                  [RFC1345,KXS2]
+MIBenum: 30
+Source: ECMA registry
+Alias: iso-ir-2
+Alias: irv
+Alias: csISO2IntlRefVersion
+
+Name: BS_4730                                           [RFC1345,KXS2]
+MIBenum: 20
+Source: ECMA registry
+Alias: iso-ir-4
+Alias: ISO646-GB
+Alias: gb
+Alias: uk
+Alias: csISO4UnitedKingdom
+
+Name: NATS-SEFI                                         [RFC1345,KXS2]
+MIBenum: 31
+Source: ECMA registry
+Alias: iso-ir-8-1
+Alias: csNATSSEFI
+
+Name: NATS-SEFI-ADD                                     [RFC1345,KXS2]
+MIBenum: 32
+Source: ECMA registry
+Alias: iso-ir-8-2
+Alias: csNATSSEFIADD
+
+Name: NATS-DANO                                         [RFC1345,KXS2]
+MIBenum: 33
+Source: ECMA registry
+Alias: iso-ir-9-1
+Alias: csNATSDANO
+
+Name: NATS-DANO-ADD                                     [RFC1345,KXS2]
+MIBenum: 34
+Source: ECMA registry
+Alias: iso-ir-9-2
+Alias: csNATSDANOADD
+
+Name: SEN_850200_B                                      [RFC1345,KXS2]
+MIBenum: 35
+Source: ECMA registry
+Alias: iso-ir-10
+Alias: FI
+Alias: ISO646-FI
+Alias: ISO646-SE
+Alias: se
+Alias: csISO10Swedish
+
+Name: SEN_850200_C                                      [RFC1345,KXS2]
+MIBenum: 21
+Source: ECMA registry
+Alias: iso-ir-11
+Alias: ISO646-SE2
+Alias: se2
+Alias: csISO11SwedishForNames
+
+Name: KS_C_5601-1987                                    [RFC1345,KXS2]
+MIBenum: 36
+Source: ECMA registry
+Alias: iso-ir-149
+Alias: KS_C_5601-1989
+Alias: KSC_5601
+Alias: korean
+Alias: csKSC56011987
+
+Name: ISO-2022-KR  (preferred MIME name)                [RFC1557,Choi]
+MIBenum: 37
+Source: RFC-1557 (see also KS_C_5601-1987)
+Alias: csISO2022KR
+
+Name: EUC-KR  (preferred MIME name)                     [RFC1557,Choi]
+MIBenum: 38
+Source: RFC-1557 (see also KS_C_5861-1992)
+Alias: csEUCKR
+
+Name: ISO-2022-JP  (preferred MIME name)               [RFC1468,Murai]
+MIBenum: 39
+Source: RFC-1468 (see also RFC-2237)
+Alias: csISO2022JP
+
+Name: ISO-2022-JP-2  (preferred MIME name)              [RFC1554,Ohta]
+MIBenum: 40
+Source: RFC-1554
+Alias: csISO2022JP2
+
+Name: ISO-2022-CN                                            [RFC1922]
+MIBenum: 104
+Source: RFC-1922
+
+Name: ISO-2022-CN-EXT                                        [RFC1922]
+MIBenum: 105
+Source: RFC-1922
+
+Name: JIS_C6220-1969-jp                                 [RFC1345,KXS2]
+MIBenum: 41
+Source: ECMA registry
+Alias: JIS_C6220-1969
+Alias: iso-ir-13
+Alias: katakana
+Alias: x0201-7
+Alias: csISO13JISC6220jp
+
+Name: JIS_C6220-1969-ro                                 [RFC1345,KXS2]
+MIBenum: 42
+Source: ECMA registry
+Alias: iso-ir-14
+Alias: jp
+Alias: ISO646-JP
+Alias: csISO14JISC6220ro
+
+Name: IT                                                [RFC1345,KXS2]
+MIBenum: 22
+Source: ECMA registry
+Alias: iso-ir-15
+Alias: ISO646-IT
+Alias: csISO15Italian
+
+Name: PT                                                [RFC1345,KXS2]
+MIBenum: 43
+Source: ECMA registry
+Alias: iso-ir-16
+Alias: ISO646-PT
+Alias: csISO16Portuguese
+
+Name: ES                                                [RFC1345,KXS2]
+MIBenum: 23
+Source: ECMA registry
+Alias: iso-ir-17
+Alias: ISO646-ES
+Alias: csISO17Spanish
+
+Name: greek7-old                                        [RFC1345,KXS2]
+MIBenum: 44
+Source: ECMA registry
+Alias: iso-ir-18
+Alias: csISO18Greek7Old
+
+Name: latin-greek                                       [RFC1345,KXS2]
+MIBenum: 45
+Source: ECMA registry
+Alias: iso-ir-19
+Alias: csISO19LatinGreek
+
+Name: DIN_66003                                         [RFC1345,KXS2]
+MIBenum: 24
+Source: ECMA registry
+Alias: iso-ir-21
+Alias: de
+Alias: ISO646-DE
+Alias: csISO21German
+
+Name: NF_Z_62-010_(1973)                                [RFC1345,KXS2]
+MIBenum: 46
+Source: ECMA registry
+Alias: iso-ir-25
+Alias: ISO646-FR1
+Alias: csISO25French
+
+Name: Latin-greek-1                                     [RFC1345,KXS2]
+MIBenum: 47
+Source: ECMA registry
+Alias: iso-ir-27
+Alias: csISO27LatinGreek1
+
+Name: ISO_5427                                          [RFC1345,KXS2]
+MIBenum: 48
+Source: ECMA registry
+Alias: iso-ir-37
+Alias: csISO5427Cyrillic
+
+Name: JIS_C6226-1978                                    [RFC1345,KXS2]
+MIBenum: 49
+Source: ECMA registry
+Alias: iso-ir-42
+Alias: csISO42JISC62261978
+
+Name: BS_viewdata                                       [RFC1345,KXS2]
+MIBenum: 50
+Source: ECMA registry
+Alias: iso-ir-47
+Alias: csISO47BSViewdata
+
+Name: INIS                                              [RFC1345,KXS2]
+MIBenum: 51
+Source: ECMA registry
+Alias: iso-ir-49
+Alias: csISO49INIS
+
+Name: INIS-8                                            [RFC1345,KXS2]
+MIBenum: 52
+Source: ECMA registry
+Alias: iso-ir-50
+Alias: csISO50INIS8
+
+Name: INIS-cyrillic                                     [RFC1345,KXS2]
+MIBenum: 53
+Source: ECMA registry
+Alias: iso-ir-51
+Alias: csISO51INISCyrillic
+
+Name: ISO_5427:1981                                     [RFC1345,KXS2]
+MIBenum: 54
+Source: ECMA registry
+Alias: iso-ir-54
+Alias: ISO5427Cyrillic1981
+
+Name: ISO_5428:1980                                     [RFC1345,KXS2]
+MIBenum: 55
+Source: ECMA registry
+Alias: iso-ir-55
+Alias: csISO5428Greek
+
+Name: GB_1988-80                                        [RFC1345,KXS2]
+MIBenum: 56
+Source: ECMA registry
+Alias: iso-ir-57
+Alias: cn
+Alias: ISO646-CN
+Alias: csISO57GB1988
+
+Name: GB_2312-80                                        [RFC1345,KXS2]
+MIBenum: 57
+Source: ECMA registry
+Alias: iso-ir-58
+Alias: chinese
+Alias: csISO58GB231280
+
+Name: NS_4551-1                                         [RFC1345,KXS2]
+MIBenum: 25
+Source: ECMA registry
+Alias: iso-ir-60
+Alias: ISO646-NO
+Alias: no
+Alias: csISO60DanishNorwegian
+Alias: csISO60Norwegian1
+
+Name: NS_4551-2                                          [RFC1345,KXS2]
+MIBenum: 58
+Source: ECMA registry
+Alias: ISO646-NO2
+Alias: iso-ir-61
+Alias: no2
+Alias: csISO61Norwegian2
+
+Name: NF_Z_62-010                                        [RFC1345,KXS2]
+MIBenum: 26
+Source: ECMA registry
+Alias: iso-ir-69
+Alias: ISO646-FR
+Alias: fr
+Alias: csISO69French
+
+Name: videotex-suppl                                     [RFC1345,KXS2]
+MIBenum: 59
+Source: ECMA registry
+Alias: iso-ir-70
+Alias: csISO70VideotexSupp1
+
+Name: PT2                                                [RFC1345,KXS2]
+MIBenum: 60
+Source: ECMA registry
+Alias: iso-ir-84
+Alias: ISO646-PT2
+Alias: csISO84Portuguese2
+
+Name: ES2                                                [RFC1345,KXS2]
+MIBenum: 61
+Source: ECMA registry
+Alias: iso-ir-85
+Alias: ISO646-ES2
+Alias: csISO85Spanish2
+
+Name: MSZ_7795.3                                         [RFC1345,KXS2]
+MIBenum: 62
+Source: ECMA registry
+Alias: iso-ir-86
+Alias: ISO646-HU
+Alias: hu
+Alias: csISO86Hungarian
+
+Name: JIS_C6226-1983                                     [RFC1345,KXS2]
+MIBenum: 63
+Source: ECMA registry
+Alias: iso-ir-87
+Alias: x0208
+Alias: JIS_X0208-1983
+Alias: csISO87JISX0208
+
+Name: greek7                                             [RFC1345,KXS2]
+MIBenum: 64
+Source: ECMA registry
+Alias: iso-ir-88
+Alias: csISO88Greek7
+
+Name: ASMO_449                                           [RFC1345,KXS2]
+MIBenum: 65
+Source: ECMA registry
+Alias: ISO_9036
+Alias: arabic7
+Alias: iso-ir-89
+Alias: csISO89ASMO449
+
+Name: iso-ir-90                                          [RFC1345,KXS2]
+MIBenum: 66
+Source: ECMA registry
+Alias: csISO90
+
+Name: JIS_C6229-1984-a                                   [RFC1345,KXS2]
+MIBenum: 67
+Source: ECMA registry
+Alias: iso-ir-91
+Alias: jp-ocr-a
+Alias: csISO91JISC62291984a
+
+Name: JIS_C6229-1984-b                                   [RFC1345,KXS2]
+MIBenum: 68
+Source: ECMA registry
+Alias: iso-ir-92
+Alias: ISO646-JP-OCR-B
+Alias: jp-ocr-b
+Alias: csISO92JISC62991984b
+
+Name: JIS_C6229-1984-b-add                               [RFC1345,KXS2]
+MIBenum: 69
+Source: ECMA registry
+Alias: iso-ir-93
+Alias: jp-ocr-b-add
+Alias: csISO93JIS62291984badd
+
+Name: JIS_C6229-1984-hand                                [RFC1345,KXS2]
+MIBenum: 70
+Source: ECMA registry
+Alias: iso-ir-94
+Alias: jp-ocr-hand
+Alias: csISO94JIS62291984hand
+
+Name: JIS_C6229-1984-hand-add                            [RFC1345,KXS2]
+MIBenum: 71
+Source: ECMA registry
+Alias: iso-ir-95
+Alias: jp-ocr-hand-add
+Alias: csISO95JIS62291984handadd
+
+Name: JIS_C6229-1984-kana                                [RFC1345,KXS2]
+MIBenum: 72
+Source: ECMA registry
+Alias: iso-ir-96
+Alias: csISO96JISC62291984kana
+
+Name: ISO_2033-1983                                      [RFC1345,KXS2]
+MIBenum: 73
+Source: ECMA registry
+Alias: iso-ir-98
+Alias: e13b
+Alias: csISO2033
+
+Name: ANSI_X3.110-1983                                   [RFC1345,KXS2]
+MIBenum: 74
+Source: ECMA registry
+Alias: iso-ir-99
+Alias: CSA_T500-1983
+Alias: NAPLPS
+Alias: csISO99NAPLPS
+
+Name: ISO_8859-1:1987                                    [RFC1345,KXS2]
+MIBenum: 4
+Source: ECMA registry
+Alias: iso-ir-100
+Alias: ISO_8859-1
+Alias: ISO-8859-1 (preferred MIME name)
+Alias: latin1
+Alias: l1
+Alias: IBM819
+Alias: CP819
+Alias: csISOLatin1
+
+Name: ISO_8859-2:1987                                    [RFC1345,KXS2]
+MIBenum: 5
+Source: ECMA registry
+Alias: iso-ir-101
+Alias: ISO_8859-2
+Alias: ISO-8859-2 (preferred MIME name)
+Alias: latin2
+Alias: l2
+Alias: csISOLatin2
+
+Name: T.61-7bit                                          [RFC1345,KXS2]
+MIBenum: 75
+Source: ECMA registry
+Alias: iso-ir-102
+Alias: csISO102T617bit
+
+Name: T.61-8bit                                          [RFC1345,KXS2]
+MIBenum: 76
+Alias: T.61
+Source: ECMA registry
+Alias: iso-ir-103
+Alias: csISO103T618bit
+
+Name: ISO_8859-3:1988                                    [RFC1345,KXS2]
+MIBenum: 6
+Source: ECMA registry
+Alias: iso-ir-109
+Alias: ISO_8859-3
+Alias: ISO-8859-3 (preferred MIME name)
+Alias: latin3
+Alias: l3
+Alias: csISOLatin3
+
+Name: ISO_8859-4:1988                                    [RFC1345,KXS2]
+MIBenum: 7
+Source: ECMA registry
+Alias: iso-ir-110
+Alias: ISO_8859-4
+Alias: ISO-8859-4 (preferred MIME name)
+Alias: latin4
+Alias: l4
+Alias: csISOLatin4
+
+Name: ECMA-cyrillic                                     
+MIBenum: 77
+Source: ISO registry (formerly ECMA registry)
+         http://www.itscj.ipsj.jp/ISO-IR/111.pdf
+Alias: iso-ir-111
+Alias: KOI8-E
+Alias: csISO111ECMACyrillic
+
+Name: CSA_Z243.4-1985-1                                  [RFC1345,KXS2]
+MIBenum: 78
+Source: ECMA registry
+Alias: iso-ir-121
+Alias: ISO646-CA
+Alias: csa7-1
+Alias: ca
+Alias: csISO121Canadian1
+
+Name: CSA_Z243.4-1985-2                                  [RFC1345,KXS2]
+MIBenum: 79
+Source: ECMA registry
+Alias: iso-ir-122
+Alias: ISO646-CA2
+Alias: csa7-2
+Alias: csISO122Canadian2
+
+Name: CSA_Z243.4-1985-gr                                 [RFC1345,KXS2]
+MIBenum: 80
+Source: ECMA registry
+Alias: iso-ir-123
+Alias: csISO123CSAZ24341985gr
+
+Name: ISO_8859-6:1987                                    [RFC1345,KXS2]
+MIBenum: 9
+Source: ECMA registry
+Alias: iso-ir-127
+Alias: ISO_8859-6
+Alias: ISO-8859-6 (preferred MIME name)
+Alias: ECMA-114
+Alias: ASMO-708
+Alias: arabic
+Alias: csISOLatinArabic
+
+Name: ISO_8859-6-E                                       [RFC1556,IANA]
+MIBenum: 81
+Source: RFC1556
+Alias: csISO88596E
+Alias: ISO-8859-6-E (preferred MIME name)
+
+Name: ISO_8859-6-I                                       [RFC1556,IANA]
+MIBenum: 82
+Source: RFC1556
+Alias: csISO88596I
+Alias: ISO-8859-6-I (preferred MIME name)
+
+Name: ISO_8859-7:1987                            [RFC1947,RFC1345,KXS2]
+MIBenum: 10
+Source: ECMA registry
+Alias: iso-ir-126
+Alias: ISO_8859-7
+Alias: ISO-8859-7 (preferred MIME name)
+Alias: ELOT_928
+Alias: ECMA-118
+Alias: greek
+Alias: greek8
+Alias: csISOLatinGreek
+
+Name: T.101-G2                                            [RFC1345,KXS2]
+MIBenum: 83
+Source: ECMA registry
+Alias: iso-ir-128
+Alias: csISO128T101G2
+
+Name: ISO_8859-8:1988                                     [RFC1345,KXS2]
+MIBenum: 11
+Source: ECMA registry
+Alias: iso-ir-138
+Alias: ISO_8859-8
+Alias: ISO-8859-8 (preferred MIME name)
+Alias: hebrew
+Alias: csISOLatinHebrew
+
+Name: ISO_8859-8-E                                  [RFC1556,Nussbacher]
+MIBenum: 84
+Source: RFC1556
+Alias: csISO88598E
+Alias: ISO-8859-8-E (preferred MIME name)
+
+Name: ISO_8859-8-I                                  [RFC1556,Nussbacher]
+MIBenum: 85
+Source: RFC1556
+Alias: csISO88598I
+Alias: ISO-8859-8-I (preferred MIME name)
+
+Name: CSN_369103                                          [RFC1345,KXS2]
+MIBenum: 86
+Source: ECMA registry
+Alias: iso-ir-139
+Alias: csISO139CSN369103
+
+Name: JUS_I.B1.002                                        [RFC1345,KXS2]
+MIBenum: 87
+Source: ECMA registry
+Alias: iso-ir-141
+Alias: ISO646-YU
+Alias: js
+Alias: yu
+Alias: csISO141JUSIB1002
+
+Name: ISO_6937-2-add                                      [RFC1345,KXS2]
+MIBenum: 14
+Source: ECMA registry and ISO 6937-2:1983
+Alias: iso-ir-142
+Alias: csISOTextComm
+
+Name: IEC_P27-1                                           [RFC1345,KXS2]
+MIBenum: 88
+Source: ECMA registry
+Alias: iso-ir-143
+Alias: csISO143IECP271
+
+Name: ISO_8859-5:1988                                     [RFC1345,KXS2]
+MIBenum: 8
+Source: ECMA registry
+Alias: iso-ir-144
+Alias: ISO_8859-5
+Alias: ISO-8859-5 (preferred MIME name)
+Alias: cyrillic
+Alias: csISOLatinCyrillic
+
+Name: JUS_I.B1.003-serb                                   [RFC1345,KXS2]
+MIBenum: 89
+Source: ECMA registry
+Alias: iso-ir-146
+Alias: serbian
+Alias: csISO146Serbian
+
+Name: JUS_I.B1.003-mac                                    [RFC1345,KXS2]
+MIBenum: 90
+Source: ECMA registry
+Alias: macedonian
+Alias: iso-ir-147
+Alias: csISO147Macedonian
+
+Name: ISO_8859-9:1989                                     [RFC1345,KXS2]
+MIBenum: 12
+Source: ECMA registry
+Alias: iso-ir-148
+Alias: ISO_8859-9
+Alias: ISO-8859-9 (preferred MIME name)
+Alias: latin5
+Alias: l5
+Alias: csISOLatin5
+
+Name: greek-ccitt                                         [RFC1345,KXS2]
+MIBenum: 91
+Source: ECMA registry
+Alias: iso-ir-150
+Alias: csISO150
+Alias: csISO150GreekCCITT
+
+Name: NC_NC00-10:81                                       [RFC1345,KXS2]
+MIBenum: 92
+Source: ECMA registry
+Alias: cuba
+Alias: iso-ir-151
+Alias: ISO646-CU
+Alias: csISO151Cuba
+
+Name: ISO_6937-2-25                                       [RFC1345,KXS2]
+MIBenum: 93
+Source: ECMA registry
+Alias: iso-ir-152
+Alias: csISO6937Add
+
+Name: GOST_19768-74                                       [RFC1345,KXS2]
+MIBenum: 94
+Source: ECMA registry
+Alias: ST_SEV_358-88
+Alias: iso-ir-153
+Alias: csISO153GOST1976874
+
+Name: ISO_8859-supp                                       [RFC1345,KXS2]
+MIBenum: 95
+Source: ECMA registry
+Alias: iso-ir-154
+Alias: latin1-2-5
+Alias: csISO8859Supp
+
+Name: ISO_10367-box                                       [RFC1345,KXS2]
+MIBenum: 96
+Source: ECMA registry
+Alias: iso-ir-155
+Alias: csISO10367Box
+
+Name: ISO-8859-10 (preferred MIME name)			  [RFC1345,KXS2]
+MIBenum: 13
+Source: ECMA registry
+Alias: iso-ir-157
+Alias: l6
+Alias: ISO_8859-10:1992
+Alias: csISOLatin6
+Alias: latin6
+
+Name: latin-lap                                           [RFC1345,KXS2]
+MIBenum: 97
+Source: ECMA registry
+Alias: lap
+Alias: iso-ir-158
+Alias: csISO158Lap
+
+Name: JIS_X0212-1990                                      [RFC1345,KXS2]
+MIBenum: 98
+Source: ECMA registry
+Alias: x0212
+Alias: iso-ir-159
+Alias: csISO159JISX02121990
+
+Name: DS_2089                                             [RFC1345,KXS2]
+MIBenum: 99
+Source: Danish Standard, DS 2089, February 1974
+Alias: DS2089
+Alias: ISO646-DK
+Alias: dk
+Alias: csISO646Danish
+
+Name: us-dk                                               [RFC1345,KXS2]
+MIBenum: 100
+Alias: csUSDK
+
+Name: dk-us                                               [RFC1345,KXS2]
+MIBenum: 101
+Alias: csDKUS
+
+Name: JIS_X0201                                           [RFC1345,KXS2]
+MIBenum: 15
+Source: JIS X 0201-1976.   One byte only, this is equivalent to 
+        JIS/Roman (similar to ASCII) plus eight-bit half-width 
+        Katakana
+Alias: X0201
+Alias: csHalfWidthKatakana
+
+Name: KSC5636                                             [RFC1345,KXS2]
+MIBenum: 102
+Alias: ISO646-KR
+Alias: csKSC5636
+
+Name: ISO-10646-UCS-2
+MIBenum: 1000
+Source: the 2-octet Basic Multilingual Plane, aka Unicode
+        this needs to specify network byte order: the standard
+        does not specify (it is a 16-bit integer space)
+Alias: csUnicode
+
+Name: ISO-10646-UCS-4
+MIBenum: 1001
+Source: the full code space. (same comment about byte order,
+        these are 31-bit numbers.
+Alias: csUCS4
+
+Name: DEC-MCS                                             [RFC1345,KXS2]
+MIBenum: 2008
+Source: VAX/VMS User's Manual, 
+        Order Number: AI-Y517A-TE, April 1986.
+Alias: dec
+Alias: csDECMCS
+
+Name: hp-roman8                                  [HP-PCL5,RFC1345,KXS2]
+MIBenum: 2004
+Source: LaserJet IIP Printer User's Manual, 
+        HP part no 33471-90901, Hewlet-Packard, June 1989.
+Alias: roman8
+Alias: r8
+Alias: csHPRoman8
+
+Name: macintosh                                           [RFC1345,KXS2]
+MIBenum: 2027
+Source: The Unicode Standard ver1.0, ISBN 0-201-56788-1, Oct 1991
+Alias: mac
+Alias: csMacintosh
+
+Name: IBM037                                              [RFC1345,KXS2]
+MIBenum: 2028
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp037
+Alias: ebcdic-cp-us
+Alias: ebcdic-cp-ca
+Alias: ebcdic-cp-wt
+Alias: ebcdic-cp-nl
+Alias: csIBM037
+
+Name: IBM038                                              [RFC1345,KXS2]
+MIBenum: 2029
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: EBCDIC-INT
+Alias: cp038
+Alias: csIBM038
+
+Name: IBM273                                              [RFC1345,KXS2]
+MIBenum: 2030
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP273
+Alias: csIBM273
+
+Name: IBM274                                              [RFC1345,KXS2]
+MIBenum: 2031
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: EBCDIC-BE
+Alias: CP274
+Alias: csIBM274
+
+Name: IBM275                                              [RFC1345,KXS2]
+MIBenum: 2032
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: EBCDIC-BR
+Alias: cp275
+Alias: csIBM275
+
+Name: IBM277                                              [RFC1345,KXS2]
+MIBenum: 2033
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: EBCDIC-CP-DK
+Alias: EBCDIC-CP-NO
+Alias: csIBM277
+
+Name: IBM278                                              [RFC1345,KXS2]
+MIBenum: 2034
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP278
+Alias: ebcdic-cp-fi
+Alias: ebcdic-cp-se
+Alias: csIBM278
+
+Name: IBM280                                              [RFC1345,KXS2]
+MIBenum: 2035
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP280
+Alias: ebcdic-cp-it
+Alias: csIBM280
+
+Name: IBM281                                              [RFC1345,KXS2]
+MIBenum: 2036
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: EBCDIC-JP-E
+Alias: cp281
+Alias: csIBM281
+
+Name: IBM284                                              [RFC1345,KXS2]
+MIBenum: 2037
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP284
+Alias: ebcdic-cp-es
+Alias: csIBM284
+
+Name: IBM285                                              [RFC1345,KXS2]
+MIBenum: 2038
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP285
+Alias: ebcdic-cp-gb
+Alias: csIBM285
+
+Name: IBM290                                              [RFC1345,KXS2]
+MIBenum: 2039
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: cp290
+Alias: EBCDIC-JP-kana
+Alias: csIBM290
+
+Name: IBM297                                              [RFC1345,KXS2]
+MIBenum: 2040
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp297
+Alias: ebcdic-cp-fr
+Alias: csIBM297
+
+Name: IBM420                                              [RFC1345,KXS2]
+MIBenum: 2041
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990, 
+        IBM NLS RM p 11-11
+Alias: cp420
+Alias: ebcdic-cp-ar1
+Alias: csIBM420
+
+Name: IBM423                                              [RFC1345,KXS2]
+MIBenum: 2042
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp423
+Alias: ebcdic-cp-gr
+Alias: csIBM423
+
+Name: IBM424                                              [RFC1345,KXS2]
+MIBenum: 2043
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp424
+Alias: ebcdic-cp-he
+Alias: csIBM424
+
+Name: IBM437                                              [RFC1345,KXS2]
+MIBenum: 2011
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp437
+Alias: 437
+Alias: csPC8CodePage437
+
+Name: IBM500                                              [RFC1345,KXS2]
+MIBenum: 2044
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP500
+Alias: ebcdic-cp-be
+Alias: ebcdic-cp-ch
+Alias: csIBM500
+
+Name: IBM775                                                   [HP-PCL5]
+MIBenum: 2087
+Source: HP PCL 5 Comparison Guide (P/N 5021-0329) pp B-13, 1996
+Alias: cp775
+Alias: csPC775Baltic
+
+Name: IBM850                                              [RFC1345,KXS2]
+MIBenum: 2009
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp850
+Alias: 850
+Alias: csPC850Multilingual
+
+Name: IBM851                                              [RFC1345,KXS2]
+MIBenum: 2045
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp851
+Alias: 851
+Alias: csIBM851
+
+Name: IBM852                                              [RFC1345,KXS2]
+MIBenum: 2010
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp852
+Alias: 852
+Alias: csPCp852
+
+Name: IBM855                                              [RFC1345,KXS2]
+MIBenum: 2046
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp855
+Alias: 855
+Alias: csIBM855
+
+Name: IBM857                                              [RFC1345,KXS2]
+MIBenum: 2047
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp857
+Alias: 857
+Alias: csIBM857
+
+Name: IBM860                                              [RFC1345,KXS2]
+MIBenum: 2048
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp860
+Alias: 860
+Alias: csIBM860
+
+Name: IBM861                                              [RFC1345,KXS2]
+MIBenum: 2049
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp861
+Alias: 861
+Alias: cp-is
+Alias: csIBM861
+
+Name: IBM862                                              [RFC1345,KXS2]
+MIBenum: 2013
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp862
+Alias: 862
+Alias: csPC862LatinHebrew
+
+Name: IBM863                                              [RFC1345,KXS2]
+MIBenum: 2050
+Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
+Alias: cp863
+Alias: 863
+Alias: csIBM863
+
+Name: IBM864                                              [RFC1345,KXS2]
+MIBenum: 2051
+Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
+Alias: cp864
+Alias: csIBM864
+
+Name: IBM865                                              [RFC1345,KXS2]
+MIBenum: 2052
+Source: IBM DOS 3.3 Ref (Abridged), 94X9575 (Feb 1987)
+Alias: cp865
+Alias: 865
+Alias: csIBM865
+
+Name: IBM866                                                     [Pond]
+MIBenum: 2086
+Source: IBM NLDG Volume 2 (SE09-8002-03) August 1994
+Alias: cp866
+Alias: 866
+Alias: csIBM866
+
+Name: IBM868                                              [RFC1345,KXS2]
+MIBenum: 2053
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP868
+Alias: cp-ar
+Alias: csIBM868
+
+Name: IBM869                                              [RFC1345,KXS2]
+MIBenum: 2054
+Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
+Alias: cp869
+Alias: 869
+Alias: cp-gr
+Alias: csIBM869
+
+Name: IBM870                                              [RFC1345,KXS2]
+MIBenum: 2055
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP870
+Alias: ebcdic-cp-roece
+Alias: ebcdic-cp-yu
+Alias: csIBM870
+
+Name: IBM871                                              [RFC1345,KXS2]
+MIBenum: 2056
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP871
+Alias: ebcdic-cp-is
+Alias: csIBM871
+
+Name: IBM880                                              [RFC1345,KXS2]
+MIBenum: 2057
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp880
+Alias: EBCDIC-Cyrillic
+Alias: csIBM880
+
+Name: IBM891                                              [RFC1345,KXS2]
+MIBenum: 2058
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp891
+Alias: csIBM891
+
+Name: IBM903                                              [RFC1345,KXS2]
+MIBenum: 2059
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp903
+Alias: csIBM903
+
+Name: IBM904                                              [RFC1345,KXS2]
+MIBenum: 2060
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp904
+Alias: 904
+Alias: csIBBM904
+
+Name: IBM905                                              [RFC1345,KXS2]
+MIBenum: 2061
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: CP905
+Alias: ebcdic-cp-tr
+Alias: csIBM905
+
+Name: IBM918                                              [RFC1345,KXS2]
+MIBenum: 2062
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP918
+Alias: ebcdic-cp-ar2
+Alias: csIBM918
+
+Name: IBM1026                                             [RFC1345,KXS2]
+MIBenum: 2063
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP1026
+Alias: csIBM1026
+
+Name: EBCDIC-AT-DE                                        [RFC1345,KXS2]
+MIBenum: 2064
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csIBMEBCDICATDE
+
+Name: EBCDIC-AT-DE-A                                      [RFC1345,KXS2]
+MIBenum: 2065 
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 
+Alias: csEBCDICATDEA
+
+Name: EBCDIC-CA-FR                                        [RFC1345,KXS2]
+MIBenum: 2066
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICCAFR
+
+Name: EBCDIC-DK-NO                                        [RFC1345,KXS2]
+MIBenum: 2067
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICDKNO
+
+Name: EBCDIC-DK-NO-A                                      [RFC1345,KXS2]
+MIBenum: 2068
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICDKNOA
+
+Name: EBCDIC-FI-SE                                        [RFC1345,KXS2]
+MIBenum: 2069
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICFISE
+
+Name: EBCDIC-FI-SE-A                                      [RFC1345,KXS2]
+MIBenum: 2070
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICFISEA
+
+Name: EBCDIC-FR                                           [RFC1345,KXS2]
+MIBenum: 2071
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICFR
+
+Name: EBCDIC-IT                                           [RFC1345,KXS2]
+MIBenum: 2072
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICIT
+
+Name: EBCDIC-PT                                           [RFC1345,KXS2]
+MIBenum: 2073
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICPT
+
+Name: EBCDIC-ES                                           [RFC1345,KXS2]
+MIBenum: 2074
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICES
+
+Name: EBCDIC-ES-A                                         [RFC1345,KXS2]
+MIBenum: 2075
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICESA
+
+Name: EBCDIC-ES-S                                         [RFC1345,KXS2]
+MIBenum: 2076
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICESS
+
+Name: EBCDIC-UK                                           [RFC1345,KXS2]
+MIBenum: 2077
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICUK
+
+Name: EBCDIC-US                                           [RFC1345,KXS2]
+MIBenum: 2078
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICUS
+
+Name: UNKNOWN-8BIT                                             [RFC1428]
+MIBenum: 2079
+Alias: csUnknown8BiT
+
+Name: MNEMONIC                                            [RFC1345,KXS2]
+MIBenum: 2080
+Source: RFC 1345, also known as "mnemonic+ascii+38"
+Alias: csMnemonic
+
+Name: MNEM                                                [RFC1345,KXS2]
+MIBenum: 2081
+Source: RFC 1345, also known as "mnemonic+ascii+8200"
+Alias: csMnem
+
+Name: VISCII                                                   [RFC1456]
+MIBenum: 2082
+Source: RFC 1456
+Alias: csVISCII
+
+Name: VIQR                                                     [RFC1456]
+MIBenum: 2083
+Source: RFC 1456
+Alias: csVIQR
+
+Name: KOI8-R  (preferred MIME name)                            [RFC1489]
+MIBenum: 2084
+Source: RFC 1489, based on GOST-19768-74, ISO-6937/8, 
+        INIS-Cyrillic, ISO-5427.
+Alias: csKOI8R
+
+Name: KOI8-U                                                   [RFC2319]
+MIBenum: 2088
+Source: RFC 2319
+
+Name: IBM00858
+MIBenum: 2089
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00858)    [Mahdi]
+Alias: CCSID00858
+Alias: CP00858
+Alias: PC-Multilingual-850+euro
+
+Name: IBM00924
+MIBenum: 2090
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00924)    [Mahdi]
+Alias: CCSID00924
+Alias: CP00924
+Alias: ebcdic-Latin9--euro
+
+Name: IBM01140
+MIBenum: 2091
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01140)    [Mahdi]
+Alias: CCSID01140
+Alias: CP01140
+Alias: ebcdic-us-37+euro
+
+Name: IBM01141
+MIBenum: 2092
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01141)    [Mahdi]
+Alias: CCSID01141
+Alias: CP01141
+Alias: ebcdic-de-273+euro
+
+Name: IBM01142
+MIBenum: 2093
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01142)    [Mahdi]
+Alias: CCSID01142
+Alias: CP01142
+Alias: ebcdic-dk-277+euro
+Alias: ebcdic-no-277+euro
+
+Name: IBM01143
+MIBenum: 2094
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01143)    [Mahdi]
+Alias: CCSID01143
+Alias: CP01143
+Alias: ebcdic-fi-278+euro
+Alias: ebcdic-se-278+euro
+
+Name: IBM01144
+MIBenum: 2095
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01144)    [Mahdi]
+Alias: CCSID01144
+Alias: CP01144
+Alias: ebcdic-it-280+euro
+
+Name: IBM01145
+MIBenum: 2096
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01145)    [Mahdi]
+Alias: CCSID01145
+Alias: CP01145
+Alias: ebcdic-es-284+euro
+
+Name: IBM01146
+MIBenum: 2097
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01146)    [Mahdi]
+Alias: CCSID01146
+Alias: CP01146
+Alias: ebcdic-gb-285+euro
+
+Name: IBM01147
+MIBenum: 2098
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01147)    [Mahdi]
+Alias: CCSID01147
+Alias: CP01147
+Alias: ebcdic-fr-297+euro
+
+Name: IBM01148
+MIBenum: 2099
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01148)    [Mahdi]
+Alias: CCSID01148
+Alias: CP01148
+Alias: ebcdic-international-500+euro
+
+Name: IBM01149
+MIBenum: 2100
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01149)    [Mahdi]
+Alias: CCSID01149
+Alias: CP01149
+Alias: ebcdic-is-871+euro
+
+Name: Big5-HKSCS                                                  [Yick]
+MIBenum: 2101
+Source:   See (http://www.iana.org/assignments/charset-reg/Big5-HKSCS) 
+Alias: None
+
+Name: IBM1047                                                [Robrigado]
+MIBenum: 2102
+Source: IBM1047 (EBCDIC Latin 1/Open Systems)
+http://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf
+Alias: IBM-1047
+
+Name: PTCP154                                                    [Uskov]
+MIBenum: 2103
+Source: See (http://www.iana.org/assignments/charset-reg/PTCP154)
+Alias: csPTCP154
+Alias: PT154
+Alias: CP154
+Alias: Cyrillic-Asian
+
+Name:  Amiga-1251
+MIBenum:  2104
+Source:  See (http://www.amiga.ultranet.ru/Amiga-1251.html)
+Alias:  Ami1251
+Alias:  Amiga1251
+Alias:  Ami-1251
+(Aliases are provided for historical reasons and should not be used)
+                                                              [Malyshev]
+															  
+Name:  KOI7-switched
+MIBenum:  2105
+Source:  See <http://www.iana.org/assignments/charset-reg/KOI7-switched>
+Aliases:  None
+
+Name: UNICODE-1-1                                              [RFC1641]
+MIBenum: 1010
+Source: RFC 1641
+Alias: csUnicode11
+
+Name: SCSU
+MIBenum: 1011
+Source: SCSU See (http://www.iana.org/assignments/charset-reg/SCSU)     [Scherer]
+Alias: None 
+
+Name: UTF-7                                                    [RFC2152]
+MIBenum: 1012
+Source: RFC 2152
+Alias: None
+
+Name: UTF-16BE                                                 [RFC2781]
+MIBenum: 1013
+Source: RFC 2781
+Alias: None
+
+Name: UTF-16LE                                                 [RFC2781]
+MIBenum: 1014
+Source: RFC 2781
+Alias: None
+
+Name: UTF-16                                                   [RFC2781]
+MIBenum: 1015
+Source: RFC 2781
+Alias: None
+
+Name: CESU-8                                                    [Phipps]
+MIBenum: 1016
+Source: <http://www.unicode.org/unicode/reports/tr26>
+Alias: csCESU-8
+
+Name: UTF-32                                                     [Davis] 
+MIBenum: 1017
+Source: <http://www.unicode.org/unicode/reports/tr19/>
+Alias: None
+
+Name: UTF-32BE                                                   [Davis]
+MIBenum: 1018
+Source: <http://www.unicode.org/unicode/reports/tr19/>
+Alias: None
+
+Name: UTF-32LE                                                   [Davis]
+MIBenum: 1019
+Source: <http://www.unicode.org/unicode/reports/tr19/>
+Alias: None
+
+Name: BOCU-1                                                   [Scherer]
+MIBenum: 1020
+Source: http://www.unicode.org/notes/tn6/
+Alias: csBOCU-1
+
+Name: UNICODE-1-1-UTF-7                                        [RFC1642]
+MIBenum: 103
+Source: RFC 1642
+Alias: csUnicode11UTF7
+
+Name: UTF-8                                                    [RFC3629]
+MIBenum: 106
+Source: RFC 3629
+Alias: None 
+
+Name: ISO-8859-13
+MIBenum: 109
+Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-13)[Tumasonis] 
+Alias: None
+
+Name: ISO-8859-14
+MIBenum: 110
+Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-14) [Simonsen]
+Alias: iso-ir-199
+Alias: ISO_8859-14:1998
+Alias: ISO_8859-14
+Alias: latin8
+Alias: iso-celtic
+Alias: l8
+
+Name: ISO-8859-15
+MIBenum: 111
+Source: ISO 
+        Please see: <http://www.iana.org/assignments/charset-reg/ISO-8859-15>
+Alias: ISO_8859-15
+Alias: Latin-9
+
+Name: ISO-8859-16
+MIBenum: 112
+Source: ISO
+Alias: iso-ir-226
+Alias: ISO_8859-16:2001
+Alias: ISO_8859-16
+Alias: latin10
+Alias: l10 
+
+Name: GBK                                                 
+MIBenum: 113
+Source: Chinese IT Standardization Technical Committee  
+        Please see: <http://www.iana.org/assignments/charset-reg/GBK>
+Alias: CP936
+Alias: MS936
+Alias: windows-936
+
+Name: GB18030
+MIBenum: 114
+Source: Chinese IT Standardization Technical Committee
+        Please see: <http://www.iana.org/assignments/charset-reg/GB18030>
+Alias: None
+
+Name:  OSD_EBCDIC_DF04_15
+MIBenum:  115
+Source:  Fujitsu-Siemens standard mainframe EBCDIC encoding
+         Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15>
+Alias:   None
+
+Name:  OSD_EBCDIC_DF03_IRV
+MIBenum:  116
+Source:  Fujitsu-Siemens standard mainframe EBCDIC encoding
+         Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV>
+Alias:  None
+
+Name:  OSD_EBCDIC_DF04_1
+MIBenum:  117
+Source:  Fujitsu-Siemens standard mainframe EBCDIC encoding
+         Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1>
+Alias:  None   
+
+Name: JIS_Encoding
+MIBenum: 16
+Source: JIS X 0202-1991.  Uses ISO 2022 escape sequences to
+        shift code sets as documented in JIS X 0202-1991.
+Alias: csJISEncoding
+
+Name: Shift_JIS  (preferred MIME name)
+MIBenum: 17
+Source: This charset is an extension of csHalfWidthKatakana by
+        adding graphic characters in JIS X 0208.  The CCS's are
+        JIS X0201:1997 and JIS X0208:1997.  The
+        complete definition is shown in Appendix 1 of JIS
+        X0208:1997.
+        This charset can be used for the top-level media type "text".
+Alias: MS_Kanji 
+Alias: csShiftJIS
+
+Name: Extended_UNIX_Code_Packed_Format_for_Japanese
+MIBenum: 18
+Source: Standardized by OSF, UNIX International, and UNIX Systems
+        Laboratories Pacific.  Uses ISO 2022 rules to select
+               code set 0: US-ASCII (a single 7-bit byte set)
+               code set 1: JIS X0208-1990 (a double 8-bit byte set)
+                           restricted to A0-FF in both bytes
+               code set 2: Half Width Katakana (a single 7-bit byte set)
+                           requiring SS2 as the character prefix
+               code set 3: JIS X0212-1990 (a double 7-bit byte set)
+                           restricted to A0-FF in both bytes
+                           requiring SS3 as the character prefix
+Alias: csEUCPkdFmtJapanese
+Alias: EUC-JP  (preferred MIME name)
+
+Name: Extended_UNIX_Code_Fixed_Width_for_Japanese
+MIBenum: 19
+Source: Used in Japan.  Each character is 2 octets.
+                code set 0: US-ASCII (a single 7-bit byte set)
+                              1st byte = 00
+                              2nd byte = 20-7E
+                code set 1: JIS X0208-1990 (a double 7-bit byte set)
+                            restricted  to A0-FF in both bytes 
+                code set 2: Half Width Katakana (a single 7-bit byte set)
+                              1st byte = 00
+                              2nd byte = A0-FF
+                code set 3: JIS X0212-1990 (a double 7-bit byte set)
+                            restricted to A0-FF in 
+                            the first byte
+                and 21-7E in the second byte
+Alias: csEUCFixWidJapanese
+
+Name: ISO-10646-UCS-Basic
+MIBenum: 1002
+Source: ASCII subset of Unicode.  Basic Latin = collection 1
+        See ISO 10646, Appendix A
+Alias: csUnicodeASCII
+
+Name: ISO-10646-Unicode-Latin1
+MIBenum: 1003
+Source: ISO Latin-1 subset of Unicode. Basic Latin and Latin-1 
+         Supplement  = collections 1 and 2.  See ISO 10646, 
+         Appendix A.  See RFC 1815.
+Alias: csUnicodeLatin1
+Alias: ISO-10646
+
+Name: ISO-10646-J-1
+Source: ISO 10646 Japanese, see RFC 1815.
+
+Name: ISO-Unicode-IBM-1261
+MIBenum: 1005
+Source: IBM Latin-2, -3, -5, Extended Presentation Set, GCSGID: 1261
+Alias: csUnicodeIBM1261
+
+Name: ISO-Unicode-IBM-1268
+MIBenum: 1006
+Source: IBM Latin-4 Extended Presentation Set, GCSGID: 1268
+Alias: csUnicodeIBM1268
+
+Name: ISO-Unicode-IBM-1276
+MIBenum: 1007
+Source: IBM Cyrillic Greek Extended Presentation Set, GCSGID: 1276
+Alias: csUnicodeIBM1276
+
+Name: ISO-Unicode-IBM-1264
+MIBenum: 1008
+Source: IBM Arabic Presentation Set, GCSGID: 1264
+Alias: csUnicodeIBM1264
+
+Name: ISO-Unicode-IBM-1265
+MIBenum: 1009
+Source: IBM Hebrew Presentation Set, GCSGID: 1265
+Alias: csUnicodeIBM1265
+
+Name: ISO-8859-1-Windows-3.0-Latin-1                           [HP-PCL5] 
+MIBenum: 2000
+Source: Extended ISO 8859-1 Latin-1 for Windows 3.0.  
+        PCL Symbol Set id: 9U
+Alias: csWindows30Latin1
+
+Name: ISO-8859-1-Windows-3.1-Latin-1                           [HP-PCL5] 
+MIBenum: 2001
+Source: Extended ISO 8859-1 Latin-1 for Windows 3.1.  
+        PCL Symbol Set id: 19U
+Alias: csWindows31Latin1
+
+Name: ISO-8859-2-Windows-Latin-2                               [HP-PCL5] 
+MIBenum: 2002
+Source: Extended ISO 8859-2.  Latin-2 for Windows 3.1.
+        PCL Symbol Set id: 9E
+Alias: csWindows31Latin2
+
+Name: ISO-8859-9-Windows-Latin-5                               [HP-PCL5] 
+MIBenum: 2003
+Source: Extended ISO 8859-9.  Latin-5 for Windows 3.1
+        PCL Symbol Set id: 5T
+Alias: csWindows31Latin5
+
+Name: Adobe-Standard-Encoding                                    [Adobe]
+MIBenum: 2005
+Source: PostScript Language Reference Manual
+        PCL Symbol Set id: 10J
+Alias: csAdobeStandardEncoding
+
+Name: Ventura-US                                               [HP-PCL5]
+MIBenum: 2006
+Source: Ventura US.  ASCII plus characters typically used in 
+        publishing, like pilcrow, copyright, registered, trade mark, 
+        section, dagger, and double dagger in the range A0 (hex) 
+        to FF (hex).  
+        PCL Symbol Set id: 14J
+Alias: csVenturaUS  
+
+Name: Ventura-International                                    [HP-PCL5]
+MIBenum: 2007
+Source: Ventura International.  ASCII plus coded characters similar 
+        to Roman8.
+        PCL Symbol Set id: 13J
+Alias: csVenturaInternational
+
+Name: PC8-Danish-Norwegian                                     [HP-PCL5]
+MIBenum: 2012
+Source: PC Danish Norwegian
+        8-bit PC set for Danish Norwegian
+        PCL Symbol Set id: 11U
+Alias: csPC8DanishNorwegian
+
+Name: PC8-Turkish                                              [HP-PCL5]
+MIBenum: 2014
+Source: PC Latin Turkish.  PCL Symbol Set id: 9T
+Alias: csPC8Turkish
+
+Name: IBM-Symbols                                             [IBM-CIDT] 
+MIBenum: 2015
+Source: Presentation Set, CPGID: 259
+Alias: csIBMSymbols
+
+Name: IBM-Thai                                                [IBM-CIDT] 
+MIBenum: 2016
+Source: Presentation Set, CPGID: 838
+Alias: csIBMThai
+
+Name: HP-Legal                                                 [HP-PCL5]
+MIBenum: 2017
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+        HP part number 5961-0510, October 1992
+        PCL Symbol Set id: 1U
+Alias: csHPLegal
+
+Name: HP-Pi-font                                               [HP-PCL5]
+MIBenum: 2018
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+        HP part number 5961-0510, October 1992
+        PCL Symbol Set id: 15U
+Alias: csHPPiFont
+
+Name: HP-Math8                                                 [HP-PCL5]
+MIBenum: 2019
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+        HP part number 5961-0510, October 1992
+        PCL Symbol Set id: 8M
+Alias: csHPMath8
+
+Name: Adobe-Symbol-Encoding                                      [Adobe]
+MIBenum: 2020
+Source: PostScript Language Reference Manual
+        PCL Symbol Set id: 5M
+Alias: csHPPSMath
+
+Name: HP-DeskTop                                               [HP-PCL5]
+MIBenum: 2021
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+        HP part number 5961-0510, October 1992
+        PCL Symbol Set id: 7J
+Alias: csHPDesktop
+
+Name: Ventura-Math                                             [HP-PCL5]
+MIBenum: 2022
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+        HP part number 5961-0510, October 1992
+        PCL Symbol Set id: 6M
+Alias: csVenturaMath
+
+Name: Microsoft-Publishing                                     [HP-PCL5]
+MIBenum: 2023
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+        HP part number 5961-0510, October 1992
+        PCL Symbol Set id: 6J
+Alias: csMicrosoftPublishing
+
+Name: Windows-31J
+MIBenum: 2024
+Source: Windows Japanese.  A further extension of Shift_JIS
+        to include NEC special characters (Row 13), NEC
+        selection of IBM extensions (Rows 89 to 92), and IBM
+        extensions (Rows 115 to 119).  The CCS's are
+        JIS X0201:1997, JIS X0208:1997, and these extensions.
+        This charset can be used for the top-level media type "text",
+        but it is of limited or specialized use (see RFC2278).
+        PCL Symbol Set id: 19K
+Alias: csWindows31J
+
+Name: GB2312  (preferred MIME name)
+MIBenum: 2025
+Source: Chinese for People's Republic of China (PRC) mixed one byte, 
+        two byte set: 
+          20-7E = one byte ASCII 
+          A1-FE = two byte PRC Kanji 
+        See GB 2312-80 
+        PCL Symbol Set Id: 18C
+Alias: csGB2312
+
+Name: Big5  (preferred MIME name)
+MIBenum: 2026
+Source: Chinese for Taiwan Multi-byte set.
+        PCL Symbol Set Id: 18T
+Alias: csBig5
+
+Name: windows-1250
+MIBenum: 2250
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1250) [Lazhintseva]
+Alias: None
+
+Name: windows-1251
+MIBenum: 2251
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1251) [Lazhintseva]
+Alias: None
+
+Name: windows-1252
+MIBenum: 2252
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1252)       [Wendt]
+Alias: None
+
+Name: windows-1253
+MIBenum: 2253
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1253) [Lazhintseva]
+Alias: None
+
+Name: windows-1254
+MIBenum: 2254
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1254) [Lazhintseva]
+Alias: None
+
+Name: windows-1255
+MIBenum: 2255
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1255) [Lazhintseva]
+Alias: None
+
+Name: windows-1256
+MIBenum: 2256
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1256) [Lazhintseva]
+Alias: None 
+
+Name: windows-1257
+MIBenum: 2257
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1257) [Lazhintseva]
+Alias: None
+
+Name: windows-1258
+MIBenum: 2258
+Source: Microsoft  (http://www.iana.org/assignments/charset-reg/windows-1258) [Lazhintseva]
+Alias: None
+
+Name: TIS-620
+MIBenum: 2259
+Source: Thai Industrial Standards Institute (TISI)	     [Tantsetthi]
+
+Name: HZ-GB-2312
+MIBenum: 2085
+Source: RFC 1842, RFC 1843                              [RFC1842, RFC1843]
+
+
+REFERENCES
+----------
+
+[RFC1345]  Simonsen, K., "Character Mnemonics & Character Sets",
+           RFC 1345, Rationel Almen Planlaegning, Rationel Almen
+           Planlaegning, June 1992.
+
+[RFC1428]  Vaudreuil, G., "Transition of Internet Mail from
+           Just-Send-8 to 8bit-SMTP/MIME", RFC1428, CNRI, February
+           1993.
+
+[RFC1456]  Vietnamese Standardization Working Group, "Conventions for
+           Encoding the Vietnamese Language VISCII: VIetnamese 
+           Standard Code for Information Interchange VIQR: VIetnamese 
+           Quoted-Readable Specification Revision 1.1", RFC 1456, May
+           1993.
+
+[RFC1468]  Murai, J., Crispin, M., and E. van der Poel, "Japanese
+           Character Encoding for Internet Messages", RFC 1468,
+           Keio University, Panda Programming, June 1993.
+
+[RFC1489]  Chernov, A., "Registration of a Cyrillic Character Set",
+           RFC1489, RELCOM Development Team, July 1993. 
+
+[RFC1554]  Ohta, M., and K. Handa, "ISO-2022-JP-2: Multilingual
+           Extension of ISO-2022-JP", RFC1554, Tokyo Institute of
+           Technology, ETL, December 1993. 
+
+[RFC1556]  Nussbacher, H., "Handling of Bi-directional Texts in MIME",
+           RFC1556, Israeli Inter-University, December 1993. 
+
+[RFC1557]  Choi, U., Chon, K., and H. Park, "Korean Character Encoding
+           for Internet Messages", KAIST, Solvit Chosun Media,
+           December 1993.
+
+[RFC1641]  Goldsmith, D., and M. Davis, "Using Unicode with MIME",
+           RFC1641, Taligent, Inc., July 1994. 
+
+[RFC1642]  Goldsmith, D., and M. Davis, "UTF-7", RFC1642, Taligent,
+           Inc., July 1994.
+
+[RFC1815]  Ohta, M., "Character Sets ISO-10646 and ISO-10646-J-1",
+           RFC 1815, Tokyo Institute of Technology, July 1995.
+
+
+[Adobe]    Adobe Systems Incorporated, PostScript Language Reference
+           Manual, second edition, Addison-Wesley Publishing Company,
+           Inc., 1990.
+
+[ECMA Registry]  ISO-IR: International Register of Escape Sequences
+           http://www.itscj.ipsj.or.jp/ISO-IE/  Note: The current
+           registration authority is IPSJ/ITSCJ, Japan.
+
+[HP-PCL5]  Hewlett-Packard Company, "HP PCL 5 Comparison Guide", 
+           (P/N 5021-0329) pp B-13, 1996.
+
+[IBM-CIDT] IBM Corporation, "ABOUT TYPE: IBM's Technical Reference
+           for Core Interchange Digitized Type", Publication number
+           S544-3708-01
+
+[RFC1842]  Wei, Y., J. Li, and Y. Jiang, "ASCII Printable
+           Characters-Based Chinese Character Encoding for Internet
+           Messages", RFC 1842, Harvard University, Rice University,
+           University of Maryland, August 1995.
+
+[RFC1843]  Lee, F., "HZ - A Data Format for Exchanging Files of
+           Arbitrarily Mixed Chinese and ASCII Characters", RFC 1843,
+           Stanford University, August 1995.
+
+[RFC2152]  Goldsmith, D., M. Davis, "UTF-7: A Mail-Safe Transformation
+	   Format of Unicode", RFC 2152, Apple Computer, Inc.,
+	   Taligent Inc., May 1997.
+
+[RFC2279]  Yergeau, F., "UTF-8, A Transformation Format of ISO 10646",
+           RFC 2279, Alis Technologies, January, 1998.
+
+[RFC2781]  Hoffman, P., Yergeau, F., "UTF-16, an encoding of ISO 10646",
+           RFC 2781, February 2000.
+
+[RFC3629]  Yergeau, F., "UTF-8, a transformation format of ISO 10646",
+           RFC3629, November 2003.
+
+PEOPLE
+------
+
+[KXS2] Keld Simonsen <Keld.Simonsen@dkuug.dk>
+
+[Choi] Woohyong Choi <whchoi@cosmos.kaist.ac.kr>
+
+[Davis] Mark Davis, <mark@unicode.org>, April 2002.
+
+[Lazhintseva] Katya Lazhintseva, <katyal@MICROSOFT.com>, May 1996.
+
+[Mahdi] Tamer Mahdi, <tamer@ca.ibm.com>, August 2000.
+
+[Malyshev] Michael Malyshev, <michael_malyshev@mail.ru>, January 2004
+
+[Murai] Jun Murai <jun@wide.ad.jp>
+
+[Nussbacher] Hank Nussbacher, <hank@vm.tau.ac.il>
+
+[Ohta] Masataka Ohta, <mohta@cc.titech.ac.jp>, July 1995.
+
+[Phipps] Toby Phipps, <tphipps@peoplesoft.com>, March 2002.
+
+[Pond] Rick Pond, <rickpond@vnet.ibm.com>, March 1997.
+
+[Robrigado] Reuel Robrigado, <reuelr@ca.ibm.com>, September 2002.
+
+[Scherer] Markus Scherer, <markus.scherer@jtcsv.com>, August 2000, 
+          September 2002.
+
+[Simonsen] Keld Simonsen, <Keld.Simonsen@rap.dk>, August 2000.
+
+[Tantsetthi] Trin Tantsetthi, <trin@mozart.inet.co.th>, September 1998.
+
+[Tumasonis] Vladas Tumasonis, <vladas.tumasonis@maf.vu.lt>, August 2000.
+
+[Uskov] Alexander Uskov, <auskov@idc.kz>, September 2002.
+
+[Wendt] Chris Wendt, <christw@microsoft.com>, December 1999.
+
+[Yick] Nicky Yick, <cliac@itsd.gcn.gov.hk>, October 2000.
+
+[]
+
+
+
+
+
+
+
diff --git a/Source/WebCore/platform/text/mac/mac-encodings.txt b/Source/WebCore/platform/text/mac/mac-encodings.txt
new file mode 100644
index 0000000..bb45e22
--- /dev/null
+++ b/Source/WebCore/platform/text/mac/mac-encodings.txt
@@ -0,0 +1,45 @@
+# We'd like to eliminate this file.
+# It would be nice to get rid of dependence on the TextEncodingConvert entirely.
+# Perhaps we can prove these are not used on the web and remove them.
+# Or perhaps we can get them added to ICU.
+
+# The items on the left are names of TEC TextEncoding values (without the leading kTextEncoding).
+# The items on the right are IANA character set names. Names listed in character-sets.txt are not
+# repeated here; mentioning any one character set from a group in there pulls in all the aliases in
+# that group.
+
+DOSChineseTrad: cp950
+DOSGreek: cp737, ibm737
+EUC_TW: EUC-TW
+ISOLatin10: ISO-8859-16
+ISOLatin6: ISO-8859-10
+ISOLatin8: ISO-8859-14
+ISOLatinThai: ISO-8859-11
+ISO_2022_JP_3: ISO-2022-JP-3
+JIS_C6226_78: JIS_C6226-1978
+JIS_X0208_83: JIS_X0208-1983
+JIS_X0208_90: JIS_X0208-1990
+JIS_X0212_90: JIS_X0212-1990
+KOI8_U: KOI8-U
+MacArabic: x-mac-arabic
+MacChineseSimp: x-mac-chinesesimp, xmacsimpchinese
+MacChineseTrad: x-mac-chinesetrad, xmactradchinese
+MacCroatian: x-mac-croatian
+MacDevanagari: x-mac-devanagari
+MacDingbats: x-mac-dingbats
+MacFarsi: x-mac-farsi
+MacGujarati: x-mac-gujarati
+MacGurmukhi: x-mac-gurmukhi
+MacHebrew: x-mac-hebrew
+MacIcelandic: x-mac-icelandic
+MacJapanese: x-mac-japanese
+MacKorean: x-mac-korean
+MacRomanLatin1: x-mac-roman-latin1
+MacRomanian: x-mac-romanian
+MacSymbol: x-mac-symbol
+MacThai: x-mac-thai
+MacTibetan: x-mac-tibetan
+MacVT100: x-mac-vt100
+NextStepLatin: x-nextstep
+ShiftJIS_X0213_00: Shift_JIS_X0213-2000
+WindowsKoreanJohab: johab
diff --git a/Source/WebCore/platform/text/mac/make-charset-table.pl b/Source/WebCore/platform/text/mac/make-charset-table.pl
new file mode 100755
index 0000000..16fd25a
--- /dev/null
+++ b/Source/WebCore/platform/text/mac/make-charset-table.pl
@@ -0,0 +1,225 @@
+#!/usr/bin/perl -w
+
+# Copyright (C) 2003, 2004, 2005, 2006 Apple Computer, Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1.  Redistributions of source code must retain the above copyright
+#     notice, this list of conditions and the following disclaimer. 
+# 2.  Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution. 
+# 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+#     its contributors may be used to endorse or promote products derived
+#     from this software without specific prior written permission. 
+#
+# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+use strict;
+
+my %aliasesFromCharsetsFile;
+my %namesWritten;
+
+my $output = "";
+
+my $error = 0;
+
+sub error ($)
+{
+    print STDERR @_, "\n";
+    $error = 1;
+}
+
+sub emit_line
+{
+    my ($name, $prefix, $encoding, $flags) = @_;
+ 
+    error "$name shows up twice in output" if $namesWritten{$name};
+    $namesWritten{$name} = 1;
+    
+    $output .= "        { \"$name\", $prefix$encoding },\n";
+}
+
+sub process_platform_encodings
+{
+    my ($filename, $PlatformPrefix) = @_;
+    my $baseFilename = $filename;
+    $baseFilename =~ s|.*/||;
+    
+    my %seenPlatformNames;
+    my %seenIANANames;
+    
+    open PLATFORM_ENCODINGS, $filename or die;
+    
+    while (<PLATFORM_ENCODINGS>) {
+        chomp;
+        s/\#.*$//;
+        s/\s+$//;
+        if (my ($PlatformName, undef, $flags, $IANANames) = /^(.+?)(, (.+))?: (.+)$/) {
+            my %aliases;
+            
+            my $PlatformNameWithFlags = $PlatformName;
+            if ($flags) {
+                $PlatformNameWithFlags .= ", " . $flags;
+            } else {
+                $flags = "NoEncodingFlags";
+            }
+            error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformNameWithFlags};
+            $seenPlatformNames{$PlatformNameWithFlags} = 1;
+
+            # Build the aliases list.
+            # Also check that no two names are part of the same entry in the charsets file.
+            my @IANANames = split ", ", $IANANames;
+            my $firstName = "";
+            my $canonicalFirstName = "";
+            my $prevName = "";
+            for my $name (@IANANames) {
+                if ($firstName eq "") {
+                    if ($name !~ /^[-A-Za-z0-9_]+$/) {
+                        error "$name, in $baseFilename, has illegal characters in it";
+                        next;
+                    }
+                    $firstName = $name;
+                } else {
+                    if ($name !~ /^[a-z0-9]+$/) {
+                        error "$name, in $baseFilename, has illegal characters in it (must be all lowercase alphanumeric)";
+                        next;
+                    }
+                    if ($name le $prevName) {
+                        error "$name comes after $prevName in $baseFilename, but everything must be in alphabetical order";
+                    }
+                    $prevName = $name;
+                }
+                
+                my $canonicalName = lc $name;
+                $canonicalName =~ tr/-_//d;
+                
+                $canonicalFirstName = $canonicalName if $canonicalFirstName eq "";
+                
+                error "$name is mentioned twice in $baseFilename" if $seenIANANames{$canonicalName};
+                $seenIANANames{$canonicalName} = 1;
+                
+                $aliases{$canonicalName} = 1;
+                next if !$aliasesFromCharsetsFile{$canonicalName};
+                for my $alias (@{$aliasesFromCharsetsFile{$canonicalName}}) {
+                    $aliases{$alias} = 1;
+                }
+                for my $otherName (@IANANames) {
+                    next if $canonicalName eq $otherName;
+                    if ($aliasesFromCharsetsFile{$otherName}
+                        && $aliasesFromCharsetsFile{$canonicalName} eq $aliasesFromCharsetsFile{$otherName}
+                        && $canonicalName le $otherName) {
+                        error "$baseFilename lists both $name and $otherName under $PlatformName, but that aliasing is already specified in character-sets.txt";
+                    }
+                }
+            }
+            
+            # write out
+            emit_line($firstName, $PlatformPrefix, $PlatformName, $flags);
+            for my $alias (sort keys %aliases) {
+                emit_line($alias, $PlatformPrefix, $PlatformName, $flags) if $alias ne $canonicalFirstName;
+            }
+        } elsif (/^([a-zA-Z0-9_]+)(, (.+))?$/) {
+            my $PlatformName = $1;
+            
+            error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformName};
+            $seenPlatformNames{$PlatformName} = 1;
+        } elsif (/./) {
+            error "syntax error in $baseFilename, line $.";
+        }
+    }
+    
+    close PLATFORM_ENCODINGS;
+}
+
+sub process_iana_charset 
+{
+    my ($canonical_name, @aliases) = @_;
+    
+    return if !$canonical_name;
+    
+    my @names = sort $canonical_name, @aliases;
+    
+    for my $name (@names) {
+        $aliasesFromCharsetsFile{$name} = \@names;
+    }
+}
+
+sub process_iana_charsets
+{
+    my ($filename) = @_;
+    
+    open CHARSETS, $filename or die;
+    
+    my %seen;
+    
+    my $canonical_name;
+    my @aliases;
+    
+    my %exceptions = ( isoir91 => 1, isoir92 => 1 );
+    
+    while (<CHARSETS>) {
+        chomp;
+        if ((my $new_canonical_name) = /Name: ([^ \t]*).*/) {
+            $new_canonical_name = lc $new_canonical_name;
+            $new_canonical_name =~ tr/a-z0-9//cd;
+            
+            error "saw $new_canonical_name twice in character-sets.txt", if $seen{$new_canonical_name};
+            $seen{$new_canonical_name} = $new_canonical_name;
+            
+            process_iana_charset $canonical_name, @aliases;
+
+            $canonical_name = $new_canonical_name;
+            @aliases = ();
+        } elsif ((my $new_alias) = /Alias: ([^ \t]*).*/) {
+            $new_alias = lc $new_alias;
+            $new_alias =~ tr/a-z0-9//cd;
+            
+            # do this after normalizing the alias, sometimes character-sets.txt
+            # has weird escape characters, e.g. \b after None
+            next if $new_alias eq "none";
+
+            error "saw $new_alias twice in character-sets.txt $seen{$new_alias}, $canonical_name", if $seen{$new_alias} && $seen{$new_alias} ne $canonical_name && !$exceptions{$new_alias};
+            push @aliases, $new_alias if !$seen{$new_alias};
+            $seen{$new_alias} = $canonical_name;            
+        }
+    }
+    
+    process_iana_charset $canonical_name, @aliases;
+    
+    close CHARSETS;
+}
+
+# Program body
+
+process_iana_charsets($ARGV[0]);
+process_platform_encodings($ARGV[1], $ARGV[2]);
+
+exit 1 if $error;
+
+print <<EOF
+// File generated by make-charset-table.pl. Do not edit!
+
+#include "config.h"
+#include "CharsetData.h"
+
+namespace WebCore {
+
+    const CharsetEntry CharsetTable[] = {
+$output
+        { 0, 0 }
+    };
+
+}
+EOF
diff --git a/Source/WebCore/platform/text/qt/TextBoundariesQt.cpp b/Source/WebCore/platform/text/qt/TextBoundariesQt.cpp
new file mode 100644
index 0000000..a354ca6
--- /dev/null
+++ b/Source/WebCore/platform/text/qt/TextBoundariesQt.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2006 Zack Rusin <zack@kde.org>
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "TextBoundaries.h"
+#include "NotImplemented.h"
+
+#include <QString>
+#include <QChar>
+
+#include <QDebug>
+#include <stdio.h>
+
+#include <qtextboundaryfinder.h>
+
+namespace WebCore {
+
+int findNextWordFromIndex(UChar const* buffer, int len, int position, bool forward)
+{
+    QString str(reinterpret_cast<QChar const*>(buffer), len);
+    QTextBoundaryFinder iterator(QTextBoundaryFinder::Word, str);
+    iterator.setPosition(position >= len ? len - 1 : position);
+    if (forward) {
+        int pos = iterator.toNextBoundary();
+        while (pos > 0) {
+            if (QChar(buffer[pos-1]).isLetterOrNumber())
+                return pos;
+            pos = iterator.toNextBoundary();
+        }
+        return len;
+    } else {
+        int pos = iterator.toPreviousBoundary();
+        while (pos > 0) {
+            if (QChar(buffer[pos]).isLetterOrNumber())
+                return pos;
+            pos = iterator.toPreviousBoundary();
+        }
+        return 0;
+    }
+}
+
+void findWordBoundary(UChar const* buffer, int len, int position, int* start, int* end)
+{
+    QString str(reinterpret_cast<QChar const*>(buffer), len);
+    QTextBoundaryFinder iterator(QTextBoundaryFinder::Word, str);
+    iterator.setPosition(position);
+    *start = position > 0 ? iterator.toPreviousBoundary() : 0;
+    *end = position == len ? len : iterator.toNextBoundary();
+}
+
+}
+
diff --git a/Source/WebCore/platform/text/qt/TextBreakIteratorQt.cpp b/Source/WebCore/platform/text/qt/TextBreakIteratorQt.cpp
new file mode 100644
index 0000000..b9f5a9e
--- /dev/null
+++ b/Source/WebCore/platform/text/qt/TextBreakIteratorQt.cpp
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIterator.h"
+
+#include <QtCore/qtextboundaryfinder.h>
+#include <qdebug.h>
+
+// #define DEBUG_TEXT_ITERATORS
+#ifdef DEBUG_TEXT_ITERATORS
+#define DEBUG qDebug
+#else
+#define DEBUG if (1) {} else qDebug
+#endif
+
+namespace WebCore {
+
+#if USE(QT_ICU_TEXT_BREAKING)
+const char* currentTextBreakLocaleID()
+{
+    return QLocale::system().name().toLatin1();
+}
+#else
+    static unsigned char buffer[1024];
+
+    class TextBreakIterator : public QTextBoundaryFinder {
+    public:
+        TextBreakIterator(QTextBoundaryFinder::BoundaryType type, const UChar* string, int length)
+            : QTextBoundaryFinder(type, (const QChar*)string, length, buffer, sizeof(buffer))
+            , length(length)
+            , string(string) {}
+        TextBreakIterator()
+            : QTextBoundaryFinder()
+            , length(0)
+            , string(0) {}
+
+        int length;
+        const UChar* string;
+    };
+
+    TextBreakIterator* setUpIterator(TextBreakIterator& iterator, QTextBoundaryFinder::BoundaryType type, const UChar* string, int length)
+    {
+        if (!string || !length)
+            return 0;
+
+        if (iterator.isValid() && type == iterator.type() && length == iterator.length
+            && memcmp(string, iterator.string, length) == 0) {
+            iterator.toStart();
+            return &iterator;
+        }
+
+        iterator = TextBreakIterator(type, string, length);
+
+        return &iterator;
+    }
+
+    TextBreakIterator* wordBreakIterator(const UChar* string, int length)
+    {
+        static TextBreakIterator staticWordBreakIterator;
+        return setUpIterator(staticWordBreakIterator, QTextBoundaryFinder::Word, string, length);
+    }
+
+    TextBreakIterator* characterBreakIterator(const UChar* string, int length)
+    {
+        static TextBreakIterator staticCharacterBreakIterator;
+        return setUpIterator(staticCharacterBreakIterator, QTextBoundaryFinder::Grapheme, string, length);
+    }
+
+    TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
+    {
+        return characterBreakIterator(string, length);
+    }
+
+    TextBreakIterator* lineBreakIterator(const UChar* string, int length)
+    {
+        static TextBreakIterator staticLineBreakIterator;
+        return setUpIterator(staticLineBreakIterator, QTextBoundaryFinder::Line, string, length);
+    }
+
+    TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
+    {
+        static TextBreakIterator staticSentenceBreakIterator;
+        return setUpIterator(staticSentenceBreakIterator, QTextBoundaryFinder::Sentence, string, length);
+
+    }
+
+    int textBreakFirst(TextBreakIterator* bi)
+    {
+        bi->toStart();
+        DEBUG() << "textBreakFirst" << bi->position();
+        return bi->position();
+    }
+
+    int textBreakNext(TextBreakIterator* bi)
+    {
+        int pos = bi->toNextBoundary();
+        DEBUG() << "textBreakNext" << pos;
+        return pos;
+    }
+
+    int textBreakPreceding(TextBreakIterator* bi, int pos)
+    {
+        bi->setPosition(pos);
+        int newpos = bi->toPreviousBoundary();
+        DEBUG() << "textBreakPreceding" << pos << newpos;
+        return newpos;
+    }
+
+    int textBreakFollowing(TextBreakIterator* bi, int pos)
+    {
+        bi->setPosition(pos);
+        int newpos = bi->toNextBoundary();
+        DEBUG() << "textBreakFollowing" << pos << newpos;
+        return newpos;
+    }
+
+    int textBreakCurrent(TextBreakIterator* bi)
+    {
+        return bi->position();
+    }
+
+    bool isTextBreak(TextBreakIterator*, int)
+    {
+        return true;
+    }
+#endif
+
+}
diff --git a/Source/WebCore/platform/text/qt/TextCodecQt.cpp b/Source/WebCore/platform/text/qt/TextCodecQt.cpp
new file mode 100644
index 0000000..1e95d87
--- /dev/null
+++ b/Source/WebCore/platform/text/qt/TextCodecQt.cpp
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2008 Holger Hans Peter Freyther
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextCodecQt.h"
+#include "PlatformString.h"
+#include <wtf/text/CString.h>
+#include <qset.h>
+
+namespace WebCore {
+
+static QSet<QByteArray> *unique_names = 0;
+
+static const char *getAtomicName(const QByteArray &name)
+{
+    if (!unique_names)
+        unique_names = new QSet<QByteArray>;
+
+    unique_names->insert(name);
+    return unique_names->find(name)->constData();
+}
+
+void TextCodecQt::registerEncodingNames(EncodingNameRegistrar registrar)
+{
+    QList<int> mibs = QTextCodec::availableMibs();
+//     qDebug() << ">>>>>>>>> registerEncodingNames";
+
+    for (int i = 0; i < mibs.size(); ++i) {
+        QTextCodec *c = QTextCodec::codecForMib(mibs.at(i));
+        const char *name = getAtomicName(c->name());
+        registrar(name, name);
+//         qDebug() << "    " << name << name;
+        QList<QByteArray> aliases = c->aliases();
+        for (int i = 0; i < aliases.size(); ++i) {
+            const char *a = getAtomicName(aliases.at(i));
+//             qDebug() << "     (a) " << a << name;
+            registrar(a, name);
+        }
+    }
+}
+
+static PassOwnPtr<TextCodec> newTextCodecQt(const TextEncoding& encoding, const void*)
+{
+    return new TextCodecQt(encoding);
+}
+
+void TextCodecQt::registerCodecs(TextCodecRegistrar registrar)
+{
+    QList<int> mibs = QTextCodec::availableMibs();
+//     qDebug() << ">>>>>>>>> registerCodecs";
+
+    for (int i = 0; i < mibs.size(); ++i) {
+        QTextCodec *c = QTextCodec::codecForMib(mibs.at(i));
+        const char *name = getAtomicName(c->name());
+//         qDebug() << "    " << name;
+        registrar(name, newTextCodecQt, 0);
+    }
+}
+
+TextCodecQt::TextCodecQt(const TextEncoding& encoding)
+    : m_encoding(encoding)
+{
+    m_codec = QTextCodec::codecForName(m_encoding.name());
+}
+
+TextCodecQt::~TextCodecQt()
+{
+}
+
+
+String TextCodecQt::decode(const char* bytes, size_t length, bool flush, bool /*stopOnError*/, bool& sawError)
+{
+    // We chop input buffer to smaller buffers to avoid excessive memory consumption
+    // when the input buffer is big.  This helps reduce peak memory consumption in
+    // mobile devices where system RAM is limited.
+#if OS(SYMBIAN)
+    static const int MaxInputChunkSize = 32 * 1024;
+#else
+    static const int MaxInputChunkSize = 1024 * 1024;
+#endif
+    const char* buf = bytes;
+    const char* end = buf + length;
+    String unicode(""); // a non-null string is expected
+
+    while (buf < end) {
+        int size = end - buf;
+        size = qMin(size, MaxInputChunkSize);
+        QString decoded = m_codec->toUnicode(buf, size, &m_state);
+        unicode.append(reinterpret_cast_ptr<const UChar*>(decoded.unicode()), decoded.length());
+        buf += size;
+    }
+
+    sawError = m_state.invalidChars != 0;
+
+    if (flush) {
+        m_state.flags = QTextCodec::DefaultConversion;
+        m_state.remainingChars = 0;
+        m_state.invalidChars = 0;
+    }
+
+    return unicode;
+}
+
+CString TextCodecQt::encode(const UChar* characters, size_t length, UnencodableHandling handling)
+{
+    QTextCodec::ConverterState state;
+    state.flags = QTextCodec::ConversionFlags(QTextCodec::ConvertInvalidToNull | QTextCodec::IgnoreHeader);
+
+    if (!length)
+        return "";
+
+    QByteArray ba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), length, &state);
+
+    // If some <b> characters </b> are unencodable, escape them as specified by <b> handling </b>
+    // We append one valid encoded chunk to a QByteArray at a time. When we encounter an unencodable chunk we
+    // escape it with getUnencodableReplacement, append it, then move to the next chunk.
+    if (state.invalidChars) {
+        state.invalidChars = 0;
+        state.remainingChars = 0;
+        int len = 0;
+        ba.clear();
+        for (size_t pos = 0; pos < length; ++pos) {
+            QByteArray tba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), ++len, &state);
+            if (state.remainingChars)
+                continue;
+            if (state.invalidChars) {
+                UnencodableReplacementArray replacement;
+                getUnencodableReplacement(characters[0], handling, replacement);
+                tba.replace('\0', replacement);
+                state.invalidChars = 0;
+            }
+            ba.append(tba);
+            characters += len;
+            len = 0;
+            state.remainingChars = 0;
+        }
+    }
+
+    return CString(ba.constData(), ba.length());
+}
+
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/qt/TextCodecQt.h b/Source/WebCore/platform/text/qt/TextCodecQt.h
new file mode 100644
index 0000000..f28f0bb
--- /dev/null
+++ b/Source/WebCore/platform/text/qt/TextCodecQt.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextCodecQt_h
+#define TextCodecQt_h
+
+#include "TextCodec.h"
+#include "TextEncoding.h"
+#include <QTextCodec>
+
+namespace WebCore {
+
+    class TextCodecQt : public TextCodec {
+    public:
+        static void registerEncodingNames(EncodingNameRegistrar);
+        static void registerCodecs(TextCodecRegistrar);
+
+        TextCodecQt(const TextEncoding&);
+        virtual ~TextCodecQt();
+
+        virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+        virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+
+    private:
+        TextEncoding m_encoding;
+        QTextCodec *m_codec;
+        QTextCodec::ConverterState m_state;
+    };
+
+} // namespace WebCore
+
+#endif // TextCodecICU_h
diff --git a/Source/WebCore/platform/text/transcoder/FontTranscoder.cpp b/Source/WebCore/platform/text/transcoder/FontTranscoder.cpp
new file mode 100644
index 0000000..68601f9
--- /dev/null
+++ b/Source/WebCore/platform/text/transcoder/FontTranscoder.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2010, Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "FontTranscoder.h"
+
+#include "CharacterNames.h"
+#include "FontDescription.h"
+#include "TextEncoding.h"
+
+namespace WebCore {
+
+FontTranscoder::FontTranscoder()
+{
+    m_converterTypes.add("MS PGothic", BackslashToYenSign);
+    UChar unicodeNameMSPGothic[] = {0xFF2D, 0xFF33, 0x0020, 0xFF30, 0x30B4, 0x30B7, 0x30C3, 0x30AF};
+    m_converterTypes.add(AtomicString(unicodeNameMSPGothic, WTF_ARRAY_LENGTH(unicodeNameMSPGothic)), BackslashToYenSign);
+
+    m_converterTypes.add("MS PMincho", BackslashToYenSign);
+    UChar unicodeNameMSPMincho[] = {0xFF2D, 0xFF33, 0x0020, 0xFF30, 0x660E, 0x671D};
+    m_converterTypes.add(AtomicString(unicodeNameMSPMincho, WTF_ARRAY_LENGTH(unicodeNameMSPMincho)), BackslashToYenSign);
+
+    m_converterTypes.add("MS Gothic", BackslashToYenSign);
+    UChar unicodeNameMSGothic[] = {0xFF2D, 0xFF33, 0x0020, 0x30B4, 0x30B7, 0x30C3, 0x30AF};
+    m_converterTypes.add(AtomicString(unicodeNameMSGothic, WTF_ARRAY_LENGTH(unicodeNameMSGothic)), BackslashToYenSign);
+
+    m_converterTypes.add("MS Mincho", BackslashToYenSign);
+    UChar unicodeNameMSMincho[] = {0xFF2D, 0xFF33, 0x0020, 0x660E, 0x671D};
+    m_converterTypes.add(AtomicString(unicodeNameMSMincho, WTF_ARRAY_LENGTH(unicodeNameMSMincho)), BackslashToYenSign);
+
+    m_converterTypes.add("Meiryo", BackslashToYenSign);
+    UChar unicodeNameMeiryo[] = {0x30E1, 0x30A4, 0x30EA, 0x30AA};
+    m_converterTypes.add(AtomicString(unicodeNameMeiryo, WTF_ARRAY_LENGTH(unicodeNameMeiryo)), BackslashToYenSign);
+}
+
+FontTranscoder::ConverterType FontTranscoder::converterType(const FontDescription& fontDescription, const TextEncoding* encoding) const
+{
+    const AtomicString& fontFamily = fontDescription.family().family().string();
+    if (!fontFamily.isNull()) {
+        HashMap<AtomicString, ConverterType>::const_iterator found = m_converterTypes.find(fontFamily);
+        if (found != m_converterTypes.end())
+            return found->second;
+    }
+
+    // IE's default fonts for Japanese encodings change backslashes into yen signs.
+    // We emulate this behavior only when no font is explicitly specified.
+    if (encoding && encoding->backslashAsCurrencySymbol() != '\\' && !fontDescription.isSpecifiedFont())
+        return BackslashToYenSign;
+
+    return NoConversion;
+}
+
+void FontTranscoder::convert(String& text, const FontDescription& fontDescription, const TextEncoding* encoding) const
+{
+    switch (converterType(fontDescription, encoding)) {
+    case BackslashToYenSign: {
+        // FIXME: TextEncoding.h has similar code. We need to factor them out.
+        text.replace('\\', yenSign);
+        break;
+    }
+    case NoConversion:
+    default:
+        ASSERT_NOT_REACHED();
+    }
+}
+
+bool FontTranscoder::needsTranscoding(const FontDescription& fontDescription, const TextEncoding* encoding) const
+{
+    ConverterType type = converterType(fontDescription, encoding);
+    return type != NoConversion;
+}
+
+FontTranscoder& fontTranscoder()
+{
+    static FontTranscoder* transcoder = new FontTranscoder;
+    return *transcoder;
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/transcoder/FontTranscoder.h b/Source/WebCore/platform/text/transcoder/FontTranscoder.h
new file mode 100644
index 0000000..67db977
--- /dev/null
+++ b/Source/WebCore/platform/text/transcoder/FontTranscoder.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2010, Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FontTranscoder_h
+#define FontTranscoder_h
+
+#include <wtf/HashMap.h>
+#include <wtf/Noncopyable.h>
+#include <wtf/text/AtomicStringHash.h>
+
+namespace WebCore {
+
+class FontDescription;
+class TextEncoding;
+
+class FontTranscoder : public Noncopyable {
+public:
+    void convert(String& text, const FontDescription&, const TextEncoding* = 0) const;
+    bool needsTranscoding(const FontDescription&, const TextEncoding* = 0) const;
+
+private:
+    FontTranscoder();
+    ~FontTranscoder(); // Not implemented to make sure nobody accidentally calls delete -- WebCore does not delete singletons.
+
+    enum ConverterType {
+        NoConversion, BackslashToYenSign,
+    };
+
+    ConverterType converterType(const FontDescription&, const TextEncoding*) const;
+
+    HashMap<AtomicString, ConverterType> m_converterTypes;
+
+    friend FontTranscoder& fontTranscoder();
+};
+
+FontTranscoder& fontTranscoder();
+
+} // namespace WebCore
+
+#endif // FontTranscoder_h
diff --git a/Source/WebCore/platform/text/win/TextBreakIteratorInternalICUWin.cpp b/Source/WebCore/platform/text/win/TextBreakIteratorInternalICUWin.cpp
new file mode 100644
index 0000000..e417e17
--- /dev/null
+++ b/Source/WebCore/platform/text/win/TextBreakIteratorInternalICUWin.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIteratorInternalICU.h"
+
+namespace WebCore {
+
+const char* currentSearchLocaleID()
+{
+    // FIXME: Should use system locale.
+    return "";
+}
+
+const char* currentTextBreakLocaleID()
+{
+    // Using en_US_POSIX now so word selection in address field works as expected as before (double-clicking
+    // in a URL selects a word delimited by periods rather than selecting the entire URL).
+    // However, this is not entirely correct - we should honor the system locale in the normal case.
+    // FIXME: <rdar://problem/6786703> Should use system locale for text breaking
+    return "en_US_POSIX";
+}
+
+}
diff --git a/Source/WebCore/platform/text/wince/TextBreakIteratorWinCE.cpp b/Source/WebCore/platform/text/wince/TextBreakIteratorWinCE.cpp
new file mode 100644
index 0000000..96488c0
--- /dev/null
+++ b/Source/WebCore/platform/text/wince/TextBreakIteratorWinCE.cpp
@@ -0,0 +1,303 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007-2009 Torch Mobile, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIterator.h"
+
+#include "PlatformString.h"
+#include <wtf/StdLibExtras.h>
+#include <wtf/unicode/Unicode.h>
+
+using namespace WTF::Unicode;
+
+namespace WebCore {
+
+// Hack, not entirely correct
+static inline bool isCharStop(UChar c)
+{
+    CharCategory charCategory = category(c);
+    return charCategory != Mark_NonSpacing && (charCategory != Other_Surrogate || (c < 0xd800 || c >= 0xdc00));
+}
+
+static inline bool isLineStop(UChar c)
+{
+    return category(c) != Separator_Line;
+}
+
+static inline bool isSentenceStop(UChar c)
+{
+    return isPunct(c);
+}
+
+class TextBreakIterator {
+public:
+    void reset(const UChar* str, int len)
+    {
+        string = str;
+        length = len;
+        currentPos = 0;
+    }
+    int first()
+    {
+        currentPos = 0;
+        return currentPos;
+    }
+    int last()
+    {
+        currentPos = length;
+        return currentPos;
+    }
+    virtual int next() = 0;
+    virtual int previous() = 0;
+    int following(int position)
+    {
+        currentPos = position;
+        return next();
+    }
+    int preceding(int position)
+    {
+        currentPos = position;
+        return previous();
+    }
+
+    int currentPos;
+    const UChar* string;
+    int length;
+};
+
+struct WordBreakIterator: TextBreakIterator {
+    virtual int next();
+    virtual int previous();
+};
+
+struct CharBreakIterator: TextBreakIterator {
+    virtual int next();
+    virtual int previous();
+};
+
+struct LineBreakIterator: TextBreakIterator {
+    virtual int next();
+    virtual int previous();
+};
+
+struct SentenceBreakIterator : TextBreakIterator {
+    virtual int next();
+    virtual int previous();
+};
+
+int WordBreakIterator::next()
+{
+    if (currentPos == length) {
+        currentPos = -1;
+        return currentPos;
+    }
+    bool haveSpace = false;
+    while (currentPos < length) {
+        if (haveSpace && !isSpace(string[currentPos]))
+            break;
+        if (isSpace(string[currentPos]))
+            haveSpace = true;
+        ++currentPos;
+    }
+    return currentPos;
+}
+
+int WordBreakIterator::previous()
+{
+    if (!currentPos) {
+        currentPos = -1;
+        return currentPos;
+    }
+    bool haveSpace = false;
+    while (currentPos > 0) {
+        if (haveSpace && !isSpace(string[currentPos]))
+            break;
+        if (isSpace(string[currentPos]))
+            haveSpace = true;
+        --currentPos;
+    }
+    return currentPos;
+}
+
+int CharBreakIterator::next()
+{
+    if (currentPos >= length)
+        return -1;
+    ++currentPos;
+    while (currentPos < length && !isCharStop(string[currentPos]))
+        ++currentPos;
+    return currentPos;
+}
+
+int CharBreakIterator::previous()
+{
+    if (currentPos <= 0)
+        return -1;
+    if (currentPos > length)
+        currentPos = length;
+    --currentPos;
+    while (currentPos > 0 && !isCharStop(string[currentPos]))
+        --currentPos;
+    return currentPos;
+}
+
+int LineBreakIterator::next()
+{
+    if (currentPos == length) {
+        currentPos = -1;
+        return currentPos;
+    }
+    bool haveSpace = false;
+    while (currentPos < length) {
+        if (haveSpace && !isLineStop(string[currentPos]))
+            break;
+        if (isLineStop(string[currentPos]))
+            haveSpace = true;
+        ++currentPos;
+    }
+    return currentPos;
+}
+
+int LineBreakIterator::previous()
+{
+    if (!currentPos) {
+        currentPos = -1;
+        return currentPos;
+    }
+    bool haveSpace = false;
+    while (currentPos > 0) {
+        if (haveSpace && !isLineStop(string[currentPos]))
+            break;
+        if (isLineStop(string[currentPos]))
+            haveSpace = true;
+        --currentPos;
+    }
+    return currentPos;
+}
+
+int SentenceBreakIterator::next()
+{
+    if (currentPos == length) {
+        currentPos = -1;
+        return currentPos;
+    }
+    bool haveSpace = false;
+    while (currentPos < length) {
+        if (haveSpace && !isSentenceStop(string[currentPos]))
+            break;
+        if (isSentenceStop(string[currentPos]))
+            haveSpace = true;
+        ++currentPos;
+    }
+    return currentPos;
+}
+
+int SentenceBreakIterator::previous()
+{
+    if (!currentPos) {
+        currentPos = -1;
+        return currentPos;
+    }
+    bool haveSpace = false;
+    while (currentPos > 0) {
+        if (haveSpace && !isSentenceStop(string[currentPos]))
+            break;
+        if (isSentenceStop(string[currentPos]))
+            haveSpace = true;
+        --currentPos;
+    }
+    return currentPos;
+}
+
+TextBreakIterator* wordBreakIterator(const UChar* string, int length)
+{
+    DEFINE_STATIC_LOCAL(WordBreakIterator, iterator, ());
+    iterator.reset(string, length);
+    return &iterator;
+}
+
+TextBreakIterator* characterBreakIterator(const UChar* string, int length)
+{
+    DEFINE_STATIC_LOCAL(CharBreakIterator, iterator, ());
+    iterator.reset(string, length);
+    return &iterator;
+}
+
+TextBreakIterator* lineBreakIterator(const UChar* string, int length)
+{
+    DEFINE_STATIC_LOCAL(LineBreakIterator , iterator, ());
+    iterator.reset(string, length);
+    return &iterator;
+}
+
+TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
+{
+    DEFINE_STATIC_LOCAL(SentenceBreakIterator, iterator, ());
+    iterator.reset(string, length);
+    return &iterator;
+}
+
+int textBreakFirst(TextBreakIterator* breakIterator)
+{
+    return breakIterator->first();
+}
+
+int textBreakLast(TextBreakIterator* breakIterator)
+{
+    return breakIterator->last();
+}
+
+int textBreakNext(TextBreakIterator* breakIterator)
+{
+    return breakIterator->next();
+}
+
+int textBreakPrevious(TextBreakIterator* breakIterator)
+{
+    return breakIterator->previous();
+}
+
+int textBreakPreceding(TextBreakIterator* breakIterator, int position)
+{
+    return breakIterator->preceding(position);
+}
+
+int textBreakFollowing(TextBreakIterator* breakIterator, int position)
+{
+    return breakIterator->following(position);
+}
+
+int textBreakCurrent(TextBreakIterator* breakIterator)
+{
+    return breakIterator->currentPos;
+}
+
+bool isTextBreak(TextBreakIterator*, int)
+{
+    return true;
+}
+
+TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
+{
+    return characterBreakIterator(string, length);
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/wince/TextCodecWinCE.cpp b/Source/WebCore/platform/text/wince/TextCodecWinCE.cpp
new file mode 100644
index 0000000..3532e74
--- /dev/null
+++ b/Source/WebCore/platform/text/wince/TextCodecWinCE.cpp
@@ -0,0 +1,389 @@
+/*
+ * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved.
+ * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ *  This library is distributed in the hope that i will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Library General Public License
+ *  along with this library; see the file COPYING.LIB.  If not, write to
+ *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ *  Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+#include "TextCodecWinCE.h"
+
+#include "FontCache.h"
+#include "PlatformString.h"
+#include <mlang.h>
+#include <winbase.h>
+#include <winnls.h>
+#include <wtf/HashMap.h>
+#include <wtf/HashSet.h>
+#include <wtf/text/CString.h>
+#include <wtf/text/StringConcatenate.h>
+#include <wtf/text/StringHash.h>
+#include <wtf/unicode/UTF8.h>
+
+namespace WebCore {
+
+struct CharsetInfo {
+    CString m_name;
+    String m_friendlyName;
+    UINT m_codePage;
+    Vector<CString> m_aliases;
+};
+
+class LanguageManager {
+private:
+    LanguageManager();
+
+    friend LanguageManager& languageManager();
+};
+
+// Usage: a lookup table used to get CharsetInfo with code page ID.
+// Key: code page ID. Value: charset information.
+static HashMap<UINT, CString>& codePageCharsets()
+{
+    static HashMap<UINT, CString> cc;
+    return cc;
+}
+
+static HashMap<String, CharsetInfo>& knownCharsets()
+{
+    static HashMap<String, CharsetInfo> kc;
+    return kc;
+}
+
+// Usage: a map that stores charsets that are supported by system. Sorted by name.
+// Key: charset. Value: code page ID.
+typedef HashSet<String> CharsetSet;
+static CharsetSet& supportedCharsets()
+{
+    static CharsetSet sl;
+    return sl;
+}
+
+static LanguageManager& languageManager()
+{
+    static LanguageManager lm;
+    return lm;
+}
+
+LanguageManager::LanguageManager()
+{
+    IEnumCodePage* enumInterface;
+    IMultiLanguage* mli = FontCache::getMultiLanguageInterface();
+    if (mli && S_OK == mli->EnumCodePages(MIMECONTF_BROWSER, &enumInterface)) {
+        MIMECPINFO cpInfo;
+        ULONG ccpInfo;
+        while (S_OK == enumInterface->Next(1, &cpInfo, &ccpInfo) && ccpInfo) {
+            if (!IsValidCodePage(cpInfo.uiCodePage))
+                continue;
+
+            HashMap<UINT, CString>::iterator i = codePageCharsets().find(cpInfo.uiCodePage);
+
+            CString name(String(cpInfo.wszWebCharset).latin1());
+            if (i == codePageCharsets().end()) {
+                CharsetInfo info;
+                info.m_codePage = cpInfo.uiCodePage;
+                knownCharsets().set(name.data(), info);
+                i = codePageCharsets().set(cpInfo.uiCodePage, name).first;
+            }
+            if (i != codePageCharsets().end()) {
+                HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(String(i->second.data(), i->second.length()));
+                ASSERT(j != knownCharsets().end());
+                CharsetInfo& info = j->second;
+                info.m_name = i->second.data();
+                info.m_friendlyName = cpInfo.wszDescription;
+                info.m_aliases.append(name);
+                info.m_aliases.append(String(cpInfo.wszHeaderCharset).latin1());
+                info.m_aliases.append(String(cpInfo.wszBodyCharset).latin1());
+                String cpName = makeString("cp", String::number(cpInfo.uiCodePage));
+                info.m_aliases.append(cpName.latin1());
+                supportedCharsets().add(i->second.data());
+            }
+        }
+        enumInterface->Release();
+    }
+}
+
+static UINT getCodePage(const char* name)
+{
+    if (!strcmp(name, "UTF-8"))
+        return CP_UTF8;
+
+    // Explicitly use a "const" reference to fix the silly VS build error
+    // saying "==" is not found for const_iterator and iterator
+    const HashMap<String, CharsetInfo>& charsets = knownCharsets();
+    HashMap<String, CharsetInfo>::const_iterator i = charsets.find(name);
+    return i == charsets.end() ? CP_ACP : i->second.m_codePage;
+}
+
+static PassOwnPtr<TextCodec> newTextCodecWinCE(const TextEncoding& encoding, const void*)
+{
+    return new TextCodecWinCE(getCodePage(encoding.name()));
+}
+
+TextCodecWinCE::TextCodecWinCE(UINT codePage)
+    : m_codePage(codePage)
+{
+}
+
+TextCodecWinCE::~TextCodecWinCE()
+{
+}
+
+void TextCodecWinCE::registerBaseEncodingNames(EncodingNameRegistrar registrar)
+{
+    registrar("UTF-8", "UTF-8");
+}
+
+void TextCodecWinCE::registerBaseCodecs(TextCodecRegistrar registrar)
+{
+    registrar("UTF-8", newTextCodecWinCE, 0);
+}
+
+void TextCodecWinCE::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
+{
+    languageManager();
+    for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
+        HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
+        if (j != knownCharsets().end()) {
+            registrar(j->second.m_name.data(), j->second.m_name.data());
+            for (Vector<CString>::const_iterator alias = j->second.m_aliases.begin(); alias != j->second.m_aliases.end(); ++alias)
+                registrar(alias->data(), j->second.m_name.data());
+        }
+    }
+}
+
+void TextCodecWinCE::registerExtendedCodecs(TextCodecRegistrar registrar)
+{
+    languageManager();
+    for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
+        HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
+        if (j != knownCharsets().end())
+            registrar(j->second.m_name.data(), newTextCodecWinCE, 0);
+    }
+}
+
+static DWORD getCodePageFlags(UINT codePage)
+{
+    if (codePage == CP_UTF8)
+        return MB_ERR_INVALID_CHARS;
+
+    if (codePage == 42) // Symbol
+        return 0;
+
+    // Microsoft says the flag must be 0 for the following code pages
+    if (codePage > 50000) {
+        if ((codePage >= 50220 && codePage <= 50222)
+            || codePage == 50225
+            || codePage == 50227
+            || codePage == 50229
+            || codePage == 52936
+            || codePage == 54936
+            || (codePage >= 57002 && codePage <= 57001)
+            || codePage == 65000 // UTF-7
+            )
+            return 0;
+    }
+
+    return MB_PRECOMPOSED | MB_ERR_INVALID_CHARS;
+}
+
+static inline const char* findFirstNonAsciiCharacter(const char* bytes, size_t length)
+{
+    for (const char* bytesEnd = bytes + length; bytes < bytesEnd; ++bytes) {
+        if (*bytes & 0x80)
+            break;
+    }
+    return bytes;
+}
+
+static void decode(Vector<UChar, 8192>& result, UINT codePage, const char* bytes, size_t length, size_t* left, bool canBeFirstTime, bool& sawInvalidChar)
+{
+    *left = length;
+    if (!bytes || !length)
+        return;
+
+    DWORD flags = getCodePageFlags(codePage);
+
+    if (codePage == CP_UTF8) {
+        if (canBeFirstTime) {
+            // Handle BOM.
+            if (length > 3) {
+                if (bytes[0] == (char)0xEF && bytes[1] == (char)0xBB && bytes[2] == (char)0xBF) {
+                    // BOM found!
+                    length -= 3;
+                    bytes += 3;
+                    *left = length;
+                }
+            } else if (bytes[0] == 0xEF && (length < 2 || bytes[1] == (char)0xBB) && (length < 3 || bytes[2] == (char)0xBF)) {
+                if (length == 3)
+                    *left = 0;
+                return;
+            }
+        }
+
+        // Process ASCII characters at beginning.
+        const char* firstNonAsciiChar = findFirstNonAsciiCharacter(bytes, length);
+        int numAsciiCharacters = firstNonAsciiChar - bytes;
+        if (numAsciiCharacters) {
+            result.append(bytes, numAsciiCharacters);
+            length -= numAsciiCharacters;
+            if (!length) {
+                *left = 0;
+                return;
+            }
+            bytes = firstNonAsciiChar;
+        }
+
+        int oldSize = result.size();
+        result.resize(oldSize + length);
+        UChar* resultStart = result.data() + oldSize;
+        const char* sourceStart = bytes;
+        const char* const sourceEnd = bytes + length;
+        for (;;) {
+            using namespace WTF::Unicode;
+            ConversionResult convRes = convertUTF8ToUTF16(&sourceStart
+                , sourceEnd
+                , &resultStart
+                , result.data() + result.size()
+                , true);
+
+            // FIXME: is it possible?
+            if (convRes == targetExhausted && sourceStart < sourceEnd) {
+                oldSize = result.size();
+                result.resize(oldSize + 256);
+                resultStart = result.data() + oldSize;
+                continue;
+            }
+
+            if (convRes != conversionOK)
+                sawInvalidChar = true;
+
+            break;
+        }
+
+        *left = sourceEnd - sourceStart;
+        result.resize(resultStart - result.data());
+    } else {
+        int testLength = length;
+        int untestedLength = length;
+        for (;;) {
+            int resultLength = MultiByteToWideChar(codePage, flags, bytes, testLength, 0, 0);
+
+            if (resultLength > 0) {
+                int oldSize = result.size();
+                result.resize(oldSize + resultLength);
+
+                MultiByteToWideChar(codePage, flags, bytes, testLength, result.data() + oldSize, resultLength);
+
+                if (testLength == untestedLength) {
+                    *left = length - testLength;
+                    break;
+                }
+                untestedLength -= testLength;
+                length -= testLength;
+                bytes += testLength;
+            } else {
+                untestedLength = testLength - 1;
+                if (!untestedLength) {
+                    *left = length;
+                    break;
+                }
+            }
+            testLength = (untestedLength + 1) / 2;
+        }
+    }
+}
+
+String TextCodecWinCE::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
+{
+    if (!m_decodeBuffer.isEmpty()) {
+        m_decodeBuffer.append(bytes, length);
+        bytes = m_decodeBuffer.data();
+        length = m_decodeBuffer.size();
+    }
+
+    size_t left;
+    Vector<UChar, 8192> result;
+    for (;;) {
+        bool sawInvalidChar = false;
+        WebCore::decode(result, m_codePage, bytes, length, &left, m_decodeBuffer.isEmpty(), sawInvalidChar);
+        if (!left)
+            break;
+
+        if (!sawInvalidChar && !flush && left < 16)
+            break;
+
+        result.append(L'?');
+        sawError = true;
+        if (stopOnError)
+            return String::adopt(result);
+
+        if (left == 1)
+            break;
+
+        bytes += length - left + 1;
+        length = left - 1;
+    }
+    if (left && !flush) {
+        if (m_decodeBuffer.isEmpty())
+            m_decodeBuffer.append(bytes + length - left, left);
+        else {
+            memmove(m_decodeBuffer.data(), bytes + length - left, left);
+            m_decodeBuffer.resize(left);
+        }
+    } else
+        m_decodeBuffer.clear();
+
+    return String::adopt(result);
+}
+
+CString TextCodecWinCE::encode(const UChar* characters, size_t length, UnencodableHandling)
+{
+    if (!characters || !length)
+        return CString();
+
+    DWORD flags = m_codePage == CP_UTF8 ? 0 : WC_COMPOSITECHECK;
+
+    int resultLength = WideCharToMultiByte(m_codePage, flags, characters, length, 0, 0, 0, 0);
+
+    // FIXME: We need to implement UnencodableHandling: QuestionMarksForUnencodables, EntitiesForUnencodables, and URLEncodedEntitiesForUnencodables.
+
+    if (resultLength <= 0)
+        return "?";
+
+    char* characterBuffer;
+    CString result = CString::newUninitialized(resultLength, characterBuffer);
+
+    WideCharToMultiByte(m_codePage, flags, characters, length, characterBuffer, resultLength, 0, 0);
+
+    return result;
+}
+
+void TextCodecWinCE::enumerateSupportedEncodings(EncodingReceiver& receiver)
+{
+    languageManager();
+    for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
+        HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
+        if (j != knownCharsets().end() && !receiver.receive(j->second.m_name.data(), j->second.m_friendlyName.charactersWithNullTermination(), j->second.m_codePage))
+            break;
+    }
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/wince/TextCodecWinCE.h b/Source/WebCore/platform/text/wince/TextCodecWinCE.h
new file mode 100644
index 0000000..8d332a6
--- /dev/null
+++ b/Source/WebCore/platform/text/wince/TextCodecWinCE.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2004, 2006, 2007 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ * Copyright (C) 2007-2009 Torch Mobile, Inc.
+ * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef TextCodecWinCE_h
+#define TextCodecWinCE_h
+
+#include "PlatformString.h"
+#include "TextCodec.h"
+#include "TextEncoding.h"
+#include <wtf/Vector.h>
+#include <windows.h>
+
+namespace WebCore {
+
+class TextCodecWinCE : public TextCodec {
+public:
+    static void registerBaseEncodingNames(EncodingNameRegistrar);
+    static void registerBaseCodecs(TextCodecRegistrar);
+
+    static void registerExtendedEncodingNames(EncodingNameRegistrar);
+    static void registerExtendedCodecs(TextCodecRegistrar);
+
+    TextCodecWinCE(UINT codePage);
+    virtual ~TextCodecWinCE();
+
+    virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
+    virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+
+    struct EncodingInfo {
+        String m_encoding;
+        String m_friendlyName;
+    };
+
+    struct EncodingReceiver {
+        // Return false to stop enumerating.
+        virtual bool receive(const char* encoding, const wchar_t* friendlyName, unsigned int codePage) = 0;
+    };
+
+    static void enumerateSupportedEncodings(EncodingReceiver& receiver);
+
+private:
+    UINT m_codePage;
+    Vector<char> m_decodeBuffer;
+};
+
+} // namespace WebCore
+
+#endif // TextCodecWinCE_h
author	Steve Block <steveblock@google.com>	2011-05-06 11:45:16 +0100
committer	Steve Block <steveblock@google.com>	2011-05-12 13:44:10 +0100
commit	cad810f21b803229eb11403f9209855525a25d57 (patch)
tree	29a6fd0279be608e0fe9ffe9841f722f0f4e4269 /Source/WebCore/platform/text
parent	121b0cf4517156d0ac5111caf9830c51b69bae8f (diff)
download	external_webkit-cad810f21b803229eb11403f9209855525a25d57.zip external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.gz external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.bz2