summaryrefslogtreecommitdiffstats
path: root/WebCore/platform/text
diff options
context:
space:
mode:
authorUpstream <upstream-import@none>1970-01-12 13:46:40 +0000
committerUpstream <upstream-import@none>1970-01-12 13:46:40 +0000
commitd8543bb6618c17b12da906afa77d216f58cf4058 (patch)
treec58dc05ed86825bd0ef8d305d58c8205106b540f /WebCore/platform/text
downloadexternal_webkit-d8543bb6618c17b12da906afa77d216f58cf4058.zip
external_webkit-d8543bb6618c17b12da906afa77d216f58cf4058.tar.gz
external_webkit-d8543bb6618c17b12da906afa77d216f58cf4058.tar.bz2
external/webkit r30707
Diffstat (limited to 'WebCore/platform/text')
-rw-r--r--WebCore/platform/text/AtomicString.cpp236
-rw-r--r--WebCore/platform/text/AtomicString.h136
-rw-r--r--WebCore/platform/text/AtomicStringImpl.h36
-rw-r--r--WebCore/platform/text/Base64.cpp184
-rw-r--r--WebCore/platform/text/Base64.h41
-rw-r--r--WebCore/platform/text/BidiContext.cpp38
-rw-r--r--WebCore/platform/text/BidiContext.h69
-rw-r--r--WebCore/platform/text/BidiResolver.h837
-rw-r--r--WebCore/platform/text/CString.cpp102
-rw-r--r--WebCore/platform/text/CString.h74
-rw-r--r--WebCore/platform/text/CharacterNames.h60
-rw-r--r--WebCore/platform/text/PlatformString.h314
-rw-r--r--WebCore/platform/text/RegularExpression.cpp207
-rw-r--r--WebCore/platform/text/RegularExpression.h63
-rw-r--r--WebCore/platform/text/SegmentedString.cpp202
-rw-r--r--WebCore/platform/text/SegmentedString.h176
-rw-r--r--WebCore/platform/text/String.cpp799
-rw-r--r--WebCore/platform/text/StringBuffer.h77
-rw-r--r--WebCore/platform/text/StringHash.h249
-rw-r--r--WebCore/platform/text/StringImpl.cpp1041
-rw-r--r--WebCore/platform/text/StringImpl.h281
-rw-r--r--WebCore/platform/text/TextBoundaries.h38
-rw-r--r--WebCore/platform/text/TextBoundariesICU.cpp76
-rw-r--r--WebCore/platform/text/TextBreakIterator.h48
-rw-r--r--WebCore/platform/text/TextBreakIteratorICU.cpp117
-rw-r--r--WebCore/platform/text/TextBreakIteratorInternalICU.h32
-rw-r--r--WebCore/platform/text/TextCodec.cpp56
-rw-r--r--WebCore/platform/text/TextCodec.h59
-rw-r--r--WebCore/platform/text/TextCodecICU.cpp357
-rw-r--r--WebCore/platform/text/TextCodecICU.h66
-rw-r--r--WebCore/platform/text/TextCodecLatin1.cpp203
-rw-r--r--WebCore/platform/text/TextCodecLatin1.h44
-rw-r--r--WebCore/platform/text/TextCodecUTF16.cpp144
-rw-r--r--WebCore/platform/text/TextCodecUTF16.h51
-rw-r--r--WebCore/platform/text/TextCodecUserDefined.cpp115
-rw-r--r--WebCore/platform/text/TextCodecUserDefined.h44
-rw-r--r--WebCore/platform/text/TextDecoder.cpp106
-rw-r--r--WebCore/platform/text/TextDecoder.h64
-rw-r--r--WebCore/platform/text/TextDirection.h35
-rw-r--r--WebCore/platform/text/TextEncoding.cpp213
-rw-r--r--WebCore/platform/text/TextEncoding.h70
-rw-r--r--WebCore/platform/text/TextEncodingRegistry.cpp243
-rw-r--r--WebCore/platform/text/TextEncodingRegistry.h53
-rw-r--r--WebCore/platform/text/TextStream.cpp99
-rw-r--r--WebCore/platform/text/TextStream.h55
-rw-r--r--WebCore/platform/text/UnicodeRange.cpp462
-rw-r--r--WebCore/platform/text/UnicodeRange.h120
-rw-r--r--WebCore/platform/text/cf/StringCF.cpp55
-rw-r--r--WebCore/platform/text/cf/StringImplCF.cpp37
-rw-r--r--WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp30
-rw-r--r--WebCore/platform/text/mac/CharsetData.h37
-rw-r--r--WebCore/platform/text/mac/ShapeArabic.c550
-rw-r--r--WebCore/platform/text/mac/ShapeArabic.h41
-rw-r--r--WebCore/platform/text/mac/StringImplMac.mm31
-rw-r--r--WebCore/platform/text/mac/StringMac.mm41
-rw-r--r--WebCore/platform/text/mac/TextBoundaries.mm54
-rw-r--r--WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm72
-rw-r--r--WebCore/platform/text/mac/TextCodecMac.cpp321
-rw-r--r--WebCore/platform/text/mac/TextCodecMac.h66
-rw-r--r--WebCore/platform/text/mac/character-sets.txt1868
-rw-r--r--WebCore/platform/text/mac/mac-encodings.txt49
-rwxr-xr-xWebCore/platform/text/mac/make-charset-table.pl225
-rw-r--r--WebCore/platform/text/qt/StringQt.cpp56
-rw-r--r--WebCore/platform/text/qt/TextBoundaries.cpp125
-rw-r--r--WebCore/platform/text/qt/TextBreakIteratorQt.cpp297
-rw-r--r--WebCore/platform/text/qt/TextCodecQt.cpp119
-rw-r--r--WebCore/platform/text/qt/TextCodecQt.h56
-rw-r--r--WebCore/platform/text/symbian/StringImplSymbian.cpp53
-rw-r--r--WebCore/platform/text/symbian/StringSymbian.cpp50
-rw-r--r--WebCore/platform/text/win/TextBreakIteratorInternalICUWin.cpp31
-rw-r--r--WebCore/platform/text/wx/StringWx.cpp92
71 files changed, 12548 insertions, 0 deletions
diff --git a/WebCore/platform/text/AtomicString.cpp b/WebCore/platform/text/AtomicString.cpp
new file mode 100644
index 0000000..f7b3c91
--- /dev/null
+++ b/WebCore/platform/text/AtomicString.cpp
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+
+#ifdef AVOID_STATIC_CONSTRUCTORS
+#define ATOMICSTRING_HIDE_GLOBALS 1
+#endif
+
+#include "AtomicString.h"
+
+#include "StaticConstructors.h"
+#include "StringHash.h"
+#include <kjs/identifier.h>
+#include <wtf/HashSet.h>
+
+using KJS::Identifier;
+using KJS::UString;
+
+namespace WebCore {
+
+static HashSet<StringImpl*>* stringTable;
+
+struct CStringTranslator
+{
+ static unsigned hash(const char* c)
+ {
+ return StringImpl::computeHash(c);
+ }
+
+ static bool equal(StringImpl* r, const char* s)
+ {
+ int length = r->length();
+ const UChar* d = r->characters();
+ for (int i = 0; i != length; ++i) {
+ unsigned char c = s[i];
+ if (d[i] != c)
+ return false;
+ }
+ return s[length] == 0;
+ }
+
+ static void translate(StringImpl*& location, const char* const& c, unsigned hash)
+ {
+ location = new StringImpl(c, strlen(c), hash);
+ }
+};
+
+bool operator==(const AtomicString& a, const char* b)
+{
+ StringImpl* impl = a.impl();
+ if ((!impl || !impl->characters()) && !b)
+ return true;
+ if ((!impl || !impl->characters()) || !b)
+ return false;
+ return CStringTranslator::equal(impl, b);
+}
+
+PassRefPtr<StringImpl> AtomicString::add(const char* c)
+{
+ if (!c)
+ return 0;
+ if (!*c)
+ return StringImpl::empty();
+ pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable->add<const char*, CStringTranslator>(c);
+ if (!addResult.second)
+ return *addResult.first;
+ return adoptRef(*addResult.first);
+}
+
+struct UCharBuffer {
+ const UChar* s;
+ unsigned length;
+};
+
+struct UCharBufferTranslator {
+ static unsigned hash(const UCharBuffer& buf)
+ {
+ return StringImpl::computeHash(buf.s, buf.length);
+ }
+
+ static bool equal(StringImpl* const& str, const UCharBuffer& buf)
+ {
+ unsigned strLength = str->length();
+ unsigned bufLength = buf.length;
+ if (strLength != bufLength)
+ return false;
+
+#if PLATFORM(ARM)
+ const UChar* strChars = str->characters();
+ const UChar* bufChars = buf.s;
+
+ for (unsigned i = 0; i != strLength; ++i) {
+ if (*strChars++ != *bufChars++)
+ return false;
+ }
+ return true;
+#else
+ /* Do it 4-bytes-at-a-time on architectures where it's safe */
+ const uint32_t* strChars = reinterpret_cast<const uint32_t*>(str->characters());
+ const uint32_t* bufChars = reinterpret_cast<const uint32_t*>(buf.s);
+
+ unsigned halfLength = strLength >> 1;
+ for (unsigned i = 0; i != halfLength; ++i) {
+ if (*strChars++ != *bufChars++)
+ return false;
+ }
+
+ if (strLength & 1 &&
+ *reinterpret_cast<const uint16_t *>(strChars) != *reinterpret_cast<const uint16_t *>(bufChars))
+ return false;
+
+ return true;
+#endif
+ }
+
+ static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
+ {
+ location = new StringImpl(buf.s, buf.length, hash);
+ }
+};
+
+PassRefPtr<StringImpl> AtomicString::add(const UChar* s, int length)
+{
+ if (!s)
+ return 0;
+
+ if (length == 0)
+ return StringImpl::empty();
+
+ UCharBuffer buf = { s, length };
+ pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable->add<UCharBuffer, UCharBufferTranslator>(buf);
+ if (!addResult.second)
+ return *addResult.first;
+ return adoptRef(*addResult.first);
+}
+
+PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
+{
+ if (!s)
+ return 0;
+
+ int length = 0;
+ while (s[length] != UChar(0))
+ length++;
+
+ if (length == 0)
+ return StringImpl::empty();
+
+ UCharBuffer buf = {s, length};
+ pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable->add<UCharBuffer, UCharBufferTranslator>(buf);
+ if (!addResult.second)
+ return *addResult.first;
+ return adoptRef(*addResult.first);
+}
+
+PassRefPtr<StringImpl> AtomicString::add(StringImpl* r)
+{
+ if (!r || r->m_inTable)
+ return r;
+
+ if (r->length() == 0)
+ return StringImpl::empty();
+
+ StringImpl* result = *stringTable->add(r).first;
+ if (result == r)
+ r->m_inTable = true;
+ return result;
+}
+
+void AtomicString::remove(StringImpl* r)
+{
+ stringTable->remove(r);
+}
+
+PassRefPtr<StringImpl> AtomicString::add(const KJS::Identifier& str)
+{
+ return add(reinterpret_cast<const UChar*>(str.data()), str.size());
+}
+
+PassRefPtr<StringImpl> AtomicString::add(const KJS::UString& str)
+{
+ return add(reinterpret_cast<const UChar*>(str.data()), str.size());
+}
+
+AtomicString::operator Identifier() const
+{
+ return m_string;
+}
+
+AtomicString::operator UString() const
+{
+ return m_string;
+}
+
+DEFINE_GLOBAL(AtomicString, nullAtom)
+DEFINE_GLOBAL(AtomicString, emptyAtom, "")
+DEFINE_GLOBAL(AtomicString, textAtom, "#text")
+DEFINE_GLOBAL(AtomicString, commentAtom, "#comment")
+DEFINE_GLOBAL(AtomicString, starAtom, "*")
+
+void AtomicString::init()
+{
+ static bool initialized;
+ if (!initialized) {
+ stringTable = new HashSet<StringImpl*>;
+
+ // Use placement new to initialize the globals.
+ new ((void*)&nullAtom) AtomicString;
+ new ((void*)&emptyAtom) AtomicString("");
+ new ((void*)&textAtom) AtomicString("#text");
+ new ((void*)&commentAtom) AtomicString("#comment");
+ new ((void*)&starAtom) AtomicString("*");
+
+ initialized = true;
+ }
+}
+
+}
diff --git a/WebCore/platform/text/AtomicString.h b/WebCore/platform/text/AtomicString.h
new file mode 100644
index 0000000..2a2ac97
--- /dev/null
+++ b/WebCore/platform/text/AtomicString.h
@@ -0,0 +1,136 @@
+/*
+ * This file is part of the DOM implementation for KDE.
+ *
+ * Copyright (C) 2004, 2005, 2006 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef AtomicString_h
+#define AtomicString_h
+
+#include "AtomicStringImpl.h"
+#include "PlatformString.h"
+
+namespace WebCore {
+
+class AtomicString {
+public:
+ static void init();
+
+ AtomicString() { }
+ AtomicString(const char* s) : m_string(add(s)) { }
+ AtomicString(const UChar* s, int length) : m_string(add(s, length)) { }
+ AtomicString(const UChar* s) : m_string(add(s)) { }
+ AtomicString(const KJS::UString& s) : m_string(add(s)) { }
+ AtomicString(const KJS::Identifier& s) : m_string(add(s)) { }
+ AtomicString(StringImpl* imp) : m_string(add(imp)) { }
+ AtomicString(AtomicStringImpl* imp) : m_string(imp) { }
+ AtomicString(const String& s) : m_string(add(s.impl())) { }
+
+ operator const String&() const { return m_string; }
+ const String& string() const { return m_string; };
+
+ operator KJS::Identifier() const;
+ operator KJS::UString() const;
+
+ AtomicStringImpl* impl() const { return static_cast<AtomicStringImpl *>(m_string.impl()); }
+
+ const UChar* characters() const { return m_string.characters(); }
+ unsigned length() const { return m_string.length(); }
+
+ UChar operator[](unsigned int i) const { return m_string[i]; }
+
+ bool contains(UChar c) const { return m_string.contains(c); }
+ bool contains(const AtomicString& s, bool caseSensitive = true) const
+ { return m_string.contains(s.string(), caseSensitive); }
+
+ int find(UChar c, int start = 0) const { return m_string.find(c, start); }
+ int find(const AtomicString& s, int start = 0, bool caseSentitive = true) const
+ { return m_string.find(s.string(), start, caseSentitive); }
+
+ bool startsWith(const AtomicString& s, bool caseSensitive = true) const
+ { return m_string.startsWith(s.string(), caseSensitive); }
+ bool endsWith(const AtomicString& s, bool caseSensitive = true) const
+ { return m_string.endsWith(s.string(), caseSensitive); }
+
+ int toInt(bool* ok = 0) const { return m_string.toInt(ok); }
+ double toDouble(bool* ok = 0) const { return m_string.toDouble(ok); }
+ float toFloat(bool* ok = 0) const { return m_string.toFloat(ok); }
+ bool percentage(int& p) const { return m_string.percentage(p); }
+ Length* toLengthArray(int& len) const { return m_string.toLengthArray(len); }
+ Length* toCoordsArray(int& len) const { return m_string.toCoordsArray(len); }
+
+ bool isNull() const { return m_string.isNull(); }
+ bool isEmpty() const { return m_string.isEmpty(); }
+
+ static void remove(StringImpl*);
+
+#ifdef __OBJC__
+ AtomicString(NSString* s) : m_string(add(String(s).impl())) { }
+ operator NSString*() const { return m_string; }
+#endif
+#if PLATFORM(SYMBIAN)
+ AtomicString(const TDesC& s) : m_string(add(String(s).impl())) { }
+ operator TPtrC() const { return m_string; }
+#endif
+#if PLATFORM(QT)
+ AtomicString(const QString& s) : m_string(add(String(s).impl())) { }
+ operator QString() const { return m_string; }
+#endif
+
+private:
+ String m_string;
+
+ static PassRefPtr<StringImpl> add(const char*);
+ static PassRefPtr<StringImpl> add(const UChar*, int length);
+ static PassRefPtr<StringImpl> add(const UChar*);
+ static PassRefPtr<StringImpl> add(StringImpl*);
+ static PassRefPtr<StringImpl> add(const KJS::UString&);
+ static PassRefPtr<StringImpl> add(const KJS::Identifier&);
+};
+
+inline bool operator==(const AtomicString& a, const AtomicString& b) { return a.impl() == b.impl(); }
+bool operator==(const AtomicString& a, const char* b);
+inline bool operator==(const AtomicString& a, const String& b) { return equal(a.impl(), b.impl()); }
+inline bool operator==(const char* a, const AtomicString& b) { return b == a; }
+inline bool operator==(const String& a, const AtomicString& b) { return equal(a.impl(), b.impl()); }
+
+inline bool operator!=(const AtomicString& a, const AtomicString& b) { return a.impl() != b.impl(); }
+inline bool operator!=(const AtomicString& a, const char *b) { return !(a == b); }
+inline bool operator!=(const AtomicString& a, const String& b) { return !equal(a.impl(), b.impl()); }
+inline bool operator!=(const char* a, const AtomicString& b) { return !(b == a); }
+inline bool operator!=(const String& a, const AtomicString& b) { return !equal(a.impl(), b.impl()); }
+
+inline bool equalIgnoringCase(const AtomicString& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); }
+inline bool equalIgnoringCase(const AtomicString& a, const char* b) { return equalIgnoringCase(a.impl(), b); }
+inline bool equalIgnoringCase(const AtomicString& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); }
+inline bool equalIgnoringCase(const char* a, const AtomicString& b) { return equalIgnoringCase(a, b.impl()); }
+inline bool equalIgnoringCase(const String& a, const AtomicString& b) { return equalIgnoringCase(a.impl(), b.impl()); }
+
+// Define external global variables for the commonly used atomic strings.
+#ifndef ATOMICSTRING_HIDE_GLOBALS
+ extern const AtomicString nullAtom;
+ extern const AtomicString emptyAtom;
+ extern const AtomicString textAtom;
+ extern const AtomicString commentAtom;
+ extern const AtomicString starAtom;
+#endif
+
+} // namespace WebCore
+
+#endif // AtomicString_h
diff --git a/WebCore/platform/text/AtomicStringImpl.h b/WebCore/platform/text/AtomicStringImpl.h
new file mode 100644
index 0000000..d905afc
--- /dev/null
+++ b/WebCore/platform/text/AtomicStringImpl.h
@@ -0,0 +1,36 @@
+/*
+ * This file is part of the DOM implementation for KDE.
+ *
+ * Copyright (C) 2006 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef AtomicStringImpl_h
+#define AtomicStringImpl_h
+
+#include "StringImpl.h"
+
+namespace WebCore {
+
+class AtomicStringImpl : public StringImpl
+{
+};
+
+}
+
+#endif
diff --git a/WebCore/platform/text/Base64.cpp b/WebCore/platform/text/Base64.cpp
new file mode 100644
index 0000000..920fa89
--- /dev/null
+++ b/WebCore/platform/text/Base64.cpp
@@ -0,0 +1,184 @@
+/*
+ Copyright (C) 2000-2001 Dawit Alemayehu <adawit@kde.org>
+ Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org>
+ Copyright (C) 2007, 2008 Apple Inc. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License (LGPL)
+ version 2 as published by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+ This code is based on the java implementation in HTTPClient
+ package by Ronald Tschalär Copyright (C) 1996-1999.
+*/
+
+#include "config.h"
+#include "Base64.h"
+
+#include <limits.h>
+
+#include <wtf/Platform.h>
+#include <wtf/StringExtras.h>
+
+namespace WebCore {
+
+static const char base64EncMap[64] = {
+ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+ 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+ 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,
+ 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E,
+ 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
+ 0x77, 0x78, 0x79, 0x7A, 0x30, 0x31, 0x32, 0x33,
+ 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F
+};
+
+static const char base64DecMap[128] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3F,
+ 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
+ 0x3C, 0x3D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
+ 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
+ 0x17, 0x18, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30,
+ 0x31, 0x32, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+void base64Encode(const Vector<char>& in, Vector<char>& out, bool insertLFs)
+{
+ out.clear();
+ if (in.isEmpty())
+ return;
+
+ // If the input string is pathologically large, just return nothing.
+ // Note: Keep this in sync with the "out_len" computation below.
+ // Rather than being perfectly precise, this is a bit conservative.
+ const unsigned maxInputBufferSize = UINT_MAX / 77 * 76 / 4 * 3 - 2;
+ if (in.size() > maxInputBufferSize)
+ return;
+
+ unsigned sidx = 0;
+ unsigned didx = 0;
+ const char* data = in.data();
+ const unsigned len = in.size();
+
+ unsigned out_len = ((len + 2) / 3) * 4;
+
+ // Deal with the 76 character per line limit specified in RFC 2045.
+ insertLFs = (insertLFs && out_len > 76);
+ if (insertLFs)
+ out_len += ((out_len - 1) / 76);
+
+ int count = 0;
+ out.grow(out_len);
+
+ // 3-byte to 4-byte conversion + 0-63 to ascii printable conversion
+ if (len > 1) {
+ while (sidx < len - 2) {
+ if (insertLFs) {
+ if (count && (count % 76) == 0)
+ out[didx++] = '\n';
+ count += 4;
+ }
+ out[didx++] = base64EncMap[(data[sidx] >> 2) & 077];
+ out[didx++] = base64EncMap[(data[sidx + 1] >> 4) & 017 | (data[sidx] << 4) & 077];
+ out[didx++] = base64EncMap[(data[sidx + 2] >> 6) & 003 | (data[sidx + 1] << 2) & 077];
+ out[didx++] = base64EncMap[data[sidx + 2] & 077];
+ sidx += 3;
+ }
+ }
+
+ if (sidx < len) {
+ if (insertLFs && (count > 0) && (count % 76) == 0)
+ out[didx++] = '\n';
+
+ out[didx++] = base64EncMap[(data[sidx] >> 2) & 077];
+ if (sidx < len - 1) {
+ out[didx++] = base64EncMap[(data[sidx + 1] >> 4) & 017 | (data[sidx] << 4) & 077];
+ out[didx++] = base64EncMap[(data[sidx + 1] << 2) & 077];
+ } else
+ out[didx++] = base64EncMap[(data[sidx] << 4) & 077];
+ }
+
+ // Add padding
+ while (didx < out.size()) {
+ out[didx] = '=';
+ didx++;
+ }
+}
+
+bool base64Decode(const Vector<char>& in, Vector<char>& out)
+{
+ out.clear();
+
+ // If the input string is pathologically large, just return nothing.
+ if (in.size() > UINT_MAX)
+ return false;
+
+ return base64Decode(in.data(), in.size(), out);
+}
+
+bool base64Decode(const char* data, unsigned len, Vector<char>& out)
+{
+ out.clear();
+ if (len == 0)
+ return true;
+
+ while (len && data[len-1] == '=')
+ --len;
+
+ out.grow(len);
+ for (unsigned idx = 0; idx < len; idx++) {
+ unsigned char ch = data[idx];
+ if ((ch > 47 && ch < 58) || (ch > 64 && ch < 91) || (ch > 96 && ch < 123) || ch == '+' || ch == '/' || ch == '=')
+ out[idx] = base64DecMap[ch];
+ else
+ return false;
+ }
+
+ // 4-byte to 3-byte conversion
+ unsigned outLen = len - ((len + 3) / 4);
+ if (!outLen || ((outLen + 2) / 3) * 4 < len)
+ return false;
+
+ unsigned sidx = 0;
+ unsigned didx = 0;
+ if (outLen > 1) {
+ while (didx < outLen - 2) {
+ out[didx] = (((out[sidx] << 2) & 255) | ((out[sidx + 1] >> 4) & 003));
+ out[didx + 1] = (((out[sidx + 1] << 4) & 255) | ((out[sidx + 2] >> 2) & 017));
+ out[didx + 2] = (((out[sidx + 2] << 6) & 255) | (out[sidx + 3] & 077));
+ sidx += 4;
+ didx += 3;
+ }
+ }
+
+ if (didx < outLen)
+ out[didx] = (((out[sidx] << 2) & 255) | ((out[sidx + 1] >> 4) & 003));
+
+ if (++didx < outLen)
+ out[didx] = (((out[sidx + 1] << 4) & 255) | ((out[sidx + 2] >> 2) & 017));
+
+ if (outLen < out.size())
+ out.shrink(outLen);
+
+ return true;
+}
+
+}
diff --git a/WebCore/platform/text/Base64.h b/WebCore/platform/text/Base64.h
new file mode 100644
index 0000000..0b176e6
--- /dev/null
+++ b/WebCore/platform/text/Base64.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef Base64_h
+#define Base64_h
+
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+void base64Encode(const Vector<char>&, Vector<char>&, bool insertLFs = false);
+
+// this decoder is not general purpose - it returns an error if it encounters a linefeed, as needed for window.atob
+bool base64Decode(const Vector<char>&, Vector<char>&);
+bool base64Decode(const char*, unsigned, Vector<char>&);
+
+}
+
+#endif // Base64_h
diff --git a/WebCore/platform/text/BidiContext.cpp b/WebCore/platform/text/BidiContext.cpp
new file mode 100644
index 0000000..ef3c225
--- /dev/null
+++ b/WebCore/platform/text/BidiContext.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2003, 2004, 2006, 2007 Apple Inc. All right reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "BidiContext.h"
+
+namespace WebCore {
+
+bool operator==(const BidiContext& c1, const BidiContext& c2)
+{
+ if (&c1 == &c2)
+ return true;
+ if (c1.level() != c2.level() || c1.override() != c2.override() || c1.dir() != c2.dir())
+ return false;
+ if (!c1.parent())
+ return !c2.parent();
+ return c2.parent() && *c1.parent() == *c2.parent();
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/BidiContext.h b/WebCore/platform/text/BidiContext.h
new file mode 100644
index 0000000..89123c8
--- /dev/null
+++ b/WebCore/platform/text/BidiContext.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2003, 2004, 2006, 2007 Apple Inc. All right reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef BidiContext_h
+#define BidiContext_h
+
+#include <wtf/Assertions.h>
+#include <wtf/RefPtr.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+// Used to keep track of explicit embeddings.
+class BidiContext {
+public:
+ BidiContext(unsigned char level, WTF::Unicode::Direction direction, bool override = false, BidiContext* parent = 0)
+ : m_level(level)
+ , m_direction(direction)
+ , m_override(override)
+ , m_parent(parent)
+ , m_refCount(0)
+ {
+ ASSERT(direction == WTF::Unicode::LeftToRight || direction == WTF::Unicode::RightToLeft);
+ }
+
+ void ref() const { m_refCount++; }
+ void deref() const
+ {
+ m_refCount--;
+ if (m_refCount <= 0)
+ delete this;
+ }
+
+ BidiContext* parent() const { return m_parent.get(); }
+ unsigned char level() const { return m_level; }
+ WTF::Unicode::Direction dir() const { return static_cast<WTF::Unicode::Direction>(m_direction); }
+ bool override() const { return m_override; }
+
+private:
+ unsigned char m_level;
+ unsigned m_direction : 5; // Direction
+ bool m_override : 1;
+ RefPtr<BidiContext> m_parent;
+ mutable int m_refCount;
+};
+
+bool operator==(const BidiContext&, const BidiContext&);
+
+} // namespace WebCore
+
+#endif // BidiContext_h
diff --git a/WebCore/platform/text/BidiResolver.h b/WebCore/platform/text/BidiResolver.h
new file mode 100644
index 0000000..3e545d9
--- /dev/null
+++ b/WebCore/platform/text/BidiResolver.h
@@ -0,0 +1,837 @@
+/*
+ * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc. All right reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef BidiResolver_h
+#define BidiResolver_h
+
+#include "BidiContext.h"
+#include <wtf/PassRefPtr.h>
+
+namespace WebCore {
+
+// The BidiStatus at a given position (typically the end of a line) can
+// be cached and then used to restart bidi resolution at that position.
+struct BidiStatus {
+ BidiStatus()
+ : eor(WTF::Unicode::OtherNeutral)
+ , lastStrong(WTF::Unicode::OtherNeutral)
+ , last(WTF::Unicode::OtherNeutral)
+ {
+ }
+
+ BidiStatus(WTF::Unicode::Direction eorDir, WTF::Unicode::Direction lastStrongDir, WTF::Unicode::Direction lastDir, PassRefPtr<BidiContext> bidiContext)
+ : eor(eorDir)
+ , lastStrong(lastStrongDir)
+ , last(lastDir)
+ , context(bidiContext)
+ {
+ }
+
+ WTF::Unicode::Direction eor;
+ WTF::Unicode::Direction lastStrong;
+ WTF::Unicode::Direction last;
+ RefPtr<BidiContext> context;
+};
+
+inline bool operator==(const BidiStatus& status1, const BidiStatus& status2)
+{
+ return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context);
+}
+
+inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2)
+{
+ return !(status1 == status2);
+}
+
+struct BidiCharacterRun {
+ BidiCharacterRun(int start, int stop, BidiContext* context, WTF::Unicode::Direction dir)
+ : m_start(start)
+ , m_stop(stop)
+ , m_override(context->override())
+ , m_next(0)
+ {
+ if (dir == WTF::Unicode::OtherNeutral)
+ dir = context->dir();
+
+ m_level = context->level();
+
+ // add level of run (cases I1 & I2)
+ if (m_level % 2) {
+ if (dir == WTF::Unicode::LeftToRight || dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
+ m_level++;
+ } else {
+ if (dir == WTF::Unicode::RightToLeft)
+ m_level++;
+ else if (dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
+ m_level += 2;
+ }
+ }
+
+ void destroy() { delete this; }
+
+ int start() const { return m_start; }
+ int stop() const { return m_stop; }
+ unsigned char level() const { return m_level; }
+ bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; }
+ bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; }
+
+ BidiCharacterRun* next() const { return m_next; }
+
+ unsigned char m_level;
+ int m_start;
+ int m_stop;
+ bool m_override;
+ BidiCharacterRun* m_next;
+};
+
+template <class Iterator, class Run> class BidiResolver {
+public :
+ BidiResolver()
+ : m_direction(WTF::Unicode::OtherNeutral)
+ , m_adjustEmbedding(false)
+ , reachedEndOfLine(false)
+ , emptyRun(true)
+ , m_firstRun(0)
+ , m_lastRun(0)
+ , m_logicallyLastRun(0)
+ , m_runCount(0)
+ {
+ }
+
+ BidiContext* context() const { return m_status.context.get(); }
+ void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; }
+
+ void setLastDir(WTF::Unicode::Direction lastDir) { m_status.last = lastDir; }
+ void setLastStrongDir(WTF::Unicode::Direction lastStrongDir) { m_status.lastStrong = lastStrongDir; }
+ void setEorDir(WTF::Unicode::Direction eorDir) { m_status.eor = eorDir; }
+
+ WTF::Unicode::Direction dir() const { return m_direction; }
+ void setDir(WTF::Unicode::Direction d) { m_direction = d; }
+
+ const BidiStatus& status() const { return m_status; }
+ void setStatus(const BidiStatus s) { m_status = s; }
+
+ bool adjustEmbedding() const { return m_adjustEmbedding; }
+ void setAdjustEmbedding(bool adjsutEmbedding) { m_adjustEmbedding = adjsutEmbedding; }
+
+ void embed(WTF::Unicode::Direction);
+ void createBidiRunsForLine(const Iterator& start, const Iterator& end, bool visualOrder = false, bool hardLineBreak = false);
+
+ Run* firstRun() const { return m_firstRun; }
+ Run* lastRun() const { return m_lastRun; }
+ Run* logicallyLastRun() const { return m_logicallyLastRun; }
+ unsigned runCount() const { return m_runCount; }
+
+ void addRun(Run*);
+ void deleteRuns();
+
+protected:
+ void appendRun();
+ void reverseRuns(unsigned start, unsigned end);
+
+ Iterator current;
+ Iterator sor;
+ Iterator eor;
+ Iterator last;
+ BidiStatus m_status;
+ WTF::Unicode::Direction m_direction;
+ bool m_adjustEmbedding;
+ Iterator endOfLine;
+ bool reachedEndOfLine;
+ Iterator lastBeforeET;
+ bool emptyRun;
+
+ Run* m_firstRun;
+ Run* m_lastRun;
+ Run* m_logicallyLastRun;
+ unsigned m_runCount;
+};
+
+template <class Iterator, class Run>
+inline void BidiResolver<Iterator, Run>::addRun(Run* run)
+{
+ if (!m_firstRun)
+ m_firstRun = run;
+ else
+ m_lastRun->m_next = run;
+ m_lastRun = run;
+ m_runCount++;
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::appendRun()
+{
+ if (emptyRun || eor.atEnd())
+ return;
+
+ addRun(new Run(sor.offset(), eor.offset() + 1, context(), m_direction));
+
+ eor.increment(*this);
+ sor = eor;
+ m_direction = WTF::Unicode::OtherNeutral;
+ m_status.eor = WTF::Unicode::OtherNeutral;
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::embed(WTF::Unicode::Direction d)
+{
+ using namespace WTF::Unicode;
+
+ bool b = m_adjustEmbedding;
+ m_adjustEmbedding = false;
+ if (d == PopDirectionalFormat) {
+ BidiContext* c = context()->parent();
+ if (c) {
+ if (!emptyRun && eor != last) {
+ ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
+ // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
+ ASSERT(m_status.last == EuropeanNumberSeparator
+ || m_status.last == EuropeanNumberTerminator
+ || m_status.last == CommonNumberSeparator
+ || m_status.last == BoundaryNeutral
+ || m_status.last == BlockSeparator
+ || m_status.last == SegmentSeparator
+ || m_status.last == WhiteSpaceNeutral
+ || m_status.last == OtherNeutral);
+ if (m_direction == OtherNeutral)
+ m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
+ if (context()->dir() == LeftToRight) {
+ // bidi.sor ... bidi.eor ... bidi.last L
+ if (m_status.eor == EuropeanNumber) {
+ if (m_status.lastStrong != LeftToRight) {
+ m_direction = EuropeanNumber;
+ appendRun();
+ }
+ } else if (m_status.eor == ArabicNumber) {
+ m_direction = ArabicNumber;
+ appendRun();
+ } else if (m_status.lastStrong != LeftToRight) {
+ if (context()->dir() == RightToLeft)
+ m_direction = RightToLeft;
+ else {
+ appendRun();
+ m_direction = LeftToRight;
+ }
+ }
+ } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) {
+ appendRun();
+ m_direction = RightToLeft;
+ }
+ eor = last;
+ }
+ appendRun();
+ emptyRun = true;
+ // sor for the new run is determined by the higher level (rule X10)
+ setLastDir(context()->dir());
+ setLastStrongDir(context()->dir());
+ setContext(c);
+ eor = Iterator();
+ }
+ } else {
+ Direction runDir;
+ if (d == RightToLeftEmbedding || d == RightToLeftOverride)
+ runDir = RightToLeft;
+ else
+ runDir = LeftToRight;
+ bool override = d == LeftToRightOverride || d == RightToLeftOverride;
+
+ unsigned char level = context()->level();
+ if (runDir == RightToLeft) {
+ if (level % 2) // we have an odd level
+ level += 2;
+ else
+ level++;
+ } else {
+ if (level % 2) // we have an odd level
+ level++;
+ else
+ level += 2;
+ }
+
+ if (level < 61) {
+ if (!emptyRun && eor != last) {
+ ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
+ // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
+ ASSERT(m_status.last == EuropeanNumberSeparator
+ || m_status.last == EuropeanNumberTerminator
+ || m_status.last == CommonNumberSeparator
+ || m_status.last == BoundaryNeutral
+ || m_status.last == BlockSeparator
+ || m_status.last == SegmentSeparator
+ || m_status.last == WhiteSpaceNeutral
+ || m_status.last == OtherNeutral);
+ if (m_direction == OtherNeutral)
+ m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
+ if (runDir == LeftToRight) {
+ // bidi.sor ... bidi.eor ... bidi.last L
+ if (m_status.eor == EuropeanNumber) {
+ if (m_status.lastStrong != LeftToRight) {
+ m_direction = EuropeanNumber;
+ appendRun();
+ }
+ } else if (m_status.eor == ArabicNumber) {
+ m_direction = ArabicNumber;
+ appendRun();
+ } else if (m_status.lastStrong != LeftToRight && context()->dir() == LeftToRight) {
+ appendRun();
+ m_direction = LeftToRight;
+ }
+ } else if (m_status.eor == ArabicNumber
+ || m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || context()->dir() == RightToLeft)
+ || m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft) {
+ appendRun();
+ m_direction = RightToLeft;
+ }
+ eor = last;
+ }
+ appendRun();
+ emptyRun = true;
+ setContext(new BidiContext(level, runDir, override, context()));
+ setLastDir(runDir);
+ setLastStrongDir(runDir);
+ eor = Iterator();
+ }
+ }
+ m_adjustEmbedding = b;
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::deleteRuns()
+{
+ emptyRun = true;
+ if (!m_firstRun)
+ return;
+
+ Run* curr = m_firstRun;
+ while (curr) {
+ Run* s = curr->next();
+ curr->destroy();
+ curr = s;
+ }
+
+ m_firstRun = 0;
+ m_lastRun = 0;
+ m_runCount = 0;
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::reverseRuns(unsigned start, unsigned end)
+{
+ if (start >= end)
+ return;
+
+ ASSERT(end < m_runCount);
+
+ // Get the item before the start of the runs to reverse and put it in
+ // |beforeStart|. |curr| should point to the first run to reverse.
+ Run* curr = m_firstRun;
+ Run* beforeStart = 0;
+ unsigned i = 0;
+ while (i < start) {
+ i++;
+ beforeStart = curr;
+ curr = curr->next();
+ }
+
+ Run* startRun = curr;
+ while (i < end) {
+ i++;
+ curr = curr->next();
+ }
+ Run* endRun = curr;
+ Run* afterEnd = curr->next();
+
+ i = start;
+ curr = startRun;
+ Run* newNext = afterEnd;
+ while (i <= end) {
+ // Do the reversal.
+ Run* next = curr->next();
+ curr->m_next = newNext;
+ newNext = curr;
+ curr = next;
+ i++;
+ }
+
+ // Now hook up beforeStart and afterEnd to the startRun and endRun.
+ if (beforeStart)
+ beforeStart->m_next = endRun;
+ else
+ m_firstRun = endRun;
+
+ startRun->m_next = afterEnd;
+ if (!afterEnd)
+ m_lastRun = startRun;
+}
+
+template <class Iterator, class Run>
+void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& start, const Iterator& end, bool visualOrder, bool hardLineBreak)
+{
+ using namespace WTF::Unicode;
+
+ ASSERT(m_direction == OtherNeutral);
+
+ emptyRun = true;
+
+ eor = Iterator();
+
+ current = start;
+ last = current;
+ bool pastEnd = false;
+ BidiResolver<Iterator, Run> stateAtEnd;
+
+ while (true) {
+ Direction dirCurrent;
+ if (pastEnd && (hardLineBreak || current.atEnd())) {
+ BidiContext* c = context();
+ while (c->parent())
+ c = c->parent();
+ dirCurrent = c->dir();
+ if (hardLineBreak) {
+ // A deviation from the Unicode Bidi Algorithm in order to match
+ // Mac OS X text and WinIE: a hard line break resets bidi state.
+ stateAtEnd.setContext(c);
+ stateAtEnd.setEorDir(dirCurrent);
+ stateAtEnd.setLastDir(dirCurrent);
+ stateAtEnd.setLastStrongDir(dirCurrent);
+ }
+ } else {
+ dirCurrent = current.direction();
+ if (context()->override()
+ && dirCurrent != RightToLeftEmbedding
+ && dirCurrent != LeftToRightEmbedding
+ && dirCurrent != RightToLeftOverride
+ && dirCurrent != LeftToRightOverride
+ && dirCurrent != PopDirectionalFormat)
+ dirCurrent = context()->dir();
+ else if (dirCurrent == NonSpacingMark)
+ dirCurrent = m_status.last;
+ }
+
+ ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
+ switch (dirCurrent) {
+
+ // embedding and overrides (X1-X9 in the Bidi specs)
+ case RightToLeftEmbedding:
+ case LeftToRightEmbedding:
+ case RightToLeftOverride:
+ case LeftToRightOverride:
+ case PopDirectionalFormat:
+ embed(dirCurrent);
+ break;
+
+ // strong types
+ case LeftToRight:
+ switch(m_status.last) {
+ case RightToLeft:
+ case RightToLeftArabic:
+ case EuropeanNumber:
+ case ArabicNumber:
+ if (m_status.last != EuropeanNumber || m_status.lastStrong != LeftToRight)
+ appendRun();
+ break;
+ case LeftToRight:
+ break;
+ case EuropeanNumberSeparator:
+ case EuropeanNumberTerminator:
+ case CommonNumberSeparator:
+ case BoundaryNeutral:
+ case BlockSeparator:
+ case SegmentSeparator:
+ case WhiteSpaceNeutral:
+ case OtherNeutral:
+ if (m_status.eor == EuropeanNumber) {
+ if (m_status.lastStrong != LeftToRight) {
+ // the numbers need to be on a higher embedding level, so let's close that run
+ m_direction = EuropeanNumber;
+ appendRun();
+ if (context()->dir() != LeftToRight) {
+ // the neutrals take the embedding direction, which is R
+ eor = last;
+ m_direction = RightToLeft;
+ appendRun();
+ }
+ }
+ } else if (m_status.eor == ArabicNumber) {
+ // Arabic numbers are always on a higher embedding level, so let's close that run
+ m_direction = ArabicNumber;
+ appendRun();
+ if (context()->dir() != LeftToRight) {
+ // the neutrals take the embedding direction, which is R
+ eor = last;
+ m_direction = RightToLeft;
+ appendRun();
+ }
+ } else if (m_status.lastStrong != LeftToRight) {
+ //last stuff takes embedding dir
+ if (context()->dir() == RightToLeft) {
+ eor = last;
+ m_direction = RightToLeft;
+ }
+ appendRun();
+ }
+ default:
+ break;
+ }
+ eor = current;
+ m_status.eor = LeftToRight;
+ m_status.lastStrong = LeftToRight;
+ m_direction = LeftToRight;
+ break;
+ case RightToLeftArabic:
+ case RightToLeft:
+ switch (m_status.last) {
+ case LeftToRight:
+ case EuropeanNumber:
+ case ArabicNumber:
+ appendRun();
+ case RightToLeft:
+ case RightToLeftArabic:
+ break;
+ case EuropeanNumberSeparator:
+ case EuropeanNumberTerminator:
+ case CommonNumberSeparator:
+ case BoundaryNeutral:
+ case BlockSeparator:
+ case SegmentSeparator:
+ case WhiteSpaceNeutral:
+ case OtherNeutral:
+ if (m_status.eor == EuropeanNumber) {
+ if (m_status.lastStrong == LeftToRight && context()->dir() == LeftToRight)
+ eor = last;
+ appendRun();
+ } else if (m_status.eor == ArabicNumber)
+ appendRun();
+ else if (m_status.lastStrong == LeftToRight) {
+ if (context()->dir() == LeftToRight)
+ eor = last;
+ appendRun();
+ }
+ default:
+ break;
+ }
+ eor = current;
+ m_status.eor = RightToLeft;
+ m_status.lastStrong = dirCurrent;
+ m_direction = RightToLeft;
+ break;
+
+ // weak types:
+
+ case EuropeanNumber:
+ if (m_status.lastStrong != RightToLeftArabic) {
+ // if last strong was AL change EN to AN
+ switch (m_status.last) {
+ case EuropeanNumber:
+ case LeftToRight:
+ break;
+ case RightToLeft:
+ case RightToLeftArabic:
+ case ArabicNumber:
+ eor = last;
+ appendRun();
+ m_direction = EuropeanNumber;
+ break;
+ case EuropeanNumberSeparator:
+ case CommonNumberSeparator:
+ if (m_status.eor == EuropeanNumber)
+ break;
+ case EuropeanNumberTerminator:
+ case BoundaryNeutral:
+ case BlockSeparator:
+ case SegmentSeparator:
+ case WhiteSpaceNeutral:
+ case OtherNeutral:
+ if (m_status.eor == EuropeanNumber) {
+ if (m_status.lastStrong == RightToLeft) {
+ // ENs on both sides behave like Rs, so the neutrals should be R.
+ // Terminate the EN run.
+ appendRun();
+ // Make an R run.
+ eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last;
+ m_direction = RightToLeft;
+ appendRun();
+ // Begin a new EN run.
+ m_direction = EuropeanNumber;
+ }
+ } else if (m_status.eor == ArabicNumber) {
+ // Terminate the AN run.
+ appendRun();
+ if (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft) {
+ // Make an R run.
+ eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last;
+ m_direction = RightToLeft;
+ appendRun();
+ // Begin a new EN run.
+ m_direction = EuropeanNumber;
+ }
+ } else if (m_status.lastStrong == RightToLeft) {
+ // Extend the R run to include the neutrals.
+ eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last;
+ m_direction = RightToLeft;
+ appendRun();
+ // Begin a new EN run.
+ m_direction = EuropeanNumber;
+ }
+ default:
+ break;
+ }
+ eor = current;
+ m_status.eor = EuropeanNumber;
+ if (m_direction == OtherNeutral)
+ m_direction = LeftToRight;
+ break;
+ }
+ case ArabicNumber:
+ dirCurrent = ArabicNumber;
+ switch (m_status.last) {
+ case LeftToRight:
+ if (context()->dir() == LeftToRight)
+ appendRun();
+ break;
+ case ArabicNumber:
+ break;
+ case RightToLeft:
+ case RightToLeftArabic:
+ case EuropeanNumber:
+ eor = last;
+ appendRun();
+ break;
+ case CommonNumberSeparator:
+ if (m_status.eor == ArabicNumber)
+ break;
+ case EuropeanNumberSeparator:
+ case EuropeanNumberTerminator:
+ case BoundaryNeutral:
+ case BlockSeparator:
+ case SegmentSeparator:
+ case WhiteSpaceNeutral:
+ case OtherNeutral:
+ if (m_status.eor == ArabicNumber
+ || m_status.eor == EuropeanNumber && (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft)
+ || m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft) {
+ // Terminate the run before the neutrals.
+ appendRun();
+ // Begin an R run for the neutrals.
+ m_direction = RightToLeft;
+ } else if (m_direction == OtherNeutral)
+ m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
+ eor = last;
+ appendRun();
+ default:
+ break;
+ }
+ eor = current;
+ m_status.eor = ArabicNumber;
+ if (m_direction == OtherNeutral)
+ m_direction = ArabicNumber;
+ break;
+ case EuropeanNumberSeparator:
+ case CommonNumberSeparator:
+ break;
+ case EuropeanNumberTerminator:
+ if (m_status.last == EuropeanNumber) {
+ dirCurrent = EuropeanNumber;
+ eor = current;
+ m_status.eor = dirCurrent;
+ } else if (m_status.last != EuropeanNumberTerminator)
+ lastBeforeET = emptyRun ? eor : last;
+ break;
+
+ // boundary neutrals should be ignored
+ case BoundaryNeutral:
+ if (eor == last)
+ eor = current;
+ break;
+ // neutrals
+ case BlockSeparator:
+ // ### what do we do with newline and paragraph seperators that come to here?
+ break;
+ case SegmentSeparator:
+ // ### implement rule L1
+ break;
+ case WhiteSpaceNeutral:
+ break;
+ case OtherNeutral:
+ break;
+ default:
+ break;
+ }
+
+ if (pastEnd) {
+ if (eor == current) {
+ if (!reachedEndOfLine) {
+ eor = endOfLine;
+ switch (m_status.eor) {
+ case LeftToRight:
+ case RightToLeft:
+ case ArabicNumber:
+ m_direction = m_status.eor;
+ break;
+ case EuropeanNumber:
+ m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : EuropeanNumber;
+ break;
+ default:
+ ASSERT(false);
+ }
+ appendRun();
+ }
+ m_status = stateAtEnd.m_status;
+ current = stateAtEnd.current;
+ sor = stateAtEnd.sor;
+ eor = stateAtEnd.eor;
+ last = stateAtEnd.last;
+ m_adjustEmbedding = stateAtEnd.m_adjustEmbedding;
+ reachedEndOfLine = stateAtEnd.reachedEndOfLine;
+ lastBeforeET = stateAtEnd.lastBeforeET;
+ emptyRun = stateAtEnd.emptyRun;
+ m_direction = OtherNeutral;
+ break;
+ }
+ }
+
+ // set m_status.last as needed.
+ switch (dirCurrent) {
+ case EuropeanNumberTerminator:
+ if (m_status.last != EuropeanNumber)
+ m_status.last = EuropeanNumberTerminator;
+ break;
+ case EuropeanNumberSeparator:
+ case CommonNumberSeparator:
+ case SegmentSeparator:
+ case WhiteSpaceNeutral:
+ case OtherNeutral:
+ switch(m_status.last) {
+ case LeftToRight:
+ case RightToLeft:
+ case RightToLeftArabic:
+ case EuropeanNumber:
+ case ArabicNumber:
+ m_status.last = dirCurrent;
+ break;
+ default:
+ m_status.last = OtherNeutral;
+ }
+ break;
+ case NonSpacingMark:
+ case BoundaryNeutral:
+ case RightToLeftEmbedding:
+ case LeftToRightEmbedding:
+ case RightToLeftOverride:
+ case LeftToRightOverride:
+ case PopDirectionalFormat:
+ // ignore these
+ break;
+ case EuropeanNumber:
+ // fall through
+ default:
+ m_status.last = dirCurrent;
+ }
+
+ last = current;
+
+ if (emptyRun && !(dirCurrent == RightToLeftEmbedding
+ || dirCurrent == LeftToRightEmbedding
+ || dirCurrent == RightToLeftOverride
+ || dirCurrent == LeftToRightOverride
+ || dirCurrent == PopDirectionalFormat)) {
+ sor = current;
+ emptyRun = false;
+ }
+
+ // this causes the operator ++ to open and close embedding levels as needed
+ // for the CSS unicode-bidi property
+ m_adjustEmbedding = true;
+ current.increment(*this);
+ m_adjustEmbedding = false;
+ if (emptyRun && (dirCurrent == RightToLeftEmbedding
+ || dirCurrent == LeftToRightEmbedding
+ || dirCurrent == RightToLeftOverride
+ || dirCurrent == LeftToRightOverride
+ || dirCurrent == PopDirectionalFormat)) {
+ // exclude the embedding char itself from the new run so that ATSUI will never see it
+ eor = Iterator();
+ last = current;
+ sor = current;
+ }
+
+ if (!pastEnd && (current == end || current.atEnd())) {
+ if (emptyRun)
+ break;
+ stateAtEnd = *this;
+ endOfLine = last;
+ pastEnd = true;
+ }
+ }
+
+ m_logicallyLastRun = m_lastRun;
+
+ // reorder line according to run structure...
+ // do not reverse for visually ordered web sites
+ if (!visualOrder) {
+
+ // first find highest and lowest levels
+ unsigned char levelLow = 128;
+ unsigned char levelHigh = 0;
+ Run* r = firstRun();
+ while (r) {
+ if (r->m_level > levelHigh)
+ levelHigh = r->m_level;
+ if (r->m_level < levelLow)
+ levelLow = r->m_level;
+ r = r->next();
+ }
+
+ // implements reordering of the line (L2 according to Bidi spec):
+ // L2. From the highest level found in the text to the lowest odd level on each line,
+ // reverse any contiguous sequence of characters that are at that level or higher.
+
+ // reversing is only done up to the lowest odd level
+ if (!(levelLow % 2))
+ levelLow++;
+
+ unsigned count = runCount() - 1;
+
+ while (levelHigh >= levelLow) {
+ unsigned i = 0;
+ Run* currRun = firstRun();
+ while (i < count) {
+ while (i < count && currRun && currRun->m_level < levelHigh) {
+ i++;
+ currRun = currRun->next();
+ }
+ unsigned start = i;
+ while (i <= count && currRun && currRun->m_level >= levelHigh) {
+ i++;
+ currRun = currRun->next();
+ }
+ unsigned end = i - 1;
+ reverseRuns(start, end);
+ }
+ levelHigh--;
+ }
+ }
+ endOfLine = Iterator();
+}
+
+} // namespace WebCore
+
+#endif // BidiResolver_h
diff --git a/WebCore/platform/text/CString.cpp b/WebCore/platform/text/CString.cpp
new file mode 100644
index 0000000..f1434ad
--- /dev/null
+++ b/WebCore/platform/text/CString.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2003, 2006, 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "config.h"
+#include "CString.h"
+
+using std::min;
+
+namespace WebCore {
+
+CString::CString(const char* str)
+{
+ init(str, strlen(str));
+}
+
+CString::CString(const char* str, unsigned length)
+{
+ init(str, length);
+}
+
+void CString::init(const char* str, unsigned length)
+{
+ if (!str)
+ return;
+
+ m_buffer = CStringBuffer::create(length + 1);
+ memcpy(m_buffer->data(), str, length);
+ m_buffer->data()[length] = '\0';
+}
+
+const char* CString::data() const
+{
+ return m_buffer ? m_buffer->data() : 0;
+}
+
+char* CString::mutableData()
+{
+ copyBufferIfNeeded();
+ if (!m_buffer)
+ return 0;
+ return m_buffer->data();
+}
+
+unsigned CString::length() const
+{
+ return m_buffer ? m_buffer->length() - 1 : 0;
+}
+
+CString CString::newUninitialized(size_t length, char*& characterBuffer)
+{
+ CString result;
+ result.m_buffer = CStringBuffer::create(length + 1);
+ char* bytes = result.m_buffer->data();
+ bytes[length] = '\0';
+ characterBuffer = bytes;
+ return result;
+}
+
+void CString::copyBufferIfNeeded()
+{
+ if (!m_buffer || m_buffer->hasOneRef())
+ return;
+
+ int len = m_buffer->length();
+ RefPtr<CStringBuffer> m_temp = m_buffer;
+ m_buffer = CStringBuffer::create(len);
+ memcpy(m_buffer->data(), m_temp->data(), len);
+}
+
+bool operator==(const CString& a, const CString& b)
+{
+ if (a.isNull() != b.isNull())
+ return false;
+ if (a.length() != b.length())
+ return false;
+ return !strncmp(a.data(), b.data(), min(a.length(), b.length()));
+}
+
+}
diff --git a/WebCore/platform/text/CString.h b/WebCore/platform/text/CString.h
new file mode 100644
index 0000000..fcb4c8c
--- /dev/null
+++ b/WebCore/platform/text/CString.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2003, 2006, 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CString_h
+#define CString_h
+
+#include <wtf/PassRefPtr.h>
+#include <wtf/RefCounted.h>
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+ class CStringBuffer : public RefCounted<CStringBuffer> {
+ public:
+ static PassRefPtr<CStringBuffer> create(unsigned length) { return adoptRef(new CStringBuffer(length)); }
+
+ char* data() { return m_vector.data(); }
+ size_t length() const { return m_vector.size(); }
+
+ private:
+ CStringBuffer(unsigned length) : m_vector(length) { }
+
+ Vector<char> m_vector;
+ };
+
+ // A container for a null-terminated char array supporting copy-on-write
+ // assignment. The contained char array may be null.
+ class CString {
+ public:
+ CString() { }
+ CString(const char*);
+ CString(const char*, unsigned length);
+ static CString newUninitialized(size_t length, char*& characterBuffer);
+
+ const char* data() const;
+ char* mutableData();
+ unsigned length() const;
+
+ bool isNull() const { return !m_buffer; }
+
+ private:
+ void copyBufferIfNeeded();
+ void init(const char*, unsigned length);
+ RefPtr<CStringBuffer> m_buffer;
+ };
+
+ bool operator==(const CString& a, const CString& b);
+ inline bool operator!=(const CString& a, const CString& b) { return !(a == b); }
+
+} // namespace WebCore
+
+#endif // CString_h
diff --git a/WebCore/platform/text/CharacterNames.h b/WebCore/platform/text/CharacterNames.h
new file mode 100644
index 0000000..bfbb5b0
--- /dev/null
+++ b/WebCore/platform/text/CharacterNames.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CharacterNames_h
+#define CharacterNames_h
+
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+ // Names here are taken from the Unicode standard.
+
+ // Note, these are UChar constants, not UChar32, which makes them
+ // more convenient for WebCore code that mostly uses UTF-16.
+
+ const UChar blackSquare = 0x25A0;
+ const UChar bullet = 0x2022;
+ const UChar horizontalEllipsis = 0x2026;
+ const UChar ideographicSpace = 0x3000;
+ const UChar ideographicComma = 0x3001;
+ const UChar ideographicFullStop = 0x3002;
+ const UChar leftToRightMark = 0x200E;
+ const UChar leftToRightEmbed = 0x202A;
+ const UChar leftToRightOverride = 0x202D;
+ const UChar newlineCharacter = 0x000A;
+ const UChar noBreakSpace = 0x00A0;
+ const UChar objectReplacementCharacter = 0xFFFC;
+ const UChar popDirectionalFormatting = 0x202C;
+ const UChar rightToLeftMark = 0x200F;
+ const UChar rightToLeftEmbed = 0x202B;
+ const UChar rightToLeftOverride = 0x202E;
+ const UChar softHyphen = 0x00AD;
+ const UChar whiteBullet = 0x25E6;
+ const UChar zeroWidthSpace = 0x200B;
+
+}
+
+#endif // CharacterNames_h
diff --git a/WebCore/platform/text/PlatformString.h b/WebCore/platform/text/PlatformString.h
new file mode 100644
index 0000000..9399fdd
--- /dev/null
+++ b/WebCore/platform/text/PlatformString.h
@@ -0,0 +1,314 @@
+/*
+ * (C) 1999 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef PlatformString_h
+#define PlatformString_h
+
+// This file would be called String.h, but that conflicts with <string.h>
+// on systems without case-sensitive file systems.
+
+#include "StringImpl.h"
+
+#if PLATFORM(CF)
+typedef const struct __CFString * CFStringRef;
+#endif
+
+#if PLATFORM(QT)
+class QString;
+#endif
+
+#if PLATFORM(WX)
+class wxString;
+#endif
+
+
+namespace WebCore {
+
+class CString;
+struct StringHash;
+
+class String {
+public:
+ String() { } // gives null string, distinguishable from an empty string
+ String(const UChar*, unsigned length);
+ String(const UChar*); // Specifically for null terminated UTF-16
+ String(const KJS::Identifier&);
+ String(const KJS::UString&);
+ String(const char*);
+ String(const char*, unsigned length);
+ String(StringImpl* i) : m_impl(i) { }
+ String(PassRefPtr<StringImpl> i) : m_impl(i) { }
+ String(RefPtr<StringImpl> i) : m_impl(i) { }
+
+ static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); }
+ static String adopt(Vector<UChar>& vector) { return StringImpl::adopt(vector); }
+
+ operator KJS::Identifier() const;
+ operator KJS::UString() const;
+
+ unsigned length() const;
+ const UChar* characters() const;
+ const UChar* charactersWithNullTermination();
+
+ UChar operator[](unsigned i) const; // if i >= length(), returns 0
+ UChar32 characterStartingAt(unsigned) const; // Ditto.
+
+ bool contains(UChar c) const { return find(c) != -1; }
+ bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; }
+ bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; }
+
+ int find(UChar c, int start = 0) const
+ { return m_impl ? m_impl->find(c, start) : -1; }
+ int find(const char* str, int start = 0, bool caseSensitive = true) const
+ { return m_impl ? m_impl->find(str, start, caseSensitive) : -1; }
+ int find(const String& str, int start = 0, bool caseSensitive = true) const
+ { return m_impl ? m_impl->find(str.impl(), start, caseSensitive) : -1; }
+
+ int reverseFind(UChar c, int start = -1) const
+ { return m_impl ? m_impl->reverseFind(c, start) : -1; }
+ int reverseFind(const String& str, int start = -1, bool caseSensitive = true) const
+ { return m_impl ? m_impl->reverseFind(str.impl(), start, caseSensitive) : -1; }
+
+ bool startsWith(const String& s, bool caseSensitive = true) const
+ { return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); }
+ bool endsWith(const String& s, bool caseSensitive = true) const
+ { return m_impl ? m_impl->endsWith(s.impl(), caseSensitive) : s.isEmpty(); }
+
+ void append(const String&);
+ void append(char);
+ void append(UChar);
+ void append(const UChar*, unsigned length);
+ void insert(const String&, unsigned pos);
+ void insert(const UChar*, unsigned length, unsigned pos);
+
+ String& replace(UChar a, UChar b) { if (m_impl) m_impl = m_impl->replace(a, b); return *this; }
+ String& replace(UChar a, const String& b) { if (m_impl) m_impl = m_impl->replace(a, b.impl()); return *this; }
+ String& replace(const String& a, const String& b) { if (m_impl) m_impl = m_impl->replace(a.impl(), b.impl()); return *this; }
+ String& replace(unsigned index, unsigned len, const String& b) { if (m_impl) m_impl = m_impl->replace(index, len, b.impl()); return *this; }
+
+ void truncate(unsigned len);
+ void remove(unsigned pos, int len = 1);
+
+ String substring(unsigned pos, unsigned len = UINT_MAX) const;
+ String left(unsigned len) const { return substring(0, len); }
+ String right(unsigned len) const { return substring(length() - len, len); }
+
+ // Returns a lowercase/uppercase version of the string
+ String lower() const;
+ String upper() const;
+
+ String stripWhiteSpace() const;
+ String simplifyWhiteSpace() const;
+
+ // Return the string with case folded for case insensitive comparison.
+ String foldCase() const;
+
+ static String number(int);
+ static String number(unsigned);
+ static String number(long);
+ static String number(unsigned long);
+ static String number(long long);
+ static String number(unsigned long long);
+ static String number(double);
+
+ static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2);
+
+ void split(const String& separator, Vector<String>& result) const;
+ void split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const;
+ void split(UChar separator, Vector<String>& result) const;
+ void split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const;
+
+ int toIntStrict(bool* ok = 0, int base = 10) const;
+ unsigned toUIntStrict(bool* ok = 0, int base = 10) const;
+ int64_t toInt64Strict(bool* ok = 0, int base = 10) const;
+ uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const;
+
+ int toInt(bool* ok = 0) const;
+ unsigned toUInt(bool* ok = 0) const;
+ int64_t toInt64(bool* ok = 0) const;
+ uint64_t toUInt64(bool* ok = 0) const;
+ double toDouble(bool* ok = 0) const;
+ float toFloat(bool* ok = 0) const;
+ Length* toLengthArray(int& len) const;
+ Length* toCoordsArray(int& len) const;
+ bool percentage(int& percentage) const;
+
+ // Makes a deep copy. Helpful only if you need to use a String on another thread.
+ // Since the underlying StringImpl objects are immutable, there's no other reason
+ // to ever prefer copy() over plain old assignment.
+ String copy() const;
+
+ bool isNull() const { return !m_impl; }
+ bool isEmpty() const;
+
+ StringImpl* impl() const { return m_impl.get(); }
+
+#if PLATFORM(CF)
+ String(CFStringRef);
+ CFStringRef createCFString() const;
+#endif
+
+#ifdef __OBJC__
+ String(NSString*);
+
+ // This conversion maps NULL to "", which loses the meaning of NULL, but we
+ // need this mapping because AppKit crashes when passed nil NSStrings.
+ operator NSString*() const { if (!m_impl) return @""; return *m_impl; }
+#endif
+
+#if PLATFORM(QT)
+ String(const QString&);
+ String(const QStringRef&);
+ operator QString() const;
+#endif
+
+#if PLATFORM(SYMBIAN)
+ String(const TDesC&);
+ operator TPtrC() const { return des(); }
+ TPtrC des() const { if (!m_impl) return KNullDesC(); return m_impl->des(); }
+#endif
+
+#if PLATFORM(WX)
+ String(const wxString&);
+ operator wxString() const;
+#endif
+
+#ifndef NDEBUG
+ Vector<char> ascii() const;
+#endif
+
+ CString latin1() const;
+ CString utf8() const;
+
+ static String fromUTF8(const char*, size_t);
+ static String fromUTF8(const char*);
+
+ // Determines the writing direction using the Unicode Bidi Algorithm rules P2 and P3.
+ WTF::Unicode::Direction defaultWritingDirection() const { return m_impl ? m_impl->defaultWritingDirection() : WTF::Unicode::LeftToRight; }
+
+private:
+ RefPtr<StringImpl> m_impl;
+};
+
+String operator+(const String&, const String&);
+String operator+(const String&, const char*);
+String operator+(const char*, const String&);
+
+inline String& operator+=(String& a, const String& b) { a.append(b); return a; }
+
+inline bool operator==(const String& a, const String& b) { return equal(a.impl(), b.impl()); }
+inline bool operator==(const String& a, const char* b) { return equal(a.impl(), b); }
+inline bool operator==(const char* a, const String& b) { return equal(a, b.impl()); }
+
+inline bool operator!=(const String& a, const String& b) { return !equal(a.impl(), b.impl()); }
+inline bool operator!=(const String& a, const char* b) { return !equal(a.impl(), b); }
+inline bool operator!=(const char* a, const String& b) { return !equal(a, b.impl()); }
+
+inline bool equalIgnoringCase(const String& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); }
+inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), b); }
+inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(a, b.impl()); }
+
+inline bool operator!(const String& str) { return str.isNull(); }
+
+
+// String Operations
+
+bool charactersAreAllASCII(const UChar*, size_t);
+
+int charactersToIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
+unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
+int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
+uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
+
+int charactersToInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
+unsigned charactersToUInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
+int64_t charactersToInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
+uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
+
+double charactersToDouble(const UChar*, size_t, bool* ok = 0);
+float charactersToFloat(const UChar*, size_t, bool* ok = 0);
+
+int find(const UChar*, size_t, UChar, int startPosition = 0);
+int reverseFind(const UChar*, size_t, UChar, int startPosition = -1);
+
+void append(Vector<UChar>&, const String&);
+
+#ifdef __OBJC__
+// This is for situations in WebKit where the long standing behavior has been
+// "nil if empty", so we try to maintain longstanding behavior for the sake of
+// entrenched clients
+inline NSString* nsStringNilIfEmpty(const String& str) { return str.isEmpty() ? nil : (NSString*)str; }
+#endif
+
+inline bool charactersAreAllASCII(const UChar* characters, size_t length)
+{
+ UChar ored = 0;
+ for (size_t i = 0; i < length; ++i)
+ ored |= characters[i];
+ return !(ored & 0xFF80);
+}
+
+inline int find(const UChar* characters, size_t length, UChar character, int startPosition)
+{
+ if (startPosition >= static_cast<int>(length))
+ return -1;
+ for (size_t i = startPosition; i < length; ++i) {
+ if (characters[i] == character)
+ return static_cast<int>(i);
+ }
+ return -1;
+}
+
+inline int reverseFind(const UChar* characters, size_t length, UChar character, int startPosition)
+{
+ if (startPosition >= static_cast<int>(length) || !length)
+ return -1;
+ if (startPosition < 0)
+ startPosition += static_cast<int>(length);
+ while (true) {
+ if (characters[startPosition] == character)
+ return startPosition;
+ if (!startPosition)
+ return -1;
+ startPosition--;
+ }
+ ASSERT_NOT_REACHED();
+ return -1;
+}
+
+inline void append(Vector<UChar>& vector, const String& string)
+{
+ vector.append(string.characters(), string.length());
+}
+
+} // namespace WebCore
+
+namespace WTF {
+
+ // StringHash is the default hash for String
+ template<typename T> struct DefaultHash;
+ template<> struct DefaultHash<WebCore::String> {
+ typedef WebCore::StringHash Hash;
+ };
+
+}
+
+#endif
diff --git a/WebCore/platform/text/RegularExpression.cpp b/WebCore/platform/text/RegularExpression.cpp
new file mode 100644
index 0000000..4213d75
--- /dev/null
+++ b/WebCore/platform/text/RegularExpression.cpp
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2004, 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "RegularExpression.h"
+
+#include "PlatformString.h"
+#include "Logging.h"
+#include <wtf/RefCounted.h>
+#include <pcre/pcre.h>
+#include <sys/types.h>
+
+namespace WebCore {
+
+const size_t maxSubstrings = 10;
+const size_t maxOffsets = 3 * maxSubstrings;
+
+class RegularExpression::Private : public RefCounted<Private> {
+public:
+ Private();
+ Private(const String& pattern, bool caseSensitive);
+ ~Private();
+
+ void compile(bool caseSensitive);
+
+ String pattern;
+ JSRegExp* regex;
+
+ String lastMatchString;
+ int lastMatchOffsets[maxOffsets];
+ int lastMatchCount;
+ int lastMatchPos;
+ int lastMatchLength;
+};
+
+RegularExpression::Private::Private()
+ : RefCounted<Private>(0)
+ , pattern("")
+{
+ compile(true);
+}
+
+RegularExpression::Private::Private(const String& p, bool caseSensitive)
+ : RefCounted<Private>(0)
+ , pattern(p)
+ , lastMatchPos(-1)
+ , lastMatchLength(-1)
+{
+ compile(caseSensitive);
+}
+
+void RegularExpression::Private::compile(bool caseSensitive)
+{
+ const char* errorMessage;
+ regex = jsRegExpCompile(pattern.characters(), pattern.length(),
+ caseSensitive ? JSRegExpDoNotIgnoreCase : JSRegExpIgnoreCase, JSRegExpSingleLine,
+ 0, &errorMessage);
+ if (!regex)
+ LOG_ERROR("RegularExpression: pcre_compile failed with '%s'", errorMessage);
+}
+
+RegularExpression::Private::~Private()
+{
+ jsRegExpFree(regex);
+}
+
+
+RegularExpression::RegularExpression()
+ : d(new Private)
+{
+}
+
+RegularExpression::RegularExpression(const String& pattern, bool caseSensitive)
+ : d(new Private(pattern, caseSensitive))
+{
+}
+
+RegularExpression::RegularExpression(const char* pattern)
+ : d(new Private(pattern, true))
+{
+}
+
+
+RegularExpression::RegularExpression(const RegularExpression& re)
+ : d(re.d)
+{
+}
+
+RegularExpression::~RegularExpression()
+{
+}
+
+RegularExpression& RegularExpression::operator=(const RegularExpression& re)
+{
+ RegularExpression tmp(re);
+ tmp.d.swap(d);
+ return *this;
+}
+
+String RegularExpression::pattern() const
+{
+ return d->pattern;
+}
+
+int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
+{
+ d->lastMatchString = str;
+ // First 2 offsets are start and end offsets; 3rd entry is used internally by pcre
+ d->lastMatchCount = jsRegExpExecute(d->regex, d->lastMatchString.characters(),
+ d->lastMatchString.length(), startFrom, d->lastMatchOffsets, maxOffsets);
+ if (d->lastMatchCount < 0) {
+ if (d->lastMatchCount != JSRegExpErrorNoMatch)
+ LOG_ERROR("RegularExpression: pcre_exec() failed with result %d", d->lastMatchCount);
+ d->lastMatchPos = -1;
+ d->lastMatchLength = -1;
+ d->lastMatchString = String();
+ return -1;
+ }
+
+ // 1 means 1 match; 0 means more than one match. First match is recorded in offsets.
+ d->lastMatchPos = d->lastMatchOffsets[0];
+ d->lastMatchLength = d->lastMatchOffsets[1] - d->lastMatchOffsets[0];
+ if (matchLength)
+ *matchLength = d->lastMatchLength;
+ return d->lastMatchPos;
+}
+
+int RegularExpression::search(const String& str, int startFrom) const
+{
+ if (startFrom < 0)
+ startFrom = str.length() - startFrom;
+ return match(str, startFrom, 0);
+}
+
+int RegularExpression::searchRev(const String& str) const
+{
+ // FIXME: Total hack for now. Search forward, return the last, greedy match
+ int start = 0;
+ int pos;
+ int lastPos = -1;
+ int lastMatchLength = -1;
+ do {
+ int matchLength;
+ pos = match(str, start, &matchLength);
+ if (pos >= 0) {
+ if (pos + matchLength > lastPos + lastMatchLength) {
+ // replace last match if this one is later and not a subset of the last match
+ lastPos = pos;
+ lastMatchLength = matchLength;
+ }
+ start = pos + 1;
+ }
+ } while (pos != -1);
+ d->lastMatchPos = lastPos;
+ d->lastMatchLength = lastMatchLength;
+ return lastPos;
+}
+
+int RegularExpression::pos(int n)
+{
+ ASSERT(n == 0);
+ return d->lastMatchPos;
+}
+
+int RegularExpression::matchedLength() const
+{
+ return d->lastMatchLength;
+}
+
+void replace(String& string, const RegularExpression& target, const String& replacement)
+{
+ int index = 0;
+ while (index < static_cast<int>(string.length())) {
+ int matchLength;
+ index = target.match(string, index, &matchLength);
+ if (index < 0)
+ break;
+ string.replace(index, matchLength, replacement);
+ index += replacement.length();
+ if (!matchLength)
+ break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
+ }
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/RegularExpression.h b/WebCore/platform/text/RegularExpression.h
new file mode 100644
index 0000000..5d1991e
--- /dev/null
+++ b/WebCore/platform/text/RegularExpression.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2003, 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RegularExpression_h
+#define RegularExpression_h
+
+#include <wtf/RefPtr.h>
+
+namespace WebCore {
+
+class String;
+
+class RegularExpression {
+public:
+ RegularExpression();
+ RegularExpression(const String&, bool caseSensitive = false);
+ RegularExpression(const char*);
+ ~RegularExpression();
+
+ RegularExpression(const RegularExpression&);
+ RegularExpression& operator=(const RegularExpression&);
+
+ String pattern() const;
+ int match(const String&, int startFrom = 0, int* matchLength = 0) const;
+
+ int search(const String&, int startFrom = 0) const;
+ int searchRev(const String&) const;
+
+ int pos(int n = 0);
+ int matchedLength() const;
+
+private:
+ class Private;
+ RefPtr<Private> d;
+};
+
+void replace(String&, const RegularExpression&, const String&);
+
+} // namespace WebCore
+
+#endif // RegularExpression_h
diff --git a/WebCore/platform/text/SegmentedString.cpp b/WebCore/platform/text/SegmentedString.cpp
new file mode 100644
index 0000000..9f5eb26
--- /dev/null
+++ b/WebCore/platform/text/SegmentedString.cpp
@@ -0,0 +1,202 @@
+/*
+ Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to
+ the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA.
+*/
+
+#include "config.h"
+#include "SegmentedString.h"
+
+namespace WebCore {
+
+SegmentedString::SegmentedString(const SegmentedString &other) :
+ m_pushedChar1(other.m_pushedChar1), m_pushedChar2(other.m_pushedChar2), m_currentString(other.m_currentString),
+ m_substrings(other.m_substrings), m_composite(other.m_composite)
+{
+ if (other.m_currentChar == &other.m_pushedChar1)
+ m_currentChar = &m_pushedChar1;
+ else if (other.m_currentChar == &other.m_pushedChar2)
+ m_currentChar = &m_pushedChar2;
+ else
+ m_currentChar = other.m_currentChar;
+}
+
+const SegmentedString& SegmentedString::operator=(const SegmentedString &other)
+{
+ m_pushedChar1 = other.m_pushedChar1;
+ m_pushedChar2 = other.m_pushedChar2;
+ m_currentString = other.m_currentString;
+ m_substrings = other.m_substrings;
+ m_composite = other.m_composite;
+ if (other.m_currentChar == &other.m_pushedChar1)
+ m_currentChar = &m_pushedChar1;
+ else if (other.m_currentChar == &other.m_pushedChar2)
+ m_currentChar = &m_pushedChar2;
+ else
+ m_currentChar = other.m_currentChar;
+ return *this;
+}
+
+unsigned SegmentedString::length() const
+{
+ unsigned length = m_currentString.m_length;
+ if (m_pushedChar1) {
+ ++length;
+ if (m_pushedChar2)
+ ++length;
+ }
+ if (m_composite) {
+ Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin();
+ Deque<SegmentedSubstring>::const_iterator e = m_substrings.end();
+ for (; it != e; ++it)
+ length += it->m_length;
+ }
+ return length;
+}
+
+void SegmentedString::setExcludeLineNumbers()
+{
+ if (m_composite) {
+ Deque<SegmentedSubstring>::iterator it = m_substrings.begin();
+ Deque<SegmentedSubstring>::iterator e = m_substrings.end();
+ for (; it != e; ++it)
+ it->setExcludeLineNumbers();
+ } else
+ m_currentString.setExcludeLineNumbers();
+}
+
+void SegmentedString::clear()
+{
+ m_pushedChar1 = 0;
+ m_pushedChar2 = 0;
+ m_currentChar = 0;
+ m_currentString.clear();
+ m_substrings.clear();
+ m_composite = false;
+}
+
+void SegmentedString::append(const SegmentedSubstring &s)
+{
+ if (s.m_length) {
+ if (!m_currentString.m_length) {
+ m_currentString = s;
+ } else {
+ m_substrings.append(s);
+ m_composite = true;
+ }
+ }
+}
+
+void SegmentedString::prepend(const SegmentedSubstring &s)
+{
+ ASSERT(!escaped());
+ if (s.m_length) {
+ if (!m_currentString.m_length)
+ m_currentString = s;
+ else {
+ // Shift our m_currentString into our list.
+ m_substrings.prepend(m_currentString);
+ m_currentString = s;
+ m_composite = true;
+ }
+ }
+}
+
+void SegmentedString::append(const SegmentedString &s)
+{
+ ASSERT(!s.escaped());
+ append(s.m_currentString);
+ if (s.m_composite) {
+ Deque<SegmentedSubstring>::const_iterator it = s.m_substrings.begin();
+ Deque<SegmentedSubstring>::const_iterator e = s.m_substrings.end();
+ for (; it != e; ++it)
+ append(*it);
+ }
+ m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
+}
+
+void SegmentedString::prepend(const SegmentedString &s)
+{
+ ASSERT(!escaped());
+ ASSERT(!s.escaped());
+ if (s.m_composite) {
+ Deque<SegmentedSubstring>::const_reverse_iterator it = s.m_substrings.rbegin();
+ Deque<SegmentedSubstring>::const_reverse_iterator e = s.m_substrings.rend();
+ for (; it != e; ++it)
+ prepend(*it);
+ }
+ prepend(s.m_currentString);
+ m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
+}
+
+void SegmentedString::advanceSubstring()
+{
+ if (m_composite) {
+ m_currentString = m_substrings.first();
+ m_substrings.removeFirst();
+ if (m_substrings.isEmpty())
+ m_composite = false;
+ } else {
+ m_currentString.clear();
+ }
+}
+
+String SegmentedString::toString() const
+{
+ String result;
+ if (m_pushedChar1) {
+ result.append(m_pushedChar1);
+ if (m_pushedChar2)
+ result.append(m_pushedChar2);
+ }
+ m_currentString.appendTo(result);
+ if (m_composite) {
+ Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin();
+ Deque<SegmentedSubstring>::const_iterator e = m_substrings.end();
+ for (; it != e; ++it)
+ it->appendTo(result);
+ }
+ return result;
+}
+
+void SegmentedString::advanceSlowCase()
+{
+ if (m_pushedChar1) {
+ m_pushedChar1 = m_pushedChar2;
+ m_pushedChar2 = 0;
+ } else if (m_currentString.m_current) {
+ ++m_currentString.m_current;
+ if (--m_currentString.m_length == 0)
+ advanceSubstring();
+ }
+ m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
+}
+
+void SegmentedString::advanceSlowCase(int& lineNumber)
+{
+ if (m_pushedChar1) {
+ m_pushedChar1 = m_pushedChar2;
+ m_pushedChar2 = 0;
+ } else if (m_currentString.m_current) {
+ if (*m_currentString.m_current++ == '\n' && m_currentString.doNotExcludeLineNumbers())
+ ++lineNumber;
+ if (--m_currentString.m_length == 0)
+ advanceSubstring();
+ }
+ m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
+}
+
+}
diff --git a/WebCore/platform/text/SegmentedString.h b/WebCore/platform/text/SegmentedString.h
new file mode 100644
index 0000000..79ed1f0
--- /dev/null
+++ b/WebCore/platform/text/SegmentedString.h
@@ -0,0 +1,176 @@
+/*
+ Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this library; see the file COPYING.LIB. If not, write to
+ the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA.
+*/
+
+#ifndef SegmentedString_h
+#define SegmentedString_h
+
+#include "PlatformString.h"
+#include <wtf/Deque.h>
+
+namespace WebCore {
+
+class SegmentedString;
+
+class SegmentedSubstring {
+public:
+ SegmentedSubstring() : m_length(0), m_current(0), m_doNotExcludeLineNumbers(true) {}
+ SegmentedSubstring(const String& str)
+ : m_length(str.length())
+ , m_current(str.isEmpty() ? 0 : str.characters())
+ , m_string(str)
+ , m_doNotExcludeLineNumbers(true)
+ {
+ }
+
+ SegmentedSubstring(const UChar* str, int length) : m_length(length), m_current(length == 0 ? 0 : str), m_doNotExcludeLineNumbers(true) {}
+
+ void clear() { m_length = 0; m_current = 0; }
+
+ bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; }
+ bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; }
+
+ void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }
+
+ void appendTo(String& str) const
+ {
+ if (m_string.characters() == m_current) {
+ if (str.isEmpty())
+ str = m_string;
+ else
+ str.append(m_string);
+ } else {
+ str.append(String(m_current, m_length));
+ }
+ }
+
+public:
+ int m_length;
+ const UChar* m_current;
+
+private:
+ String m_string;
+ bool m_doNotExcludeLineNumbers;
+};
+
+class SegmentedString {
+public:
+ SegmentedString()
+ : m_pushedChar1(0), m_pushedChar2(0), m_currentChar(0), m_composite(false) {}
+ SegmentedString(const UChar* str, int length) : m_pushedChar1(0), m_pushedChar2(0)
+ , m_currentString(str, length), m_currentChar(m_currentString.m_current), m_composite(false) {}
+ SegmentedString(const String& str)
+ : m_pushedChar1(0), m_pushedChar2(0), m_currentString(str)
+ , m_currentChar(m_currentString.m_current), m_composite(false) {}
+ SegmentedString(const SegmentedString&);
+
+ const SegmentedString& operator=(const SegmentedString&);
+
+ void clear();
+
+ void append(const SegmentedString&);
+ void prepend(const SegmentedString&);
+
+ bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
+ void setExcludeLineNumbers();
+
+ void push(UChar c)
+ {
+ if (!m_pushedChar1) {
+ m_pushedChar1 = c;
+ m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
+ } else {
+ ASSERT(!m_pushedChar2);
+ m_pushedChar2 = c;
+ }
+ }
+
+ bool isEmpty() const { return !current(); }
+ unsigned length() const;
+
+ void advance()
+ {
+ if (!m_pushedChar1 && m_currentString.m_length > 1) {
+ --m_currentString.m_length;
+ m_currentChar = ++m_currentString.m_current;
+ return;
+ }
+ advanceSlowCase();
+ }
+
+ void advancePastNewline(int& lineNumber)
+ {
+ ASSERT(*current() == '\n');
+ if (!m_pushedChar1 && m_currentString.m_length > 1) {
+ lineNumber += m_currentString.doNotExcludeLineNumbers();
+ --m_currentString.m_length;
+ m_currentChar = ++m_currentString.m_current;
+ return;
+ }
+ advanceSlowCase(lineNumber);
+ }
+
+ void advancePastNonNewline()
+ {
+ ASSERT(*current() != '\n');
+ if (!m_pushedChar1 && m_currentString.m_length > 1) {
+ --m_currentString.m_length;
+ m_currentChar = ++m_currentString.m_current;
+ return;
+ }
+ advanceSlowCase();
+ }
+
+ void advance(int& lineNumber)
+ {
+ if (!m_pushedChar1 && m_currentString.m_length > 1) {
+ lineNumber += (*m_currentString.m_current == '\n') & m_currentString.doNotExcludeLineNumbers();
+ --m_currentString.m_length;
+ m_currentChar = ++m_currentString.m_current;
+ return;
+ }
+ advanceSlowCase(lineNumber);
+ }
+
+ bool escaped() const { return m_pushedChar1; }
+
+ String toString() const;
+
+ const UChar& operator*() const { return *current(); }
+ const UChar* operator->() const { return current(); }
+
+private:
+ void append(const SegmentedSubstring&);
+ void prepend(const SegmentedSubstring&);
+
+ void advanceSlowCase();
+ void advanceSlowCase(int& lineNumber);
+ void advanceSubstring();
+ const UChar* current() const { return m_currentChar; }
+
+ UChar m_pushedChar1;
+ UChar m_pushedChar2;
+ SegmentedSubstring m_currentString;
+ const UChar* m_currentChar;
+ Deque<SegmentedSubstring> m_substrings;
+ bool m_composite;
+};
+
+}
+
+#endif
diff --git a/WebCore/platform/text/String.cpp b/WebCore/platform/text/String.cpp
new file mode 100644
index 0000000..ee4cef4
--- /dev/null
+++ b/WebCore/platform/text/String.cpp
@@ -0,0 +1,799 @@
+/*
+ * (C) 1999 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+#include "PlatformString.h"
+
+#include "CString.h"
+#include "FloatConversion.h"
+#include "StringBuffer.h"
+#include "TextEncoding.h"
+#include <kjs/dtoa.h>
+#include <kjs/identifier.h>
+#include <limits>
+#include <stdarg.h>
+#include <wtf/ASCIICType.h>
+#include <wtf/StringExtras.h>
+#include <wtf/Vector.h>
+#include <wtf/unicode/Unicode.h>
+
+using KJS::Identifier;
+using KJS::UString;
+
+using namespace WTF;
+
+namespace WebCore {
+
+String::String(const UChar* str, unsigned len)
+{
+ if (!str)
+ return;
+ m_impl = StringImpl::create(str, len);
+}
+
+String::String(const UChar* str)
+{
+ if (!str)
+ return;
+
+ int len = 0;
+ while (str[len] != UChar(0))
+ len++;
+
+ m_impl = StringImpl::create(str, len);
+}
+
+String::String(const char* str)
+{
+ if (!str)
+ return;
+ m_impl = StringImpl::create(str);
+}
+
+String::String(const char* str, unsigned length)
+{
+ if (!str)
+ return;
+ m_impl = StringImpl::create(str, length);
+}
+
+void String::append(const String& str)
+{
+ // FIXME: This is extremely inefficient. So much so that we might want to take this
+ // out of String's API. We can make it better by optimizing the case where exactly
+ // one String is pointing at this StringImpl, but even then it's going to require a
+ // call to fastMalloc every single time.
+ if (str.m_impl) {
+ if (m_impl) {
+ StringBuffer buffer(m_impl->length() + str.length());
+ memcpy(buffer.characters(), m_impl->characters(), m_impl->length() * sizeof(UChar));
+ memcpy(buffer.characters() + m_impl->length(), str.characters(), str.length() * sizeof(UChar));
+ m_impl = StringImpl::adopt(buffer);
+ } else
+ m_impl = str.m_impl;
+ }
+}
+
+void String::append(char c)
+{
+ // FIXME: This is extremely inefficient. So much so that we might want to take this
+ // out of String's API. We can make it better by optimizing the case where exactly
+ // one String is pointing at this StringImpl, but even then it's going to require a
+ // call to fastMalloc every single time.
+ if (m_impl) {
+ StringBuffer buffer(m_impl->length() + 1);
+ memcpy(buffer.characters(), m_impl->characters(), m_impl->length() * sizeof(UChar));
+ buffer[m_impl->length()] = c;
+ m_impl = StringImpl::adopt(buffer);
+ } else
+ m_impl = StringImpl::create(&c, 1);
+}
+
+void String::append(UChar c)
+{
+ // FIXME: This is extremely inefficient. So much so that we might want to take this
+ // out of String's API. We can make it better by optimizing the case where exactly
+ // one String is pointing at this StringImpl, but even then it's going to require a
+ // call to fastMalloc every single time.
+ if (m_impl) {
+ StringBuffer buffer(m_impl->length() + 1);
+ memcpy(buffer.characters(), m_impl->characters(), m_impl->length() * sizeof(UChar));
+ buffer[m_impl->length()] = c;
+ m_impl = StringImpl::adopt(buffer);
+ } else
+ m_impl = StringImpl::create(&c, 1);
+}
+
+String operator+(const String& a, const String& b)
+{
+ if (a.isEmpty())
+ return b;
+ if (b.isEmpty())
+ return a;
+ String c = a;
+ c += b;
+ return c;
+}
+
+String operator+(const String& s, const char* cs)
+{
+ return s + String(cs);
+}
+
+String operator+(const char* cs, const String& s)
+{
+ return String(cs) + s;
+}
+
+void String::insert(const String& str, unsigned pos)
+{
+ if (str.isEmpty()) {
+ if (str.isNull())
+ return;
+ if (isNull())
+ m_impl = str.impl();
+ return;
+ }
+ insert(str.characters(), str.length(), pos);
+}
+
+void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
+{
+ if (!m_impl) {
+ if (!charactersToAppend)
+ return;
+ m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
+ return;
+ }
+
+ if (!lengthToAppend)
+ return;
+
+ ASSERT(charactersToAppend);
+ StringBuffer buffer(length() + lengthToAppend);
+ memcpy(buffer.characters(), characters(), length() * sizeof(UChar));
+ memcpy(buffer.characters() + length(), charactersToAppend, lengthToAppend * sizeof(UChar));
+ m_impl = StringImpl::adopt(buffer);
+}
+
+void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position)
+{
+ if (position >= length()) {
+ append(charactersToInsert, lengthToInsert);
+ return;
+ }
+
+ ASSERT(m_impl);
+
+ if (!lengthToInsert)
+ return;
+
+ ASSERT(charactersToInsert);
+ StringBuffer buffer(length() + lengthToInsert);
+ memcpy(buffer.characters(), characters(), position * sizeof(UChar));
+ memcpy(buffer.characters() + position, charactersToInsert, lengthToInsert * sizeof(UChar));
+ memcpy(buffer.characters() + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar));
+ m_impl = StringImpl::adopt(buffer);
+}
+
+UChar String::operator[](unsigned i) const
+{
+ if (!m_impl || i >= m_impl->length())
+ return 0;
+ return m_impl->characters()[i];
+}
+
+UChar32 String::characterStartingAt(unsigned i) const
+{
+ if (!m_impl || i >= m_impl->length())
+ return 0;
+ return m_impl->characterStartingAt(i);
+}
+
+unsigned String::length() const
+{
+ if (!m_impl)
+ return 0;
+ return m_impl->length();
+}
+
+void String::truncate(unsigned position)
+{
+ if (position >= length())
+ return;
+ StringBuffer buffer(position);
+ memcpy(buffer.characters(), characters(), position * sizeof(UChar));
+ m_impl = StringImpl::adopt(buffer);
+}
+
+void String::remove(unsigned position, int lengthToRemove)
+{
+ if (lengthToRemove <= 0)
+ return;
+ if (position >= length())
+ return;
+ if (static_cast<unsigned>(lengthToRemove) > length() - position)
+ lengthToRemove = length() - position;
+ StringBuffer buffer(length() - lengthToRemove);
+ memcpy(buffer.characters(), characters(), position * sizeof(UChar));
+ memcpy(buffer.characters() + position, characters() + position + lengthToRemove,
+ (length() - lengthToRemove - position) * sizeof(UChar));
+ m_impl = StringImpl::adopt(buffer);
+}
+
+String String::substring(unsigned pos, unsigned len) const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->substring(pos, len);
+}
+
+String String::lower() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->lower();
+}
+
+String String::upper() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->upper();
+}
+
+String String::stripWhiteSpace() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->stripWhiteSpace();
+}
+
+String String::simplifyWhiteSpace() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->simplifyWhiteSpace();
+}
+
+String String::foldCase() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->foldCase();
+}
+
+bool String::percentage(int& result) const
+{
+ if (!m_impl || !m_impl->length())
+ return false;
+
+ if ((*m_impl)[m_impl->length() - 1] != '%')
+ return false;
+
+ result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1);
+ return true;
+}
+
+const UChar* String::characters() const
+{
+ if (!m_impl)
+ return 0;
+ return m_impl->characters();
+}
+
+const UChar* String::charactersWithNullTermination()
+{
+ if (!m_impl)
+ return 0;
+ if (m_impl->hasTerminatingNullCharacter())
+ return m_impl->characters();
+ m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl);
+ return m_impl->characters();
+}
+
+String String::format(const char *format, ...)
+{
+ va_list args;
+ va_start(args, format);
+
+ Vector<char, 256> buffer;
+
+ // Do the format once to get the length.
+#if COMPILER(MSVC)
+ int result = _vscprintf(format, args);
+#else
+ char ch;
+ int result = vsnprintf(&ch, 1, format, args);
+ // We need to call va_end() and then va_start() again here, as the
+ // contents of args is undefined after the call to vsnprintf
+ // according to http://man.cx/snprintf(3)
+ //
+ // Not calling va_end/va_start here happens to work on lots of
+ // systems, but fails e.g. on 64bit Linux.
+ va_end(args);
+ va_start(args, format);
+#endif
+
+ if (result == 0)
+ return String("");
+ if (result < 0)
+ return String();
+ unsigned len = result;
+ buffer.grow(len + 1);
+
+ // Now do the formatting again, guaranteed to fit.
+ vsnprintf(buffer.data(), buffer.size(), format, args);
+
+ va_end(args);
+
+ return StringImpl::create(buffer.data(), len);
+}
+
+String String::number(int n)
+{
+ return String::format("%d", n);
+}
+
+String String::number(unsigned n)
+{
+ return String::format("%u", n);
+}
+
+String String::number(long n)
+{
+ return String::format("%ld", n);
+}
+
+String String::number(unsigned long n)
+{
+ return String::format("%lu", n);
+}
+
+String String::number(long long n)
+{
+#if PLATFORM(WIN_OS)
+ return String::format("%I64i", n);
+#else
+ return String::format("%lli", n);
+#endif
+}
+
+String String::number(unsigned long long n)
+{
+#if PLATFORM(WIN_OS)
+ return String::format("%I64u", n);
+#else
+ return String::format("%llu", n);
+#endif
+}
+
+String String::number(double n)
+{
+ return String::format("%.6lg", n);
+}
+
+int String::toIntStrict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toIntStrict(ok, base);
+}
+
+unsigned String::toUIntStrict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toUIntStrict(ok, base);
+}
+
+int64_t String::toInt64Strict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toInt64Strict(ok, base);
+}
+
+uint64_t String::toUInt64Strict(bool* ok, int base) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toUInt64Strict(ok, base);
+}
+
+int String::toInt(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toInt(ok);
+}
+
+unsigned String::toUInt(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toUInt(ok);
+}
+
+int64_t String::toInt64(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toInt64(ok);
+}
+
+uint64_t String::toUInt64(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0;
+ }
+ return m_impl->toUInt64(ok);
+}
+
+double String::toDouble(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0.0;
+ }
+ return m_impl->toDouble(ok);
+}
+
+float String::toFloat(bool* ok) const
+{
+ if (!m_impl) {
+ if (ok)
+ *ok = false;
+ return 0.0f;
+ }
+ return m_impl->toFloat(ok);
+}
+
+String String::copy() const
+{
+ if (!m_impl)
+ return String();
+ return m_impl->copy();
+}
+
+bool String::isEmpty() const
+{
+ return !m_impl || !m_impl->length();
+}
+
+Length* String::toCoordsArray(int& len) const
+{
+ return m_impl ? m_impl->toCoordsArray(len) : 0;
+}
+
+Length* String::toLengthArray(int& len) const
+{
+ return m_impl ? m_impl->toLengthArray(len) : 0;
+}
+
+void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const
+{
+ result.clear();
+
+ int startPos = 0;
+ int endPos;
+ while ((endPos = find(separator, startPos)) != -1) {
+ if (allowEmptyEntries || startPos != endPos)
+ result.append(substring(startPos, endPos - startPos));
+ startPos = endPos + separator.length();
+ }
+ if (allowEmptyEntries || startPos != static_cast<int>(length()))
+ result.append(substring(startPos));
+}
+
+void String::split(const String& separator, Vector<String>& result) const
+{
+ return split(separator, false, result);
+}
+
+void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const
+{
+ result.clear();
+
+ int startPos = 0;
+ int endPos;
+ while ((endPos = find(separator, startPos)) != -1) {
+ if (allowEmptyEntries || startPos != endPos)
+ result.append(substring(startPos, endPos - startPos));
+ startPos = endPos + 1;
+ }
+ if (allowEmptyEntries || startPos != static_cast<int>(length()))
+ result.append(substring(startPos));
+}
+
+void String::split(UChar separator, Vector<String>& result) const
+{
+ return split(String(&separator, 1), false, result);
+}
+
+#ifndef NDEBUG
+Vector<char> String::ascii() const
+{
+ if (m_impl)
+ return m_impl->ascii();
+
+ const char* nullMsg = "(null impl)";
+ Vector<char, 2048> buffer;
+ for (int i = 0; nullMsg[i]; ++i)
+ buffer.append(nullMsg[i]);
+
+ buffer.append('\0');
+ return buffer;
+}
+#endif
+
+CString String::latin1() const
+{
+ return Latin1Encoding().encode(characters(), length());
+}
+
+CString String::utf8() const
+{
+ return UTF8Encoding().encode(characters(), length());
+}
+
+String String::fromUTF8(const char* string, size_t size)
+{
+ return UTF8Encoding().decode(string, size);
+}
+
+String String::fromUTF8(const char* string)
+{
+ return UTF8Encoding().decode(string, strlen(string));
+}
+
+String::String(const Identifier& str)
+{
+ if (str.isNull())
+ return;
+ m_impl = StringImpl::create(reinterpret_cast<const UChar*>(str.data()), str.size());
+}
+
+String::String(const UString& str)
+{
+ if (str.isNull())
+ return;
+ m_impl = StringImpl::create(reinterpret_cast<const UChar*>(str.data()), str.size());
+}
+
+String::operator Identifier() const
+{
+ if (!m_impl)
+ return Identifier();
+ return Identifier(reinterpret_cast<const KJS::UChar*>(m_impl->characters()), m_impl->length());
+}
+
+String::operator UString() const
+{
+ if (!m_impl)
+ return UString();
+ return UString(reinterpret_cast<const KJS::UChar*>(m_impl->characters()), m_impl->length());
+}
+
+// String Operations
+
+static bool isCharacterAllowedInBase(UChar c, int base)
+{
+ if (c > 0x7F)
+ return false;
+ if (isASCIIDigit(c))
+ return c - '0' < base;
+ if (isASCIIAlpha(c)) {
+ if (base > 36)
+ base = 36;
+ return (c >= 'a' && c < 'a' + base - 10)
+ || (c >= 'A' && c < 'A' + base - 10);
+ }
+ return false;
+}
+
+template <typename IntegralType>
+static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base)
+{
+ static const IntegralType integralMax = std::numeric_limits<IntegralType>::max();
+ static const bool isSigned = std::numeric_limits<IntegralType>::is_signed;
+ const IntegralType maxMultiplier = integralMax / base;
+
+ IntegralType value = 0;
+ bool isOk = false;
+ bool isNegative = false;
+
+ if (!data)
+ goto bye;
+
+ // skip leading whitespace
+ while (length && isSpaceOrNewline(*data)) {
+ length--;
+ data++;
+ }
+
+ if (isSigned && length && *data == '-') {
+ length--;
+ data++;
+ isNegative = true;
+ } else if (length && *data == '+') {
+ length--;
+ data++;
+ }
+
+ if (!length || !isCharacterAllowedInBase(*data, base))
+ goto bye;
+
+ while (length && isCharacterAllowedInBase(*data, base)) {
+ length--;
+ IntegralType digitValue;
+ UChar c = *data;
+ if (isASCIIDigit(c))
+ digitValue = c - '0';
+ else if (c >= 'a')
+ digitValue = c - 'a' + 10;
+ else
+ digitValue = c - 'A' + 10;
+
+ if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative))
+ goto bye;
+
+ value = base * value + digitValue;
+ data++;
+ }
+
+ if (isNegative)
+ value = -value;
+
+ // skip trailing space
+ while (length && isSpaceOrNewline(*data)) {
+ length--;
+ data++;
+ }
+
+ if (!length)
+ isOk = true;
+bye:
+ if (ok)
+ *ok = isOk;
+ return isOk ? value : 0;
+}
+
+static unsigned lengthOfCharactersAsInteger(const UChar* data, size_t length)
+{
+ size_t i = 0;
+
+ // Allow leading spaces.
+ for (; i != length; ++i) {
+ if (!isSpaceOrNewline(data[i]))
+ break;
+ }
+
+ // Allow sign.
+ if (i != length && (data[i] == '+' || data[i] == '-'))
+ ++i;
+
+ // Allow digits.
+ for (; i != length; ++i) {
+ if (!Unicode::isDigit(data[i]))
+ break;
+ }
+
+ return i;
+}
+
+int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<int>(data, length, ok, base);
+}
+
+unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<unsigned>(data, length, ok, base);
+}
+
+int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<int64_t>(data, length, ok, base);
+}
+
+uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base)
+{
+ return toIntegralType<uint64_t>(data, length, ok, base);
+}
+
+int charactersToInt(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<int>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<unsigned>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<int64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)
+{
+ return toIntegralType<uint64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
+}
+
+double charactersToDouble(const UChar* data, size_t length, bool* ok)
+{
+ if (!length) {
+ if (ok)
+ *ok = false;
+ return 0.0;
+ }
+
+ Vector<char, 256> bytes(length + 1);
+ for (unsigned i = 0; i < length; ++i)
+ bytes[i] = data[i] < 0x7F ? data[i] : '?';
+ bytes[length] = '\0';
+ char* end;
+ double val = kjs_strtod(bytes.data(), &end);
+ if (ok)
+ *ok = (end == 0 || *end == '\0');
+ return val;
+}
+
+float charactersToFloat(const UChar* data, size_t length, bool* ok)
+{
+ // FIXME: This will return ok even when the string fits into a double but not a float.
+ return narrowPrecisionToFloat(charactersToDouble(data, length, ok));
+}
+
+} // namespace WebCore
+
+#ifndef NDEBUG
+// For debugging only -- leaks memory
+WebCore::String* string(const char* s)
+{
+ return new WebCore::String(s);
+}
+#endif
diff --git a/WebCore/platform/text/StringBuffer.h b/WebCore/platform/text/StringBuffer.h
new file mode 100644
index 0000000..28d4e89
--- /dev/null
+++ b/WebCore/platform/text/StringBuffer.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Apple Inc. ("Apple") nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef StringBuffer_h
+#define StringBuffer_h
+
+#include <wtf/Assertions.h>
+#include <wtf/Noncopyable.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+class StringBuffer : Noncopyable {
+public:
+ explicit StringBuffer(unsigned length)
+ : m_length(length)
+ , m_data(static_cast<UChar*>(fastMalloc(length * sizeof(UChar))))
+ {
+ }
+ ~StringBuffer()
+ {
+ fastFree(m_data);
+ }
+
+ void shrink(unsigned newLength)
+ {
+ ASSERT(newLength <= m_length);
+ m_length = newLength;
+ }
+
+ void resize(unsigned newLength)
+ {
+ if (newLength > m_length)
+ m_data = static_cast<UChar*>(fastRealloc(m_data, newLength * sizeof(UChar)));
+ m_length = newLength;
+ }
+
+ unsigned length() const { return m_length; }
+ UChar* characters() { return m_data; }
+
+ UChar& operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
+
+ UChar* release() { UChar* data = m_data; m_data = 0; return data; }
+
+private:
+ unsigned m_length;
+ UChar* m_data;
+};
+
+}
+
+#endif
diff --git a/WebCore/platform/text/StringHash.h b/WebCore/platform/text/StringHash.h
new file mode 100644
index 0000000..375b2e4
--- /dev/null
+++ b/WebCore/platform/text/StringHash.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright (C) 2006, 2007 Apple Inc. All rights reserved
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef StringHash_h
+#define StringHash_h
+
+#include "AtomicStringImpl.h"
+#include "PlatformString.h"
+#include <wtf/HashTraits.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+ struct StringHash {
+ static unsigned hash(StringImpl* key) { return key->hash(); }
+ static bool equal(StringImpl* a, StringImpl* b)
+ {
+ if (a == b)
+ return true;
+ if (!a || !b)
+ return false;
+
+ unsigned aLength = a->length();
+ unsigned bLength = b->length();
+ if (aLength != bLength)
+ return false;
+
+ const uint32_t* aChars = reinterpret_cast<const uint32_t*>(a->characters());
+ const uint32_t* bChars = reinterpret_cast<const uint32_t*>(b->characters());
+
+ unsigned halfLength = aLength >> 1;
+ for (unsigned i = 0; i != halfLength; ++i)
+ if (*aChars++ != *bChars++)
+ return false;
+
+ if (aLength & 1 && *reinterpret_cast<const uint16_t*>(aChars) != *reinterpret_cast<const uint16_t*>(bChars))
+ return false;
+
+ return true;
+ }
+
+ static unsigned hash(const RefPtr<StringImpl>& key) { return key->hash(); }
+ static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b)
+ {
+ return equal(a.get(), b.get());
+ }
+
+ static unsigned hash(const String& key) { return key.impl()->hash(); }
+ static bool equal(const String& a, const String& b)
+ {
+ return equal(a.impl(), b.impl());
+ }
+
+ static const bool safeToCompareToEmptyOrDeleted = false;
+ };
+
+ class CaseFoldingHash {
+ private:
+ // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
+ static const unsigned PHI = 0x9e3779b9U;
+ public:
+ // Paul Hsieh's SuperFastHash
+ // http://www.azillionmonkeys.com/qed/hash.html
+ static unsigned hash(StringImpl* str)
+ {
+ unsigned l = str->length();
+ const UChar* s = str->characters();
+ uint32_t hash = PHI;
+ uint32_t tmp;
+
+ int rem = l & 1;
+ l >>= 1;
+
+ // Main loop
+ for (; l > 0; l--) {
+ hash += WTF::Unicode::foldCase(s[0]);
+ tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ s += 2;
+ hash += hash >> 11;
+ }
+
+ // Handle end case
+ if (rem) {
+ hash += WTF::Unicode::foldCase(s[0]);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ }
+
+ // Force "avalanching" of final 127 bits
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 2;
+ hash += hash >> 15;
+ hash ^= hash << 10;
+
+ // this avoids ever returning a hash code of 0, since that is used to
+ // signal "hash not computed yet", using a value that is likely to be
+ // effectively the same as 0 when the low bits are masked
+ if (hash == 0)
+ hash = 0x80000000;
+
+ return hash;
+ }
+
+ static unsigned hash(const char* str, unsigned length)
+ {
+ // This hash is designed to work on 16-bit chunks at a time. But since the normal case
+ // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
+ // were 16-bit chunks, which will give matching results.
+
+ unsigned l = length;
+ const char* s = str;
+ uint32_t hash = PHI;
+ uint32_t tmp;
+
+ int rem = l & 1;
+ l >>= 1;
+
+ // Main loop
+ for (; l > 0; l--) {
+ hash += WTF::Unicode::foldCase(s[0]);
+ tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ s += 2;
+ hash += hash >> 11;
+ }
+
+ // Handle end case
+ if (rem) {
+ hash += WTF::Unicode::foldCase(s[0]);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ }
+
+ // Force "avalanching" of final 127 bits
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 2;
+ hash += hash >> 15;
+ hash ^= hash << 10;
+
+ // this avoids ever returning a hash code of 0, since that is used to
+ // signal "hash not computed yet", using a value that is likely to be
+ // effectively the same as 0 when the low bits are masked
+ if (hash == 0)
+ hash = 0x80000000;
+
+ return hash;
+ }
+
+ static bool equal(StringImpl* a, StringImpl* b)
+ {
+ if (a == b)
+ return true;
+ if (!a || !b)
+ return false;
+ unsigned length = a->length();
+ if (length != b->length())
+ return false;
+ return WTF::Unicode::umemcasecmp(a->characters(), b->characters(), length) == 0;
+ }
+
+ static unsigned hash(const RefPtr<StringImpl>& key)
+ {
+ return hash(key.get());
+ }
+
+ static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b)
+ {
+ return equal(a.get(), b.get());
+ }
+
+ static unsigned hash(const String& key)
+ {
+ return hash(key.impl());
+ }
+ static bool equal(const String& a, const String& b)
+ {
+ return equal(a.impl(), b.impl());
+ }
+
+ static const bool safeToCompareToEmptyOrDeleted = false;
+ };
+
+}
+
+namespace WTF {
+
+ // store WebCore::String as StringImpl*
+
+ template<> struct HashTraits<WebCore::String> : GenericHashTraits<WebCore::String> {
+ typedef HashTraits<WebCore::StringImpl*>::StorageTraits StorageTraits;
+ typedef StorageTraits::TraitType StorageType;
+ static const bool emptyValueIsZero = true;
+ static const bool needsRef = true;
+
+ typedef union {
+ WebCore::StringImpl* m_p;
+ StorageType m_s;
+ } UnionType;
+
+ static void ref(const StorageType& s) { ref(reinterpret_cast<const UnionType*>(&s)->m_p); }
+ static void deref(const StorageType& s) { deref(reinterpret_cast<const UnionType*>(&s)->m_p); }
+
+ static void ref(const WebCore::StringImpl* str) { if (str) const_cast<WebCore::StringImpl*>(str)->ref(); }
+ static void deref(const WebCore::StringImpl* str) { if (str) const_cast<WebCore::StringImpl*>(str)->deref(); }
+ };
+
+ // share code between StringImpl*, RefPtr<StringImpl>, and String
+
+ template<> struct HashKeyStorageTraits<WebCore::StringHash, HashTraits<RefPtr<WebCore::StringImpl> > > {
+ typedef WebCore::StringHash Hash;
+ typedef HashTraits<WebCore::StringImpl*> Traits;
+ };
+ template<> struct HashKeyStorageTraits<WebCore::StringHash, HashTraits<WebCore::String> > {
+ typedef WebCore::StringHash Hash;
+ typedef HashTraits<WebCore::StringImpl*> Traits;
+ };
+
+ template<> struct HashKeyStorageTraits<WebCore::CaseFoldingHash, HashTraits<RefPtr<WebCore::StringImpl> > > {
+ typedef WebCore::CaseFoldingHash Hash;
+ typedef HashTraits<WebCore::StringImpl*> Traits;
+ };
+ template<> struct HashKeyStorageTraits<WebCore::CaseFoldingHash, HashTraits<WebCore::String> > {
+ typedef WebCore::CaseFoldingHash Hash;
+ typedef HashTraits<WebCore::StringImpl*> Traits;
+ };
+
+}
+
+#endif
diff --git a/WebCore/platform/text/StringImpl.cpp b/WebCore/platform/text/StringImpl.cpp
new file mode 100644
index 0000000..f9087b5
--- /dev/null
+++ b/WebCore/platform/text/StringImpl.cpp
@@ -0,0 +1,1041 @@
+/*
+ * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
+ * (C) 1999 Antti Koivisto (koivisto@kde.org)
+ * (C) 2001 Dirk Mueller ( mueller@kde.org )
+ * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "StringImpl.h"
+
+#include "AtomicString.h"
+#include "CString.h"
+#include "CharacterNames.h"
+#include "FloatConversion.h"
+#include "Length.h"
+#include "StringBuffer.h"
+#include "StringHash.h"
+#include "TextBreakIterator.h"
+#include "TextEncoding.h"
+#include <kjs/dtoa.h>
+#include <kjs/identifier.h>
+#include <wtf/Assertions.h>
+#include <wtf/unicode/Unicode.h>
+
+using namespace WTF;
+using namespace Unicode;
+
+using KJS::Identifier;
+using KJS::UString;
+
+namespace WebCore {
+
+static inline UChar* newUCharVector(unsigned n)
+{
+ return static_cast<UChar*>(fastMalloc(sizeof(UChar) * n));
+}
+
+static inline void deleteUCharVector(const UChar* p)
+{
+ fastFree(const_cast<UChar*>(p));
+}
+
+// This constructor is used only to create the empty string.
+StringImpl::StringImpl()
+ : m_length(0)
+ , m_data(0)
+ , m_hash(0)
+ , m_inTable(false)
+ , m_hasTerminatingNullCharacter(false)
+{
+}
+
+// This is one of the most common constructors, but it's also used for the copy()
+// operation. Because of that, it's the one constructor that doesn't assert the
+// length is non-zero, since we support copying the empty string.
+inline StringImpl::StringImpl(const UChar* characters, unsigned length)
+ : m_length(length)
+ , m_hash(0)
+ , m_inTable(false)
+ , m_hasTerminatingNullCharacter(false)
+{
+ UChar* data = newUCharVector(length);
+ memcpy(data, characters, length * sizeof(UChar));
+ m_data = data;
+}
+
+inline StringImpl::StringImpl(const StringImpl& str, WithTerminatingNullCharacter)
+ : m_length(str.m_length)
+ , m_hash(str.m_hash)
+ , m_inTable(false)
+ , m_hasTerminatingNullCharacter(true)
+{
+ UChar* data = newUCharVector(str.m_length + 1);
+ memcpy(data, str.m_data, str.m_length * sizeof(UChar));
+ data[str.m_length] = 0;
+ m_data = data;
+}
+
+inline StringImpl::StringImpl(const char* characters, unsigned length)
+ : m_length(length)
+ , m_hash(0)
+ , m_inTable(false)
+ , m_hasTerminatingNullCharacter(false)
+{
+ ASSERT(characters);
+ ASSERT(length);
+
+ UChar* data = newUCharVector(length);
+ for (unsigned i = 0; i != length; ++i) {
+ unsigned char c = characters[i];
+ data[i] = c;
+ }
+ m_data = data;
+}
+
+inline StringImpl::StringImpl(UChar* characters, unsigned length, AdoptBuffer)
+ : m_length(length)
+ , m_data(characters)
+ , m_hash(0)
+ , m_inTable(false)
+ , m_hasTerminatingNullCharacter(false)
+{
+ ASSERT(characters);
+ ASSERT(length);
+}
+
+// This constructor is only for use by AtomicString.
+StringImpl::StringImpl(const UChar* characters, unsigned length, unsigned hash)
+ : m_length(length)
+ , m_hash(hash)
+ , m_inTable(true)
+ , m_hasTerminatingNullCharacter(false)
+{
+ ASSERT(hash);
+ ASSERT(characters);
+ ASSERT(length);
+
+ UChar* data = newUCharVector(length);
+ memcpy(data, characters, length * sizeof(UChar));
+ m_data = data;
+}
+
+// This constructor is only for use by AtomicString.
+StringImpl::StringImpl(const char* characters, unsigned length, unsigned hash)
+ : m_length(length)
+ , m_hash(hash)
+ , m_inTable(true)
+ , m_hasTerminatingNullCharacter(false)
+{
+ ASSERT(hash);
+ ASSERT(characters);
+ ASSERT(length);
+
+ UChar* data = newUCharVector(length);
+ for (unsigned i = 0; i != length; ++i) {
+ unsigned char c = characters[i];
+ data[i] = c;
+ }
+ m_data = data;
+}
+
+StringImpl::~StringImpl()
+{
+ if (m_inTable)
+ AtomicString::remove(this);
+ deleteUCharVector(m_data);
+}
+
+StringImpl* StringImpl::empty()
+{
+ static StringImpl e;
+ return &e;
+}
+
+bool StringImpl::containsOnlyWhitespace()
+{
+ // FIXME: The definition of whitespace here includes a number of characters
+ // that are not whitespace from the point of view of RenderText; I wonder if
+ // that's a problem in practice.
+ for (unsigned i = 0; i < m_length; i++)
+ if (!isASCIISpace(m_data[i]))
+ return false;
+ return true;
+}
+
+PassRefPtr<StringImpl> StringImpl::substring(unsigned pos, unsigned len)
+{
+ if (pos >= m_length)
+ return empty();
+ if (len > m_length - pos)
+ len = m_length - pos;
+ return create(m_data + pos, len);
+}
+
+UChar32 StringImpl::characterStartingAt(unsigned i)
+{
+ if (U16_IS_SINGLE(m_data[i]))
+ return m_data[i];
+ if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1]))
+ return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]);
+ return 0;
+}
+
+static Length parseLength(const UChar* data, unsigned length)
+{
+ if (length == 0)
+ return Length(1, Relative);
+
+ unsigned i = 0;
+ while (i < length && isSpaceOrNewline(data[i]))
+ ++i;
+ if (i < length && (data[i] == '+' || data[i] == '-'))
+ ++i;
+ while (i < length && Unicode::isDigit(data[i]))
+ ++i;
+
+ bool ok;
+ int r = charactersToIntStrict(data, i, &ok);
+
+ /* Skip over any remaining digits, we are not that accurate (5.5% => 5%) */
+ while (i < length && (Unicode::isDigit(data[i]) || data[i] == '.'))
+ ++i;
+
+ /* IE Quirk: Skip any whitespace (20 % => 20%) */
+ while (i < length && isSpaceOrNewline(data[i]))
+ ++i;
+
+ if (ok) {
+ if (i < length) {
+ UChar next = data[i];
+ if (next == '%')
+ return Length(static_cast<double>(r), Percent);
+ if (next == '*')
+ return Length(r, Relative);
+ }
+ return Length(r, Fixed);
+ } else {
+ if (i < length) {
+ UChar next = data[i];
+ if (next == '*')
+ return Length(1, Relative);
+ if (next == '%')
+ return Length(1, Relative);
+ }
+ }
+ return Length(0, Relative);
+}
+
+Length StringImpl::toLength()
+{
+ return parseLength(m_data, m_length);
+}
+
+static int countCharacter(StringImpl* string, UChar character)
+{
+ int count = 0;
+ int length = string->length();
+ for (int i = 0; i < length; ++i)
+ count += (*string)[i] == character;
+ return count;
+}
+
+Length* StringImpl::toCoordsArray(int& len)
+{
+ StringBuffer spacified(m_length);
+ for (unsigned i = 0; i < m_length; i++) {
+ UChar cc = m_data[i];
+ if (cc > '9' || (cc < '0' && cc != '-' && cc != '*' && cc != '.'))
+ spacified[i] = ' ';
+ else
+ spacified[i] = cc;
+ }
+ RefPtr<StringImpl> str = adopt(spacified);
+
+ str = str->simplifyWhiteSpace();
+
+ len = countCharacter(str.get(), ' ') + 1;
+ Length* r = new Length[len];
+
+ int i = 0;
+ int pos = 0;
+ int pos2;
+
+ while ((pos2 = str->find(' ', pos)) != -1) {
+ r[i++] = parseLength(str->characters() + pos, pos2 - pos);
+ pos = pos2+1;
+ }
+ r[i] = parseLength(str->characters() + pos, str->length() - pos);
+
+ ASSERT(i == len - 1);
+
+ return r;
+}
+
+Length* StringImpl::toLengthArray(int& len)
+{
+ RefPtr<StringImpl> str = simplifyWhiteSpace();
+ if (!str->length()) {
+ len = 1;
+ return 0;
+ }
+
+ len = countCharacter(str.get(), ',') + 1;
+ Length* r = new Length[len];
+
+ int i = 0;
+ int pos = 0;
+ int pos2;
+
+ while ((pos2 = str->find(',', pos)) != -1) {
+ r[i++] = parseLength(str->characters() + pos, pos2 - pos);
+ pos = pos2+1;
+ }
+
+ ASSERT(i == len - 1);
+
+ /* IE Quirk: If the last comma is the last char skip it and reduce len by one */
+ if (str->length()-pos > 0)
+ r[i] = parseLength(str->characters() + pos, str->length() - pos);
+ else
+ len--;
+
+ return r;
+}
+
+bool StringImpl::isLower()
+{
+ // Do a faster loop for the case where all the characters are ASCII.
+ bool allLower = true;
+ UChar ored = 0;
+ for (unsigned i = 0; i < m_length; i++) {
+ UChar c = m_data[i];
+ allLower = allLower && isASCIILower(c);
+ ored |= c;
+ }
+ if (!(ored & ~0x7F))
+ return allLower;
+
+ // Do a slower check for cases that include non-ASCII characters.
+ allLower = true;
+ unsigned i = 0;
+ while (i < m_length) {
+ UChar32 character;
+ U16_NEXT(m_data, i, m_length, character)
+ allLower = allLower && Unicode::isLower(character);
+ }
+ return allLower;
+}
+
+PassRefPtr<StringImpl> StringImpl::lower()
+{
+ StringBuffer data(m_length);
+ int32_t length = m_length;
+
+ // Do a faster loop for the case where all the characters are ASCII.
+ UChar ored = 0;
+ for (int i = 0; i < length; i++) {
+ UChar c = m_data[i];
+ ored |= c;
+ data[i] = toASCIILower(c);
+ }
+ if (!(ored & ~0x7F))
+ return adopt(data);
+
+ // Do a slower implementation for cases that include non-ASCII characters.
+ bool error;
+ int32_t realLength = Unicode::toLower(data.characters(), length, m_data, m_length, &error);
+ if (!error && realLength == length)
+ return adopt(data);
+ data.resize(realLength);
+ Unicode::toLower(data.characters(), length, m_data, m_length, &error);
+ if (error)
+ return this;
+ return adopt(data);
+}
+
+PassRefPtr<StringImpl> StringImpl::upper()
+{
+ bool error;
+ int32_t length = Unicode::toUpper(0, 0, m_data, m_length, &error);
+ StringBuffer data(length);
+ Unicode::toUpper(data.characters(), length, m_data, m_length, &error);
+ if (error)
+ return this;
+ return adopt(data);
+}
+
+PassRefPtr<StringImpl> StringImpl::secure(UChar aChar)
+{
+ int length = m_length;
+ StringBuffer data(length);
+ for (int i = 0; i < length; ++i)
+ data[i] = aChar;
+ return adopt(data);
+}
+
+PassRefPtr<StringImpl> StringImpl::foldCase()
+{
+ StringBuffer data(m_length);
+ int32_t length = m_length;
+
+ // Do a faster loop for the case where all the characters are ASCII.
+ UChar ored = 0;
+ for (int i = 0; i < length; i++) {
+ UChar c = m_data[i];
+ ored |= c;
+ data[i] = toASCIILower(c);
+ }
+ if (!(ored & ~0x7F))
+ return adopt(data);
+
+ // Do a slower implementation for cases that include non-ASCII characters.
+ bool error;
+ int32_t realLength = Unicode::foldCase(data.characters(), length, m_data, m_length, &error);
+ if (!error && realLength == length)
+ return adopt(data);
+ data.resize(realLength);
+ Unicode::foldCase(data.characters(), length, m_data, m_length, &error);
+ if (error)
+ return this;
+ return adopt(data);
+}
+
+PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
+{
+ if (!m_length)
+ return empty();
+
+ unsigned start = 0;
+ unsigned end = m_length - 1;
+
+ // skip white space from start
+ while (start <= end && isSpaceOrNewline(m_data[start]))
+ start++;
+
+ // only white space
+ if (start > end)
+ return empty();
+
+ // skip white space from end
+ while (end && isSpaceOrNewline(m_data[end]))
+ end--;
+
+ return create(m_data + start, end + 1 - start);
+}
+
+PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace()
+{
+ StringBuffer data(m_length);
+
+ const UChar* from = m_data;
+ const UChar* fromend = from + m_length;
+ int outc = 0;
+
+ UChar* to = data.characters();
+
+ while (true) {
+ while (from != fromend && isSpaceOrNewline(*from))
+ from++;
+ while (from != fromend && !isSpaceOrNewline(*from))
+ to[outc++] = *from++;
+ if (from != fromend)
+ to[outc++] = ' ';
+ else
+ break;
+ }
+
+ if (outc > 0 && to[outc - 1] == ' ')
+ outc--;
+
+ data.shrink(outc);
+
+ return adopt(data);
+}
+
+PassRefPtr<StringImpl> StringImpl::capitalize(UChar previous)
+{
+ StringBuffer stringWithPrevious(m_length + 1);
+ stringWithPrevious[0] = previous == noBreakSpace ? ' ' : previous;
+ for (unsigned i = 1; i < m_length + 1; i++) {
+ // Replace &nbsp with a real space since ICU no longer treats &nbsp as a word separator.
+ if (m_data[i - 1] == noBreakSpace)
+ stringWithPrevious[i] = ' ';
+ else
+ stringWithPrevious[i] = m_data[i - 1];
+ }
+
+ TextBreakIterator* boundary = wordBreakIterator(stringWithPrevious.characters(), m_length + 1);
+ if (!boundary)
+ return this;
+
+ StringBuffer data(m_length);
+
+ int32_t endOfWord;
+ int32_t startOfWord = textBreakFirst(boundary);
+ for (endOfWord = textBreakNext(boundary); endOfWord != TextBreakDone; startOfWord = endOfWord, endOfWord = textBreakNext(boundary)) {
+ if (startOfWord != 0) // Ignore first char of previous string
+ data[startOfWord - 1] = m_data[startOfWord - 1] == noBreakSpace ? noBreakSpace : toTitleCase(stringWithPrevious[startOfWord]);
+ for (int i = startOfWord + 1; i < endOfWord; i++)
+ data[i - 1] = m_data[i - 1];
+ }
+
+ return adopt(data);
+}
+
+int StringImpl::toIntStrict(bool* ok, int base)
+{
+ return charactersToIntStrict(m_data, m_length, ok, base);
+}
+
+unsigned StringImpl::toUIntStrict(bool* ok, int base)
+{
+ return charactersToUIntStrict(m_data, m_length, ok, base);
+}
+
+int64_t StringImpl::toInt64Strict(bool* ok, int base)
+{
+ return charactersToInt64Strict(m_data, m_length, ok, base);
+}
+
+uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
+{
+ return charactersToUInt64Strict(m_data, m_length, ok, base);
+}
+
+int StringImpl::toInt(bool* ok)
+{
+ return charactersToInt(m_data, m_length, ok);
+}
+
+unsigned StringImpl::toUInt(bool* ok)
+{
+ return charactersToUInt(m_data, m_length, ok);
+}
+
+int64_t StringImpl::toInt64(bool* ok)
+{
+ return charactersToInt64(m_data, m_length, ok);
+}
+
+uint64_t StringImpl::toUInt64(bool* ok)
+{
+ return charactersToUInt64(m_data, m_length, ok);
+}
+
+double StringImpl::toDouble(bool* ok)
+{
+ return charactersToDouble(m_data, m_length, ok);
+}
+
+float StringImpl::toFloat(bool* ok)
+{
+ return charactersToFloat(m_data, m_length, ok);
+}
+
+static bool equal(const UChar* a, const char* b, int length)
+{
+ ASSERT(length >= 0);
+ while (length--) {
+ unsigned char bc = *b++;
+ if (*a++ != bc)
+ return false;
+ }
+ return true;
+}
+
+static bool equalIgnoringCase(const UChar* a, const char* b, int length)
+{
+ ASSERT(length >= 0);
+ while (length--) {
+ unsigned char bc = *b++;
+ if (foldCase(*a++) != foldCase(bc))
+ return false;
+ }
+ return true;
+}
+
+static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length)
+{
+ ASSERT(length >= 0);
+ return umemcasecmp(a, b, length) == 0;
+}
+
+int StringImpl::find(const char* chs, int index, bool caseSensitive)
+{
+ if (!chs || index < 0)
+ return -1;
+
+ int chsLength = strlen(chs);
+ int n = m_length - index;
+ if (n < 0)
+ return -1;
+ n -= chsLength - 1;
+ if (n <= 0)
+ return -1;
+
+ const char* chsPlusOne = chs + 1;
+ int chsLengthMinusOne = chsLength - 1;
+
+ const UChar* ptr = m_data + index - 1;
+ if (caseSensitive) {
+ UChar c = *chs;
+ do {
+ if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne))
+ return m_length - chsLength - n + 1;
+ } while (--n);
+ } else {
+ UChar lc = Unicode::foldCase(*chs);
+ do {
+ if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne))
+ return m_length - chsLength - n + 1;
+ } while (--n);
+ }
+
+ return -1;
+}
+
+int StringImpl::find(UChar c, int start)
+{
+ return WebCore::find(m_data, m_length, c, start);
+}
+
+int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
+{
+ /*
+ We use a simple trick for efficiency's sake. Instead of
+ comparing strings, we compare the sum of str with that of
+ a part of this string. Only if that matches, we call memcmp
+ or ucstrnicmp.
+ */
+ ASSERT(str);
+ if (index < 0)
+ index += m_length;
+ int lstr = str->m_length;
+ int lthis = m_length - index;
+ if ((unsigned)lthis > m_length)
+ return -1;
+ int delta = lthis - lstr;
+ if (delta < 0)
+ return -1;
+
+ const UChar* uthis = m_data + index;
+ const UChar* ustr = str->m_data;
+ unsigned hthis = 0;
+ unsigned hstr = 0;
+ if (caseSensitive) {
+ for (int i = 0; i < lstr; i++) {
+ hthis += uthis[i];
+ hstr += ustr[i];
+ }
+ int i = 0;
+ while (1) {
+ if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
+ return index + i;
+ if (i == delta)
+ return -1;
+ hthis += uthis[i + lstr];
+ hthis -= uthis[i];
+ i++;
+ }
+ } else {
+ for (int i = 0; i < lstr; i++ ) {
+ hthis += toASCIILower(uthis[i]);
+ hstr += toASCIILower(ustr[i]);
+ }
+ int i = 0;
+ while (1) {
+ if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr))
+ return index + i;
+ if (i == delta)
+ return -1;
+ hthis += toASCIILower(uthis[i + lstr]);
+ hthis -= toASCIILower(uthis[i]);
+ i++;
+ }
+ }
+}
+
+int StringImpl::reverseFind(UChar c, int index)
+{
+ return WebCore::reverseFind(m_data, m_length, c, index);
+}
+
+int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive)
+{
+ /*
+ See StringImpl::find() for explanations.
+ */
+ ASSERT(str);
+ int lthis = m_length;
+ if (index < 0)
+ index += lthis;
+
+ int lstr = str->m_length;
+ int delta = lthis - lstr;
+ if ( index < 0 || index > lthis || delta < 0 )
+ return -1;
+ if ( index > delta )
+ index = delta;
+
+ const UChar *uthis = m_data;
+ const UChar *ustr = str->m_data;
+ unsigned hthis = 0;
+ unsigned hstr = 0;
+ int i;
+ if (caseSensitive) {
+ for ( i = 0; i < lstr; i++ ) {
+ hthis += uthis[index + i];
+ hstr += ustr[i];
+ }
+ i = index;
+ while (1) {
+ if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
+ return i;
+ if (i == 0)
+ return -1;
+ i--;
+ hthis -= uthis[i + lstr];
+ hthis += uthis[i];
+ }
+ } else {
+ for (i = 0; i < lstr; i++) {
+ hthis += toASCIILower(uthis[index + i]);
+ hstr += toASCIILower(ustr[i]);
+ }
+ i = index;
+ while (1) {
+ if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) )
+ return i;
+ if (i == 0)
+ return -1;
+ i--;
+ hthis -= toASCIILower(uthis[i + lstr]);
+ hthis += toASCIILower(uthis[i]);
+ }
+ }
+
+ // Should never get here.
+ return -1;
+}
+
+bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive)
+{
+ ASSERT(m_data);
+ int start = m_length - m_data->m_length;
+ if (start >= 0)
+ return (find(m_data, start, caseSensitive) == start);
+ return false;
+}
+
+PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
+{
+ if (oldC == newC)
+ return this;
+ unsigned i;
+ for (i = 0; i != m_length; ++i)
+ if (m_data[i] == oldC)
+ break;
+ if (i == m_length)
+ return this;
+
+ StringBuffer data(m_length);
+ for (i = 0; i != m_length; ++i) {
+ UChar ch = m_data[i];
+ if (ch == oldC)
+ ch = newC;
+ data[i] = ch;
+ }
+ return adopt(data);
+}
+
+PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
+{
+ position = min(position, length());
+ lengthToReplace = min(lengthToReplace, length() - position);
+ unsigned lengthToInsert = str ? str->length() : 0;
+ if (!lengthToReplace && !lengthToInsert)
+ return this;
+ StringBuffer buffer(length() - lengthToReplace + lengthToInsert);
+ memcpy(buffer.characters(), characters(), position * sizeof(UChar));
+ if (str)
+ memcpy(buffer.characters() + position, str->characters(), lengthToInsert * sizeof(UChar));
+ memcpy(buffer.characters() + position + lengthToInsert, characters() + position + lengthToReplace,
+ (length() - position - lengthToReplace) * sizeof(UChar));
+ return adopt(buffer);
+}
+
+PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
+{
+ if (!replacement)
+ return this;
+
+ int repStrLength = replacement->length();
+ int srcSegmentStart = 0;
+ int matchCount = 0;
+
+ // Count the matches
+ while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
+ ++matchCount;
+ ++srcSegmentStart;
+ }
+
+ // If we have 0 matches, we don't have to do any more work
+ if (!matchCount)
+ return this;
+
+ StringBuffer data(m_length - matchCount + (matchCount * repStrLength));
+
+ // Construct the new data
+ int srcSegmentEnd;
+ int srcSegmentLength;
+ srcSegmentStart = 0;
+ int dstOffset = 0;
+
+ while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
+ srcSegmentLength = srcSegmentEnd - srcSegmentStart;
+ memcpy(data.characters() + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
+ dstOffset += srcSegmentLength;
+ memcpy(data.characters() + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
+ dstOffset += repStrLength;
+ srcSegmentStart = srcSegmentEnd + 1;
+ }
+
+ srcSegmentLength = m_length - srcSegmentStart;
+ memcpy(data.characters() + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
+
+ ASSERT(dstOffset + srcSegmentLength == static_cast<int>(data.length()));
+
+ return adopt(data);
+}
+
+PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
+{
+ if (!pattern || !replacement)
+ return this;
+
+ int patternLength = pattern->length();
+ if (!patternLength)
+ return this;
+
+ int repStrLength = replacement->length();
+ int srcSegmentStart = 0;
+ int matchCount = 0;
+
+ // Count the matches
+ while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
+ ++matchCount;
+ srcSegmentStart += patternLength;
+ }
+
+ // If we have 0 matches, we don't have to do any more work
+ if (!matchCount)
+ return this;
+
+ StringBuffer data(m_length + matchCount * (repStrLength - patternLength));
+
+ // Construct the new data
+ int srcSegmentEnd;
+ int srcSegmentLength;
+ srcSegmentStart = 0;
+ int dstOffset = 0;
+
+ while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
+ srcSegmentLength = srcSegmentEnd - srcSegmentStart;
+ memcpy(data.characters() + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
+ dstOffset += srcSegmentLength;
+ memcpy(data.characters() + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
+ dstOffset += repStrLength;
+ srcSegmentStart = srcSegmentEnd + patternLength;
+ }
+
+ srcSegmentLength = m_length - srcSegmentStart;
+ memcpy(data.characters() + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
+
+ ASSERT(dstOffset + srcSegmentLength == static_cast<int>(data.length()));
+
+ return adopt(data);
+}
+
+bool equal(StringImpl* a, StringImpl* b)
+{
+ return StringHash::equal(a, b);
+}
+
+bool equal(StringImpl* a, const char* b)
+{
+ if (!a)
+ return !b;
+ if (!b)
+ return !a;
+
+ unsigned length = a->length();
+ const UChar* as = a->characters();
+ for (unsigned i = 0; i != length; ++i) {
+ unsigned char bc = b[i];
+ if (!bc)
+ return false;
+ if (as[i] != bc)
+ return false;
+ }
+
+ return !b[length];
+}
+
+bool equalIgnoringCase(StringImpl* a, StringImpl* b)
+{
+ return CaseFoldingHash::equal(a, b);
+}
+
+bool equalIgnoringCase(StringImpl* a, const char* b)
+{
+ if (!a)
+ return !b;
+ if (!b)
+ return !a;
+
+ unsigned length = a->length();
+ const UChar* as = a->characters();
+
+ // Do a faster loop for the case where all the characters are ASCII.
+ UChar ored = 0;
+ bool equal = true;
+ for (unsigned i = 0; i != length; ++i) {
+ char bc = b[i];
+ if (!bc)
+ return false;
+ UChar ac = as[i];
+ ored |= ac;
+ equal = equal && (toASCIILower(ac) == toASCIILower(bc));
+ }
+
+ // Do a slower implementation for cases that include non-ASCII characters.
+ if (ored & ~0x7F) {
+ equal = true;
+ for (unsigned i = 0; i != length; ++i) {
+ unsigned char bc = b[i];
+ equal = equal && (foldCase(as[i]) == foldCase(bc));
+ }
+ }
+
+ return equal && !b[length];
+}
+
+Vector<char> StringImpl::ascii()
+{
+ Vector<char> buffer(m_length + 1);
+ for (unsigned i = 0; i != m_length; ++i) {
+ UChar c = m_data[i];
+ if ((c >= 0x20 && c < 0x7F) || c == 0x00)
+ buffer[i] = c;
+ else
+ buffer[i] = '?';
+ }
+ buffer[m_length] = '\0';
+ return buffer;
+}
+
+WTF::Unicode::Direction StringImpl::defaultWritingDirection()
+{
+ for (unsigned i = 0; i < m_length; ++i) {
+ WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]);
+ if (charDirection == WTF::Unicode::LeftToRight)
+ return WTF::Unicode::LeftToRight;
+ if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic)
+ return WTF::Unicode::RightToLeft;
+ }
+ return WTF::Unicode::LeftToRight;
+}
+
+// This is a hot function because it's used when parsing HTML.
+PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length)
+{
+ ASSERT(characters);
+ ASSERT(length);
+
+ StringBuffer strippedCopy(length);
+ int foundNull = 0;
+ for (unsigned i = 0; i < length; i++) {
+ int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
+ strippedCopy[i] = c;
+ foundNull |= ~c;
+ }
+ if (!foundNull)
+ return adoptRef(new StringImpl(strippedCopy.release(), length, AdoptBuffer()));
+ unsigned strippedLength = 0;
+ for (unsigned i = 0; i < length; i++) {
+ if (int c = characters[i])
+ strippedCopy[strippedLength++] = c;
+ }
+ strippedCopy.shrink(strippedLength);
+ return adopt(strippedCopy);
+}
+
+PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer& buffer)
+{
+ unsigned length = buffer.length();
+ if (length == 0)
+ return empty();
+ return adoptRef(new StringImpl(buffer.release(), length, AdoptBuffer()));
+}
+
+PassRefPtr<StringImpl> StringImpl::adopt(Vector<UChar>& vector)
+{
+ size_t size = vector.size();
+ if (size == 0)
+ return empty();
+ return adoptRef(new StringImpl(vector.releaseBuffer(), size, AdoptBuffer()));
+}
+
+PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
+{
+ if (!characters || !length)
+ return empty();
+ return adoptRef(new StringImpl(characters, length));
+}
+
+PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length)
+{
+ if (!characters || !length)
+ return empty();
+ return adoptRef(new StringImpl(characters, length));
+}
+
+PassRefPtr<StringImpl> StringImpl::create(const char* string)
+{
+ if (!string)
+ return empty();
+ unsigned length = strlen(string);
+ if (!length)
+ return empty();
+ return adoptRef(new StringImpl(string, length));
+}
+
+PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string)
+{
+ return adoptRef(new StringImpl(string, WithTerminatingNullCharacter()));
+}
+
+PassRefPtr<StringImpl> StringImpl::copy()
+{
+ return adoptRef(new StringImpl(m_data, m_length));
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/StringImpl.h b/WebCore/platform/text/StringImpl.h
new file mode 100644
index 0000000..1fb1e95
--- /dev/null
+++ b/WebCore/platform/text/StringImpl.h
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
+ * Copyright (C) 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef StringImpl_h
+#define StringImpl_h
+
+#include <kjs/identifier.h>
+#include <limits.h>
+#include <wtf/ASCIICType.h>
+#include <wtf/Forward.h>
+#include <wtf/RefCounted.h>
+#include <wtf/Vector.h>
+#include <wtf/unicode/Unicode.h>
+
+#if PLATFORM(CF)
+typedef const struct __CFString * CFStringRef;
+#endif
+
+#ifdef __OBJC__
+@class NSString;
+#endif
+
+namespace WebCore {
+
+class AtomicString;
+class StringBuffer;
+
+struct CStringTranslator;
+struct Length;
+struct StringHash;
+struct UCharBufferTranslator;
+
+class StringImpl : public RefCounted<StringImpl> {
+ friend class AtomicString;
+ friend struct UCharBufferTranslator;
+ friend struct CStringTranslator;
+private:
+ StringImpl();
+ StringImpl(const UChar*, unsigned length);
+ StringImpl(const char*, unsigned length);
+
+ struct AdoptBuffer { };
+ StringImpl(UChar*, unsigned length, AdoptBuffer);
+
+ struct WithTerminatingNullCharacter { };
+ StringImpl(const StringImpl&, WithTerminatingNullCharacter);
+
+ // For AtomicString.
+ StringImpl(const UChar*, unsigned length, unsigned hash);
+ StringImpl(const char*, unsigned length, unsigned hash);
+
+public:
+ ~StringImpl();
+
+ static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
+ static PassRefPtr<StringImpl> create(const char*, unsigned length);
+ static PassRefPtr<StringImpl> create(const char*);
+
+ static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&);
+
+ static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length);
+ static PassRefPtr<StringImpl> adopt(StringBuffer&);
+ static PassRefPtr<StringImpl> adopt(Vector<UChar>&);
+
+ const UChar* characters() { return m_data; }
+ unsigned length() { return m_length; }
+
+ bool hasTerminatingNullCharacter() { return m_hasTerminatingNullCharacter; }
+
+ unsigned hash() { if (m_hash == 0) m_hash = computeHash(m_data, m_length); return m_hash; }
+ static unsigned computeHash(const UChar*, unsigned len);
+ static unsigned computeHash(const char*);
+
+ // Makes a deep copy. Helpful only if you need to use a String on another thread.
+ // Since StringImpl objects are immutable, there's no other reason to make a copy.
+ PassRefPtr<StringImpl> copy();
+
+ PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
+
+ UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
+ UChar32 characterStartingAt(unsigned);
+
+ Length toLength();
+
+ bool containsOnlyWhitespace();
+
+ int toIntStrict(bool* ok = 0, int base = 10);
+ unsigned toUIntStrict(bool* ok = 0, int base = 10);
+ int64_t toInt64Strict(bool* ok = 0, int base = 10);
+ uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
+
+ int toInt(bool* ok = 0); // ignores trailing garbage
+ unsigned toUInt(bool* ok = 0); // ignores trailing garbage
+ int64_t toInt64(bool* ok = 0); // ignores trailing garbage
+ uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
+
+ double toDouble(bool* ok = 0);
+ float toFloat(bool* ok = 0);
+
+ Length* toCoordsArray(int& len);
+ Length* toLengthArray(int& len);
+ bool isLower();
+ PassRefPtr<StringImpl> lower();
+ PassRefPtr<StringImpl> upper();
+ PassRefPtr<StringImpl> secure(UChar aChar);
+ PassRefPtr<StringImpl> capitalize(UChar previousCharacter);
+ PassRefPtr<StringImpl> foldCase();
+
+ PassRefPtr<StringImpl> stripWhiteSpace();
+ PassRefPtr<StringImpl> simplifyWhiteSpace();
+
+ int find(const char*, int index = 0, bool caseSensitive = true);
+ int find(UChar, int index = 0);
+ int find(StringImpl*, int index, bool caseSensitive = true);
+
+ int reverseFind(UChar, int index);
+ int reverseFind(StringImpl*, int index, bool caseSensitive = true);
+
+ bool startsWith(StringImpl* m_data, bool caseSensitive = true) { return find(m_data, 0, caseSensitive) == 0; }
+ bool endsWith(StringImpl*, bool caseSensitive = true);
+
+ PassRefPtr<StringImpl> replace(UChar, UChar);
+ PassRefPtr<StringImpl> replace(UChar, StringImpl*);
+ PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
+ PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
+
+ static StringImpl* empty();
+
+ Vector<char> ascii();
+
+ WTF::Unicode::Direction defaultWritingDirection();
+
+#if PLATFORM(CF)
+ CFStringRef createCFString();
+#endif
+#ifdef __OBJC__
+ operator NSString*();
+#endif
+
+private:
+ unsigned m_length;
+ const UChar* m_data;
+ mutable unsigned m_hash;
+ bool m_inTable;
+ bool m_hasTerminatingNullCharacter;
+};
+
+bool equal(StringImpl*, StringImpl*);
+bool equal(StringImpl*, const char*);
+inline bool equal(const char* a, StringImpl* b) { return equal(b, a); }
+
+bool equalIgnoringCase(StringImpl*, StringImpl*);
+bool equalIgnoringCase(StringImpl*, const char*);
+inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); }
+
+// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
+// or anything like that.
+const unsigned phi = 0x9e3779b9U;
+
+// Paul Hsieh's SuperFastHash
+// http://www.azillionmonkeys.com/qed/hash.html
+inline unsigned StringImpl::computeHash(const UChar* data, unsigned length)
+{
+ unsigned hash = phi;
+
+ // Main loop.
+ for (unsigned pairCount = length >> 1; pairCount; pairCount--) {
+ hash += data[0];
+ unsigned tmp = (data[1] << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 2;
+ hash += hash >> 11;
+ }
+
+ // Handle end case.
+ if (length & 1) {
+ hash += data[0];
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ }
+
+ // Force "avalanching" of final 127 bits.
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 2;
+ hash += hash >> 15;
+ hash ^= hash << 10;
+
+ // This avoids ever returning a hash code of 0, since that is used to
+ // signal "hash not computed yet", using a value that is likely to be
+ // effectively the same as 0 when the low bits are masked.
+ hash |= !hash << 31;
+
+ return hash;
+}
+
+// Paul Hsieh's SuperFastHash
+// http://www.azillionmonkeys.com/qed/hash.html
+inline unsigned StringImpl::computeHash(const char* data)
+{
+ // This hash is designed to work on 16-bit chunks at a time. But since the normal case
+ // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
+ // were 16-bit chunks, which should give matching results
+
+ unsigned hash = phi;
+
+ // Main loop
+ for (;;) {
+ unsigned char b0 = data[0];
+ if (!b0)
+ break;
+ unsigned char b1 = data[1];
+ if (!b1) {
+ hash += b0;
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ }
+ hash += b0;
+ unsigned tmp = (b1 << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 2;
+ hash += hash >> 11;
+ }
+
+ // Force "avalanching" of final 127 bits.
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 2;
+ hash += hash >> 15;
+ hash ^= hash << 10;
+
+ // This avoids ever returning a hash code of 0, since that is used to
+ // signal "hash not computed yet", using a value that is likely to be
+ // effectively the same as 0 when the low bits are masked.
+ hash |= !hash << 31;
+
+ return hash;
+}
+
+static inline bool isSpaceOrNewline(UChar c)
+{
+ // Use isASCIISpace() for basic Latin-1.
+ // This will include newlines, which aren't included in Unicode DirWS.
+ return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
+}
+
+}
+
+namespace WTF {
+
+ // WebCore::StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
+ template<typename T> struct DefaultHash;
+ template<> struct DefaultHash<WebCore::StringImpl*> {
+ typedef WebCore::StringHash Hash;
+ };
+ template<> struct DefaultHash<RefPtr<WebCore::StringImpl> > {
+ typedef WebCore::StringHash Hash;
+ };
+
+}
+
+#endif
diff --git a/WebCore/platform/text/TextBoundaries.h b/WebCore/platform/text/TextBoundaries.h
new file mode 100644
index 0000000..118dd1a
--- /dev/null
+++ b/WebCore/platform/text/TextBoundaries.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextBoundaries_h
+#define TextBoundaries_h
+
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+ void findWordBoundary(const UChar*, int len, int position, int* start, int* end);
+ int findNextWordFromIndex(const UChar*, int len, int position, bool forward);
+
+}
+
+#endif
diff --git a/WebCore/platform/text/TextBoundariesICU.cpp b/WebCore/platform/text/TextBoundariesICU.cpp
new file mode 100644
index 0000000..d226048
--- /dev/null
+++ b/WebCore/platform/text/TextBoundariesICU.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextBoundaries.h"
+
+#include <unicode/ubrk.h>
+
+#include "StringImpl.h"
+#include "TextBreakIterator.h"
+
+namespace WebCore {
+
+int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward)
+{
+ UBreakIterator* it = wordBreakIterator(chars, len);
+
+ if (forward) {
+ position = ubrk_following(it, position);
+ while (position != UBRK_DONE) {
+ // We stop searching when the character preceeding the break
+ // is alphanumeric.
+ if (position < len && u_isalnum(chars[position - 1]))
+ return position;
+
+ position = ubrk_following(it, position);
+ }
+
+ return len;
+ } else {
+ position = ubrk_preceding(it, position);
+ while (position != UBRK_DONE) {
+ // We stop searching when the character following the break
+ // is alphanumeric.
+ if (position > 0 && u_isalnum(chars[position]))
+ return position;
+
+ position = ubrk_preceding(it, position);
+ }
+
+ return 0;
+ }
+}
+
+void findWordBoundary(const UChar* chars, int len, int position, int* start, int* end)
+{
+ UBreakIterator* it = wordBreakIterator(chars, len);
+ *end = ubrk_following(it, position);
+ if (*end < 0)
+ *end = ubrk_last(it);
+ *start = ubrk_previous(it);
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/TextBreakIterator.h b/WebCore/platform/text/TextBreakIterator.h
new file mode 100644
index 0000000..64717a4
--- /dev/null
+++ b/WebCore/platform/text/TextBreakIterator.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef TextBreakIterator_h
+#define TextBreakIterator_h
+
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+ class TextBreakIterator;
+
+ // Note: The returned iterator is good only until you get another iterator.
+ TextBreakIterator* characterBreakIterator(const UChar*, int length);
+ TextBreakIterator* wordBreakIterator(const UChar*, int length);
+ TextBreakIterator* lineBreakIterator(const UChar*, int length);
+ TextBreakIterator* sentenceBreakIterator(const UChar*, int length);
+
+ int textBreakFirst(TextBreakIterator*);
+ int textBreakNext(TextBreakIterator*);
+ int textBreakCurrent(TextBreakIterator*);
+ int textBreakPreceding(TextBreakIterator*, int);
+ int textBreakFollowing(TextBreakIterator*, int);
+ bool isTextBreak(TextBreakIterator*, int);
+
+ const int TextBreakDone = -1;
+
+}
+
+#endif
diff --git a/WebCore/platform/text/TextBreakIteratorICU.cpp b/WebCore/platform/text/TextBreakIteratorICU.cpp
new file mode 100644
index 0000000..9941f58
--- /dev/null
+++ b/WebCore/platform/text/TextBreakIteratorICU.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIterator.h"
+
+#include "TextBreakIteratorInternalICU.h"
+
+#include <unicode/ubrk.h>
+#include <wtf/Assertions.h>
+
+namespace WebCore {
+
+static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator,
+ UBreakIteratorType type, const UChar* string, int length)
+{
+ if (!string)
+ return 0;
+
+ if (!createdIterator) {
+ UErrorCode openStatus = U_ZERO_ERROR;
+ iterator = static_cast<TextBreakIterator*>(ubrk_open(type, currentTextBreakLocaleID(), 0, 0, &openStatus));
+ createdIterator = true;
+ ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
+ }
+ if (!iterator)
+ return 0;
+
+ UErrorCode setTextStatus = U_ZERO_ERROR;
+ ubrk_setText(iterator, string, length, &setTextStatus);
+ if (U_FAILURE(setTextStatus))
+ return 0;
+
+ return iterator;
+}
+
+TextBreakIterator* characterBreakIterator(const UChar* string, int length)
+{
+ static bool createdCharacterBreakIterator = false;
+ static TextBreakIterator* staticCharacterBreakIterator;
+ return setUpIterator(createdCharacterBreakIterator,
+ staticCharacterBreakIterator, UBRK_CHARACTER, string, length);
+}
+
+TextBreakIterator* wordBreakIterator(const UChar* string, int length)
+{
+ static bool createdWordBreakIterator = false;
+ static TextBreakIterator* staticWordBreakIterator;
+ return setUpIterator(createdWordBreakIterator,
+ staticWordBreakIterator, UBRK_WORD, string, length);
+}
+
+TextBreakIterator* lineBreakIterator(const UChar* string, int length)
+{
+ static bool createdLineBreakIterator = false;
+ static TextBreakIterator* staticLineBreakIterator;
+ return setUpIterator(createdLineBreakIterator,
+ staticLineBreakIterator, UBRK_LINE, string, length);
+}
+
+TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
+{
+ static bool createdSentenceBreakIterator = false;
+ static TextBreakIterator* staticSentenceBreakIterator;
+ return setUpIterator(createdSentenceBreakIterator,
+ staticSentenceBreakIterator, UBRK_SENTENCE, string, length);
+}
+
+int textBreakFirst(TextBreakIterator* bi)
+{
+ return ubrk_first(bi);
+}
+
+int textBreakNext(TextBreakIterator* bi)
+{
+ return ubrk_next(bi);
+}
+
+int textBreakPreceding(TextBreakIterator* bi, int pos)
+{
+ return ubrk_preceding(bi, pos);
+}
+
+int textBreakFollowing(TextBreakIterator* bi, int pos)
+{
+ return ubrk_following(bi, pos);
+}
+
+int textBreakCurrent(TextBreakIterator* bi)
+{
+ return ubrk_current(bi);
+}
+
+bool isTextBreak(TextBreakIterator* bi, int pos)
+{
+ return ubrk_isBoundary(bi, pos);
+}
+
+}
diff --git a/WebCore/platform/text/TextBreakIteratorInternalICU.h b/WebCore/platform/text/TextBreakIteratorInternalICU.h
new file mode 100644
index 0000000..d4b25e7
--- /dev/null
+++ b/WebCore/platform/text/TextBreakIteratorInternalICU.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef TextBreakIteratorInternalICU_h
+#define TextBreakIteratorInternalICU_h
+
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+ const char* currentTextBreakLocaleID();
+
+}
+
+#endif
diff --git a/WebCore/platform/text/TextCodec.cpp b/WebCore/platform/text/TextCodec.cpp
new file mode 100644
index 0000000..1985c49
--- /dev/null
+++ b/WebCore/platform/text/TextCodec.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextCodec.h"
+
+#include "PlatformString.h"
+
+namespace WebCore {
+
+const UChar BOM = 0xFEFF;
+
+TextCodec::~TextCodec()
+{
+}
+
+// We strip BOM characters because they can show up both at the start of content
+// and inside content, and we never want them to end up in the decoded text.
+void TextCodec::appendOmittingBOM(Vector<UChar>& v, const UChar* characters, size_t length)
+{
+ size_t start = 0;
+ for (size_t i = 0; i != length; ++i) {
+ if (BOM == characters[i]) {
+ if (start != i)
+ v.append(&characters[start], i - start);
+ start = i + 1;
+ }
+ }
+ if (start != length)
+ v.append(&characters[start], length - start);
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/TextCodec.h b/WebCore/platform/text/TextCodec.h
new file mode 100644
index 0000000..77ffcf4
--- /dev/null
+++ b/WebCore/platform/text/TextCodec.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextCodec_h
+#define TextCodec_h
+
+#include <memory>
+#include <wtf/Noncopyable.h>
+#include <wtf/Vector.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+ class CString;
+ class String;
+ class TextEncoding;
+
+ class TextCodec : Noncopyable {
+ public:
+ virtual ~TextCodec();
+
+ virtual String decode(const char*, size_t length, bool flush = false) = 0;
+ virtual CString encode(const UChar*, size_t length, bool allowEntities = false) = 0;
+
+ protected:
+ static void appendOmittingBOM(Vector<UChar>&, const UChar*, size_t length);
+ };
+
+ typedef void (*EncodingNameRegistrar)(const char* alias, const char* name);
+
+ typedef std::auto_ptr<TextCodec> (*NewTextCodecFunction)(const TextEncoding&, const void* additionalData);
+ typedef void (*TextCodecRegistrar)(const char* name, NewTextCodecFunction, const void* additionalData);
+
+} // namespace WebCore
+
+#endif // TextCodec_h
diff --git a/WebCore/platform/text/TextCodecICU.cpp b/WebCore/platform/text/TextCodecICU.cpp
new file mode 100644
index 0000000..0299b8f
--- /dev/null
+++ b/WebCore/platform/text/TextCodecICU.cpp
@@ -0,0 +1,357 @@
+/*
+ * Copyright (C) 2004, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextCodecICU.h"
+
+#include "CharacterNames.h"
+#include "CString.h"
+#include "PlatformString.h"
+#include <unicode/ucnv.h>
+#include <unicode/ucnv_cb.h>
+#include <wtf/Assertions.h>
+
+using std::auto_ptr;
+using std::min;
+
+namespace WebCore {
+
+const size_t ConversionBufferSize = 16384;
+
+static UConverter* cachedConverterICU;
+
+static auto_ptr<TextCodec> newTextCodecICU(const TextEncoding& encoding, const void*)
+{
+ return auto_ptr<TextCodec>(new TextCodecICU(encoding));
+}
+
+void TextCodecICU::registerBaseEncodingNames(EncodingNameRegistrar registrar)
+{
+ registrar("UTF-8", "UTF-8");
+}
+
+void TextCodecICU::registerBaseCodecs(TextCodecRegistrar registrar)
+{
+ registrar("UTF-8", newTextCodecICU, 0);
+}
+
+// FIXME: Registering all the encodings we get from ucnv_getAvailableName
+// includes encodings we don't want or need. For example: UTF16_PlatformEndian,
+// UTF16_OppositeEndian, UTF32_PlatformEndian, UTF32_OppositeEndian, and all
+// the encodings with commas and version numbers.
+
+void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
+{
+ // We register Hebrew with logical ordering using a separate name.
+ // Otherwise, this would share the same canonical name as the
+ // visual ordering case, and then TextEncoding could not tell them
+ // apart; ICU works with either name.
+ registrar("ISO-8859-8-I", "ISO-8859-8-I");
+
+ int32_t numEncodings = ucnv_countAvailable();
+ for (int32_t i = 0; i < numEncodings; ++i) {
+ const char* name = ucnv_getAvailableName(i);
+ UErrorCode error = U_ZERO_ERROR;
+ // FIXME: Should we use the "MIME" standard instead of "IANA"?
+ const char* standardName = ucnv_getStandardName(name, "IANA", &error);
+ if (!U_SUCCESS(error) || !standardName)
+ continue;
+
+ // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers.
+ // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding
+ // for encoding GB_2312-80 and several others. So, we need to override this behavior, too.
+ if (strcmp(standardName, "GB2312") == 0 || strcmp(standardName, "GB_2312-80") == 0)
+ standardName = "GBK";
+ else
+ registrar(standardName, standardName);
+
+ uint16_t numAliases = ucnv_countAliases(name, &error);
+ ASSERT(U_SUCCESS(error));
+ if (U_SUCCESS(error))
+ for (uint16_t j = 0; j < numAliases; ++j) {
+ error = U_ZERO_ERROR;
+ const char* alias = ucnv_getAlias(name, j, &error);
+ ASSERT(U_SUCCESS(error));
+ if (U_SUCCESS(error) && alias != standardName)
+ registrar(alias, standardName);
+ }
+ }
+
+ // Additional aliases.
+ // Perhaps we can get these added to ICU.
+ registrar("macroman", "macintosh");
+ registrar("xmacroman", "macintosh");
+
+ // Additional aliases that historically were present in the encoding
+ // table in WebKit on Macintosh that don't seem to be present in ICU.
+ // Perhaps we can prove these are not used on the web and remove them.
+ // Or perhaps we can get them added to ICU.
+ registrar("cnbig5", "Big5");
+ registrar("cngb", "EUC-CN");
+ registrar("csISO88598I", "ISO_8859-8-I");
+ registrar("csgb231280", "EUC-CN");
+ registrar("dos720", "cp864");
+ registrar("dos874", "cp874");
+ registrar("jis7", "ISO-2022-JP");
+ registrar("koi", "KOI8-R");
+ registrar("logical", "ISO-8859-8-I");
+ registrar("unicode11utf8", "UTF-8");
+ registrar("unicode20utf8", "UTF-8");
+ registrar("visual", "ISO-8859-8");
+ registrar("winarabic", "windows-1256");
+ registrar("winbaltic", "windows-1257");
+ registrar("wincyrillic", "windows-1251");
+ registrar("windows874", "cp874");
+ registrar("wingreek", "windows-1253");
+ registrar("winhebrew", "windows-1255");
+ registrar("winlatin2", "windows-1250");
+ registrar("winturkish", "windows-1254");
+ registrar("winvietnamese", "windows-1258");
+ registrar("xcp1250", "windows-1250");
+ registrar("xcp1251", "windows-1251");
+ registrar("xeuc", "EUC-JP");
+ registrar("xeuccn", "EUC-CN");
+ registrar("xgbk", "EUC-CN");
+ registrar("xunicode20utf8", "UTF-8");
+ registrar("xxbig5", "Big5");
+}
+
+void TextCodecICU::registerExtendedCodecs(TextCodecRegistrar registrar)
+{
+ // See comment above in registerEncodingNames.
+ registrar("ISO-8859-8-I", newTextCodecICU, 0);
+
+ int32_t numEncodings = ucnv_countAvailable();
+ for (int32_t i = 0; i < numEncodings; ++i) {
+ const char* name = ucnv_getAvailableName(i);
+ UErrorCode error = U_ZERO_ERROR;
+ // FIXME: Should we use the "MIME" standard instead of "IANA"?
+ const char* standardName = ucnv_getStandardName(name, "IANA", &error);
+ if (!U_SUCCESS(error) || !standardName)
+ continue;
+ registrar(standardName, newTextCodecICU, 0);
+ }
+}
+
+TextCodecICU::TextCodecICU(const TextEncoding& encoding)
+ : m_encoding(encoding)
+ , m_numBufferedBytes(0)
+ , m_converterICU(0)
+ , m_needsGBKFallbacks(false)
+{
+}
+
+TextCodecICU::~TextCodecICU()
+{
+ releaseICUConverter();
+}
+
+void TextCodecICU::releaseICUConverter() const
+{
+ if (m_converterICU) {
+ if (cachedConverterICU)
+ ucnv_close(cachedConverterICU);
+ cachedConverterICU = m_converterICU;
+ m_converterICU = 0;
+ }
+}
+
+void TextCodecICU::createICUConverter() const
+{
+ ASSERT(!m_converterICU);
+
+ const char* name = m_encoding.name();
+ m_needsGBKFallbacks = name[0] == 'G' && name[1] == 'B' && name[2] == 'K' && !name[3];
+
+ UErrorCode err;
+
+ if (cachedConverterICU) {
+ err = U_ZERO_ERROR;
+ const char* cachedName = ucnv_getName(cachedConverterICU, &err);
+ if (U_SUCCESS(err) && m_encoding == cachedName) {
+ m_converterICU = cachedConverterICU;
+ cachedConverterICU = 0;
+ return;
+ }
+ }
+
+ err = U_ZERO_ERROR;
+ m_converterICU = ucnv_open(m_encoding.name(), &err);
+#if !LOG_DISABLED
+ if (err == U_AMBIGUOUS_ALIAS_WARNING)
+ LOG_ERROR("ICU ambiguous alias warning for encoding: %s", m_encoding.name());
+#endif
+ if (m_converterICU)
+ ucnv_setFallback(m_converterICU, TRUE);
+}
+
+String TextCodecICU::decode(const char* bytes, size_t length, bool flush)
+{
+ // Get a converter for the passed-in encoding.
+ if (!m_converterICU) {
+ createICUConverter();
+ ASSERT(m_converterICU);
+ if (!m_converterICU) {
+ LOG_ERROR("error creating ICU encoder even though encoding was in table");
+ return String();
+ }
+ }
+
+ Vector<UChar> result;
+
+ UChar buffer[ConversionBufferSize];
+ const char* source = reinterpret_cast<const char*>(bytes);
+ const char* sourceLimit = source + length;
+ int32_t* offsets = NULL;
+ UErrorCode err;
+
+ do {
+ UChar* target = buffer;
+ const UChar* targetLimit = target + ConversionBufferSize;
+ err = U_ZERO_ERROR;
+ ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err);
+ int count = target - buffer;
+ appendOmittingBOM(result, reinterpret_cast<const UChar*>(buffer), count);
+ } while (err == U_BUFFER_OVERFLOW_ERROR);
+
+ if (U_FAILURE(err)) {
+ // flush the converter so it can be reused, and not be bothered by this error.
+ do {
+ UChar *target = buffer;
+ const UChar *targetLimit = target + ConversionBufferSize;
+ err = U_ZERO_ERROR;
+ ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, true, &err);
+ } while (source < sourceLimit);
+ LOG_ERROR("ICU conversion error");
+ return String();
+ }
+
+ String resultString = String::adopt(result);
+
+ // <http://bugs.webkit.org/show_bug.cgi?id=17014>
+ // Simplified Chinese pages use the code A3A0 to mean "full-width space", but ICU decodes it as U+E5E5.
+ if (m_encoding == "GBK" || m_encoding == "gb18030")
+ resultString.replace(0xE5E5, ideographicSpace);
+
+ return resultString;
+}
+
+// We need to apply these fallbacks ourselves as they are not currently supported by ICU and
+// they were provided by the old TEC encoding path
+// Needed to fix <rdar://problem/4708689>
+static UChar getGbkEscape(UChar32 codePoint)
+{
+ switch (codePoint) {
+ case 0x01F9:
+ return 0xE7C8;
+ case 0x1E3F:
+ return 0xE7C7;
+ case 0x22EF:
+ return 0x2026;
+ case 0x301C:
+ return 0xFF5E;
+ default:
+ return 0;
+ }
+}
+
+static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
+ UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
+{
+ UChar outChar;
+ if (reason == UCNV_UNASSIGNED && (outChar = getGbkEscape(codePoint))) {
+ const UChar* source = &outChar;
+ *err = U_ZERO_ERROR;
+ ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
+ return;
+ }
+ UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
+}
+
+static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
+ UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
+{
+ UChar outChar;
+ if (reason == UCNV_UNASSIGNED && (outChar = getGbkEscape(codePoint))) {
+ const UChar* source = &outChar;
+ *err = U_ZERO_ERROR;
+ ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
+ return;
+ }
+ UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
+}
+
+CString TextCodecICU::encode(const UChar* characters, size_t length, bool allowEntities)
+{
+ if (!length)
+ return "";
+
+ if (!m_converterICU)
+ createICUConverter();
+ if (!m_converterICU)
+ return CString();
+
+ // FIXME: We should see if there is "force ASCII range" mode in ICU;
+ // until then, we change the backslash into a yen sign.
+ // Encoding will change the yen sign back into a backslash.
+ String copy(characters, length);
+ copy.replace('\\', m_encoding.backslashAsCurrencySymbol());
+
+ const UChar* source = copy.characters();
+ const UChar* sourceLimit = source + copy.length();
+
+ UErrorCode err = U_ZERO_ERROR;
+
+ if (allowEntities)
+ ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
+ else {
+ ucnv_setSubstChars(m_converterICU, "?", 1, &err);
+ ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
+ }
+
+ ASSERT(U_SUCCESS(err));
+ if (U_FAILURE(err))
+ return CString();
+
+ Vector<char> result;
+ size_t size = 0;
+ do {
+ char buffer[ConversionBufferSize];
+ char* target = buffer;
+ char* targetLimit = target + ConversionBufferSize;
+ err = U_ZERO_ERROR;
+ ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true, &err);
+ size_t count = target - buffer;
+ result.grow(size + count);
+ memcpy(result.data() + size, buffer, count);
+ size += count;
+ } while (err == U_BUFFER_OVERFLOW_ERROR);
+
+ return CString(result.data(), size);
+}
+
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/TextCodecICU.h b/WebCore/platform/text/TextCodecICU.h
new file mode 100644
index 0000000..c2a30b1
--- /dev/null
+++ b/WebCore/platform/text/TextCodecICU.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2004, 2006, 2007 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextCodecICU_h
+#define TextCodecICU_h
+
+#include "TextCodec.h"
+#include "TextEncoding.h"
+
+typedef struct UConverter UConverter;
+
+namespace WebCore {
+
+ class TextCodecICU : public TextCodec {
+ public:
+ static void registerBaseEncodingNames(EncodingNameRegistrar);
+ static void registerBaseCodecs(TextCodecRegistrar);
+
+ static void registerExtendedEncodingNames(EncodingNameRegistrar);
+ static void registerExtendedCodecs(TextCodecRegistrar);
+
+ TextCodecICU(const TextEncoding&);
+ virtual ~TextCodecICU();
+
+ virtual String decode(const char*, size_t length, bool flush = false);
+ virtual CString encode(const UChar*, size_t length, bool allowEntities = false);
+
+ private:
+ void createICUConverter() const;
+ void releaseICUConverter() const;
+ bool needsGBKFallbacks() const { return m_needsGBKFallbacks; }
+ void setNeedsGBKFallbacks(bool needsFallbacks) { m_needsGBKFallbacks = needsFallbacks; }
+
+ TextEncoding m_encoding;
+ unsigned m_numBufferedBytes;
+ unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
+ mutable UConverter* m_converterICU;
+ mutable bool m_needsGBKFallbacks;
+ };
+
+} // namespace WebCore
+
+#endif // TextCodecICU_h
diff --git a/WebCore/platform/text/TextCodecLatin1.cpp b/WebCore/platform/text/TextCodecLatin1.cpp
new file mode 100644
index 0000000..2e9d116
--- /dev/null
+++ b/WebCore/platform/text/TextCodecLatin1.cpp
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextCodecLatin1.h"
+
+#include "CString.h"
+#include "PlatformString.h"
+#include "StringBuffer.h"
+#include <stdio.h>
+
+using std::auto_ptr;
+
+namespace WebCore {
+
+static const UChar table[256] = {
+ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, // 00-07
+ 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, // 08-0F
+ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, // 10-17
+ 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, // 18-1F
+ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, // 20-27
+ 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, // 28-2F
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, // 30-37
+ 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, // 38-3F
+ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, // 40-47
+ 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, // 48-4F
+ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, // 50-57
+ 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, // 58-5F
+ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, // 60-67
+ 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, // 68-6F
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, // 70-77
+ 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, // 78-7F
+ 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
+ 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
+ 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, // 98-9F
+ 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, // A0-A7
+ 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, // A8-AF
+ 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, // B0-B7
+ 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, // B8-BF
+ 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, // C0-C7
+ 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, // C8-CF
+ 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, // D0-D7
+ 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, // D8-DF
+ 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, // E0-E7
+ 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, // E8-EF
+ 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, // F0-F7
+ 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF // F8-FF
+};
+
+void TextCodecLatin1::registerEncodingNames(EncodingNameRegistrar registrar)
+{
+ registrar("windows-1252", "windows-1252");
+ registrar("ISO-8859-1", "ISO-8859-1");
+ registrar("US-ASCII", "US-ASCII");
+
+ registrar("WinLatin1", "windows-1252");
+ registrar("ibm-1252", "windows-1252");
+ registrar("ibm-1252_P100-2000", "windows-1252");
+
+ registrar("8859-1", "ISO-8859-1");
+ registrar("CP819", "ISO-8859-1");
+ registrar("IBM819", "ISO-8859-1");
+ registrar("csISOLatin1", "ISO-8859-1");
+ registrar("iso-ir-100", "ISO-8859-1");
+ registrar("iso_8859-1:1987", "ISO-8859-1");
+ registrar("l1", "ISO-8859-1");
+ registrar("latin1", "ISO-8859-1");
+
+ registrar("ANSI_X3.4-1968", "US-ASCII");
+ registrar("ANSI_X3.4-1986", "US-ASCII");
+ registrar("ASCII", "US-ASCII");
+ registrar("IBM367", "US-ASCII");
+ registrar("ISO646-US", "US-ASCII");
+ registrar("ISO_646.irv:1991", "US-ASCII");
+ registrar("cp367", "US-ASCII");
+ registrar("csASCII", "US-ASCII");
+ registrar("ibm-367_P100-1995", "US-ASCII");
+ registrar("iso-ir-6", "US-ASCII");
+ registrar("iso-ir-6-us", "US-ASCII");
+ registrar("us", "US-ASCII");
+ registrar("x-ansi", "US-ASCII");
+}
+
+static auto_ptr<TextCodec> newStreamingTextDecoderWindowsLatin1(const TextEncoding&, const void*)
+{
+ return auto_ptr<TextCodec>(new TextCodecLatin1);
+}
+
+void TextCodecLatin1::registerCodecs(TextCodecRegistrar registrar)
+{
+ registrar("windows-1252", newStreamingTextDecoderWindowsLatin1, 0);
+
+ // ASCII and Latin-1 both decode as Windows Latin-1 although they retain unique identities.
+ registrar("ISO-8859-1", newStreamingTextDecoderWindowsLatin1, 0);
+ registrar("US-ASCII", newStreamingTextDecoderWindowsLatin1, 0);
+}
+
+String TextCodecLatin1::decode(const char* bytes, size_t length, bool)
+{
+ StringBuffer characters(length);
+
+ // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII.
+ unsigned char ored = 0;
+ for (size_t i = 0; i < length; ++i) {
+ unsigned char c = bytes[i];
+ characters[i] = c;
+ ored |= c;
+ }
+
+ if (!(ored & 0x80))
+ return String::adopt(characters);
+
+ // Convert the slightly slower way when there are non-ASCII characters.
+ for (size_t i = 0; i < length; ++i) {
+ unsigned char c = bytes[i];
+ characters[i] = table[c];
+ }
+
+ return String::adopt(characters);
+}
+
+static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length, bool allowEntities)
+{
+ Vector<char> result(length);
+ char* bytes = result.data();
+
+ size_t resultLength = 0;
+ for (size_t i = 0; i < length; ) {
+ UChar32 c;
+ U16_NEXT(characters, i, length, c);
+ unsigned char b = c;
+ // Do an efficient check to detect characters other than 00-7F and A0-FF.
+ if (b != c || (c & 0xE0) == 0x80) {
+ // Look for a way to encode this with Windows Latin-1.
+ for (b = 0x80; b < 0xA0; ++b)
+ if (table[b] == c)
+ goto gotByte;
+ // No way to encode this character with Windows Latin-1.
+ if (allowEntities) {
+ char entityBuffer[16];
+ sprintf(entityBuffer, "&#%u;", c);
+ size_t entityLength = strlen(entityBuffer);
+ result.grow(resultLength + entityLength + length - i);
+ bytes = result.data();
+ memcpy(bytes + resultLength, entityBuffer, entityLength);
+ resultLength += entityLength;
+ continue;
+ }
+ b = '?';
+ }
+ gotByte:
+ bytes[resultLength++] = b;
+ }
+
+ return CString(bytes, resultLength);
+}
+
+CString TextCodecLatin1::encode(const UChar* characters, size_t length, bool allowEntities)
+{
+ {
+ char* bytes;
+ CString string = CString::newUninitialized(length, bytes);
+
+ // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII.
+ UChar ored = 0;
+ for (size_t i = 0; i < length; ++i) {
+ UChar c = characters[i];
+ bytes[i] = c;
+ ored |= c;
+ }
+
+ if (!(ored & 0xFF80))
+ return string;
+ }
+
+ // If it wasn't all ASCII, call the function that handles more-complex cases.
+ return encodeComplexWindowsLatin1(characters, length, allowEntities);
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/TextCodecLatin1.h b/WebCore/platform/text/TextCodecLatin1.h
new file mode 100644
index 0000000..46d6e66
--- /dev/null
+++ b/WebCore/platform/text/TextCodecLatin1.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextCodecLatin1_h
+#define TextCodecLatin1_h
+
+#include "TextCodec.h"
+
+namespace WebCore {
+
+ class TextCodecLatin1 : public TextCodec {
+ public:
+ static void registerEncodingNames(EncodingNameRegistrar);
+ static void registerCodecs(TextCodecRegistrar);
+
+ virtual String decode(const char*, size_t length, bool flush = false);
+ virtual CString encode(const UChar*, size_t length, bool allowEntities = false);
+ };
+
+} // namespace WebCore
+
+#endif // TextCodecLatin1_h
diff --git a/WebCore/platform/text/TextCodecUTF16.cpp b/WebCore/platform/text/TextCodecUTF16.cpp
new file mode 100644
index 0000000..9ecd2a9
--- /dev/null
+++ b/WebCore/platform/text/TextCodecUTF16.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextCodecUTF16.h"
+
+#include "CString.h"
+#include "PlatformString.h"
+#include "StringBuffer.h"
+
+using std::auto_ptr;
+
+namespace WebCore {
+
+const UChar BOM = 0xFEFF;
+
+void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar)
+{
+ registrar("UTF-16LE", "UTF-16LE");
+ registrar("UTF-16BE", "UTF-16BE");
+
+ registrar("ISO-10646-UCS-2", "UTF-16LE");
+ registrar("UCS-2", "UTF-16LE");
+ registrar("UTF-16", "UTF-16LE");
+ registrar("Unicode", "UTF-16LE");
+ registrar("csUnicode", "UTF-16LE");
+ registrar("unicodeFEFF", "UTF-16LE");
+
+ registrar("unicodeFFFE", "UTF-16BE");
+}
+
+static auto_ptr<TextCodec> newStreamingTextDecoderUTF16LE(const TextEncoding&, const void*)
+{
+ return auto_ptr<TextCodec>(new TextCodecUTF16(true));
+}
+
+static auto_ptr<TextCodec> newStreamingTextDecoderUTF16BE(const TextEncoding&, const void*)
+{
+ return auto_ptr<TextCodec>(new TextCodecUTF16(false));
+}
+
+void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar)
+{
+ registrar("UTF-16LE", newStreamingTextDecoderUTF16LE, 0);
+ registrar("UTF-16BE", newStreamingTextDecoderUTF16BE, 0);
+}
+
+String TextCodecUTF16::decode(const char* bytes, size_t length, bool)
+{
+ if (!length)
+ return String();
+
+ const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes);
+ size_t numBytes = length + m_haveBufferedByte;
+ size_t numChars = numBytes / 2;
+
+ StringBuffer buffer(numChars);
+ UChar* q = buffer.characters();
+
+ if (m_haveBufferedByte) {
+ UChar c;
+ if (m_littleEndian)
+ c = m_bufferedByte | (p[0] << 8);
+ else
+ c = (m_bufferedByte << 8) | p[0];
+ if (c != BOM)
+ *q++ = c;
+ m_haveBufferedByte = false;
+ p += 1;
+ numChars -= 1;
+ }
+
+ if (m_littleEndian)
+ for (size_t i = 0; i < numChars; ++i) {
+ UChar c = p[0] | (p[1] << 8);
+ p += 2;
+ if (c != BOM)
+ *q++ = c;
+ }
+ else
+ for (size_t i = 0; i < numChars; ++i) {
+ UChar c = (p[0] << 8) | p[1];
+ p += 2;
+ if (c != BOM)
+ *q++ = c;
+ }
+
+ if (numBytes & 1) {
+ ASSERT(!m_haveBufferedByte);
+ m_haveBufferedByte = true;
+ m_bufferedByte = p[0];
+ }
+
+ buffer.shrink(q - buffer.characters());
+
+ return String::adopt(buffer);
+}
+
+CString TextCodecUTF16::encode(const UChar* characters, size_t length, bool)
+{
+ char* bytes;
+ CString string = CString::newUninitialized(length * 2, bytes);
+
+ // FIXME: CString is not a reasonable data structure for encoded UTF-16, which will have
+ // null characters inside it. Perhaps the result of encode should not be a CString?
+ if (m_littleEndian)
+ for (size_t i = 0; i < length; ++i) {
+ UChar c = characters[i];
+ bytes[i * 2] = c;
+ bytes[i * 2 + 1] = c >> 8;
+ }
+ else
+ for (size_t i = 0; i < length; ++i) {
+ UChar c = characters[i];
+ bytes[i * 2] = c >> 8;
+ bytes[i * 2 + 1] = c;
+ }
+
+ return string;
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/TextCodecUTF16.h b/WebCore/platform/text/TextCodecUTF16.h
new file mode 100644
index 0000000..2bde221
--- /dev/null
+++ b/WebCore/platform/text/TextCodecUTF16.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextCodecUTF16_h
+#define TextCodecUTF16_h
+
+#include "TextCodec.h"
+
+namespace WebCore {
+
+ class TextCodecUTF16 : public TextCodec {
+ public:
+ static void registerEncodingNames(EncodingNameRegistrar);
+ static void registerCodecs(TextCodecRegistrar);
+
+ TextCodecUTF16(bool littleEndian) : m_littleEndian(littleEndian), m_haveBufferedByte(false) { }
+
+ virtual String decode(const char*, size_t length, bool flush = false);
+ virtual CString encode(const UChar*, size_t length, bool allowEntities = false);
+
+ private:
+ bool m_littleEndian;
+ bool m_haveBufferedByte;
+ unsigned char m_bufferedByte;
+ };
+
+} // namespace WebCore
+
+#endif // TextCodecUTF16_h
diff --git a/WebCore/platform/text/TextCodecUserDefined.cpp b/WebCore/platform/text/TextCodecUserDefined.cpp
new file mode 100644
index 0000000..3ef1bc9
--- /dev/null
+++ b/WebCore/platform/text/TextCodecUserDefined.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2007, 2008 Apple, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextCodecUserDefined.h"
+
+#include "CString.h"
+#include "PlatformString.h"
+#include "StringBuffer.h"
+#include <stdio.h>
+
+using std::auto_ptr;
+
+namespace WebCore {
+
+void TextCodecUserDefined::registerEncodingNames(EncodingNameRegistrar registrar)
+{
+ registrar("x-user-defined", "x-user-defined");
+}
+
+static auto_ptr<TextCodec> newStreamingTextDecoderUserDefined(const TextEncoding&, const void*)
+{
+ return auto_ptr<TextCodec>(new TextCodecUserDefined);
+}
+
+void TextCodecUserDefined::registerCodecs(TextCodecRegistrar registrar)
+{
+ registrar("x-user-defined", newStreamingTextDecoderUserDefined, 0);
+}
+
+String TextCodecUserDefined::decode(const char* bytes, size_t length, bool)
+{
+ StringBuffer buffer(length);
+
+ for (size_t i = 0; i < length; ++i) {
+ signed char c = bytes[i];
+ buffer[i] = c & 0xF7FF;
+ }
+
+ return String::adopt(buffer);
+}
+
+static CString encodeComplexUserDefined(const UChar* characters, size_t length, bool allowEntities)
+{
+ Vector<char> result(length);
+ char* bytes = result.data();
+
+ size_t resultLength = 0;
+ for (size_t i = 0; i < length; ) {
+ UChar32 c;
+ U16_NEXT(characters, i, length, c);
+ signed char signedByte = c;
+ if ((signedByte & 0xf7ff) == c)
+ bytes[resultLength++] = signedByte;
+ else {
+ // No way to encode this character with x-user-defined.
+ if (allowEntities) {
+ char entityBuffer[16];
+ sprintf(entityBuffer, "&#%u;", c);
+ size_t entityLength = strlen(entityBuffer);
+ result.grow(resultLength + entityLength + length - i);
+ bytes = result.data();
+ memcpy(bytes + resultLength, entityBuffer, entityLength);
+ resultLength += entityLength;
+ } else
+ bytes[resultLength++] = '?';
+ }
+ }
+
+ return CString(bytes, resultLength);
+}
+
+CString TextCodecUserDefined::encode(const UChar* characters, size_t length, bool allowEntities)
+{
+ char* bytes;
+ CString string = CString::newUninitialized(length, bytes);
+
+ // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII.
+ UChar ored = 0;
+ for (size_t i = 0; i < length; ++i) {
+ UChar c = characters[i];
+ bytes[i] = c;
+ ored |= c;
+ }
+
+ if (!(ored & 0xFF80))
+ return string;
+
+ // If it wasn't all ASCII, call the function that handles more-complex cases.
+ return encodeComplexUserDefined(characters, length, allowEntities);
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/TextCodecUserDefined.h b/WebCore/platform/text/TextCodecUserDefined.h
new file mode 100644
index 0000000..4fba907
--- /dev/null
+++ b/WebCore/platform/text/TextCodecUserDefined.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2007 Apple, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextCodecUserDefined_h
+#define TextCodecUserDefined_h
+
+#include "TextCodec.h"
+
+namespace WebCore {
+
+ class TextCodecUserDefined : public TextCodec {
+ public:
+ static void registerEncodingNames(EncodingNameRegistrar);
+ static void registerCodecs(TextCodecRegistrar);
+
+ virtual String decode(const char*, size_t length, bool flush = false);
+ virtual CString encode(const UChar*, size_t length, bool allowEntities = false);
+ };
+
+} // namespace WebCore
+
+#endif // TextCodecUserDefined_h
diff --git a/WebCore/platform/text/TextDecoder.cpp b/WebCore/platform/text/TextDecoder.cpp
new file mode 100644
index 0000000..8633e9f
--- /dev/null
+++ b/WebCore/platform/text/TextDecoder.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextDecoder.h"
+
+#include "TextEncodingRegistry.h"
+
+// FIXME: Would be nice to also handle BOM for UTF-7 and UTF-32.
+
+namespace WebCore {
+
+TextDecoder::TextDecoder(const TextEncoding& encoding)
+ : m_encoding(encoding)
+ , m_checkedForBOM(false)
+ , m_numBufferedBytes(0)
+{
+}
+
+void TextDecoder::reset(const TextEncoding& encoding)
+{
+ m_encoding = encoding;
+ m_codec.clear();
+ m_checkedForBOM = false;
+ m_numBufferedBytes = 0;
+}
+
+String TextDecoder::checkForBOM(const char* data, size_t length, bool flush)
+{
+ // Check to see if we found a BOM.
+ size_t numBufferedBytes = m_numBufferedBytes;
+ size_t buf1Len = numBufferedBytes;
+ size_t buf2Len = length;
+ const unsigned char* buf1 = m_bufferedBytes;
+ const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data);
+ unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
+ unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
+ unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
+ unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0;
+
+ const TextEncoding* encodingConsideringBOM = &m_encoding;
+ bool foundBOM = true;
+ if (c1 == 0xFF && c2 == 0xFE) {
+ if (c3 != 0 || c4 != 0)
+ encodingConsideringBOM = &UTF16LittleEndianEncoding();
+ else if (numBufferedBytes + length > sizeof(m_bufferedBytes))
+ encodingConsideringBOM = &UTF32LittleEndianEncoding();
+ else
+ foundBOM = false;
+ }
+ else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF)
+ encodingConsideringBOM = &UTF8Encoding();
+ else if (c1 == 0xFE && c2 == 0xFF)
+ encodingConsideringBOM = &UTF16BigEndianEncoding();
+ else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF)
+ encodingConsideringBOM = &UTF32BigEndianEncoding();
+ else
+ foundBOM = false;
+ if (!foundBOM && numBufferedBytes + length <= sizeof(m_bufferedBytes) && !flush) {
+ // Continue to look for the BOM.
+ memcpy(&m_bufferedBytes[numBufferedBytes], data, length);
+ m_numBufferedBytes += length;
+ return "";
+ }
+
+ // Done checking for BOM.
+ m_codec.set(newTextCodec(*encodingConsideringBOM).release());
+ if (!m_codec)
+ return String();
+ m_checkedForBOM = true;
+
+ // Handle case where we have some buffered bytes to deal with.
+ if (numBufferedBytes) {
+ char bufferedBytes[sizeof(m_bufferedBytes)];
+ memcpy(bufferedBytes, m_bufferedBytes, numBufferedBytes);
+ m_numBufferedBytes = 0;
+ return m_codec->decode(bufferedBytes, numBufferedBytes, false)
+ + m_codec->decode(data, length, flush);
+ }
+
+ return m_codec->decode(data, length, flush);
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/TextDecoder.h b/WebCore/platform/text/TextDecoder.h
new file mode 100644
index 0000000..3892032
--- /dev/null
+++ b/WebCore/platform/text/TextDecoder.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextDecoder_h
+#define TextDecoder_h
+
+#include "PlatformString.h"
+#include "TextCodec.h"
+#include "TextEncoding.h"
+#include <wtf/OwnPtr.h>
+
+namespace WebCore {
+
+ class TextCodec;
+
+ class TextDecoder {
+ public:
+ TextDecoder(const TextEncoding&);
+ void reset(const TextEncoding&);
+ const TextEncoding& encoding() const { return m_encoding; };
+
+ String decode(const char* data, size_t length, bool flush = false)
+ {
+ if (!m_checkedForBOM)
+ return checkForBOM(data, length, flush);
+ return m_codec->decode(data, length, flush);
+ }
+
+ private:
+ String checkForBOM(const char*, size_t length, bool flush);
+
+ TextEncoding m_encoding;
+ OwnPtr<TextCodec> m_codec;
+
+ bool m_checkedForBOM;
+ unsigned char m_numBufferedBytes;
+ unsigned char m_bufferedBytes[3];
+ };
+
+} // namespace WebCore
+
+#endif // TextDecoder_h
diff --git a/WebCore/platform/text/TextDirection.h b/WebCore/platform/text/TextDirection.h
new file mode 100644
index 0000000..5be416e
--- /dev/null
+++ b/WebCore/platform/text/TextDirection.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2003, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextDirection_h
+#define TextDirection_h
+
+namespace WebCore {
+
+ enum TextDirection { RTL, LTR };
+
+}
+
+#endif
diff --git a/WebCore/platform/text/TextEncoding.cpp b/WebCore/platform/text/TextEncoding.cpp
new file mode 100644
index 0000000..c7676e9
--- /dev/null
+++ b/WebCore/platform/text/TextEncoding.cpp
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2004, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextEncoding.h"
+
+#include "CString.h"
+#include "PlatformString.h"
+#include "TextCodec.h"
+#include "TextDecoder.h"
+#include "TextEncodingRegistry.h"
+#if USE(ICU_UNICODE)
+#include <unicode/unorm.h>
+#elif USE(QT4_UNICODE)
+#include <QString>
+#endif
+#include <wtf/HashSet.h>
+#include <wtf/OwnPtr.h>
+
+namespace WebCore {
+
+static void addEncodingName(HashSet<const char*>& set, const char* name)
+{
+ const char* atomicName = atomicCanonicalTextEncodingName(name);
+ if (atomicName)
+ set.add(atomicName);
+}
+
+TextEncoding::TextEncoding(const char* name)
+ : m_name(atomicCanonicalTextEncodingName(name))
+{
+}
+
+TextEncoding::TextEncoding(const String& name)
+ : m_name(atomicCanonicalTextEncodingName(name.characters(), name.length()))
+{
+}
+
+String TextEncoding::decode(const char* data, size_t length) const
+{
+ if (!m_name)
+ return String();
+
+ return TextDecoder(*this).decode(data, length, true);
+}
+
+CString TextEncoding::encode(const UChar* characters, size_t length, bool allowEntities) const
+{
+ if (!m_name)
+ return CString();
+
+ if (!length)
+ return "";
+
+#if USE(ICU_UNICODE)
+ // FIXME: What's the right place to do normalization?
+ // It's a little strange to do it inside the encode function.
+ // Perhaps normalization should be an explicit step done before calling encode.
+
+ const UChar* source = characters;
+ size_t sourceLength = length;
+
+ Vector<UChar> normalizedCharacters;
+
+ UErrorCode err = U_ZERO_ERROR;
+ if (unorm_quickCheck(source, sourceLength, UNORM_NFC, &err) != UNORM_YES) {
+ // First try using the length of the original string, since normalization to NFC rarely increases length.
+ normalizedCharacters.grow(sourceLength);
+ int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), length, &err);
+ if (err == U_BUFFER_OVERFLOW_ERROR) {
+ err = U_ZERO_ERROR;
+ normalizedCharacters.resize(normalizedLength);
+ normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), normalizedLength, &err);
+ }
+ ASSERT(U_SUCCESS(err));
+
+ source = normalizedCharacters.data();
+ sourceLength = normalizedLength;
+ }
+ return newTextCodec(*this)->encode(source, sourceLength, allowEntities);
+#elif USE(QT4_UNICODE)
+ QString str(reinterpret_cast<const QChar*>(characters), length);
+ str = str.normalized(QString::NormalizationForm_C);
+ return newTextCodec(*this)->encode(reinterpret_cast<const UChar *>(str.utf16()), str.length(), allowEntities);
+#endif
+}
+
+bool TextEncoding::usesVisualOrdering() const
+{
+ if (noExtendedTextEncodingNameUsed())
+ return false;
+
+ static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8");
+ return m_name == a;
+}
+
+bool TextEncoding::isJapanese() const
+{
+ if (noExtendedTextEncodingNameUsed())
+ return false;
+
+ static HashSet<const char*> set;
+ if (set.isEmpty()) {
+ addEncodingName(set, "x-mac-japanese");
+ addEncodingName(set, "cp932");
+ addEncodingName(set, "JIS_X0201");
+ addEncodingName(set, "JIS_X0208-1983");
+ addEncodingName(set, "JIS_X0208-1990");
+ addEncodingName(set, "JIS_X0212-1990");
+ addEncodingName(set, "JIS_C6226-1978");
+ addEncodingName(set, "Shift_JIS_X0213-2000");
+ addEncodingName(set, "ISO-2022-JP");
+ addEncodingName(set, "ISO-2022-JP-2");
+ addEncodingName(set, "ISO-2022-JP-1");
+ addEncodingName(set, "ISO-2022-JP-3");
+ addEncodingName(set, "EUC-JP");
+ addEncodingName(set, "Shift_JIS");
+ }
+ return m_name && set.contains(m_name);
+}
+
+UChar TextEncoding::backslashAsCurrencySymbol() const
+{
+ if (noExtendedTextEncodingNameUsed())
+ return '\\';
+
+ // The text encodings below treat backslash as a currency symbol.
+ // See http://blogs.msdn.com/michkap/archive/2005/09/17/469941.aspx for more information.
+ static const char* const a = atomicCanonicalTextEncodingName("Shift_JIS_X0213-2000");
+ static const char* const b = atomicCanonicalTextEncodingName("EUC-JP");
+ return (m_name == a || m_name == b) ? 0x00A5 : '\\';
+}
+
+const TextEncoding& TextEncoding::closest8BitEquivalent() const
+{
+ if (*this == UTF16BigEndianEncoding() || *this == UTF16LittleEndianEncoding())
+ return UTF8Encoding();
+ return *this;
+}
+
+const TextEncoding& ASCIIEncoding()
+{
+ static TextEncoding globalASCIIEncoding("ASCII");
+ return globalASCIIEncoding;
+}
+
+const TextEncoding& Latin1Encoding()
+{
+ static TextEncoding globalLatin1Encoding("Latin-1");
+ return globalLatin1Encoding;
+}
+
+const TextEncoding& UTF16BigEndianEncoding()
+{
+ static TextEncoding globalUTF16BigEndianEncoding("UTF-16BE");
+ return globalUTF16BigEndianEncoding;
+}
+
+const TextEncoding& UTF16LittleEndianEncoding()
+{
+ static TextEncoding globalUTF16LittleEndianEncoding("UTF-16LE");
+ return globalUTF16LittleEndianEncoding;
+}
+
+const TextEncoding& UTF32BigEndianEncoding()
+{
+ static TextEncoding globalUTF32BigEndianEncoding("UTF-32BE");
+ return globalUTF32BigEndianEncoding;
+}
+
+const TextEncoding& UTF32LittleEndianEncoding()
+{
+ static TextEncoding globalUTF32LittleEndianEncoding("UTF-32LE");
+ return globalUTF32LittleEndianEncoding;
+}
+
+
+const TextEncoding& UTF8Encoding()
+{
+ static TextEncoding globalUTF8Encoding("UTF-8");
+ return globalUTF8Encoding;
+}
+
+const TextEncoding& WindowsLatin1Encoding()
+{
+ static TextEncoding globalWindowsLatin1Encoding("WinLatin-1");
+ return globalWindowsLatin1Encoding;
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/TextEncoding.h b/WebCore/platform/text/TextEncoding.h
new file mode 100644
index 0000000..59d225c
--- /dev/null
+++ b/WebCore/platform/text/TextEncoding.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextEncoding_h
+#define TextEncoding_h
+
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+ class CString;
+ class String;
+
+ class TextEncoding {
+ public:
+ TextEncoding() : m_name(0) { }
+ TextEncoding(const char* name);
+ TextEncoding(const String& name);
+
+ bool isValid() const { return m_name; }
+ const char* name() const { return m_name; }
+ bool usesVisualOrdering() const;
+ bool isJapanese() const;
+ UChar backslashAsCurrencySymbol() const;
+ const TextEncoding& closest8BitEquivalent() const;
+
+ String decode(const char*, size_t length) const;
+ CString encode(const UChar*, size_t length, bool allowEntities = false) const;
+
+ private:
+ const char* m_name;
+ };
+
+ inline bool operator==(const TextEncoding& a, const TextEncoding& b) { return a.name() == b.name(); }
+ inline bool operator!=(const TextEncoding& a, const TextEncoding& b) { return a.name() != b.name(); }
+
+ const TextEncoding& ASCIIEncoding();
+ const TextEncoding& Latin1Encoding();
+ const TextEncoding& UTF16BigEndianEncoding();
+ const TextEncoding& UTF16LittleEndianEncoding();
+ const TextEncoding& UTF32BigEndianEncoding();
+ const TextEncoding& UTF32LittleEndianEncoding();
+ const TextEncoding& UTF8Encoding();
+ const TextEncoding& WindowsLatin1Encoding();
+
+} // namespace WebCore
+
+#endif // TextEncoding_h
diff --git a/WebCore/platform/text/TextEncodingRegistry.cpp b/WebCore/platform/text/TextEncodingRegistry.cpp
new file mode 100644
index 0000000..a7ad879
--- /dev/null
+++ b/WebCore/platform/text/TextEncodingRegistry.cpp
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextEncodingRegistry.h"
+
+#include "PlatformString.h"
+#include "TextCodecLatin1.h"
+#include "TextCodecUserDefined.h"
+#include "TextCodecUTF16.h"
+#include <wtf/ASCIICType.h>
+#include <wtf/Assertions.h>
+#include <wtf/HashMap.h>
+
+#if USE(ICU_UNICODE)
+#include "TextCodecICU.h"
+#endif
+#if PLATFORM(MAC)
+#include "TextCodecMac.h"
+#endif
+#if PLATFORM(QT)
+#include "qt/TextCodecQt.h"
+#endif
+
+using namespace WTF;
+
+namespace WebCore {
+
+const size_t maxEncodingNameLength = 63;
+
+// Hash for all-ASCII strings that does case folding and skips any characters
+// that are not alphanumeric. If passed any non-ASCII characters, depends on
+// the behavior of isalnum -- if that returns false as it does on OS X, then
+// it will properly skip those characters too.
+struct TextEncodingNameHash {
+
+ // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
+ // or anything like that.
+ static const unsigned PHI = 0x9e3779b9U;
+
+ static bool equal(const char* s1, const char* s2)
+ {
+ char c1;
+ char c2;
+ do {
+ do
+ c1 = *s1++;
+ while (c1 && !isASCIIAlphanumeric(c1));
+ do
+ c2 = *s2++;
+ while (c2 && !isASCIIAlphanumeric(c2));
+ if (toASCIILower(c1) != toASCIILower(c2))
+ return false;
+ } while (c1 && c2);
+ return !c1 && !c2;
+ }
+
+ // This algorithm is the one-at-a-time hash from:
+ // http://burtleburtle.net/bob/hash/hashfaq.html
+ // http://burtleburtle.net/bob/hash/doobs.html
+ static unsigned hash(const char* s)
+ {
+ unsigned h = PHI;
+ for (;;) {
+ char c;
+ do {
+ c = *s++;
+ if (!c) {
+ h += (h << 3);
+ h ^= (h >> 11);
+ h += (h << 15);
+ return h;
+ }
+ } while (!isASCIIAlphanumeric(c));
+ h += toASCIILower(c);
+ h += (h << 10);
+ h ^= (h >> 6);
+ }
+ }
+
+ static const bool safeToCompareToEmptyOrDeleted = false;
+};
+
+struct TextCodecFactory {
+ NewTextCodecFunction function;
+ const void* additionalData;
+ TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { }
+};
+
+typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap;
+typedef HashMap<const char*, TextCodecFactory> TextCodecMap;
+
+static TextEncodingNameMap* textEncodingNameMap;
+static TextCodecMap* textCodecMap;
+static bool didExtendTextCodecMaps;
+
+#if ERROR_DISABLED
+
+static inline void checkExistingName(const char*, const char*) { }
+
+#else
+
+static void checkExistingName(const char* alias, const char* atomicName)
+{
+ const char* oldAtomicName = textEncodingNameMap->get(alias);
+ if (!oldAtomicName)
+ return;
+ if (oldAtomicName == atomicName)
+ return;
+ // Keep the warning silent about one case where we know this will happen.
+ if (strcmp(alias, "ISO-8859-8-I") == 0
+ && strcmp(oldAtomicName, "ISO-8859-8-I") == 0
+ && strcmp(atomicName, "ISO_8859-8:1988") == 0)
+ return;
+ LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s",
+ alias, oldAtomicName, atomicName);
+}
+
+#endif
+
+static void addToTextEncodingNameMap(const char* alias, const char* name)
+{
+ ASSERT(strlen(alias) <= maxEncodingNameLength);
+ const char* atomicName = textEncodingNameMap->get(name);
+ ASSERT(strcmp(alias, name) == 0 || atomicName);
+ if (!atomicName)
+ atomicName = name;
+ checkExistingName(alias, atomicName);
+ textEncodingNameMap->add(alias, atomicName);
+}
+
+static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData)
+{
+ TextEncoding encoding(name);
+ ASSERT(encoding.isValid());
+ textCodecMap->add(encoding.name(), TextCodecFactory(function, additionalData));
+}
+
+static void buildBaseTextCodecMaps()
+{
+ textCodecMap = new TextCodecMap;
+ textEncodingNameMap = new TextEncodingNameMap;
+
+ TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap);
+ TextCodecLatin1::registerCodecs(addToTextCodecMap);
+
+ TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap);
+ TextCodecUTF16::registerCodecs(addToTextCodecMap);
+
+ TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap);
+ TextCodecUserDefined::registerCodecs(addToTextCodecMap);
+
+#if USE(ICU_UNICODE)
+ TextCodecICU::registerBaseEncodingNames(addToTextEncodingNameMap);
+ TextCodecICU::registerBaseCodecs(addToTextCodecMap);
+#endif
+}
+
+static void extendTextCodecMaps()
+{
+#if USE(ICU_UNICODE)
+ TextCodecICU::registerExtendedEncodingNames(addToTextEncodingNameMap);
+ TextCodecICU::registerExtendedCodecs(addToTextCodecMap);
+#endif
+
+#if USE(QT4_UNICODE)
+ TextCodecQt::registerEncodingNames(addToTextEncodingNameMap);
+ TextCodecQt::registerCodecs(addToTextCodecMap);
+#endif
+
+#if PLATFORM(MAC)
+ TextCodecMac::registerEncodingNames(addToTextEncodingNameMap);
+ TextCodecMac::registerCodecs(addToTextCodecMap);
+#endif
+}
+
+std::auto_ptr<TextCodec> newTextCodec(const TextEncoding& encoding)
+{
+ ASSERT(textCodecMap);
+ TextCodecFactory factory = textCodecMap->get(encoding.name());
+ ASSERT(factory.function);
+ return factory.function(encoding, factory.additionalData);
+}
+
+const char* atomicCanonicalTextEncodingName(const char* name)
+{
+ if (!name || !name[0])
+ return 0;
+ if (!textEncodingNameMap)
+ buildBaseTextCodecMaps();
+ if (const char* atomicName = textEncodingNameMap->get(name))
+ return atomicName;
+ if (didExtendTextCodecMaps)
+ return 0;
+ extendTextCodecMaps();
+ didExtendTextCodecMaps = true;
+ return textEncodingNameMap->get(name);
+}
+
+const char* atomicCanonicalTextEncodingName(const UChar* characters, size_t length)
+{
+ char buffer[maxEncodingNameLength + 1];
+ size_t j = 0;
+ for (size_t i = 0; i < length; ++i) {
+ UChar c = characters[i];
+ if (isASCIIAlphanumeric(c)) {
+ if (j == maxEncodingNameLength)
+ return 0;
+ buffer[j++] = c;
+ }
+ }
+ buffer[j] = 0;
+ return atomicCanonicalTextEncodingName(buffer);
+}
+
+bool noExtendedTextEncodingNameUsed()
+{
+ return !didExtendTextCodecMaps;
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/TextEncodingRegistry.h b/WebCore/platform/text/TextEncodingRegistry.h
new file mode 100644
index 0000000..5ca2039
--- /dev/null
+++ b/WebCore/platform/text/TextEncodingRegistry.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextEncodingRegistry_h
+#define TextEncodingRegistry_h
+
+#include <memory>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+ class TextCodec;
+ class TextEncoding;
+
+ // Only TextEncoding and TextDecoder should use this function directly.
+ // - Use TextDecoder::decode to decode, since it handles BOMs.
+ // - Use TextEncoding::decode to decode if you have all the data at once.
+ // It's implemented by calling TextDecoder::decode so works just as well.
+ // - Use TextEncoding::encode to encode, since it takes care of normalization.
+ std::auto_ptr<TextCodec> newTextCodec(const TextEncoding&);
+
+ // Only TextEncoding should use this function directly.
+ const char* atomicCanonicalTextEncodingName(const char* alias);
+ const char* atomicCanonicalTextEncodingName(const UChar* aliasCharacters, size_t aliasLength);
+
+ // Only TextEncoding should use this function directly.
+ bool noExtendedTextEncodingNameUsed();
+
+}
+
+#endif // TextEncodingRegistry_h
diff --git a/WebCore/platform/text/TextStream.cpp b/WebCore/platform/text/TextStream.cpp
new file mode 100644
index 0000000..5aafbc0
--- /dev/null
+++ b/WebCore/platform/text/TextStream.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2004, 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextStream.h"
+
+#include "PlatformString.h"
+#include <wtf/StringExtras.h>
+
+namespace WebCore {
+
+static const size_t printBufferSize = 100; // large enough for any integer or floating point value in string format, including trailing null character
+
+TextStream& TextStream::operator<<(int i)
+{
+ char buffer[printBufferSize];
+ snprintf(buffer, sizeof(buffer) - 1, "%d", i);
+ return *this << buffer;
+}
+
+TextStream& TextStream::operator<<(unsigned i)
+{
+ char buffer[printBufferSize];
+ snprintf(buffer, sizeof(buffer) - 1, "%u", i);
+ return *this << buffer;
+}
+
+TextStream& TextStream::operator<<(long i)
+{
+ char buffer[printBufferSize];
+ snprintf(buffer, sizeof(buffer) - 1, "%ld", i);
+ return *this << buffer;
+}
+
+TextStream& TextStream::operator<<(unsigned long i)
+{
+ char buffer[printBufferSize];
+ snprintf(buffer, sizeof(buffer) - 1, "%lu", i);
+ return *this << buffer;
+}
+
+TextStream& TextStream::operator<<(float f)
+{
+ char buffer[printBufferSize];
+ snprintf(buffer, sizeof(buffer) - 1, "%.2f", f);
+ return *this << buffer;
+}
+
+TextStream& TextStream::operator<<(double d)
+{
+ char buffer[printBufferSize];
+ snprintf(buffer, sizeof(buffer) - 1, "%.2f", d);
+ return *this << buffer;
+}
+
+TextStream& TextStream::operator<<(const char* string)
+{
+ size_t stringLength = strlen(string);
+ size_t textLength = m_text.size();
+ m_text.grow(textLength + stringLength);
+ for (size_t i = 0; i < stringLength; ++i)
+ m_text[textLength + i] = string[i];
+ return *this;
+}
+
+TextStream& TextStream::operator<<(const String& string)
+{
+ append(m_text, string);
+ return *this;
+}
+
+String TextStream::release()
+{
+ return String::adopt(m_text);
+}
+
+}
diff --git a/WebCore/platform/text/TextStream.h b/WebCore/platform/text/TextStream.h
new file mode 100644
index 0000000..b4b801c
--- /dev/null
+++ b/WebCore/platform/text/TextStream.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2004, 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextStream_h
+#define TextStream_h
+
+#include <wtf/Vector.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+class String;
+
+class TextStream {
+public:
+ TextStream& operator<<(int);
+ TextStream& operator<<(unsigned);
+ TextStream& operator<<(long);
+ TextStream& operator<<(unsigned long);
+ TextStream& operator<<(float);
+ TextStream& operator<<(double);
+ TextStream& operator<<(const char*);
+ TextStream& operator<<(const String&);
+
+ String release();
+
+private:
+ Vector<UChar> m_text;
+};
+
+}
+
+#endif
diff --git a/WebCore/platform/text/UnicodeRange.cpp b/WebCore/platform/text/UnicodeRange.cpp
new file mode 100644
index 0000000..0373441
--- /dev/null
+++ b/WebCore/platform/text/UnicodeRange.cpp
@@ -0,0 +1,462 @@
+/*
+ * Copyright (C) 2007 Apple Computer, Inc.
+ *
+ * Portions are Copyright (C) 1998 Netscape Communications Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Alternatively, the contents of this file may be used under the terms
+ * of either the Mozilla Public License Version 1.1, found at
+ * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public
+ * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html
+ * (the "GPL"), in which case the provisions of the MPL or the GPL are
+ * applicable instead of those above. If you wish to allow use of your
+ * version of this file only under the terms of one of those two
+ * licenses (the MPL or the GPL) and not to allow others to use your
+ * version of this file under the LGPL, indicate your decision by
+ * deletingthe provisions above and replace them with the notice and
+ * other provisions required by the MPL or the GPL, as the case may be.
+ * If you do not delete the provisions above, a recipient may use your
+ * version of this file under any of the LGPL, the MPL or the GPL.
+ */
+
+#include "config.h"
+#include "UnicodeRange.h"
+
+namespace WebCore {
+
+// This table depends on unicode range definitions.
+// Each item's index must correspond to a unicode range value
+// eg. x-cyrillic = LangGroupTable[cRangeCyrillic]
+static const char* gUnicodeRangeToLangGroupTable[] =
+{
+ "x-cyrillic",
+ "el",
+ "tr",
+ "he",
+ "ar",
+ "x-baltic",
+ "th",
+ "ko",
+ "ja",
+ "zh-CN",
+ "zh-TW",
+ "x-devanagari",
+ "x-tamil",
+ "x-armn",
+ "x-beng",
+ "x-cans",
+ "x-ethi",
+ "x-geor",
+ "x-gujr",
+ "x-guru",
+ "x-khmr",
+ "x-mlym"
+};
+
+/**********************************************************************
+ * Unicode subranges as defined in unicode 3.0
+ * x-western, x-central-euro, tr, x-baltic -> latin
+ * 0000 - 036f
+ * 1e00 - 1eff
+ * 2000 - 206f (general punctuation)
+ * 20a0 - 20cf (currency symbols)
+ * 2100 - 214f (letterlike symbols)
+ * 2150 - 218f (Number Forms)
+ * el -> greek
+ * 0370 - 03ff
+ * 1f00 - 1fff
+ * x-cyrillic -> cyrillic
+ * 0400 - 04ff
+ * he -> hebrew
+ * 0590 - 05ff
+ * ar -> arabic
+ * 0600 - 06ff
+ * fb50 - fdff (arabic presentation forms)
+ * fe70 - feff (arabic presentation forms b)
+ * th - thai
+ * 0e00 - 0e7f
+ * ko -> korean
+ * ac00 - d7af (hangul Syllables)
+ * 1100 - 11ff (jamo)
+ * 3130 - 318f (hangul compatibility jamo)
+ * ja
+ * 3040 - 309f (hiragana)
+ * 30a0 - 30ff (katakana)
+ * zh-CN
+ * zh-TW
+ *
+ * CJK
+ * 3100 - 312f (bopomofo)
+ * 31a0 - 31bf (bopomofo extended)
+ * 3000 - 303f (CJK Symbols and Punctuation)
+ * 2e80 - 2eff (CJK radicals supplement)
+ * 2f00 - 2fdf (Kangxi Radicals)
+ * 2ff0 - 2fff (Ideographic Description Characters)
+ * 3190 - 319f (kanbun)
+ * 3200 - 32ff (Enclosed CJK letters and Months)
+ * 3300 - 33ff (CJK compatibility)
+ * 3400 - 4dbf (CJK Unified Ideographs Extension A)
+ * 4e00 - 9faf (CJK Unified Ideographs)
+ * f900 - fa5f (CJK Compatibility Ideographs)
+ * fe30 - fe4f (CJK compatibility Forms)
+ * ff00 - ffef (halfwidth and fullwidth forms)
+ *
+ * Armenian
+ * 0530 - 058f
+ * Sriac
+ * 0700 - 074f
+ * Thaana
+ * 0780 - 07bf
+ * Devanagari
+ * 0900 - 097f
+ * Bengali
+ * 0980 - 09ff
+ * Gurmukhi
+ * 0a00 - 0a7f
+ * Gujarati
+ * 0a80 - 0aff
+ * Oriya
+ * 0b00 - 0b7f
+ * Tamil
+ * 0b80 - 0bff
+ * Telugu
+ * 0c00 - 0c7f
+ * Kannada
+ * 0c80 - 0cff
+ * Malayalam
+ * 0d00 - 0d7f
+ * Sinhala
+ * 0d80 - 0def
+ * Lao
+ * 0e80 - 0eff
+ * Tibetan
+ * 0f00 - 0fbf
+ * Myanmar
+ * 1000 - 109f
+ * Georgian
+ * 10a0 - 10ff
+ * Ethiopic
+ * 1200 - 137f
+ * Cherokee
+ * 13a0 - 13ff
+ * Canadian Aboriginal Syllabics
+ * 1400 - 167f
+ * Ogham
+ * 1680 - 169f
+ * Runic
+ * 16a0 - 16ff
+ * Khmer
+ * 1780 - 17ff
+ * Mongolian
+ * 1800 - 18af
+ * Misc - superscripts and subscripts
+ * 2070 - 209f
+ * Misc - Combining Diacritical Marks for Symbols
+ * 20d0 - 20ff
+ * Misc - Arrows
+ * 2190 - 21ff
+ * Misc - Mathematical Operators
+ * 2200 - 22ff
+ * Misc - Miscellaneous Technical
+ * 2300 - 23ff
+ * Misc - Control picture
+ * 2400 - 243f
+ * Misc - Optical character recognition
+ * 2440 - 2450
+ * Misc - Enclose Alphanumerics
+ * 2460 - 24ff
+ * Misc - Box Drawing
+ * 2500 - 257f
+ * Misc - Block Elements
+ * 2580 - 259f
+ * Misc - Geometric Shapes
+ * 25a0 - 25ff
+ * Misc - Miscellaneous Symbols
+ * 2600 - 267f
+ * Misc - Dingbats
+ * 2700 - 27bf
+ * Misc - Braille Patterns
+ * 2800 - 28ff
+ * Yi Syllables
+ * a000 - a48f
+ * Yi radicals
+ * a490 - a4cf
+ * Alphabetic Presentation Forms
+ * fb00 - fb4f
+ * Misc - Combining half Marks
+ * fe20 - fe2f
+ * Misc - small form variants
+ * fe50 - fe6f
+ * Misc - Specials
+ * fff0 - ffff
+ *********************************************************************/
+
+static const unsigned cNumSubTables = 9;
+static const unsigned cSubTableSize = 16;
+
+static const unsigned char gUnicodeSubrangeTable[cNumSubTables][cSubTableSize] =
+{
+ { // table for X---
+ cRangeTableBase+1, //u0xxx
+ cRangeTableBase+2, //u1xxx
+ cRangeTableBase+3, //u2xxx
+ cRangeSetCJK, //u3xxx
+ cRangeSetCJK, //u4xxx
+ cRangeSetCJK, //u5xxx
+ cRangeSetCJK, //u6xxx
+ cRangeSetCJK, //u7xxx
+ cRangeSetCJK, //u8xxx
+ cRangeSetCJK, //u9xxx
+ cRangeTableBase+4, //uaxxx
+ cRangeKorean, //ubxxx
+ cRangeKorean, //ucxxx
+ cRangeTableBase+5, //udxxx
+ cRangePrivate, //uexxx
+ cRangeTableBase+6 //ufxxx
+ },
+ { //table for 0X--
+ cRangeSetLatin, //u00xx
+ cRangeSetLatin, //u01xx
+ cRangeSetLatin, //u02xx
+ cRangeGreek, //u03xx XXX 0300-036f is in fact cRangeCombiningDiacriticalMarks
+ cRangeCyrillic, //u04xx
+ cRangeTableBase+7, //u05xx, includes Cyrillic supplement, Hebrew, and Armenian
+ cRangeArabic, //u06xx
+ cRangeTertiaryTable, //u07xx
+ cRangeUnassigned, //u08xx
+ cRangeTertiaryTable, //u09xx
+ cRangeTertiaryTable, //u0axx
+ cRangeTertiaryTable, //u0bxx
+ cRangeTertiaryTable, //u0cxx
+ cRangeTertiaryTable, //u0dxx
+ cRangeTertiaryTable, //u0exx
+ cRangeTibetan, //u0fxx
+ },
+ { //table for 1x--
+ cRangeTertiaryTable, //u10xx
+ cRangeKorean, //u11xx
+ cRangeEthiopic, //u12xx
+ cRangeTertiaryTable, //u13xx
+ cRangeCanadian, //u14xx
+ cRangeCanadian, //u15xx
+ cRangeTertiaryTable, //u16xx
+ cRangeKhmer, //u17xx
+ cRangeMongolian, //u18xx
+ cRangeUnassigned, //u19xx
+ cRangeUnassigned, //u1axx
+ cRangeUnassigned, //u1bxx
+ cRangeUnassigned, //u1cxx
+ cRangeUnassigned, //u1dxx
+ cRangeSetLatin, //u1exx
+ cRangeGreek, //u1fxx
+ },
+ { //table for 2x--
+ cRangeSetLatin, //u20xx
+ cRangeSetLatin, //u21xx
+ cRangeMathOperators, //u22xx
+ cRangeMiscTechnical, //u23xx
+ cRangeControlOpticalEnclose, //u24xx
+ cRangeBoxBlockGeometrics, //u25xx
+ cRangeMiscSymbols, //u26xx
+ cRangeDingbats, //u27xx
+ cRangeBraillePattern, //u28xx
+ cRangeUnassigned, //u29xx
+ cRangeUnassigned, //u2axx
+ cRangeUnassigned, //u2bxx
+ cRangeUnassigned, //u2cxx
+ cRangeUnassigned, //u2dxx
+ cRangeSetCJK, //u2exx
+ cRangeSetCJK, //u2fxx
+ },
+ { //table for ax--
+ cRangeYi, //ua0xx
+ cRangeYi, //ua1xx
+ cRangeYi, //ua2xx
+ cRangeYi, //ua3xx
+ cRangeYi, //ua4xx
+ cRangeUnassigned, //ua5xx
+ cRangeUnassigned, //ua6xx
+ cRangeUnassigned, //ua7xx
+ cRangeUnassigned, //ua8xx
+ cRangeUnassigned, //ua9xx
+ cRangeUnassigned, //uaaxx
+ cRangeUnassigned, //uabxx
+ cRangeKorean, //uacxx
+ cRangeKorean, //uadxx
+ cRangeKorean, //uaexx
+ cRangeKorean, //uafxx
+ },
+ { //table for dx--
+ cRangeKorean, //ud0xx
+ cRangeKorean, //ud1xx
+ cRangeKorean, //ud2xx
+ cRangeKorean, //ud3xx
+ cRangeKorean, //ud4xx
+ cRangeKorean, //ud5xx
+ cRangeKorean, //ud6xx
+ cRangeKorean, //ud7xx
+ cRangeSurrogate, //ud8xx
+ cRangeSurrogate, //ud9xx
+ cRangeSurrogate, //udaxx
+ cRangeSurrogate, //udbxx
+ cRangeSurrogate, //udcxx
+ cRangeSurrogate, //uddxx
+ cRangeSurrogate, //udexx
+ cRangeSurrogate, //udfxx
+ },
+ { // table for fx--
+ cRangePrivate, //uf0xx
+ cRangePrivate, //uf1xx
+ cRangePrivate, //uf2xx
+ cRangePrivate, //uf3xx
+ cRangePrivate, //uf4xx
+ cRangePrivate, //uf5xx
+ cRangePrivate, //uf6xx
+ cRangePrivate, //uf7xx
+ cRangePrivate, //uf8xx
+ cRangeSetCJK, //uf9xx
+ cRangeSetCJK, //ufaxx
+ cRangeArabic, //ufbxx, includes alphabic presentation form
+ cRangeArabic, //ufcxx
+ cRangeArabic, //ufdxx
+ cRangeArabic, //ufexx, includes Combining half marks,
+ // CJK compatibility forms,
+ // CJK compatibility forms,
+ // small form variants
+ cRangeTableBase+8, //uffxx, halfwidth and fullwidth forms, includes Specials
+ },
+ { //table for 0x0500 - 0x05ff
+ cRangeCyrillic, //u050x
+ cRangeCyrillic, //u051x
+ cRangeCyrillic, //u052x
+ cRangeArmenian, //u053x
+ cRangeArmenian, //u054x
+ cRangeArmenian, //u055x
+ cRangeArmenian, //u056x
+ cRangeArmenian, //u057x
+ cRangeArmenian, //u058x
+ cRangeHebrew, //u059x
+ cRangeHebrew, //u05ax
+ cRangeHebrew, //u05bx
+ cRangeHebrew, //u05cx
+ cRangeHebrew, //u05dx
+ cRangeHebrew, //u05ex
+ cRangeHebrew, //u05fx
+ },
+ { //table for 0xff00 - 0xffff
+ cRangeSetCJK, //uff0x, fullwidth latin
+ cRangeSetCJK, //uff1x, fullwidth latin
+ cRangeSetCJK, //uff2x, fullwidth latin
+ cRangeSetCJK, //uff3x, fullwidth latin
+ cRangeSetCJK, //uff4x, fullwidth latin
+ cRangeSetCJK, //uff5x, fullwidth latin
+ cRangeSetCJK, //uff6x, halfwidth katakana
+ cRangeSetCJK, //uff7x, halfwidth katakana
+ cRangeSetCJK, //uff8x, halfwidth katakana
+ cRangeSetCJK, //uff9x, halfwidth katakana
+ cRangeSetCJK, //uffax, halfwidth hangul jamo
+ cRangeSetCJK, //uffbx, halfwidth hangul jamo
+ cRangeSetCJK, //uffcx, halfwidth hangul jamo
+ cRangeSetCJK, //uffdx, halfwidth hangul jamo
+ cRangeSetCJK, //uffex, fullwidth symbols
+ cRangeSpecials, //ufffx, Specials
+ },
+};
+
+// Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80)
+// code points so that the number of entries in the tertiary range
+// table for that range is obtained by dividing (0x1700 - 0x0700) by 128.
+// Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal
+// syllabaries take multiple chunks and Ogham and Runic share a single chunk.
+static const unsigned cTertiaryTableSize = ((0x1700 - 0x0700) / 0x80);
+
+static const unsigned char gUnicodeTertiaryRangeTable[cTertiaryTableSize] =
+{ //table for 0x0700 - 0x1600
+ cRangeSyriac, //u070x
+ cRangeThaana, //u078x
+ cRangeUnassigned, //u080x place holder(resolved in the 2ndary tab.)
+ cRangeUnassigned, //u088x place holder(resolved in the 2ndary tab.)
+ cRangeDevanagari, //u090x
+ cRangeBengali, //u098x
+ cRangeGurmukhi, //u0a0x
+ cRangeGujarati, //u0a8x
+ cRangeOriya, //u0b0x
+ cRangeTamil, //u0b8x
+ cRangeTelugu, //u0c0x
+ cRangeKannada, //u0c8x
+ cRangeMalayalam, //u0d0x
+ cRangeSinhala, //u0d8x
+ cRangeThai, //u0e0x
+ cRangeLao, //u0e8x
+ cRangeTibetan, //u0f0x place holder(resolved in the 2ndary tab.)
+ cRangeTibetan, //u0f8x place holder(resolved in the 2ndary tab.)
+ cRangeMyanmar, //u100x
+ cRangeGeorgian, //u108x
+ cRangeKorean, //u110x place holder(resolved in the 2ndary tab.)
+ cRangeKorean, //u118x place holder(resolved in the 2ndary tab.)
+ cRangeEthiopic, //u120x place holder(resolved in the 2ndary tab.)
+ cRangeEthiopic, //u128x place holder(resolved in the 2ndary tab.)
+ cRangeEthiopic, //u130x
+ cRangeCherokee, //u138x
+ cRangeCanadian, //u140x place holder(resolved in the 2ndary tab.)
+ cRangeCanadian, //u148x place holder(resolved in the 2ndary tab.)
+ cRangeCanadian, //u150x place holder(resolved in the 2ndary tab.)
+ cRangeCanadian, //u158x place holder(resolved in the 2ndary tab.)
+ cRangeCanadian, //u160x
+ cRangeOghamRunic, //u168x this contains two scripts, Ogham & Runic
+};
+
+// A two level index is almost enough for locating a range, with the
+// exception of u03xx and u05xx. Since we don't really care about range for
+// combining diacritical marks in our font application, they are
+// not discriminated further. Future adoption of this method for other use
+// should be aware of this limitation. The implementation can be extended if
+// there is such a need.
+// For Indic, Southeast Asian scripts and some other scripts between
+// U+0700 and U+16FF, it's extended to the third level.
+unsigned int findCharUnicodeRange(UChar32 ch)
+{
+ if (ch >= 0xFFFF)
+ return 0;
+
+ unsigned int range;
+
+ //search the first table
+ range = gUnicodeSubrangeTable[0][ch >> 12];
+
+ if (range < cRangeTableBase)
+ // we try to get a specific range
+ return range;
+
+ // otherwise, we have one more table to look at
+ range = gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x0f00) >> 8];
+ if (range < cRangeTableBase)
+ return range;
+ if (range < cRangeTertiaryTable)
+ return gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x00f0) >> 4];
+
+ // Yet another table to look at : U+0700 - U+16FF : 128 code point blocks
+ return gUnicodeTertiaryRangeTable[(ch - 0x0700) >> 7];
+}
+
+const char* langGroupFromUnicodeRange(unsigned char unicodeRange)
+{
+ if (cRangeSpecificItemNum > unicodeRange)
+ return gUnicodeRangeToLangGroupTable[unicodeRange];
+ return 0;
+}
+
+}
diff --git a/WebCore/platform/text/UnicodeRange.h b/WebCore/platform/text/UnicodeRange.h
new file mode 100644
index 0000000..7ecf03f
--- /dev/null
+++ b/WebCore/platform/text/UnicodeRange.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2007 Apple Computer, Inc.
+ *
+ * Portions are Copyright (C) 1998 Netscape Communications Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Alternatively, the contents of this file may be used under the terms
+ * of either the Mozilla Public License Version 1.1, found at
+ * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public
+ * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html
+ * (the "GPL"), in which case the provisions of the MPL or the GPL are
+ * applicable instead of those above. If you wish to allow use of your
+ * version of this file only under the terms of one of those two
+ * licenses (the MPL or the GPL) and not to allow others to use your
+ * version of this file under the LGPL, indicate your decision by
+ * deletingthe provisions above and replace them with the notice and
+ * other provisions required by the MPL or the GPL, as the case may be.
+ * If you do not delete the provisions above, a recipient may use your
+ * version of this file under any of the LGPL, the MPL or the GPL.
+ */
+
+#ifndef UnicodeRange_H
+#define UnicodeRange_H
+
+#include <wtf/unicode/Unicode.h>
+
+namespace WebCore {
+
+// The following constants define unicode subranges
+// values below cRangeNum must be continuous so that we can map to
+// a lang group directly.
+// All ranges we care about should fit within 32 bits.
+
+// Frequently used range definitions
+const unsigned char cRangeCyrillic = 0;
+const unsigned char cRangeGreek = 1;
+const unsigned char cRangeTurkish = 2;
+const unsigned char cRangeHebrew = 3;
+const unsigned char cRangeArabic = 4;
+const unsigned char cRangeBaltic = 5;
+const unsigned char cRangeThai = 6;
+const unsigned char cRangeKorean = 7;
+const unsigned char cRangeJapanese = 8;
+const unsigned char cRangeSChinese = 9;
+const unsigned char cRangeTChinese = 10;
+const unsigned char cRangeDevanagari = 11;
+const unsigned char cRangeTamil = 12;
+const unsigned char cRangeArmenian = 13;
+const unsigned char cRangeBengali = 14;
+const unsigned char cRangeCanadian = 15;
+const unsigned char cRangeEthiopic = 16;
+const unsigned char cRangeGeorgian = 17;
+const unsigned char cRangeGujarati = 18;
+const unsigned char cRangeGurmukhi = 19;
+const unsigned char cRangeKhmer = 20;
+const unsigned char cRangeMalayalam = 21;
+
+const unsigned char cRangeSpecificItemNum = 22;
+
+//range/rangeSet grow to this place 22-29
+
+const unsigned char cRangeSetStart = 30; // range set definition starts from here
+const unsigned char cRangeSetLatin = 30;
+const unsigned char cRangeSetCJK = 31;
+const unsigned char cRangeSetEnd = 31; // range set definition ends here
+
+// less frequently used range definition
+const unsigned char cRangeSurrogate = 32;
+const unsigned char cRangePrivate = 33;
+const unsigned char cRangeMisc = 34;
+const unsigned char cRangeUnassigned = 35;
+const unsigned char cRangeSyriac = 36;
+const unsigned char cRangeThaana = 37;
+const unsigned char cRangeOriya = 38;
+const unsigned char cRangeTelugu = 39;
+const unsigned char cRangeKannada = 40;
+const unsigned char cRangeSinhala = 41;
+const unsigned char cRangeLao = 42;
+const unsigned char cRangeTibetan = 43;
+const unsigned char cRangeMyanmar = 44;
+const unsigned char cRangeCherokee = 45;
+const unsigned char cRangeOghamRunic = 46;
+const unsigned char cRangeMongolian = 47;
+const unsigned char cRangeMathOperators = 48;
+const unsigned char cRangeMiscTechnical = 49;
+const unsigned char cRangeControlOpticalEnclose = 50;
+const unsigned char cRangeBoxBlockGeometrics = 51;
+const unsigned char cRangeMiscSymbols = 52;
+const unsigned char cRangeDingbats = 53;
+const unsigned char cRangeBraillePattern = 54;
+const unsigned char cRangeYi = 55;
+const unsigned char cRangeCombiningDiacriticalMarks = 56;
+const unsigned char cRangeSpecials = 57;
+
+const unsigned char cRangeTableBase = 128; //values over 127 are reserved for internal use only
+const unsigned char cRangeTertiaryTable = 145; // leave room for 16 subtable
+ // indices (cRangeTableBase + 1 ..
+ // cRangeTableBase + 16)
+
+
+
+unsigned int findCharUnicodeRange(UChar32 ch);
+const char* langGroupFromUnicodeRange(unsigned char unicodeRange);
+
+}
+
+#endif // UnicodeRange_H
diff --git a/WebCore/platform/text/cf/StringCF.cpp b/WebCore/platform/text/cf/StringCF.cpp
new file mode 100644
index 0000000..9e0d5f2
--- /dev/null
+++ b/WebCore/platform/text/cf/StringCF.cpp
@@ -0,0 +1,55 @@
+/**
+ * Copyright (C) 2006 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "PlatformString.h"
+
+#if PLATFORM(CF)
+
+#include <CoreFoundation/CoreFoundation.h>
+
+namespace WebCore {
+
+String::String(CFStringRef str)
+{
+ if (!str)
+ return;
+
+ CFIndex size = CFStringGetLength(str);
+ if (size == 0)
+ m_impl = StringImpl::empty();
+ else {
+ Vector<UChar, 1024> buffer(size);
+ CFStringGetCharacters(str, CFRangeMake(0, size), (UniChar*)buffer.data());
+ m_impl = StringImpl::create(buffer.data(), size);
+ }
+}
+
+CFStringRef String::createCFString() const
+{
+ if (!m_impl)
+ return CFSTR("");
+
+ return m_impl->createCFString();
+}
+
+}
+
+#endif // PLATFORM(CF)
diff --git a/WebCore/platform/text/cf/StringImplCF.cpp b/WebCore/platform/text/cf/StringImplCF.cpp
new file mode 100644
index 0000000..21b43df
--- /dev/null
+++ b/WebCore/platform/text/cf/StringImplCF.cpp
@@ -0,0 +1,37 @@
+/**
+ * Copyright (C) 2006 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "StringImpl.h"
+
+#if PLATFORM(CF)
+
+#include <CoreFoundation/CoreFoundation.h>
+
+namespace WebCore {
+
+CFStringRef StringImpl::createCFString()
+{
+ return CFStringCreateWithCharacters(NULL, reinterpret_cast<const UniChar*>(m_data), m_length);
+}
+
+}
+
+#endif // PLATFORM(CF)
diff --git a/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp b/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp
new file mode 100644
index 0000000..9b9bd1f
--- /dev/null
+++ b/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2007 Alp Toker <alp@atoker.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+#include "TextBreakIteratorInternalICU.h"
+
+namespace WebCore {
+
+const char* currentTextBreakLocaleID()
+{
+ return "en_us";
+}
+
+}
diff --git a/WebCore/platform/text/mac/CharsetData.h b/WebCore/platform/text/mac/CharsetData.h
new file mode 100644
index 0000000..458cecb
--- /dev/null
+++ b/WebCore/platform/text/mac/CharsetData.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2003, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+namespace WebCore {
+
+ #define kTextEncodingISOLatinThai kCFStringEncodingISOLatinThai
+
+ struct CharsetEntry {
+ const char* name;
+ ::TextEncoding encoding;
+ };
+
+ extern const CharsetEntry CharsetTable[];
+
+}
diff --git a/WebCore/platform/text/mac/ShapeArabic.c b/WebCore/platform/text/mac/ShapeArabic.c
new file mode 100644
index 0000000..43e149d
--- /dev/null
+++ b/WebCore/platform/text/mac/ShapeArabic.c
@@ -0,0 +1,550 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2000-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+* Copyright (C) 2007 Apple Inc. All rights reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy of this
+* software and associated documentation files (the "Software"), to deal in the Software
+* without restriction, including without limitation the rights to use, copy, modify,
+* merge, publish, distribute, and/or sell copies of the Software, and to permit persons
+* to whom the Software is furnished to do so, provided that the above copyright notice(s)
+* and this permission notice appear in all copies of the Software and that both the above
+* copyright notice(s) and this permission notice appear in supporting documentation.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+* PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER
+* OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
+* CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*
+* Except as contained in this notice, the name of a copyright holder shall not be used in
+* advertising or otherwise to promote the sale, use or other dealings in this Software
+* without prior written authorization of the copyright holder.
+*
+******************************************************************************
+*
+* Arabic letter shaping implemented by Ayman Roshdy
+*/
+
+#include "config.h"
+#include "ShapeArabic.h"
+
+#include <unicode/utypes.h>
+#include <unicode/uchar.h>
+#include <unicode/ustring.h>
+#include <unicode/ushape.h>
+#include <wtf/Assertions.h>
+
+/*
+ * ### TODO in general for letter shaping:
+ * - the letter shaping code is UTF-16-unaware; needs update
+ * + especially invertBuffer()?!
+ * - needs to handle the "Arabic Tail" that is used in some legacy codepages
+ * as a glyph fragment of wide-glyph letters
+ * + IBM Unicode conversion tables map it to U+200B (ZWSP)
+ * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms
+ */
+
+/* definitions for Arabic letter shaping ------------------------------------ */
+
+#define IRRELEVANT 4
+#define LAMTYPE 16
+#define ALEFTYPE 32
+#define LINKR 1
+#define LINKL 2
+
+static const UChar IrrelevantPos[] = {
+ 0x0, 0x2, 0x4, 0x6,
+ 0x8, 0xA, 0xC, 0xE,
+};
+
+static const UChar araLink[178]=
+{
+ 1 + 32 + 256 * 0x11,/*0x0622*/
+ 1 + 32 + 256 * 0x13,/*0x0623*/
+ 1 + 256 * 0x15,/*0x0624*/
+ 1 + 32 + 256 * 0x17,/*0x0625*/
+ 1 + 2 + 256 * 0x19,/*0x0626*/
+ 1 + 32 + 256 * 0x1D,/*0x0627*/
+ 1 + 2 + 256 * 0x1F,/*0x0628*/
+ 1 + 256 * 0x23,/*0x0629*/
+ 1 + 2 + 256 * 0x25,/*0x062A*/
+ 1 + 2 + 256 * 0x29,/*0x062B*/
+ 1 + 2 + 256 * 0x2D,/*0x062C*/
+ 1 + 2 + 256 * 0x31,/*0x062D*/
+ 1 + 2 + 256 * 0x35,/*0x062E*/
+ 1 + 256 * 0x39,/*0x062F*/
+ 1 + 256 * 0x3B,/*0x0630*/
+ 1 + 256 * 0x3D,/*0x0631*/
+ 1 + 256 * 0x3F,/*0x0632*/
+ 1 + 2 + 256 * 0x41,/*0x0633*/
+ 1 + 2 + 256 * 0x45,/*0x0634*/
+ 1 + 2 + 256 * 0x49,/*0x0635*/
+ 1 + 2 + 256 * 0x4D,/*0x0636*/
+ 1 + 2 + 256 * 0x51,/*0x0637*/
+ 1 + 2 + 256 * 0x55,/*0x0638*/
+ 1 + 2 + 256 * 0x59,/*0x0639*/
+ 1 + 2 + 256 * 0x5D,/*0x063A*/
+ 0, 0, 0, 0, 0, /*0x063B-0x063F*/
+ 1 + 2, /*0x0640*/
+ 1 + 2 + 256 * 0x61,/*0x0641*/
+ 1 + 2 + 256 * 0x65,/*0x0642*/
+ 1 + 2 + 256 * 0x69,/*0x0643*/
+ 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/
+ 1 + 2 + 256 * 0x71,/*0x0645*/
+ 1 + 2 + 256 * 0x75,/*0x0646*/
+ 1 + 2 + 256 * 0x79,/*0x0647*/
+ 1 + 256 * 0x7D,/*0x0648*/
+ 1 + 256 * 0x7F,/*0x0649*/
+ 1 + 2 + 256 * 0x81,/*0x064A*/
+ 4, 4, 4, 4, /*0x064B-0x064E*/
+ 4, 4, 4, 4, /*0x064F-0x0652*/
+ 4, 4, 4, 0, 0, /*0x0653-0x0657*/
+ 0, 0, 0, 0, /*0x0658-0x065B*/
+ 1 + 256 * 0x85,/*0x065C*/
+ 1 + 256 * 0x87,/*0x065D*/
+ 1 + 256 * 0x89,/*0x065E*/
+ 1 + 256 * 0x8B,/*0x065F*/
+ 0, 0, 0, 0, 0, /*0x0660-0x0664*/
+ 0, 0, 0, 0, 0, /*0x0665-0x0669*/
+ 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/
+ 4, /*0x0670*/
+ 0, /*0x0671*/
+ 1 + 32, /*0x0672*/
+ 1 + 32, /*0x0673*/
+ 0, /*0x0674*/
+ 1 + 32, /*0x0675*/
+ 1, 1, /*0x0676-0x0677*/
+ 1+2, /*0x0678*/
+ 1+2 + 256 * 0x16,/*0x0679*/
+ 1+2 + 256 * 0x0E,/*0x067A*/
+ 1+2 + 256 * 0x02,/*0x067B*/
+ 1+2, 1+2, /*0x067C-0x067D*/
+ 1+2 + 256 * 0x06,/*0x067E*/
+ 1+2 + 256 * 0x12,/*0x067F*/
+ 1+2 + 256 * 0x0A,/*0x0680*/
+ 1+2, 1+2, /*0x0681-0x0682*/
+ 1+2 + 256 * 0x26,/*0x0683*/
+ 1+2 + 256 * 0x22,/*0x0684*/
+ 1+2, /*0x0685*/
+ 1+2 + 256 * 0x2A,/*0x0686*/
+ 1+2 + 256 * 0x2E,/*0x0687*/
+ 1 + 256 * 0x38,/*0x0688*/
+ 1, 1, 1, /*0x0689-0x068B*/
+ 1 + 256 * 0x34,/*0x068C*/
+ 1 + 256 * 0x32,/*0x068D*/
+ 1 + 256 * 0x36,/*0x068E*/
+ 1, 1, /*0x068F-0x0690*/
+ 1 + 256 * 0x3C,/*0x0691*/
+ 1, 1, 1, 1, 1, 1, /*0x0692-0x0697*/
+ 1 + 256 * 0x3A,/*0x0698*/
+ 1, /*0x0699*/
+ 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x069F*/
+ 1+2, 1+2, 1+2, 1+2, /*0x06A0-0x06A3*/
+ 1+2 + 256 * 0x2E,/*0x06A4*/
+ 1+2, /*0x06A5*/
+ 1+2 + 256 * 0x1E,/*0x06A6*/
+ 1+2, 1+2, /*0x06A7-0x06A8*/
+ 1+2 + 256 * 0x3E,/*0x06A9*/
+ 1+2, 1+2, 1+2, /*0x06AA-0x06AC*/
+ 1+2 + 256 * 0x83,/*0x06AD*/
+ 1+2, /*0x06AE*/
+ 1+2 + 256 * 0x42,/*0x06AF*/
+ 1+2, /*0x06B0*/
+ 1+2 + 256 * 0x4A,/*0x06B1*/
+ 1+2, /*0x06B2*/
+ 1+2 + 256 * 0x46,/*0x06B3*/
+ 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x06B4-0x06B9*/
+ 1+2, /*0x06BA*/ // FIXME: Seems to have a final form
+ 1+2 + 256 * 0x50,/*0x06BB*/
+ 1+2, 1+2, /*0x06BC-0x06BD*/
+ 1+2 + 256 * 0x5A,/*0x06BE*/
+ 1+2, /*0x06BF*/
+ 1, /*0x06C0*/
+ 1+2 + 256 * 0x56,/*0x06C1*/
+ 1+2, /*0x06C2*/
+ 1, 1, /*0x06C3-0x06C4*/
+ 1 + 256 * 0x90,/*0x06C5*/
+ 1 + 256 * 0x89,/*0x06C6*/
+ 1 + 256 * 0x87,/*0x06C7*/
+ 1 + 256 * 0x8B,/*0x06C8*/
+ 1 + 256 * 0x92,/*0x06C9*/
+ 1, /*0x06CA*/
+ 1 + 256 * 0x8E,/*0x06CB*/
+ 1+2 + 256 * 0xAC,/*0x06CC*/
+ 1, /*0x06CD*/
+ 1+2, /*0x06CE*/
+ 1, /*0x06CF*/
+ 1+2 + 256 * 0x94,/*0x06D0*/
+ 1+2, /*0x06D1*/
+ 1 + 256 * 0x5E,/*0x06D2*/
+ 1 + 256 * 0x60 /*0x06D3*/
+};
+
+static const UChar presLink[141]=
+{
+ 1 + 2, /*0xFE70*/
+ 1 + 2, /*0xFE71*/
+ 1 + 2, 0, 1+ 2, 0, 1+ 2, /*0xFE72-0xFE76*/
+ 1 + 2, /*0xFE77*/
+ 1+ 2, 1 + 2, 1+2, 1 + 2, /*0xFE78-0xFE81*/
+ 1+ 2, 1 + 2, 1+2, 1 + 2, /*0xFE82-0xFE85*/
+ 0, 0 + 32, 1 + 32, 0 + 32, /*0xFE86-0xFE89*/
+ 1 + 32, 0, 1, 0 + 32, /*0xFE8A-0xFE8D*/
+ 1 + 32, 0, 2, 1 + 2, /*0xFE8E-0xFE91*/
+ 1, 0 + 32, 1 + 32, 0, /*0xFE92-0xFE95*/
+ 2, 1 + 2, 1, 0, /*0xFE96-0xFE99*/
+ 1, 0, 2, 1 + 2, /*0xFE9A-0xFE9D*/
+ 1, 0, 2, 1 + 2, /*0xFE9E-0xFEA1*/
+ 1, 0, 2, 1 + 2, /*0xFEA2-0xFEA5*/
+ 1, 0, 2, 1 + 2, /*0xFEA6-0xFEA9*/
+ 1, 0, 2, 1 + 2, /*0xFEAA-0xFEAD*/
+ 1, 0, 1, 0, /*0xFEAE-0xFEB1*/
+ 1, 0, 1, 0, /*0xFEB2-0xFEB5*/
+ 1, 0, 2, 1+2, /*0xFEB6-0xFEB9*/
+ 1, 0, 2, 1+2, /*0xFEBA-0xFEBD*/
+ 1, 0, 2, 1+2, /*0xFEBE-0xFEC1*/
+ 1, 0, 2, 1+2, /*0xFEC2-0xFEC5*/
+ 1, 0, 2, 1+2, /*0xFEC6-0xFEC9*/
+ 1, 0, 2, 1+2, /*0xFECA-0xFECD*/
+ 1, 0, 2, 1+2, /*0xFECE-0xFED1*/
+ 1, 0, 2, 1+2, /*0xFED2-0xFED5*/
+ 1, 0, 2, 1+2, /*0xFED6-0xFED9*/
+ 1, 0, 2, 1+2, /*0xFEDA-0xFEDD*/
+ 1, 0, 2, 1+2, /*0xFEDE-0xFEE1*/
+ 1, 0 + 16, 2 + 16, 1 + 2 +16, /*0xFEE2-0xFEE5*/
+ 1 + 16, 0, 2, 1+2, /*0xFEE6-0xFEE9*/
+ 1, 0, 2, 1+2, /*0xFEEA-0xFEED*/
+ 1, 0, 2, 1+2, /*0xFEEE-0xFEF1*/
+ 1, 0, 1, 0, /*0xFEF2-0xFEF5*/
+ 1, 0, 2, 1+2, /*0xFEF6-0xFEF9*/
+ 1, 0, 1, 0, /*0xFEFA-0xFEFD*/
+ 1, 0, 1, 0,
+ 1
+};
+
+static const UChar convertFEto06[] =
+{
+/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
+/*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652,
+/*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628,
+/*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C,
+/*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632,
+/*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636,
+/*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A,
+/*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644,
+/*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649,
+/*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F
+};
+
+static const UChar shapeTable[4][4][4]=
+{
+ { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} },
+ { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} },
+ { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} },
+ { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }
+};
+
+/*
+ *Name : changeLamAlef
+ *Function : Converts the Alef characters into an equivalent
+ * LamAlef location in the 0x06xx Range, this is an
+ * intermediate stage in the operation of the program
+ * later it'll be converted into the 0xFExx LamAlefs
+ * in the shaping function.
+ */
+static UChar
+changeLamAlef(UChar ch) {
+
+ switch(ch) {
+ case 0x0622 :
+ return(0x065C);
+ break;
+ case 0x0623 :
+ return(0x065D);
+ break;
+ case 0x0625 :
+ return(0x065E);
+ break;
+ case 0x0627 :
+ return(0x065F);
+ break;
+ default :
+ return(0);
+ break;
+ }
+}
+
+/*
+ *Name : specialChar
+ *Function : Special Arabic characters need special handling in the shapeUnicode
+ * function, this function returns 1 or 2 for these special characters
+ */
+static int32_t
+specialChar(UChar ch) {
+
+ if( (ch>0x0621 && ch<0x0626)||(ch==0x0627)||(ch>0x062e && ch<0x0633)||
+ (ch>0x0647 && ch<0x064a)||(ch==0x0629) ) {
+ return (1);
+ }
+ else
+ if( ch>=0x064B && ch<= 0x0652 )
+ return (2);
+ else
+ if( (ch>=0x0653 && ch<= 0x0655) || ch == 0x0670 ||
+ (ch>=0xFE70 && ch<= 0xFE7F) )
+ return (3);
+ else
+ return (0);
+}
+
+/*
+ *Name : getLink
+ *Function : Resolves the link between the characters as
+ * Arabic characters have four forms :
+ * Isolated, Initial, Middle and Final Form
+ */
+static UChar
+getLink(UChar ch) {
+
+ if(ch >= 0x0622 && ch <= 0x06D3) {
+ return(araLink[ch-0x0622]);
+ } else if(ch == 0x200D) {
+ return(3);
+ } else if(ch >= 0x206D && ch <= 0x206F) {
+ return(4);
+ } else if(ch >= 0xFE70 && ch <= 0xFEFC) {
+ return(presLink[ch-0xFE70]);
+ } else {
+ return(0);
+ }
+}
+
+/*
+ *Name : isTashkeelChar
+ *Function : Returns 1 for Tashkeel characters else return 0
+ */
+static int32_t
+isTashkeelChar(UChar ch) {
+
+ if( ch>=0x064B && ch<= 0x0652 )
+ return (1);
+ else
+ return (0);
+}
+
+/*
+ *Name : shapeUnicode
+ *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped
+ * arabic Unicode buffer in FExx Range
+ */
+static int32_t
+shapeUnicode(UChar *dest, int32_t sourceLength,
+ int32_t destSize,uint32_t options,
+ UErrorCode *pErrorCode,
+ int tashkeelFlag) {
+
+ int32_t i, iend;
+ int32_t prevPos, lastPos,Nx, Nw;
+ unsigned int Shape;
+ int32_t flag;
+ int32_t lamalef_found = 0;
+ UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0;
+ UChar wLamalef;
+
+ /*
+ * Converts the input buffer from FExx Range into 06xx Range
+ * to make sure that all characters are in the 06xx range
+ * even the lamalef is converted to the special region in
+ * the 06xx range
+ */
+ for (i = 0; i < sourceLength; i++) {
+ UChar inputChar = dest[i];
+ if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) {
+ dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ;
+ }
+ }
+
+ /* sets the index to the end of the buffer, together with the step point to -1 */
+ i = 0;
+ iend = sourceLength;
+
+ /*
+ * This function resolves the link between the characters .
+ * Arabic characters have four forms :
+ * Isolated Form, Initial Form, Middle Form and Final Form
+ */
+ currLink = getLink(dest[i]);
+
+ prevPos = i;
+ lastPos = i;
+ Nx = sourceLength + 2, Nw = 0;
+
+ while (i != iend) {
+ /* If high byte of currLink > 0 then more than one shape */
+ if ((currLink & 0xFF00) > 0 || isTashkeelChar(dest[i])) {
+ Nw = i + 1;
+ while (Nx >= sourceLength) { /* we need to know about next char */
+ if(Nw == iend) {
+ nextLink = 0;
+ Nx = -1;
+ } else {
+ nextLink = getLink(dest[Nw]);
+ if((nextLink & IRRELEVANT) == 0) {
+ Nx = Nw;
+ } else {
+ Nw = Nw + 1;
+ }
+ }
+ }
+
+ if ( ((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0) ) {
+ lamalef_found = 1;
+ wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */
+ if ( wLamalef != 0) {
+ dest[i] = ' '; /* The default case is to drop the Alef and replace */
+ dest[lastPos] =wLamalef; /* it by a space. */
+ i=lastPos;
+ }
+ lastLink = prevLink;
+ currLink = getLink(wLamalef);
+ }
+ /*
+ * get the proper shape according to link ability of neighbors
+ * and of character; depends on the order of the shapes
+ * (isolated, initial, middle, final) in the compatibility area
+ */
+ flag = specialChar(dest[i]);
+
+ Shape = shapeTable[nextLink & (LINKR + LINKL)]
+ [lastLink & (LINKR + LINKL)]
+ [currLink & (LINKR + LINKL)];
+
+ if (flag == 1) {
+ Shape = (Shape == 1 || Shape == 3) ? 1 : 0;
+ }
+ else
+ if(flag == 2) {
+ if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) &&
+ dest[i] != 0x064C && dest[i] != 0x064D ) {
+ Shape = 1;
+ if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE )
+ Shape = 0;
+ }
+ else {
+ Shape = 0;
+ }
+ }
+
+ if(flag == 2) {
+ dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape;
+ }
+ else
+ dest[i] = (UChar)((dest[i] < 0x0670 ? 0xFE70 : 0xFB50) + (currLink >> 8) + Shape);
+ }
+
+ /* move one notch forward */
+ if ((currLink & IRRELEVANT) == 0) {
+ prevLink = lastLink;
+ lastLink = currLink;
+ prevPos = lastPos;
+ lastPos = i;
+ }
+
+ i++;
+ if (i == Nx) {
+ currLink = nextLink;
+ Nx = sourceLength + 2;
+ }
+ else if(i != iend) {
+ currLink = getLink(dest[i]);
+ }
+ }
+
+ destSize = sourceLength;
+
+ return destSize;
+}
+
+int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int32_t destCapacity, uint32_t options, UErrorCode *pErrorCode) {
+ int32_t destLength;
+
+ /* usual error checking */
+ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
+ /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */
+ if( source==NULL || sourceLength<-1 ||
+ (dest==NULL && destCapacity!=0) || destCapacity<0 ||
+ options>=U_SHAPE_DIGIT_TYPE_RESERVED ||
+ (options&U_SHAPE_DIGITS_MASK)>=U_SHAPE_DIGITS_RESERVED
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ /* determine the source length */
+ if(sourceLength==-1) {
+ sourceLength=u_strlen(source);
+ }
+ if(sourceLength==0) {
+ return 0;
+ }
+
+ /* check that source and destination do not overlap */
+ if( dest!=NULL &&
+ ((source<=dest && dest<source+sourceLength) ||
+ (dest<=source && source<dest+destCapacity))
+ ) {
+ *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) {
+ int32_t outputSize = sourceLength;
+
+ /* calculate destination size */
+ /* TODO: do we ever need to do this pure preflighting? */
+ ASSERT((options&U_SHAPE_LENGTH_MASK) != U_SHAPE_LENGTH_GROW_SHRINK);
+
+ if(outputSize>destCapacity) {
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ return outputSize;
+ }
+
+ /* Start of Arabic letter shaping part */
+ memcpy(dest, source, sourceLength*U_SIZEOF_UCHAR);
+
+ ASSERT((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL);
+
+ switch(options&U_SHAPE_LETTERS_MASK) {
+ case U_SHAPE_LETTERS_SHAPE :
+ /* Call the shaping function with tashkeel flag == 1 */
+ destLength = shapeUnicode(dest,sourceLength,destCapacity,options,pErrorCode,1);
+ break;
+ case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED :
+ /* Call the shaping function with tashkeel flag == 0 */
+ destLength = shapeUnicode(dest,sourceLength,destCapacity,options,pErrorCode,0);
+ break;
+ case U_SHAPE_LETTERS_UNSHAPE :
+ ASSERT_NOT_REACHED();
+ break;
+ default :
+ /* will never occur because of validity checks above */
+ destLength = 0;
+ break;
+ }
+
+ /* End of Arabic letter shaping part */
+ } else
+ ASSERT_NOT_REACHED();
+
+ ASSERT((options & U_SHAPE_DIGITS_MASK) == U_SHAPE_DIGITS_NOOP);
+
+ return sourceLength;
+}
diff --git a/WebCore/platform/text/mac/ShapeArabic.h b/WebCore/platform/text/mac/ShapeArabic.h
new file mode 100644
index 0000000..2f85ea0
--- /dev/null
+++ b/WebCore/platform/text/mac/ShapeArabic.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ShapeArabic_h
+#define ShapeArabic_h
+
+#include <unicode/ushape.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int32_t destCapacity, uint32_t options, UErrorCode *pErrorCode);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // ShapeArabic_h
diff --git a/WebCore/platform/text/mac/StringImplMac.mm b/WebCore/platform/text/mac/StringImplMac.mm
new file mode 100644
index 0000000..2180b94
--- /dev/null
+++ b/WebCore/platform/text/mac/StringImplMac.mm
@@ -0,0 +1,31 @@
+/**
+ * Copyright (C) 2006 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "StringImpl.h"
+
+namespace WebCore {
+
+StringImpl::operator NSString *()
+{
+ return [NSString stringWithCharacters:m_data length:m_length];
+}
+
+}
diff --git a/WebCore/platform/text/mac/StringMac.mm b/WebCore/platform/text/mac/StringMac.mm
new file mode 100644
index 0000000..77942ea
--- /dev/null
+++ b/WebCore/platform/text/mac/StringMac.mm
@@ -0,0 +1,41 @@
+/**
+ * Copyright (C) 2006 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "PlatformString.h"
+
+namespace WebCore {
+
+String::String(NSString* str)
+{
+ if (!str)
+ return;
+
+ CFIndex size = CFStringGetLength(reinterpret_cast<CFStringRef>(str));
+ if (size == 0)
+ m_impl = StringImpl::empty();
+ else {
+ Vector<UChar, 1024> buffer(size);
+ CFStringGetCharacters(reinterpret_cast<CFStringRef>(str), CFRangeMake(0, size), buffer.data());
+ m_impl = StringImpl::create(buffer.data(), size);
+ }
+}
+
+}
diff --git a/WebCore/platform/text/mac/TextBoundaries.mm b/WebCore/platform/text/mac/TextBoundaries.mm
new file mode 100644
index 0000000..ff1dfd2
--- /dev/null
+++ b/WebCore/platform/text/mac/TextBoundaries.mm
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#import "config.h"
+#import "TextBoundaries.h"
+
+namespace WebCore {
+
+void findWordBoundary(const UChar* chars, int len, int position, int* start, int* end)
+{
+ NSString* string = [[NSString alloc] initWithCharactersNoCopy:const_cast<unichar*>(chars)
+ length:len freeWhenDone:NO];
+ NSAttributedString* attr = [[NSAttributedString alloc] initWithString:string];
+ NSRange range = [attr doubleClickAtIndex:(position >= len) ? len - 1 : position];
+ [attr release];
+ [string release];
+ *start = range.location;
+ *end = range.location + range.length;
+}
+
+int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward)
+{
+ NSString* string = [[NSString alloc] initWithCharactersNoCopy:const_cast<unichar*>(chars)
+ length:len freeWhenDone:NO];
+ NSAttributedString* attr = [[NSAttributedString alloc] initWithString:string];
+ int result = [attr nextWordFromIndex:position forward:forward];
+ [attr release];
+ [string release];
+ return result;
+}
+
+}
diff --git a/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm b/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm
new file mode 100644
index 0000000..92983eb
--- /dev/null
+++ b/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIteratorInternalICU.h"
+
+namespace WebCore {
+
+static const int maxLocaleStringLength = 32;
+
+// This code was swiped from the CarbonCore UnicodeUtilities. One change from that is to use the empty
+// string instead of the "old locale model" as the ultimate fallback. This change is per the UnicodeUtilities
+// engineer.
+static void getTextBreakLocale(char localeStringBuffer[maxLocaleStringLength])
+{
+ // Empty string means "root locale", which is what we use if we can't use a pref.
+
+ // We get the parts string from AppleTextBreakLocale pref.
+ // If that fails then look for the first language in the AppleLanguages pref.
+ CFStringRef prefLocaleStr = (CFStringRef)CFPreferencesCopyValue(CFSTR("AppleTextBreakLocale"),
+ kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost);
+ if (!prefLocaleStr) {
+ CFArrayRef appleLangArr = (CFArrayRef)CFPreferencesCopyValue(CFSTR("AppleLanguages"),
+ kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost);
+ if (appleLangArr) {
+ // Take the topmost language. Retain so that we can blindly release later.
+ prefLocaleStr = (CFStringRef)CFArrayGetValueAtIndex(appleLangArr, 0);
+ if (prefLocaleStr)
+ CFRetain(prefLocaleStr);
+ CFRelease(appleLangArr);
+ }
+ }
+ if (prefLocaleStr) {
+ // Canonicalize pref string in case it is not in the canonical format.
+ CFStringRef canonLocaleCFStr = CFLocaleCreateCanonicalLanguageIdentifierFromString(kCFAllocatorDefault, prefLocaleStr);
+ if (canonLocaleCFStr) {
+ CFStringGetCString(canonLocaleCFStr, localeStringBuffer, maxLocaleStringLength, kCFStringEncodingASCII);
+ CFRelease(canonLocaleCFStr);
+ }
+ CFRelease(prefLocaleStr);
+ }
+}
+
+const char* currentTextBreakLocaleID()
+{
+ static char localeStringBuffer[maxLocaleStringLength];
+ static bool gotTextBreakLocale = false;
+ if (!gotTextBreakLocale) {
+ getTextBreakLocale(localeStringBuffer);
+ gotTextBreakLocale = true;
+ }
+ return localeStringBuffer;
+}
+
+}
diff --git a/WebCore/platform/text/mac/TextCodecMac.cpp b/WebCore/platform/text/mac/TextCodecMac.cpp
new file mode 100644
index 0000000..7270a26
--- /dev/null
+++ b/WebCore/platform/text/mac/TextCodecMac.cpp
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextCodecMac.h"
+
+#include "CString.h"
+#include "CharacterNames.h"
+#include "CharsetData.h"
+#include "PlatformString.h"
+#include <wtf/Assertions.h>
+
+using std::auto_ptr;
+using std::min;
+
+namespace WebCore {
+
+// We need to keep this because ICU doesn't support some of the encodings that we need:
+// <http://bugs.webkit.org/show_bug.cgi?id=4195>.
+
+const size_t ConversionBufferSize = 16384;
+
+static TECObjectRef cachedConverterTEC;
+static TECTextEncodingID cachedConverterEncoding = invalidEncoding;
+
+void TextCodecMac::registerEncodingNames(EncodingNameRegistrar registrar)
+{
+ TECTextEncodingID lastEncoding = invalidEncoding;
+ const char* lastName = 0;
+
+ for (size_t i = 0; CharsetTable[i].name; ++i) {
+ if (CharsetTable[i].encoding != lastEncoding) {
+ lastEncoding = CharsetTable[i].encoding;
+ lastName = CharsetTable[i].name;
+ }
+ registrar(CharsetTable[i].name, lastName);
+ }
+}
+
+static auto_ptr<TextCodec> newTextCodecMac(const TextEncoding&, const void* additionalData)
+{
+ return auto_ptr<TextCodec>(new TextCodecMac(*static_cast<const TECTextEncodingID*>(additionalData)));
+}
+
+void TextCodecMac::registerCodecs(TextCodecRegistrar registrar)
+{
+ TECTextEncodingID lastEncoding = invalidEncoding;
+
+ for (size_t i = 0; CharsetTable[i].name; ++i)
+ if (CharsetTable[i].encoding != lastEncoding) {
+ registrar(CharsetTable[i].name, newTextCodecMac, &CharsetTable[i].encoding);
+ lastEncoding = CharsetTable[i].encoding;
+ }
+}
+
+TextCodecMac::TextCodecMac(TECTextEncodingID encoding)
+ : m_encoding(encoding)
+ , m_error(false)
+ , m_numBufferedBytes(0)
+ , m_converterTEC(0)
+{
+}
+
+TextCodecMac::~TextCodecMac()
+{
+ releaseTECConverter();
+}
+
+void TextCodecMac::releaseTECConverter() const
+{
+ if (m_converterTEC) {
+ if (cachedConverterTEC != 0)
+ TECDisposeConverter(cachedConverterTEC);
+ cachedConverterTEC = m_converterTEC;
+ cachedConverterEncoding = m_encoding;
+ m_converterTEC = 0;
+ }
+}
+
+OSStatus TextCodecMac::createTECConverter() const
+{
+ bool cachedEncodingEqual = cachedConverterEncoding == m_encoding;
+ cachedConverterEncoding = invalidEncoding;
+
+ if (cachedEncodingEqual && cachedConverterTEC) {
+ m_converterTEC = cachedConverterTEC;
+ cachedConverterTEC = 0;
+ TECClearConverterContextInfo(m_converterTEC);
+ } else {
+ OSStatus status = TECCreateConverter(&m_converterTEC, m_encoding,
+ CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicode16BitFormat));
+ if (status)
+ return status;
+
+ TECSetBasicOptions(m_converterTEC, kUnicodeForceASCIIRangeMask);
+ }
+
+ return noErr;
+}
+
+OSStatus TextCodecMac::decode(const unsigned char* inputBuffer, int inputBufferLength, int& inputLength,
+ void *outputBuffer, int outputBufferLength, int& outputLength)
+{
+ OSStatus status;
+ unsigned long bytesRead = 0;
+ unsigned long bytesWritten = 0;
+
+ if (m_numBufferedBytes != 0) {
+ // Finish converting a partial character that's in our buffer.
+
+ // First, fill the partial character buffer with as many bytes as are available.
+ ASSERT(m_numBufferedBytes < sizeof(m_bufferedBytes));
+ const int spaceInBuffer = sizeof(m_bufferedBytes) - m_numBufferedBytes;
+ const int bytesToPutInBuffer = MIN(spaceInBuffer, inputBufferLength);
+ ASSERT(bytesToPutInBuffer != 0);
+ memcpy(m_bufferedBytes + m_numBufferedBytes, inputBuffer, bytesToPutInBuffer);
+
+ // Now, do a conversion on the buffer.
+ status = TECConvertText(m_converterTEC, m_bufferedBytes, m_numBufferedBytes + bytesToPutInBuffer, &bytesRead,
+ reinterpret_cast<unsigned char*>(outputBuffer), outputBufferLength, &bytesWritten);
+ ASSERT(bytesRead <= m_numBufferedBytes + bytesToPutInBuffer);
+
+ if (status == kTECPartialCharErr && bytesRead == 0) {
+ // Handle the case where the partial character was not converted.
+ if (bytesToPutInBuffer >= spaceInBuffer) {
+ LOG_ERROR("TECConvertText gave a kTECPartialCharErr but read none of the %zu bytes in the buffer", sizeof(m_bufferedBytes));
+ m_numBufferedBytes = 0;
+ status = kTECUnmappableElementErr; // should never happen, but use this error code
+ } else {
+ // Tell the caller we read all the source bytes and keep them in the buffer.
+ m_numBufferedBytes += bytesToPutInBuffer;
+ bytesRead = bytesToPutInBuffer;
+ status = noErr;
+ }
+ } else {
+ // We are done with the partial character buffer.
+ // Also, we have read some of the bytes from the main buffer.
+ if (bytesRead > m_numBufferedBytes) {
+ bytesRead -= m_numBufferedBytes;
+ } else {
+ LOG_ERROR("TECConvertText accepted some bytes it previously rejected with kTECPartialCharErr");
+ bytesRead = 0;
+ }
+ m_numBufferedBytes = 0;
+ if (status == kTECPartialCharErr) {
+ // While there may be a partial character problem in the small buffer,
+ // we have to try again and not get confused and think there is a partial
+ // character problem in the large buffer.
+ status = noErr;
+ }
+ }
+ } else {
+ status = TECConvertText(m_converterTEC, inputBuffer, inputBufferLength, &bytesRead,
+ static_cast<unsigned char*>(outputBuffer), outputBufferLength, &bytesWritten);
+ ASSERT(static_cast<int>(bytesRead) <= inputBufferLength);
+ }
+
+ // Work around bug 3351093, where sometimes we get kTECBufferBelowMinimumSizeErr instead of kTECOutputBufferFullStatus.
+ if (status == kTECBufferBelowMinimumSizeErr && bytesWritten != 0) {
+ status = kTECOutputBufferFullStatus;
+ }
+
+ inputLength = bytesRead;
+ outputLength = bytesWritten;
+ return status;
+}
+
+String TextCodecMac::decode(const char* bytes, size_t length, bool flush)
+{
+ // Get a converter for the passed-in encoding.
+ if (!m_converterTEC && createTECConverter() != noErr)
+ return String();
+
+ Vector<UChar> result;
+
+ const unsigned char* sourcePointer = reinterpret_cast<const unsigned char*>(bytes);
+ int sourceLength = length;
+ bool bufferWasFull = false;
+ UniChar buffer[ConversionBufferSize];
+
+ while (sourceLength || bufferWasFull) {
+ int bytesRead = 0;
+ int bytesWritten = 0;
+ OSStatus status = decode(sourcePointer, sourceLength, bytesRead, buffer, sizeof(buffer), bytesWritten);
+ ASSERT(bytesRead <= sourceLength);
+ sourcePointer += bytesRead;
+ sourceLength -= bytesRead;
+
+ switch (status) {
+ case noErr:
+ case kTECOutputBufferFullStatus:
+ break;
+ case kTextMalformedInputErr:
+ case kTextUndefinedElementErr:
+ // FIXME: Put FFFD character into the output string in this case?
+ TECClearConverterContextInfo(m_converterTEC);
+ if (sourceLength) {
+ sourcePointer += 1;
+ sourceLength -= 1;
+ }
+ break;
+ case kTECPartialCharErr: {
+ // Put the partial character into the buffer.
+ ASSERT(m_numBufferedBytes == 0);
+ const int bufferSize = sizeof(m_numBufferedBytes);
+ if (sourceLength < bufferSize) {
+ memcpy(m_bufferedBytes, sourcePointer, sourceLength);
+ m_numBufferedBytes = sourceLength;
+ } else {
+ LOG_ERROR("TECConvertText gave a kTECPartialCharErr, but left %u bytes in the buffer", sourceLength);
+ }
+ sourceLength = 0;
+ break;
+ }
+ default:
+ LOG_ERROR("text decoding failed with error %ld", static_cast<long>(status));
+ m_error = true;
+ return String();
+ }
+
+ ASSERT(!(bytesWritten % sizeof(UChar)));
+ appendOmittingBOM(result, buffer, bytesWritten / sizeof(UChar));
+
+ bufferWasFull = status == kTECOutputBufferFullStatus;
+ }
+
+ if (flush) {
+ unsigned long bytesWritten = 0;
+ TECFlushText(m_converterTEC, reinterpret_cast<unsigned char*>(buffer), sizeof(buffer), &bytesWritten);
+ ASSERT(!(bytesWritten % sizeof(UChar)));
+ appendOmittingBOM(result, buffer, bytesWritten / sizeof(UChar));
+ }
+
+ String resultString = String::adopt(result);
+
+ // <rdar://problem/3225472>
+ // Simplified Chinese pages use the code A3A0 to mean "full-width space".
+ // But GB18030 decodes it to U+E5E5, which is correct in theory but not in practice.
+ // To work around, just change all occurences of U+E5E5 to U+3000 (ideographic space).
+ if (m_encoding == kCFStringEncodingGB_18030_2000)
+ resultString.replace(0xE5E5, ideographicSpace);
+
+ return resultString;
+}
+
+CString TextCodecMac::encode(const UChar* characters, size_t length, bool allowEntities)
+{
+ // FIXME: We should really use TEC here instead of CFString for consistency with the other direction.
+
+ // FIXME: Since there's no "force ASCII range" mode in CFString, we change the backslash into a yen sign.
+ // Encoding will change the yen sign back into a backslash.
+ String copy(characters, length);
+ copy.replace('\\', m_backslashAsCurrencySymbol);
+ CFStringRef cfs = copy.createCFString();
+
+ CFIndex startPos = 0;
+ CFIndex charactersLeft = CFStringGetLength(cfs);
+ Vector<char> result;
+ size_t size = 0;
+ UInt8 lossByte = allowEntities ? 0 : '?';
+ while (charactersLeft > 0) {
+ CFRange range = CFRangeMake(startPos, charactersLeft);
+ CFIndex bufferLength;
+ CFStringGetBytes(cfs, range, m_encoding, lossByte, false, NULL, 0x7FFFFFFF, &bufferLength);
+
+ result.grow(size + bufferLength);
+ unsigned char* buffer = reinterpret_cast<unsigned char*>(result.data() + size);
+ CFIndex charactersConverted = CFStringGetBytes(cfs, range, m_encoding, lossByte, false, buffer, bufferLength, &bufferLength);
+ size += bufferLength;
+
+ if (charactersConverted != charactersLeft) {
+ unsigned badChar = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted);
+ ++charactersConverted;
+ if ((badChar & 0xFC00) == 0xD800 && charactersConverted != charactersLeft) { // is high surrogate
+ UniChar low = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted);
+ if ((low & 0xFC00) == 0xDC00) { // is low surrogate
+ badChar <<= 10;
+ badChar += low;
+ badChar += 0x10000 - (0xD800 << 10) - 0xDC00;
+ ++charactersConverted;
+ }
+ }
+ char entityBuffer[16];
+ sprintf(entityBuffer, "&#%u;", badChar);
+ size_t entityLength = strlen(entityBuffer);
+ result.grow(size + entityLength);
+ memcpy(result.data() + size, entityBuffer, entityLength);
+ size += entityLength;
+ }
+
+ startPos += charactersConverted;
+ charactersLeft -= charactersConverted;
+ }
+ CFRelease(cfs);
+ return CString(result.data(), size);
+}
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/mac/TextCodecMac.h b/WebCore/platform/text/mac/TextCodecMac.h
new file mode 100644
index 0000000..639e214
--- /dev/null
+++ b/WebCore/platform/text/mac/TextCodecMac.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextCodecMac_h
+#define TextCodecMac_h
+
+#include "TextCodec.h"
+#include <CoreServices/CoreServices.h>
+
+namespace WebCore {
+
+ typedef ::TextEncoding TECTextEncodingID;
+ const TECTextEncodingID invalidEncoding = kCFStringEncodingInvalidId;
+
+ class TextCodecMac : public TextCodec {
+ public:
+ static void registerEncodingNames(EncodingNameRegistrar);
+ static void registerCodecs(TextCodecRegistrar);
+
+ explicit TextCodecMac(TECTextEncodingID);
+ virtual ~TextCodecMac();
+
+ virtual String decode(const char*, size_t length, bool flush = false);
+ virtual CString encode(const UChar*, size_t length, bool allowEntities = false);
+
+ private:
+ OSStatus decode(const unsigned char* inputBuffer, int inputBufferLength, int& inputLength,
+ void* outputBuffer, int outputBufferLength, int& outputLength);
+
+ OSStatus createTECConverter() const;
+ void releaseTECConverter() const;
+
+ TECTextEncodingID m_encoding;
+ UChar m_backslashAsCurrencySymbol;
+ bool m_error;
+ unsigned m_numBufferedBytes;
+ unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
+ mutable TECObjectRef m_converterTEC;
+ };
+
+} // namespace WebCore
+
+#endif // TextCodecMac_h
diff --git a/WebCore/platform/text/mac/character-sets.txt b/WebCore/platform/text/mac/character-sets.txt
new file mode 100644
index 0000000..475e78e
--- /dev/null
+++ b/WebCore/platform/text/mac/character-sets.txt
@@ -0,0 +1,1868 @@
+
+===================================================================
+CHARACTER SETS
+
+(last updated 28 January 2005)
+
+These are the official names for character sets that may be used in
+the Internet and may be referred to in Internet documentation. These
+names are expressed in ANSI_X3.4-1968 which is commonly called
+US-ASCII or simply ASCII. The character set most commonly use in the
+Internet and used especially in protocol standards is US-ASCII, this
+is strongly encouraged. The use of the name US-ASCII is also
+encouraged.
+
+The character set names may be up to 40 characters taken from the
+printable characters of US-ASCII. However, no distinction is made
+between use of upper and lower case letters.
+
+The MIBenum value is a unique value for use in MIBs to identify coded
+character sets.
+
+The value space for MIBenum values has been divided into three
+regions. The first region (3-999) consists of coded character sets
+that have been standardized by some standard setting organization.
+This region is intended for standards that do not have subset
+implementations. The second region (1000-1999) is for the Unicode and
+ISO/IEC 10646 coded character sets together with a specification of a
+(set of) sub-repertoires that may occur. The third region (>1999) is
+intended for vendor specific coded character sets.
+
+ Assigned MIB enum Numbers
+ -------------------------
+ 0-2 Reserved
+ 3-999 Set By Standards Organizations
+ 1000-1999 Unicode / 10646
+ 2000-2999 Vendor
+
+The aliases that start with "cs" have been added for use with the
+IANA-CHARSET-MIB as originally defined in RFC3808, and as currently
+maintained by IANA at http://www/iana.org/assignments/ianacharset-mib.
+Note that the ianacharset-mib needs to be kept in sync with this
+registry. These aliases that start with "cs" contain the standard
+numbers along with suggestive names in order to facilitate applications
+that want to display the names in user interfaces. The "cs" stands
+for character set and is provided for applications that need a lower
+case first letter but want to use mixed case thereafter that cannot
+contain any special characters, such as underbar ("_") and dash ("-").
+
+If the character set is from an ISO standard, its cs alias is the ISO
+standard number or name. If the character set is not from an ISO
+standard, but is registered with ISO (IPSJ/ITSCJ is the current ISO
+Registration Authority), the ISO Registry number is specified as
+ISOnnn followed by letters suggestive of the name or standards number
+of the code set. When a national or international standard is
+revised, the year of revision is added to the cs alias of the new
+character set entry in the IANA Registry in order to distinguish the
+revised character set from the original character set.
+
+
+Character Set Reference
+------------- ---------
+
+Name: ANSI_X3.4-1968 [RFC1345,KXS2]
+MIBenum: 3
+Source: ECMA registry
+Alias: iso-ir-6
+Alias: ANSI_X3.4-1986
+Alias: ISO_646.irv:1991
+Alias: ASCII
+Alias: ISO646-US
+Alias: US-ASCII (preferred MIME name)
+Alias: us
+Alias: IBM367
+Alias: cp367
+Alias: csASCII
+
+Name: ISO-10646-UTF-1
+MIBenum: 27
+Source: Universal Transfer Format (1), this is the multibyte
+ encoding, that subsets ASCII-7. It does not have byte
+ ordering issues.
+Alias: csISO10646UTF1
+
+Name: ISO_646.basic:1983 [RFC1345,KXS2]
+MIBenum: 28
+Source: ECMA registry
+Alias: ref
+Alias: csISO646basic1983
+
+Name: INVARIANT [RFC1345,KXS2]
+MIBenum: 29
+Alias: csINVARIANT
+
+Name: ISO_646.irv:1983 [RFC1345,KXS2]
+MIBenum: 30
+Source: ECMA registry
+Alias: iso-ir-2
+Alias: irv
+Alias: csISO2IntlRefVersion
+
+Name: BS_4730 [RFC1345,KXS2]
+MIBenum: 20
+Source: ECMA registry
+Alias: iso-ir-4
+Alias: ISO646-GB
+Alias: gb
+Alias: uk
+Alias: csISO4UnitedKingdom
+
+Name: NATS-SEFI [RFC1345,KXS2]
+MIBenum: 31
+Source: ECMA registry
+Alias: iso-ir-8-1
+Alias: csNATSSEFI
+
+Name: NATS-SEFI-ADD [RFC1345,KXS2]
+MIBenum: 32
+Source: ECMA registry
+Alias: iso-ir-8-2
+Alias: csNATSSEFIADD
+
+Name: NATS-DANO [RFC1345,KXS2]
+MIBenum: 33
+Source: ECMA registry
+Alias: iso-ir-9-1
+Alias: csNATSDANO
+
+Name: NATS-DANO-ADD [RFC1345,KXS2]
+MIBenum: 34
+Source: ECMA registry
+Alias: iso-ir-9-2
+Alias: csNATSDANOADD
+
+Name: SEN_850200_B [RFC1345,KXS2]
+MIBenum: 35
+Source: ECMA registry
+Alias: iso-ir-10
+Alias: FI
+Alias: ISO646-FI
+Alias: ISO646-SE
+Alias: se
+Alias: csISO10Swedish
+
+Name: SEN_850200_C [RFC1345,KXS2]
+MIBenum: 21
+Source: ECMA registry
+Alias: iso-ir-11
+Alias: ISO646-SE2
+Alias: se2
+Alias: csISO11SwedishForNames
+
+Name: KS_C_5601-1987 [RFC1345,KXS2]
+MIBenum: 36
+Source: ECMA registry
+Alias: iso-ir-149
+Alias: KS_C_5601-1989
+Alias: KSC_5601
+Alias: korean
+Alias: csKSC56011987
+
+Name: ISO-2022-KR (preferred MIME name) [RFC1557,Choi]
+MIBenum: 37
+Source: RFC-1557 (see also KS_C_5601-1987)
+Alias: csISO2022KR
+
+Name: EUC-KR (preferred MIME name) [RFC1557,Choi]
+MIBenum: 38
+Source: RFC-1557 (see also KS_C_5861-1992)
+Alias: csEUCKR
+
+Name: ISO-2022-JP (preferred MIME name) [RFC1468,Murai]
+MIBenum: 39
+Source: RFC-1468 (see also RFC-2237)
+Alias: csISO2022JP
+
+Name: ISO-2022-JP-2 (preferred MIME name) [RFC1554,Ohta]
+MIBenum: 40
+Source: RFC-1554
+Alias: csISO2022JP2
+
+Name: ISO-2022-CN [RFC1922]
+MIBenum: 104
+Source: RFC-1922
+
+Name: ISO-2022-CN-EXT [RFC1922]
+MIBenum: 105
+Source: RFC-1922
+
+Name: JIS_C6220-1969-jp [RFC1345,KXS2]
+MIBenum: 41
+Source: ECMA registry
+Alias: JIS_C6220-1969
+Alias: iso-ir-13
+Alias: katakana
+Alias: x0201-7
+Alias: csISO13JISC6220jp
+
+Name: JIS_C6220-1969-ro [RFC1345,KXS2]
+MIBenum: 42
+Source: ECMA registry
+Alias: iso-ir-14
+Alias: jp
+Alias: ISO646-JP
+Alias: csISO14JISC6220ro
+
+Name: IT [RFC1345,KXS2]
+MIBenum: 22
+Source: ECMA registry
+Alias: iso-ir-15
+Alias: ISO646-IT
+Alias: csISO15Italian
+
+Name: PT [RFC1345,KXS2]
+MIBenum: 43
+Source: ECMA registry
+Alias: iso-ir-16
+Alias: ISO646-PT
+Alias: csISO16Portuguese
+
+Name: ES [RFC1345,KXS2]
+MIBenum: 23
+Source: ECMA registry
+Alias: iso-ir-17
+Alias: ISO646-ES
+Alias: csISO17Spanish
+
+Name: greek7-old [RFC1345,KXS2]
+MIBenum: 44
+Source: ECMA registry
+Alias: iso-ir-18
+Alias: csISO18Greek7Old
+
+Name: latin-greek [RFC1345,KXS2]
+MIBenum: 45
+Source: ECMA registry
+Alias: iso-ir-19
+Alias: csISO19LatinGreek
+
+Name: DIN_66003 [RFC1345,KXS2]
+MIBenum: 24
+Source: ECMA registry
+Alias: iso-ir-21
+Alias: de
+Alias: ISO646-DE
+Alias: csISO21German
+
+Name: NF_Z_62-010_(1973) [RFC1345,KXS2]
+MIBenum: 46
+Source: ECMA registry
+Alias: iso-ir-25
+Alias: ISO646-FR1
+Alias: csISO25French
+
+Name: Latin-greek-1 [RFC1345,KXS2]
+MIBenum: 47
+Source: ECMA registry
+Alias: iso-ir-27
+Alias: csISO27LatinGreek1
+
+Name: ISO_5427 [RFC1345,KXS2]
+MIBenum: 48
+Source: ECMA registry
+Alias: iso-ir-37
+Alias: csISO5427Cyrillic
+
+Name: JIS_C6226-1978 [RFC1345,KXS2]
+MIBenum: 49
+Source: ECMA registry
+Alias: iso-ir-42
+Alias: csISO42JISC62261978
+
+Name: BS_viewdata [RFC1345,KXS2]
+MIBenum: 50
+Source: ECMA registry
+Alias: iso-ir-47
+Alias: csISO47BSViewdata
+
+Name: INIS [RFC1345,KXS2]
+MIBenum: 51
+Source: ECMA registry
+Alias: iso-ir-49
+Alias: csISO49INIS
+
+Name: INIS-8 [RFC1345,KXS2]
+MIBenum: 52
+Source: ECMA registry
+Alias: iso-ir-50
+Alias: csISO50INIS8
+
+Name: INIS-cyrillic [RFC1345,KXS2]
+MIBenum: 53
+Source: ECMA registry
+Alias: iso-ir-51
+Alias: csISO51INISCyrillic
+
+Name: ISO_5427:1981 [RFC1345,KXS2]
+MIBenum: 54
+Source: ECMA registry
+Alias: iso-ir-54
+Alias: ISO5427Cyrillic1981
+
+Name: ISO_5428:1980 [RFC1345,KXS2]
+MIBenum: 55
+Source: ECMA registry
+Alias: iso-ir-55
+Alias: csISO5428Greek
+
+Name: GB_1988-80 [RFC1345,KXS2]
+MIBenum: 56
+Source: ECMA registry
+Alias: iso-ir-57
+Alias: cn
+Alias: ISO646-CN
+Alias: csISO57GB1988
+
+Name: GB_2312-80 [RFC1345,KXS2]
+MIBenum: 57
+Source: ECMA registry
+Alias: iso-ir-58
+Alias: chinese
+Alias: csISO58GB231280
+
+Name: NS_4551-1 [RFC1345,KXS2]
+MIBenum: 25
+Source: ECMA registry
+Alias: iso-ir-60
+Alias: ISO646-NO
+Alias: no
+Alias: csISO60DanishNorwegian
+Alias: csISO60Norwegian1
+
+Name: NS_4551-2 [RFC1345,KXS2]
+MIBenum: 58
+Source: ECMA registry
+Alias: ISO646-NO2
+Alias: iso-ir-61
+Alias: no2
+Alias: csISO61Norwegian2
+
+Name: NF_Z_62-010 [RFC1345,KXS2]
+MIBenum: 26
+Source: ECMA registry
+Alias: iso-ir-69
+Alias: ISO646-FR
+Alias: fr
+Alias: csISO69French
+
+Name: videotex-suppl [RFC1345,KXS2]
+MIBenum: 59
+Source: ECMA registry
+Alias: iso-ir-70
+Alias: csISO70VideotexSupp1
+
+Name: PT2 [RFC1345,KXS2]
+MIBenum: 60
+Source: ECMA registry
+Alias: iso-ir-84
+Alias: ISO646-PT2
+Alias: csISO84Portuguese2
+
+Name: ES2 [RFC1345,KXS2]
+MIBenum: 61
+Source: ECMA registry
+Alias: iso-ir-85
+Alias: ISO646-ES2
+Alias: csISO85Spanish2
+
+Name: MSZ_7795.3 [RFC1345,KXS2]
+MIBenum: 62
+Source: ECMA registry
+Alias: iso-ir-86
+Alias: ISO646-HU
+Alias: hu
+Alias: csISO86Hungarian
+
+Name: JIS_C6226-1983 [RFC1345,KXS2]
+MIBenum: 63
+Source: ECMA registry
+Alias: iso-ir-87
+Alias: x0208
+Alias: JIS_X0208-1983
+Alias: csISO87JISX0208
+
+Name: greek7 [RFC1345,KXS2]
+MIBenum: 64
+Source: ECMA registry
+Alias: iso-ir-88
+Alias: csISO88Greek7
+
+Name: ASMO_449 [RFC1345,KXS2]
+MIBenum: 65
+Source: ECMA registry
+Alias: ISO_9036
+Alias: arabic7
+Alias: iso-ir-89
+Alias: csISO89ASMO449
+
+Name: iso-ir-90 [RFC1345,KXS2]
+MIBenum: 66
+Source: ECMA registry
+Alias: csISO90
+
+Name: JIS_C6229-1984-a [RFC1345,KXS2]
+MIBenum: 67
+Source: ECMA registry
+Alias: iso-ir-91
+Alias: jp-ocr-a
+Alias: csISO91JISC62291984a
+
+Name: JIS_C6229-1984-b [RFC1345,KXS2]
+MIBenum: 68
+Source: ECMA registry
+Alias: iso-ir-92
+Alias: ISO646-JP-OCR-B
+Alias: jp-ocr-b
+Alias: csISO92JISC62991984b
+
+Name: JIS_C6229-1984-b-add [RFC1345,KXS2]
+MIBenum: 69
+Source: ECMA registry
+Alias: iso-ir-93
+Alias: jp-ocr-b-add
+Alias: csISO93JIS62291984badd
+
+Name: JIS_C6229-1984-hand [RFC1345,KXS2]
+MIBenum: 70
+Source: ECMA registry
+Alias: iso-ir-94
+Alias: jp-ocr-hand
+Alias: csISO94JIS62291984hand
+
+Name: JIS_C6229-1984-hand-add [RFC1345,KXS2]
+MIBenum: 71
+Source: ECMA registry
+Alias: iso-ir-95
+Alias: jp-ocr-hand-add
+Alias: csISO95JIS62291984handadd
+
+Name: JIS_C6229-1984-kana [RFC1345,KXS2]
+MIBenum: 72
+Source: ECMA registry
+Alias: iso-ir-96
+Alias: csISO96JISC62291984kana
+
+Name: ISO_2033-1983 [RFC1345,KXS2]
+MIBenum: 73
+Source: ECMA registry
+Alias: iso-ir-98
+Alias: e13b
+Alias: csISO2033
+
+Name: ANSI_X3.110-1983 [RFC1345,KXS2]
+MIBenum: 74
+Source: ECMA registry
+Alias: iso-ir-99
+Alias: CSA_T500-1983
+Alias: NAPLPS
+Alias: csISO99NAPLPS
+
+Name: ISO_8859-1:1987 [RFC1345,KXS2]
+MIBenum: 4
+Source: ECMA registry
+Alias: iso-ir-100
+Alias: ISO_8859-1
+Alias: ISO-8859-1 (preferred MIME name)
+Alias: latin1
+Alias: l1
+Alias: IBM819
+Alias: CP819
+Alias: csISOLatin1
+
+Name: ISO_8859-2:1987 [RFC1345,KXS2]
+MIBenum: 5
+Source: ECMA registry
+Alias: iso-ir-101
+Alias: ISO_8859-2
+Alias: ISO-8859-2 (preferred MIME name)
+Alias: latin2
+Alias: l2
+Alias: csISOLatin2
+
+Name: T.61-7bit [RFC1345,KXS2]
+MIBenum: 75
+Source: ECMA registry
+Alias: iso-ir-102
+Alias: csISO102T617bit
+
+Name: T.61-8bit [RFC1345,KXS2]
+MIBenum: 76
+Alias: T.61
+Source: ECMA registry
+Alias: iso-ir-103
+Alias: csISO103T618bit
+
+Name: ISO_8859-3:1988 [RFC1345,KXS2]
+MIBenum: 6
+Source: ECMA registry
+Alias: iso-ir-109
+Alias: ISO_8859-3
+Alias: ISO-8859-3 (preferred MIME name)
+Alias: latin3
+Alias: l3
+Alias: csISOLatin3
+
+Name: ISO_8859-4:1988 [RFC1345,KXS2]
+MIBenum: 7
+Source: ECMA registry
+Alias: iso-ir-110
+Alias: ISO_8859-4
+Alias: ISO-8859-4 (preferred MIME name)
+Alias: latin4
+Alias: l4
+Alias: csISOLatin4
+
+Name: ECMA-cyrillic
+MIBenum: 77
+Source: ISO registry (formerly ECMA registry)
+ http://www.itscj.ipsj.jp/ISO-IR/111.pdf
+Alias: iso-ir-111
+Alias: KOI8-E
+Alias: csISO111ECMACyrillic
+
+Name: CSA_Z243.4-1985-1 [RFC1345,KXS2]
+MIBenum: 78
+Source: ECMA registry
+Alias: iso-ir-121
+Alias: ISO646-CA
+Alias: csa7-1
+Alias: ca
+Alias: csISO121Canadian1
+
+Name: CSA_Z243.4-1985-2 [RFC1345,KXS2]
+MIBenum: 79
+Source: ECMA registry
+Alias: iso-ir-122
+Alias: ISO646-CA2
+Alias: csa7-2
+Alias: csISO122Canadian2
+
+Name: CSA_Z243.4-1985-gr [RFC1345,KXS2]
+MIBenum: 80
+Source: ECMA registry
+Alias: iso-ir-123
+Alias: csISO123CSAZ24341985gr
+
+Name: ISO_8859-6:1987 [RFC1345,KXS2]
+MIBenum: 9
+Source: ECMA registry
+Alias: iso-ir-127
+Alias: ISO_8859-6
+Alias: ISO-8859-6 (preferred MIME name)
+Alias: ECMA-114
+Alias: ASMO-708
+Alias: arabic
+Alias: csISOLatinArabic
+
+Name: ISO_8859-6-E [RFC1556,IANA]
+MIBenum: 81
+Source: RFC1556
+Alias: csISO88596E
+Alias: ISO-8859-6-E (preferred MIME name)
+
+Name: ISO_8859-6-I [RFC1556,IANA]
+MIBenum: 82
+Source: RFC1556
+Alias: csISO88596I
+Alias: ISO-8859-6-I (preferred MIME name)
+
+Name: ISO_8859-7:1987 [RFC1947,RFC1345,KXS2]
+MIBenum: 10
+Source: ECMA registry
+Alias: iso-ir-126
+Alias: ISO_8859-7
+Alias: ISO-8859-7 (preferred MIME name)
+Alias: ELOT_928
+Alias: ECMA-118
+Alias: greek
+Alias: greek8
+Alias: csISOLatinGreek
+
+Name: T.101-G2 [RFC1345,KXS2]
+MIBenum: 83
+Source: ECMA registry
+Alias: iso-ir-128
+Alias: csISO128T101G2
+
+Name: ISO_8859-8:1988 [RFC1345,KXS2]
+MIBenum: 11
+Source: ECMA registry
+Alias: iso-ir-138
+Alias: ISO_8859-8
+Alias: ISO-8859-8 (preferred MIME name)
+Alias: hebrew
+Alias: csISOLatinHebrew
+
+Name: ISO_8859-8-E [RFC1556,Nussbacher]
+MIBenum: 84
+Source: RFC1556
+Alias: csISO88598E
+Alias: ISO-8859-8-E (preferred MIME name)
+
+Name: ISO_8859-8-I [RFC1556,Nussbacher]
+MIBenum: 85
+Source: RFC1556
+Alias: csISO88598I
+Alias: ISO-8859-8-I (preferred MIME name)
+
+Name: CSN_369103 [RFC1345,KXS2]
+MIBenum: 86
+Source: ECMA registry
+Alias: iso-ir-139
+Alias: csISO139CSN369103
+
+Name: JUS_I.B1.002 [RFC1345,KXS2]
+MIBenum: 87
+Source: ECMA registry
+Alias: iso-ir-141
+Alias: ISO646-YU
+Alias: js
+Alias: yu
+Alias: csISO141JUSIB1002
+
+Name: ISO_6937-2-add [RFC1345,KXS2]
+MIBenum: 14
+Source: ECMA registry and ISO 6937-2:1983
+Alias: iso-ir-142
+Alias: csISOTextComm
+
+Name: IEC_P27-1 [RFC1345,KXS2]
+MIBenum: 88
+Source: ECMA registry
+Alias: iso-ir-143
+Alias: csISO143IECP271
+
+Name: ISO_8859-5:1988 [RFC1345,KXS2]
+MIBenum: 8
+Source: ECMA registry
+Alias: iso-ir-144
+Alias: ISO_8859-5
+Alias: ISO-8859-5 (preferred MIME name)
+Alias: cyrillic
+Alias: csISOLatinCyrillic
+
+Name: JUS_I.B1.003-serb [RFC1345,KXS2]
+MIBenum: 89
+Source: ECMA registry
+Alias: iso-ir-146
+Alias: serbian
+Alias: csISO146Serbian
+
+Name: JUS_I.B1.003-mac [RFC1345,KXS2]
+MIBenum: 90
+Source: ECMA registry
+Alias: macedonian
+Alias: iso-ir-147
+Alias: csISO147Macedonian
+
+Name: ISO_8859-9:1989 [RFC1345,KXS2]
+MIBenum: 12
+Source: ECMA registry
+Alias: iso-ir-148
+Alias: ISO_8859-9
+Alias: ISO-8859-9 (preferred MIME name)
+Alias: latin5
+Alias: l5
+Alias: csISOLatin5
+
+Name: greek-ccitt [RFC1345,KXS2]
+MIBenum: 91
+Source: ECMA registry
+Alias: iso-ir-150
+Alias: csISO150
+Alias: csISO150GreekCCITT
+
+Name: NC_NC00-10:81 [RFC1345,KXS2]
+MIBenum: 92
+Source: ECMA registry
+Alias: cuba
+Alias: iso-ir-151
+Alias: ISO646-CU
+Alias: csISO151Cuba
+
+Name: ISO_6937-2-25 [RFC1345,KXS2]
+MIBenum: 93
+Source: ECMA registry
+Alias: iso-ir-152
+Alias: csISO6937Add
+
+Name: GOST_19768-74 [RFC1345,KXS2]
+MIBenum: 94
+Source: ECMA registry
+Alias: ST_SEV_358-88
+Alias: iso-ir-153
+Alias: csISO153GOST1976874
+
+Name: ISO_8859-supp [RFC1345,KXS2]
+MIBenum: 95
+Source: ECMA registry
+Alias: iso-ir-154
+Alias: latin1-2-5
+Alias: csISO8859Supp
+
+Name: ISO_10367-box [RFC1345,KXS2]
+MIBenum: 96
+Source: ECMA registry
+Alias: iso-ir-155
+Alias: csISO10367Box
+
+Name: ISO-8859-10 (preferred MIME name) [RFC1345,KXS2]
+MIBenum: 13
+Source: ECMA registry
+Alias: iso-ir-157
+Alias: l6
+Alias: ISO_8859-10:1992
+Alias: csISOLatin6
+Alias: latin6
+
+Name: latin-lap [RFC1345,KXS2]
+MIBenum: 97
+Source: ECMA registry
+Alias: lap
+Alias: iso-ir-158
+Alias: csISO158Lap
+
+Name: JIS_X0212-1990 [RFC1345,KXS2]
+MIBenum: 98
+Source: ECMA registry
+Alias: x0212
+Alias: iso-ir-159
+Alias: csISO159JISX02121990
+
+Name: DS_2089 [RFC1345,KXS2]
+MIBenum: 99
+Source: Danish Standard, DS 2089, February 1974
+Alias: DS2089
+Alias: ISO646-DK
+Alias: dk
+Alias: csISO646Danish
+
+Name: us-dk [RFC1345,KXS2]
+MIBenum: 100
+Alias: csUSDK
+
+Name: dk-us [RFC1345,KXS2]
+MIBenum: 101
+Alias: csDKUS
+
+Name: JIS_X0201 [RFC1345,KXS2]
+MIBenum: 15
+Source: JIS X 0201-1976. One byte only, this is equivalent to
+ JIS/Roman (similar to ASCII) plus eight-bit half-width
+ Katakana
+Alias: X0201
+Alias: csHalfWidthKatakana
+
+Name: KSC5636 [RFC1345,KXS2]
+MIBenum: 102
+Alias: ISO646-KR
+Alias: csKSC5636
+
+Name: ISO-10646-UCS-2
+MIBenum: 1000
+Source: the 2-octet Basic Multilingual Plane, aka Unicode
+ this needs to specify network byte order: the standard
+ does not specify (it is a 16-bit integer space)
+Alias: csUnicode
+
+Name: ISO-10646-UCS-4
+MIBenum: 1001
+Source: the full code space. (same comment about byte order,
+ these are 31-bit numbers.
+Alias: csUCS4
+
+Name: DEC-MCS [RFC1345,KXS2]
+MIBenum: 2008
+Source: VAX/VMS User's Manual,
+ Order Number: AI-Y517A-TE, April 1986.
+Alias: dec
+Alias: csDECMCS
+
+Name: hp-roman8 [HP-PCL5,RFC1345,KXS2]
+MIBenum: 2004
+Source: LaserJet IIP Printer User's Manual,
+ HP part no 33471-90901, Hewlet-Packard, June 1989.
+Alias: roman8
+Alias: r8
+Alias: csHPRoman8
+
+Name: macintosh [RFC1345,KXS2]
+MIBenum: 2027
+Source: The Unicode Standard ver1.0, ISBN 0-201-56788-1, Oct 1991
+Alias: mac
+Alias: csMacintosh
+
+Name: IBM037 [RFC1345,KXS2]
+MIBenum: 2028
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp037
+Alias: ebcdic-cp-us
+Alias: ebcdic-cp-ca
+Alias: ebcdic-cp-wt
+Alias: ebcdic-cp-nl
+Alias: csIBM037
+
+Name: IBM038 [RFC1345,KXS2]
+MIBenum: 2029
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: EBCDIC-INT
+Alias: cp038
+Alias: csIBM038
+
+Name: IBM273 [RFC1345,KXS2]
+MIBenum: 2030
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP273
+Alias: csIBM273
+
+Name: IBM274 [RFC1345,KXS2]
+MIBenum: 2031
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: EBCDIC-BE
+Alias: CP274
+Alias: csIBM274
+
+Name: IBM275 [RFC1345,KXS2]
+MIBenum: 2032
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: EBCDIC-BR
+Alias: cp275
+Alias: csIBM275
+
+Name: IBM277 [RFC1345,KXS2]
+MIBenum: 2033
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: EBCDIC-CP-DK
+Alias: EBCDIC-CP-NO
+Alias: csIBM277
+
+Name: IBM278 [RFC1345,KXS2]
+MIBenum: 2034
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP278
+Alias: ebcdic-cp-fi
+Alias: ebcdic-cp-se
+Alias: csIBM278
+
+Name: IBM280 [RFC1345,KXS2]
+MIBenum: 2035
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP280
+Alias: ebcdic-cp-it
+Alias: csIBM280
+
+Name: IBM281 [RFC1345,KXS2]
+MIBenum: 2036
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: EBCDIC-JP-E
+Alias: cp281
+Alias: csIBM281
+
+Name: IBM284 [RFC1345,KXS2]
+MIBenum: 2037
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP284
+Alias: ebcdic-cp-es
+Alias: csIBM284
+
+Name: IBM285 [RFC1345,KXS2]
+MIBenum: 2038
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP285
+Alias: ebcdic-cp-gb
+Alias: csIBM285
+
+Name: IBM290 [RFC1345,KXS2]
+MIBenum: 2039
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: cp290
+Alias: EBCDIC-JP-kana
+Alias: csIBM290
+
+Name: IBM297 [RFC1345,KXS2]
+MIBenum: 2040
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp297
+Alias: ebcdic-cp-fr
+Alias: csIBM297
+
+Name: IBM420 [RFC1345,KXS2]
+MIBenum: 2041
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990,
+ IBM NLS RM p 11-11
+Alias: cp420
+Alias: ebcdic-cp-ar1
+Alias: csIBM420
+
+Name: IBM423 [RFC1345,KXS2]
+MIBenum: 2042
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp423
+Alias: ebcdic-cp-gr
+Alias: csIBM423
+
+Name: IBM424 [RFC1345,KXS2]
+MIBenum: 2043
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp424
+Alias: ebcdic-cp-he
+Alias: csIBM424
+
+Name: IBM437 [RFC1345,KXS2]
+MIBenum: 2011
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp437
+Alias: 437
+Alias: csPC8CodePage437
+
+Name: IBM500 [RFC1345,KXS2]
+MIBenum: 2044
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP500
+Alias: ebcdic-cp-be
+Alias: ebcdic-cp-ch
+Alias: csIBM500
+
+Name: IBM775 [HP-PCL5]
+MIBenum: 2087
+Source: HP PCL 5 Comparison Guide (P/N 5021-0329) pp B-13, 1996
+Alias: cp775
+Alias: csPC775Baltic
+
+Name: IBM850 [RFC1345,KXS2]
+MIBenum: 2009
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp850
+Alias: 850
+Alias: csPC850Multilingual
+
+Name: IBM851 [RFC1345,KXS2]
+MIBenum: 2045
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp851
+Alias: 851
+Alias: csIBM851
+
+Name: IBM852 [RFC1345,KXS2]
+MIBenum: 2010
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp852
+Alias: 852
+Alias: csPCp852
+
+Name: IBM855 [RFC1345,KXS2]
+MIBenum: 2046
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp855
+Alias: 855
+Alias: csIBM855
+
+Name: IBM857 [RFC1345,KXS2]
+MIBenum: 2047
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp857
+Alias: 857
+Alias: csIBM857
+
+Name: IBM860 [RFC1345,KXS2]
+MIBenum: 2048
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp860
+Alias: 860
+Alias: csIBM860
+
+Name: IBM861 [RFC1345,KXS2]
+MIBenum: 2049
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp861
+Alias: 861
+Alias: cp-is
+Alias: csIBM861
+
+Name: IBM862 [RFC1345,KXS2]
+MIBenum: 2013
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp862
+Alias: 862
+Alias: csPC862LatinHebrew
+
+Name: IBM863 [RFC1345,KXS2]
+MIBenum: 2050
+Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
+Alias: cp863
+Alias: 863
+Alias: csIBM863
+
+Name: IBM864 [RFC1345,KXS2]
+MIBenum: 2051
+Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
+Alias: cp864
+Alias: csIBM864
+
+Name: IBM865 [RFC1345,KXS2]
+MIBenum: 2052
+Source: IBM DOS 3.3 Ref (Abridged), 94X9575 (Feb 1987)
+Alias: cp865
+Alias: 865
+Alias: csIBM865
+
+Name: IBM866 [Pond]
+MIBenum: 2086
+Source: IBM NLDG Volume 2 (SE09-8002-03) August 1994
+Alias: cp866
+Alias: 866
+Alias: csIBM866
+
+Name: IBM868 [RFC1345,KXS2]
+MIBenum: 2053
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP868
+Alias: cp-ar
+Alias: csIBM868
+
+Name: IBM869 [RFC1345,KXS2]
+MIBenum: 2054
+Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
+Alias: cp869
+Alias: 869
+Alias: cp-gr
+Alias: csIBM869
+
+Name: IBM870 [RFC1345,KXS2]
+MIBenum: 2055
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP870
+Alias: ebcdic-cp-roece
+Alias: ebcdic-cp-yu
+Alias: csIBM870
+
+Name: IBM871 [RFC1345,KXS2]
+MIBenum: 2056
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP871
+Alias: ebcdic-cp-is
+Alias: csIBM871
+
+Name: IBM880 [RFC1345,KXS2]
+MIBenum: 2057
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp880
+Alias: EBCDIC-Cyrillic
+Alias: csIBM880
+
+Name: IBM891 [RFC1345,KXS2]
+MIBenum: 2058
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp891
+Alias: csIBM891
+
+Name: IBM903 [RFC1345,KXS2]
+MIBenum: 2059
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp903
+Alias: csIBM903
+
+Name: IBM904 [RFC1345,KXS2]
+MIBenum: 2060
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp904
+Alias: 904
+Alias: csIBBM904
+
+Name: IBM905 [RFC1345,KXS2]
+MIBenum: 2061
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: CP905
+Alias: ebcdic-cp-tr
+Alias: csIBM905
+
+Name: IBM918 [RFC1345,KXS2]
+MIBenum: 2062
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP918
+Alias: ebcdic-cp-ar2
+Alias: csIBM918
+
+Name: IBM1026 [RFC1345,KXS2]
+MIBenum: 2063
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP1026
+Alias: csIBM1026
+
+Name: EBCDIC-AT-DE [RFC1345,KXS2]
+MIBenum: 2064
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csIBMEBCDICATDE
+
+Name: EBCDIC-AT-DE-A [RFC1345,KXS2]
+MIBenum: 2065
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICATDEA
+
+Name: EBCDIC-CA-FR [RFC1345,KXS2]
+MIBenum: 2066
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICCAFR
+
+Name: EBCDIC-DK-NO [RFC1345,KXS2]
+MIBenum: 2067
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICDKNO
+
+Name: EBCDIC-DK-NO-A [RFC1345,KXS2]
+MIBenum: 2068
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICDKNOA
+
+Name: EBCDIC-FI-SE [RFC1345,KXS2]
+MIBenum: 2069
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICFISE
+
+Name: EBCDIC-FI-SE-A [RFC1345,KXS2]
+MIBenum: 2070
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICFISEA
+
+Name: EBCDIC-FR [RFC1345,KXS2]
+MIBenum: 2071
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICFR
+
+Name: EBCDIC-IT [RFC1345,KXS2]
+MIBenum: 2072
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICIT
+
+Name: EBCDIC-PT [RFC1345,KXS2]
+MIBenum: 2073
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICPT
+
+Name: EBCDIC-ES [RFC1345,KXS2]
+MIBenum: 2074
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICES
+
+Name: EBCDIC-ES-A [RFC1345,KXS2]
+MIBenum: 2075
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICESA
+
+Name: EBCDIC-ES-S [RFC1345,KXS2]
+MIBenum: 2076
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICESS
+
+Name: EBCDIC-UK [RFC1345,KXS2]
+MIBenum: 2077
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICUK
+
+Name: EBCDIC-US [RFC1345,KXS2]
+MIBenum: 2078
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICUS
+
+Name: UNKNOWN-8BIT [RFC1428]
+MIBenum: 2079
+Alias: csUnknown8BiT
+
+Name: MNEMONIC [RFC1345,KXS2]
+MIBenum: 2080
+Source: RFC 1345, also known as "mnemonic+ascii+38"
+Alias: csMnemonic
+
+Name: MNEM [RFC1345,KXS2]
+MIBenum: 2081
+Source: RFC 1345, also known as "mnemonic+ascii+8200"
+Alias: csMnem
+
+Name: VISCII [RFC1456]
+MIBenum: 2082
+Source: RFC 1456
+Alias: csVISCII
+
+Name: VIQR [RFC1456]
+MIBenum: 2083
+Source: RFC 1456
+Alias: csVIQR
+
+Name: KOI8-R (preferred MIME name) [RFC1489]
+MIBenum: 2084
+Source: RFC 1489, based on GOST-19768-74, ISO-6937/8,
+ INIS-Cyrillic, ISO-5427.
+Alias: csKOI8R
+
+Name: KOI8-U [RFC2319]
+MIBenum: 2088
+Source: RFC 2319
+
+Name: IBM00858
+MIBenum: 2089
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00858) [Mahdi]
+Alias: CCSID00858
+Alias: CP00858
+Alias: PC-Multilingual-850+euro
+
+Name: IBM00924
+MIBenum: 2090
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00924) [Mahdi]
+Alias: CCSID00924
+Alias: CP00924
+Alias: ebcdic-Latin9--euro
+
+Name: IBM01140
+MIBenum: 2091
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01140) [Mahdi]
+Alias: CCSID01140
+Alias: CP01140
+Alias: ebcdic-us-37+euro
+
+Name: IBM01141
+MIBenum: 2092
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01141) [Mahdi]
+Alias: CCSID01141
+Alias: CP01141
+Alias: ebcdic-de-273+euro
+
+Name: IBM01142
+MIBenum: 2093
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01142) [Mahdi]
+Alias: CCSID01142
+Alias: CP01142
+Alias: ebcdic-dk-277+euro
+Alias: ebcdic-no-277+euro
+
+Name: IBM01143
+MIBenum: 2094
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01143) [Mahdi]
+Alias: CCSID01143
+Alias: CP01143
+Alias: ebcdic-fi-278+euro
+Alias: ebcdic-se-278+euro
+
+Name: IBM01144
+MIBenum: 2095
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01144) [Mahdi]
+Alias: CCSID01144
+Alias: CP01144
+Alias: ebcdic-it-280+euro
+
+Name: IBM01145
+MIBenum: 2096
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01145) [Mahdi]
+Alias: CCSID01145
+Alias: CP01145
+Alias: ebcdic-es-284+euro
+
+Name: IBM01146
+MIBenum: 2097
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01146) [Mahdi]
+Alias: CCSID01146
+Alias: CP01146
+Alias: ebcdic-gb-285+euro
+
+Name: IBM01147
+MIBenum: 2098
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01147) [Mahdi]
+Alias: CCSID01147
+Alias: CP01147
+Alias: ebcdic-fr-297+euro
+
+Name: IBM01148
+MIBenum: 2099
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01148) [Mahdi]
+Alias: CCSID01148
+Alias: CP01148
+Alias: ebcdic-international-500+euro
+
+Name: IBM01149
+MIBenum: 2100
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01149) [Mahdi]
+Alias: CCSID01149
+Alias: CP01149
+Alias: ebcdic-is-871+euro
+
+Name: Big5-HKSCS [Yick]
+MIBenum: 2101
+Source: See (http://www.iana.org/assignments/charset-reg/Big5-HKSCS)
+Alias: None
+
+Name: IBM1047 [Robrigado]
+MIBenum: 2102
+Source: IBM1047 (EBCDIC Latin 1/Open Systems)
+http://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf
+Alias: IBM-1047
+
+Name: PTCP154 [Uskov]
+MIBenum: 2103
+Source: See (http://www.iana.org/assignments/charset-reg/PTCP154)
+Alias: csPTCP154
+Alias: PT154
+Alias: CP154
+Alias: Cyrillic-Asian
+
+Name: Amiga-1251
+MIBenum: 2104
+Source: See (http://www.amiga.ultranet.ru/Amiga-1251.html)
+Alias: Ami1251
+Alias: Amiga1251
+Alias: Ami-1251
+(Aliases are provided for historical reasons and should not be used)
+ [Malyshev]
+
+Name: KOI7-switched
+MIBenum: 2105
+Source: See <http://www.iana.org/assignments/charset-reg/KOI7-switched>
+Aliases: None
+
+Name: UNICODE-1-1 [RFC1641]
+MIBenum: 1010
+Source: RFC 1641
+Alias: csUnicode11
+
+Name: SCSU
+MIBenum: 1011
+Source: SCSU See (http://www.iana.org/assignments/charset-reg/SCSU) [Scherer]
+Alias: None
+
+Name: UTF-7 [RFC2152]
+MIBenum: 1012
+Source: RFC 2152
+Alias: None
+
+Name: UTF-16BE [RFC2781]
+MIBenum: 1013
+Source: RFC 2781
+Alias: None
+
+Name: UTF-16LE [RFC2781]
+MIBenum: 1014
+Source: RFC 2781
+Alias: None
+
+Name: UTF-16 [RFC2781]
+MIBenum: 1015
+Source: RFC 2781
+Alias: None
+
+Name: CESU-8 [Phipps]
+MIBenum: 1016
+Source: <http://www.unicode.org/unicode/reports/tr26>
+Alias: csCESU-8
+
+Name: UTF-32 [Davis]
+MIBenum: 1017
+Source: <http://www.unicode.org/unicode/reports/tr19/>
+Alias: None
+
+Name: UTF-32BE [Davis]
+MIBenum: 1018
+Source: <http://www.unicode.org/unicode/reports/tr19/>
+Alias: None
+
+Name: UTF-32LE [Davis]
+MIBenum: 1019
+Source: <http://www.unicode.org/unicode/reports/tr19/>
+Alias: None
+
+Name: BOCU-1 [Scherer]
+MIBenum: 1020
+Source: http://www.unicode.org/notes/tn6/
+Alias: csBOCU-1
+
+Name: UNICODE-1-1-UTF-7 [RFC1642]
+MIBenum: 103
+Source: RFC 1642
+Alias: csUnicode11UTF7
+
+Name: UTF-8 [RFC3629]
+MIBenum: 106
+Source: RFC 3629
+Alias: None
+
+Name: ISO-8859-13
+MIBenum: 109
+Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-13)[Tumasonis]
+Alias: None
+
+Name: ISO-8859-14
+MIBenum: 110
+Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-14) [Simonsen]
+Alias: iso-ir-199
+Alias: ISO_8859-14:1998
+Alias: ISO_8859-14
+Alias: latin8
+Alias: iso-celtic
+Alias: l8
+
+Name: ISO-8859-15
+MIBenum: 111
+Source: ISO
+ Please see: <http://www.iana.org/assignments/charset-reg/ISO-8859-15>
+Alias: ISO_8859-15
+Alias: Latin-9
+
+Name: ISO-8859-16
+MIBenum: 112
+Source: ISO
+Alias: iso-ir-226
+Alias: ISO_8859-16:2001
+Alias: ISO_8859-16
+Alias: latin10
+Alias: l10
+
+Name: GBK
+MIBenum: 113
+Source: Chinese IT Standardization Technical Committee
+ Please see: <http://www.iana.org/assignments/charset-reg/GBK>
+Alias: CP936
+Alias: MS936
+Alias: windows-936
+
+Name: GB18030
+MIBenum: 114
+Source: Chinese IT Standardization Technical Committee
+ Please see: <http://www.iana.org/assignments/charset-reg/GB18030>
+Alias: None
+
+Name: OSD_EBCDIC_DF04_15
+MIBenum: 115
+Source: Fujitsu-Siemens standard mainframe EBCDIC encoding
+ Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15>
+Alias: None
+
+Name: OSD_EBCDIC_DF03_IRV
+MIBenum: 116
+Source: Fujitsu-Siemens standard mainframe EBCDIC encoding
+ Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV>
+Alias: None
+
+Name: OSD_EBCDIC_DF04_1
+MIBenum: 117
+Source: Fujitsu-Siemens standard mainframe EBCDIC encoding
+ Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1>
+Alias: None
+
+Name: JIS_Encoding
+MIBenum: 16
+Source: JIS X 0202-1991. Uses ISO 2022 escape sequences to
+ shift code sets as documented in JIS X 0202-1991.
+Alias: csJISEncoding
+
+Name: Shift_JIS (preferred MIME name)
+MIBenum: 17
+Source: This charset is an extension of csHalfWidthKatakana by
+ adding graphic characters in JIS X 0208. The CCS's are
+ JIS X0201:1997 and JIS X0208:1997. The
+ complete definition is shown in Appendix 1 of JIS
+ X0208:1997.
+ This charset can be used for the top-level media type "text".
+Alias: MS_Kanji
+Alias: csShiftJIS
+
+Name: Extended_UNIX_Code_Packed_Format_for_Japanese
+MIBenum: 18
+Source: Standardized by OSF, UNIX International, and UNIX Systems
+ Laboratories Pacific. Uses ISO 2022 rules to select
+ code set 0: US-ASCII (a single 7-bit byte set)
+ code set 1: JIS X0208-1990 (a double 8-bit byte set)
+ restricted to A0-FF in both bytes
+ code set 2: Half Width Katakana (a single 7-bit byte set)
+ requiring SS2 as the character prefix
+ code set 3: JIS X0212-1990 (a double 7-bit byte set)
+ restricted to A0-FF in both bytes
+ requiring SS3 as the character prefix
+Alias: csEUCPkdFmtJapanese
+Alias: EUC-JP (preferred MIME name)
+
+Name: Extended_UNIX_Code_Fixed_Width_for_Japanese
+MIBenum: 19
+Source: Used in Japan. Each character is 2 octets.
+ code set 0: US-ASCII (a single 7-bit byte set)
+ 1st byte = 00
+ 2nd byte = 20-7E
+ code set 1: JIS X0208-1990 (a double 7-bit byte set)
+ restricted to A0-FF in both bytes
+ code set 2: Half Width Katakana (a single 7-bit byte set)
+ 1st byte = 00
+ 2nd byte = A0-FF
+ code set 3: JIS X0212-1990 (a double 7-bit byte set)
+ restricted to A0-FF in
+ the first byte
+ and 21-7E in the second byte
+Alias: csEUCFixWidJapanese
+
+Name: ISO-10646-UCS-Basic
+MIBenum: 1002
+Source: ASCII subset of Unicode. Basic Latin = collection 1
+ See ISO 10646, Appendix A
+Alias: csUnicodeASCII
+
+Name: ISO-10646-Unicode-Latin1
+MIBenum: 1003
+Source: ISO Latin-1 subset of Unicode. Basic Latin and Latin-1
+ Supplement = collections 1 and 2. See ISO 10646,
+ Appendix A. See RFC 1815.
+Alias: csUnicodeLatin1
+Alias: ISO-10646
+
+Name: ISO-10646-J-1
+Source: ISO 10646 Japanese, see RFC 1815.
+
+Name: ISO-Unicode-IBM-1261
+MIBenum: 1005
+Source: IBM Latin-2, -3, -5, Extended Presentation Set, GCSGID: 1261
+Alias: csUnicodeIBM1261
+
+Name: ISO-Unicode-IBM-1268
+MIBenum: 1006
+Source: IBM Latin-4 Extended Presentation Set, GCSGID: 1268
+Alias: csUnicodeIBM1268
+
+Name: ISO-Unicode-IBM-1276
+MIBenum: 1007
+Source: IBM Cyrillic Greek Extended Presentation Set, GCSGID: 1276
+Alias: csUnicodeIBM1276
+
+Name: ISO-Unicode-IBM-1264
+MIBenum: 1008
+Source: IBM Arabic Presentation Set, GCSGID: 1264
+Alias: csUnicodeIBM1264
+
+Name: ISO-Unicode-IBM-1265
+MIBenum: 1009
+Source: IBM Hebrew Presentation Set, GCSGID: 1265
+Alias: csUnicodeIBM1265
+
+Name: ISO-8859-1-Windows-3.0-Latin-1 [HP-PCL5]
+MIBenum: 2000
+Source: Extended ISO 8859-1 Latin-1 for Windows 3.0.
+ PCL Symbol Set id: 9U
+Alias: csWindows30Latin1
+
+Name: ISO-8859-1-Windows-3.1-Latin-1 [HP-PCL5]
+MIBenum: 2001
+Source: Extended ISO 8859-1 Latin-1 for Windows 3.1.
+ PCL Symbol Set id: 19U
+Alias: csWindows31Latin1
+
+Name: ISO-8859-2-Windows-Latin-2 [HP-PCL5]
+MIBenum: 2002
+Source: Extended ISO 8859-2. Latin-2 for Windows 3.1.
+ PCL Symbol Set id: 9E
+Alias: csWindows31Latin2
+
+Name: ISO-8859-9-Windows-Latin-5 [HP-PCL5]
+MIBenum: 2003
+Source: Extended ISO 8859-9. Latin-5 for Windows 3.1
+ PCL Symbol Set id: 5T
+Alias: csWindows31Latin5
+
+Name: Adobe-Standard-Encoding [Adobe]
+MIBenum: 2005
+Source: PostScript Language Reference Manual
+ PCL Symbol Set id: 10J
+Alias: csAdobeStandardEncoding
+
+Name: Ventura-US [HP-PCL5]
+MIBenum: 2006
+Source: Ventura US. ASCII plus characters typically used in
+ publishing, like pilcrow, copyright, registered, trade mark,
+ section, dagger, and double dagger in the range A0 (hex)
+ to FF (hex).
+ PCL Symbol Set id: 14J
+Alias: csVenturaUS
+
+Name: Ventura-International [HP-PCL5]
+MIBenum: 2007
+Source: Ventura International. ASCII plus coded characters similar
+ to Roman8.
+ PCL Symbol Set id: 13J
+Alias: csVenturaInternational
+
+Name: PC8-Danish-Norwegian [HP-PCL5]
+MIBenum: 2012
+Source: PC Danish Norwegian
+ 8-bit PC set for Danish Norwegian
+ PCL Symbol Set id: 11U
+Alias: csPC8DanishNorwegian
+
+Name: PC8-Turkish [HP-PCL5]
+MIBenum: 2014
+Source: PC Latin Turkish. PCL Symbol Set id: 9T
+Alias: csPC8Turkish
+
+Name: IBM-Symbols [IBM-CIDT]
+MIBenum: 2015
+Source: Presentation Set, CPGID: 259
+Alias: csIBMSymbols
+
+Name: IBM-Thai [IBM-CIDT]
+MIBenum: 2016
+Source: Presentation Set, CPGID: 838
+Alias: csIBMThai
+
+Name: HP-Legal [HP-PCL5]
+MIBenum: 2017
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 1U
+Alias: csHPLegal
+
+Name: HP-Pi-font [HP-PCL5]
+MIBenum: 2018
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 15U
+Alias: csHPPiFont
+
+Name: HP-Math8 [HP-PCL5]
+MIBenum: 2019
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 8M
+Alias: csHPMath8
+
+Name: Adobe-Symbol-Encoding [Adobe]
+MIBenum: 2020
+Source: PostScript Language Reference Manual
+ PCL Symbol Set id: 5M
+Alias: csHPPSMath
+
+Name: HP-DeskTop [HP-PCL5]
+MIBenum: 2021
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 7J
+Alias: csHPDesktop
+
+Name: Ventura-Math [HP-PCL5]
+MIBenum: 2022
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 6M
+Alias: csVenturaMath
+
+Name: Microsoft-Publishing [HP-PCL5]
+MIBenum: 2023
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 6J
+Alias: csMicrosoftPublishing
+
+Name: Windows-31J
+MIBenum: 2024
+Source: Windows Japanese. A further extension of Shift_JIS
+ to include NEC special characters (Row 13), NEC
+ selection of IBM extensions (Rows 89 to 92), and IBM
+ extensions (Rows 115 to 119). The CCS's are
+ JIS X0201:1997, JIS X0208:1997, and these extensions.
+ This charset can be used for the top-level media type "text",
+ but it is of limited or specialized use (see RFC2278).
+ PCL Symbol Set id: 19K
+Alias: csWindows31J
+
+Name: GB2312 (preferred MIME name)
+MIBenum: 2025
+Source: Chinese for People's Republic of China (PRC) mixed one byte,
+ two byte set:
+ 20-7E = one byte ASCII
+ A1-FE = two byte PRC Kanji
+ See GB 2312-80
+ PCL Symbol Set Id: 18C
+Alias: csGB2312
+
+Name: Big5 (preferred MIME name)
+MIBenum: 2026
+Source: Chinese for Taiwan Multi-byte set.
+ PCL Symbol Set Id: 18T
+Alias: csBig5
+
+Name: windows-1250
+MIBenum: 2250
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1250) [Lazhintseva]
+Alias: None
+
+Name: windows-1251
+MIBenum: 2251
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1251) [Lazhintseva]
+Alias: None
+
+Name: windows-1252
+MIBenum: 2252
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1252) [Wendt]
+Alias: None
+
+Name: windows-1253
+MIBenum: 2253
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1253) [Lazhintseva]
+Alias: None
+
+Name: windows-1254
+MIBenum: 2254
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1254) [Lazhintseva]
+Alias: None
+
+Name: windows-1255
+MIBenum: 2255
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1255) [Lazhintseva]
+Alias: None
+
+Name: windows-1256
+MIBenum: 2256
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1256) [Lazhintseva]
+Alias: None
+
+Name: windows-1257
+MIBenum: 2257
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1257) [Lazhintseva]
+Alias: None
+
+Name: windows-1258
+MIBenum: 2258
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1258) [Lazhintseva]
+Alias: None
+
+Name: TIS-620
+MIBenum: 2259
+Source: Thai Industrial Standards Institute (TISI) [Tantsetthi]
+
+Name: HZ-GB-2312
+MIBenum: 2085
+Source: RFC 1842, RFC 1843 [RFC1842, RFC1843]
+
+
+REFERENCES
+----------
+
+[RFC1345] Simonsen, K., "Character Mnemonics & Character Sets",
+ RFC 1345, Rationel Almen Planlaegning, Rationel Almen
+ Planlaegning, June 1992.
+
+[RFC1428] Vaudreuil, G., "Transition of Internet Mail from
+ Just-Send-8 to 8bit-SMTP/MIME", RFC1428, CNRI, February
+ 1993.
+
+[RFC1456] Vietnamese Standardization Working Group, "Conventions for
+ Encoding the Vietnamese Language VISCII: VIetnamese
+ Standard Code for Information Interchange VIQR: VIetnamese
+ Quoted-Readable Specification Revision 1.1", RFC 1456, May
+ 1993.
+
+[RFC1468] Murai, J., Crispin, M., and E. van der Poel, "Japanese
+ Character Encoding for Internet Messages", RFC 1468,
+ Keio University, Panda Programming, June 1993.
+
+[RFC1489] Chernov, A., "Registration of a Cyrillic Character Set",
+ RFC1489, RELCOM Development Team, July 1993.
+
+[RFC1554] Ohta, M., and K. Handa, "ISO-2022-JP-2: Multilingual
+ Extension of ISO-2022-JP", RFC1554, Tokyo Institute of
+ Technology, ETL, December 1993.
+
+[RFC1556] Nussbacher, H., "Handling of Bi-directional Texts in MIME",
+ RFC1556, Israeli Inter-University, December 1993.
+
+[RFC1557] Choi, U., Chon, K., and H. Park, "Korean Character Encoding
+ for Internet Messages", KAIST, Solvit Chosun Media,
+ December 1993.
+
+[RFC1641] Goldsmith, D., and M. Davis, "Using Unicode with MIME",
+ RFC1641, Taligent, Inc., July 1994.
+
+[RFC1642] Goldsmith, D., and M. Davis, "UTF-7", RFC1642, Taligent,
+ Inc., July 1994.
+
+[RFC1815] Ohta, M., "Character Sets ISO-10646 and ISO-10646-J-1",
+ RFC 1815, Tokyo Institute of Technology, July 1995.
+
+
+[Adobe] Adobe Systems Incorporated, PostScript Language Reference
+ Manual, second edition, Addison-Wesley Publishing Company,
+ Inc., 1990.
+
+[ECMA Registry] ISO-IR: International Register of Escape Sequences
+ http://www.itscj.ipsj.or.jp/ISO-IE/ Note: The current
+ registration authority is IPSJ/ITSCJ, Japan.
+
+[HP-PCL5] Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
+ (P/N 5021-0329) pp B-13, 1996.
+
+[IBM-CIDT] IBM Corporation, "ABOUT TYPE: IBM's Technical Reference
+ for Core Interchange Digitized Type", Publication number
+ S544-3708-01
+
+[RFC1842] Wei, Y., J. Li, and Y. Jiang, "ASCII Printable
+ Characters-Based Chinese Character Encoding for Internet
+ Messages", RFC 1842, Harvard University, Rice University,
+ University of Maryland, August 1995.
+
+[RFC1843] Lee, F., "HZ - A Data Format for Exchanging Files of
+ Arbitrarily Mixed Chinese and ASCII Characters", RFC 1843,
+ Stanford University, August 1995.
+
+[RFC2152] Goldsmith, D., M. Davis, "UTF-7: A Mail-Safe Transformation
+ Format of Unicode", RFC 2152, Apple Computer, Inc.,
+ Taligent Inc., May 1997.
+
+[RFC2279] Yergeau, F., "UTF-8, A Transformation Format of ISO 10646",
+ RFC 2279, Alis Technologies, January, 1998.
+
+[RFC2781] Hoffman, P., Yergeau, F., "UTF-16, an encoding of ISO 10646",
+ RFC 2781, February 2000.
+
+[RFC3629] Yergeau, F., "UTF-8, a transformation format of ISO 10646",
+ RFC3629, November 2003.
+
+PEOPLE
+------
+
+[KXS2] Keld Simonsen <Keld.Simonsen@dkuug.dk>
+
+[Choi] Woohyong Choi <whchoi@cosmos.kaist.ac.kr>
+
+[Davis] Mark Davis, <mark@unicode.org>, April 2002.
+
+[Lazhintseva] Katya Lazhintseva, <katyal@MICROSOFT.com>, May 1996.
+
+[Mahdi] Tamer Mahdi, <tamer@ca.ibm.com>, August 2000.
+
+[Malyshev] Michael Malyshev, <michael_malyshev@mail.ru>, January 2004
+
+[Murai] Jun Murai <jun@wide.ad.jp>
+
+[Nussbacher] Hank Nussbacher, <hank@vm.tau.ac.il>
+
+[Ohta] Masataka Ohta, <mohta@cc.titech.ac.jp>, July 1995.
+
+[Phipps] Toby Phipps, <tphipps@peoplesoft.com>, March 2002.
+
+[Pond] Rick Pond, <rickpond@vnet.ibm.com>, March 1997.
+
+[Robrigado] Reuel Robrigado, <reuelr@ca.ibm.com>, September 2002.
+
+[Scherer] Markus Scherer, <markus.scherer@jtcsv.com>, August 2000,
+ September 2002.
+
+[Simonsen] Keld Simonsen, <Keld.Simonsen@rap.dk>, August 2000.
+
+[Tantsetthi] Trin Tantsetthi, <trin@mozart.inet.co.th>, September 1998.
+
+[Tumasonis] Vladas Tumasonis, <vladas.tumasonis@maf.vu.lt>, August 2000.
+
+[Uskov] Alexander Uskov, <auskov@idc.kz>, September 2002.
+
+[Wendt] Chris Wendt, <christw@microsoft.com>, December 1999.
+
+[Yick] Nicky Yick, <cliac@itsd.gcn.gov.hk>, October 2000.
+
+[]
+
+
+
+
+
+
+
diff --git a/WebCore/platform/text/mac/mac-encodings.txt b/WebCore/platform/text/mac/mac-encodings.txt
new file mode 100644
index 0000000..270c625
--- /dev/null
+++ b/WebCore/platform/text/mac/mac-encodings.txt
@@ -0,0 +1,49 @@
+# We'd like to eliminate this file.
+# It would be nice to get rid of dependence on the TextEncodingConvert entirely.
+# Perhaps we can prove these are not used on the web and remove them.
+# Or perhaps we can get them added to ICU.
+
+# The items on the left are names of TEC TextEncoding values (without the leading kTextEncoding).
+# The items on the right are IANA character set names. Names listed in character-sets.txt are not
+# repeated here; mentioning any one character set from a group in there pulls in all the aliases in
+# that group.
+
+DOSChineseTrad: cp950
+DOSGreek: cp737, ibm737
+EUC_TW: EUC-TW
+ISOLatin10: ISO-8859-16
+ISOLatin6: ISO-8859-10
+ISOLatin8: ISO-8859-14
+ISOLatinThai: ISO-8859-11
+ISO_2022_JP_3: ISO-2022-JP-3
+JIS_C6226_78: JIS_C6226-1978
+JIS_X0208_83: JIS_X0208-1983
+JIS_X0208_90: JIS_X0208-1990
+JIS_X0212_90: JIS_X0212-1990
+KOI8_U: KOI8-U
+MacArabic: x-mac-arabic
+MacCentralEurRoman: x-mac-centraleurroman, xmacce
+MacChineseSimp: x-mac-chinesesimp, xmacsimpchinese
+MacChineseTrad: x-mac-chinesetrad, xmactradchinese
+MacCroatian: x-mac-croatian
+MacCyrillic: x-mac-cyrillic, maccyrillic, xmacukrainian
+MacDevanagari: x-mac-devanagari
+MacDingbats: x-mac-dingbats
+MacFarsi: x-mac-farsi
+MacGreek: x-mac-greek
+MacGujarati: x-mac-gujarati
+MacGurmukhi: x-mac-gurmukhi
+MacHebrew: x-mac-hebrew
+MacIcelandic: x-mac-icelandic
+MacJapanese: x-mac-japanese
+MacKorean: x-mac-korean
+MacRomanLatin1: x-mac-roman-latin1
+MacRomanian: x-mac-romanian
+MacSymbol: x-mac-symbol
+MacThai: x-mac-thai
+MacTibetan: x-mac-tibetan
+MacTurkish: x-mac-turkish
+MacVT100: x-mac-vt100
+NextStepLatin: x-nextstep
+ShiftJIS_X0213_00: Shift_JIS_X0213-2000
+WindowsKoreanJohab: johab
diff --git a/WebCore/platform/text/mac/make-charset-table.pl b/WebCore/platform/text/mac/make-charset-table.pl
new file mode 100755
index 0000000..16fd25a
--- /dev/null
+++ b/WebCore/platform/text/mac/make-charset-table.pl
@@ -0,0 +1,225 @@
+#!/usr/bin/perl -w
+
+# Copyright (C) 2003, 2004, 2005, 2006 Apple Computer, Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+# its contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+use strict;
+
+my %aliasesFromCharsetsFile;
+my %namesWritten;
+
+my $output = "";
+
+my $error = 0;
+
+sub error ($)
+{
+ print STDERR @_, "\n";
+ $error = 1;
+}
+
+sub emit_line
+{
+ my ($name, $prefix, $encoding, $flags) = @_;
+
+ error "$name shows up twice in output" if $namesWritten{$name};
+ $namesWritten{$name} = 1;
+
+ $output .= " { \"$name\", $prefix$encoding },\n";
+}
+
+sub process_platform_encodings
+{
+ my ($filename, $PlatformPrefix) = @_;
+ my $baseFilename = $filename;
+ $baseFilename =~ s|.*/||;
+
+ my %seenPlatformNames;
+ my %seenIANANames;
+
+ open PLATFORM_ENCODINGS, $filename or die;
+
+ while (<PLATFORM_ENCODINGS>) {
+ chomp;
+ s/\#.*$//;
+ s/\s+$//;
+ if (my ($PlatformName, undef, $flags, $IANANames) = /^(.+?)(, (.+))?: (.+)$/) {
+ my %aliases;
+
+ my $PlatformNameWithFlags = $PlatformName;
+ if ($flags) {
+ $PlatformNameWithFlags .= ", " . $flags;
+ } else {
+ $flags = "NoEncodingFlags";
+ }
+ error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformNameWithFlags};
+ $seenPlatformNames{$PlatformNameWithFlags} = 1;
+
+ # Build the aliases list.
+ # Also check that no two names are part of the same entry in the charsets file.
+ my @IANANames = split ", ", $IANANames;
+ my $firstName = "";
+ my $canonicalFirstName = "";
+ my $prevName = "";
+ for my $name (@IANANames) {
+ if ($firstName eq "") {
+ if ($name !~ /^[-A-Za-z0-9_]+$/) {
+ error "$name, in $baseFilename, has illegal characters in it";
+ next;
+ }
+ $firstName = $name;
+ } else {
+ if ($name !~ /^[a-z0-9]+$/) {
+ error "$name, in $baseFilename, has illegal characters in it (must be all lowercase alphanumeric)";
+ next;
+ }
+ if ($name le $prevName) {
+ error "$name comes after $prevName in $baseFilename, but everything must be in alphabetical order";
+ }
+ $prevName = $name;
+ }
+
+ my $canonicalName = lc $name;
+ $canonicalName =~ tr/-_//d;
+
+ $canonicalFirstName = $canonicalName if $canonicalFirstName eq "";
+
+ error "$name is mentioned twice in $baseFilename" if $seenIANANames{$canonicalName};
+ $seenIANANames{$canonicalName} = 1;
+
+ $aliases{$canonicalName} = 1;
+ next if !$aliasesFromCharsetsFile{$canonicalName};
+ for my $alias (@{$aliasesFromCharsetsFile{$canonicalName}}) {
+ $aliases{$alias} = 1;
+ }
+ for my $otherName (@IANANames) {
+ next if $canonicalName eq $otherName;
+ if ($aliasesFromCharsetsFile{$otherName}
+ && $aliasesFromCharsetsFile{$canonicalName} eq $aliasesFromCharsetsFile{$otherName}
+ && $canonicalName le $otherName) {
+ error "$baseFilename lists both $name and $otherName under $PlatformName, but that aliasing is already specified in character-sets.txt";
+ }
+ }
+ }
+
+ # write out
+ emit_line($firstName, $PlatformPrefix, $PlatformName, $flags);
+ for my $alias (sort keys %aliases) {
+ emit_line($alias, $PlatformPrefix, $PlatformName, $flags) if $alias ne $canonicalFirstName;
+ }
+ } elsif (/^([a-zA-Z0-9_]+)(, (.+))?$/) {
+ my $PlatformName = $1;
+
+ error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformName};
+ $seenPlatformNames{$PlatformName} = 1;
+ } elsif (/./) {
+ error "syntax error in $baseFilename, line $.";
+ }
+ }
+
+ close PLATFORM_ENCODINGS;
+}
+
+sub process_iana_charset
+{
+ my ($canonical_name, @aliases) = @_;
+
+ return if !$canonical_name;
+
+ my @names = sort $canonical_name, @aliases;
+
+ for my $name (@names) {
+ $aliasesFromCharsetsFile{$name} = \@names;
+ }
+}
+
+sub process_iana_charsets
+{
+ my ($filename) = @_;
+
+ open CHARSETS, $filename or die;
+
+ my %seen;
+
+ my $canonical_name;
+ my @aliases;
+
+ my %exceptions = ( isoir91 => 1, isoir92 => 1 );
+
+ while (<CHARSETS>) {
+ chomp;
+ if ((my $new_canonical_name) = /Name: ([^ \t]*).*/) {
+ $new_canonical_name = lc $new_canonical_name;
+ $new_canonical_name =~ tr/a-z0-9//cd;
+
+ error "saw $new_canonical_name twice in character-sets.txt", if $seen{$new_canonical_name};
+ $seen{$new_canonical_name} = $new_canonical_name;
+
+ process_iana_charset $canonical_name, @aliases;
+
+ $canonical_name = $new_canonical_name;
+ @aliases = ();
+ } elsif ((my $new_alias) = /Alias: ([^ \t]*).*/) {
+ $new_alias = lc $new_alias;
+ $new_alias =~ tr/a-z0-9//cd;
+
+ # do this after normalizing the alias, sometimes character-sets.txt
+ # has weird escape characters, e.g. \b after None
+ next if $new_alias eq "none";
+
+ error "saw $new_alias twice in character-sets.txt $seen{$new_alias}, $canonical_name", if $seen{$new_alias} && $seen{$new_alias} ne $canonical_name && !$exceptions{$new_alias};
+ push @aliases, $new_alias if !$seen{$new_alias};
+ $seen{$new_alias} = $canonical_name;
+ }
+ }
+
+ process_iana_charset $canonical_name, @aliases;
+
+ close CHARSETS;
+}
+
+# Program body
+
+process_iana_charsets($ARGV[0]);
+process_platform_encodings($ARGV[1], $ARGV[2]);
+
+exit 1 if $error;
+
+print <<EOF
+// File generated by make-charset-table.pl. Do not edit!
+
+#include "config.h"
+#include "CharsetData.h"
+
+namespace WebCore {
+
+ const CharsetEntry CharsetTable[] = {
+$output
+ { 0, 0 }
+ };
+
+}
+EOF
diff --git a/WebCore/platform/text/qt/StringQt.cpp b/WebCore/platform/text/qt/StringQt.cpp
new file mode 100644
index 0000000..de9f527
--- /dev/null
+++ b/WebCore/platform/text/qt/StringQt.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2006 Nikolas Zimmermann <zimmermann@kde.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "PlatformString.h"
+
+#include <QString>
+
+namespace WebCore {
+
+// String conversions
+String::String(const QString& qstr)
+{
+ if (qstr.isNull())
+ return;
+ m_impl = StringImpl::create(reinterpret_cast<const UChar*>(qstr.constData()), qstr.length());
+}
+
+String::String(const QStringRef& ref)
+{
+ if (!ref.string())
+ return;
+ m_impl = StringImpl::create(reinterpret_cast<const UChar*>(ref.unicode()), ref.length());
+}
+
+String::operator QString() const
+{
+ return QString(reinterpret_cast<const QChar*>(characters()), length());
+}
+
+}
+
+// vim: ts=4 sw=4 et
diff --git a/WebCore/platform/text/qt/TextBoundaries.cpp b/WebCore/platform/text/qt/TextBoundaries.cpp
new file mode 100644
index 0000000..bdc851b
--- /dev/null
+++ b/WebCore/platform/text/qt/TextBoundaries.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2006 Zack Rusin <zack@kde.org>
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "TextBoundaries.h"
+#include "NotImplemented.h"
+
+#include <QString>
+#include <QChar>
+
+#include <QDebug>
+#include <stdio.h>
+
+#if QT_VERSION >= 0x040400
+#include <qtextboundaryfinder.h>
+
+namespace WebCore
+{
+
+int findNextWordFromIndex(UChar const* buffer, int len, int position, bool forward)
+{
+ QString str(reinterpret_cast<QChar const*>(buffer), len);
+ QTextBoundaryFinder iterator(QTextBoundaryFinder::Word, str);
+ iterator.setPosition(position >= len ? len - 1 : position);
+ if (forward) {
+ int pos = iterator.toNextBoundary();
+ while (pos > 0) {
+ if (QChar(buffer[pos-1]).isLetterOrNumber())
+ return pos;
+ pos = iterator.toNextBoundary();
+ }
+ return len;
+ } else {
+ int pos = iterator.toPreviousBoundary();
+ while (pos > 0) {
+ if (QChar(buffer[pos]).isLetterOrNumber())
+ return pos;
+ pos = iterator.toPreviousBoundary();
+ }
+ return 0;
+ }
+}
+
+void findWordBoundary(UChar const* buffer, int len, int position, int* start, int* end)
+{
+ QString str(reinterpret_cast<QChar const*>(buffer), len);
+ QTextBoundaryFinder iterator(QTextBoundaryFinder::Word, str);
+ iterator.setPosition(position);
+ *start = position > 0 ? iterator.toPreviousBoundary() : 0;
+ *end = position == len ? len : iterator.toNextBoundary();
+}
+
+}
+
+#else
+namespace WebCore
+{
+
+int findNextWordFromIndex(UChar const* buffer, int len, int position, bool forward)
+{
+ QString str(reinterpret_cast<QChar const*>(buffer), len);
+ notImplemented();
+ return 0;
+}
+
+void findWordBoundary(UChar const* buffer, int len, int position, int* start, int* end)
+{
+ QString str(reinterpret_cast<QChar const*>(buffer), len);
+
+ if (position > str.length()) {
+ *start = 0;
+ *end = 0;
+ return;
+ }
+
+ int currentPosition = position - 1;
+ QString foundWord;
+ while (currentPosition >= 0 &&
+ str[currentPosition].isLetter()) {
+ foundWord.prepend(str[currentPosition]);
+ --currentPosition;
+ }
+
+ // currentPosition == 0 means the first char is not letter
+ // currentPosition == -1 means we reached the beginning
+ int startPos = (currentPosition < 0) ? 0 : ++currentPosition;
+ currentPosition = position;
+ if (str[currentPosition].isLetter()) {
+ while (str[currentPosition].isLetter()) {
+ foundWord.append(str[currentPosition]);
+ ++currentPosition;
+ }
+ }
+
+ *start = startPos;
+ *end = currentPosition;
+}
+
+}
+#endif
diff --git a/WebCore/platform/text/qt/TextBreakIteratorQt.cpp b/WebCore/platform/text/qt/TextBreakIteratorQt.cpp
new file mode 100644
index 0000000..88b9680
--- /dev/null
+++ b/WebCore/platform/text/qt/TextBreakIteratorQt.cpp
@@ -0,0 +1,297 @@
+/*
+ * This file is part of the DOM implementation for KDE.
+ *
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "TextBreakIterator.h"
+
+#if QT_VERSION >= 0x040400
+#include <QtCore/qtextboundaryfinder.h>
+#include <qdebug.h>
+
+// #define DEBUG_TEXT_ITERATORS
+#ifdef DEBUG_TEXT_ITERATORS
+#define DEBUG qDebug
+#else
+#define DEBUG if (1) {} else qDebug
+#endif
+
+namespace WebCore {
+
+ class TextBreakIterator : public QTextBoundaryFinder
+ {
+ };
+ static QTextBoundaryFinder* iterator = 0;
+ static unsigned char buffer[1024];
+
+ TextBreakIterator* wordBreakIterator(const UChar* string, int length)
+ {
+ if (!string)
+ return 0;
+ if (!iterator)
+ iterator = new QTextBoundaryFinder;
+
+ *iterator = QTextBoundaryFinder(QTextBoundaryFinder::Word, (const QChar *)string, length, buffer, sizeof(buffer));
+ return static_cast<TextBreakIterator*>(iterator);
+ }
+
+ TextBreakIterator* characterBreakIterator(const UChar* string, int length)
+ {
+ if (!string)
+ return 0;
+ if (!iterator)
+ iterator = new QTextBoundaryFinder;
+
+ *iterator = QTextBoundaryFinder(QTextBoundaryFinder::Grapheme, (const QChar *)string, length, buffer, sizeof(buffer));
+ return static_cast<TextBreakIterator*>(iterator);
+ }
+
+ TextBreakIterator* lineBreakIterator(const UChar* string, int length)
+ {
+ static QTextBoundaryFinder *iterator = 0;
+ if (!string)
+ return 0;
+ if (!iterator)
+ iterator = new QTextBoundaryFinder;
+
+ *iterator = QTextBoundaryFinder(QTextBoundaryFinder::Line, (const QChar *)string, length, buffer, sizeof(buffer));
+ return static_cast<TextBreakIterator*>(iterator);
+ }
+
+ TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
+ {
+ if (!string)
+ return 0;
+ if (!iterator)
+ iterator = new QTextBoundaryFinder;
+
+ *iterator = QTextBoundaryFinder(QTextBoundaryFinder::Sentence, (const QChar *)string, length, buffer, sizeof(buffer));
+ return static_cast<TextBreakIterator*>(iterator);
+ }
+
+ int textBreakFirst(TextBreakIterator* bi)
+ {
+ bi->toStart();
+ DEBUG() << "textBreakFirst" << bi->position();
+ return bi->position();
+ }
+
+ int textBreakNext(TextBreakIterator* bi)
+ {
+ int pos = bi->toNextBoundary();
+ DEBUG() << "textBreakNext" << pos;
+ return pos;
+ }
+
+ int textBreakPreceding(TextBreakIterator* bi, int pos)
+ {
+ bi->setPosition(pos);
+ int newpos = bi->toPreviousBoundary();
+ DEBUG() << "textBreakPreceding" << pos << newpos;
+ return newpos;
+ }
+
+ int textBreakFollowing(TextBreakIterator* bi, int pos)
+ {
+ bi->setPosition(pos);
+ int newpos = bi->toNextBoundary();
+ DEBUG() << "textBreakFollowing" << pos << newpos;
+ return newpos;
+ }
+
+ int textBreakCurrent(TextBreakIterator* bi)
+ {
+ return bi->position();
+ }
+
+ bool isTextBreak(TextBreakIterator*, int)
+ {
+ return true;
+ }
+
+}
+#else
+#include <qtextlayout.h>
+
+namespace WebCore {
+
+ class TextBreakIterator
+ {
+ public:
+ virtual int first() = 0;
+ virtual int next() = 0;
+ virtual int previous() = 0;
+ inline int following(int pos) {
+ currentPos = pos;
+ return next();
+ }
+ inline int preceding(int pos) {
+ currentPos = pos;
+ return previous();
+ }
+ int currentPos;
+ const UChar *string;
+ int length;
+ };
+
+ class WordBreakIteratorQt : public TextBreakIterator
+ {
+ public:
+ virtual int first();
+ virtual int next();
+ virtual int previous();
+ };
+
+ class CharBreakIteratorQt : public TextBreakIterator
+ {
+ public:
+ virtual int first();
+ virtual int next();
+ virtual int previous();
+ QTextLayout layout;
+ };
+
+ int WordBreakIteratorQt::first() {
+ currentPos = 0;
+ return currentPos;
+ }
+
+ int WordBreakIteratorQt::next() {
+ if (currentPos >= length) {
+ currentPos = -1;
+ return currentPos;
+ }
+ bool haveSpace = false;
+ while (currentPos < length) {
+ if (haveSpace && !QChar(string[currentPos]).isSpace())
+ break;
+ if (QChar(string[currentPos]).isSpace())
+ haveSpace = true;
+ ++currentPos;
+ }
+ return currentPos;
+ }
+ int WordBreakIteratorQt::previous() {
+ if (currentPos <= 0) {
+ currentPos = -1;
+ return currentPos;
+ }
+ bool haveSpace = false;
+ while (currentPos > 0) {
+ if (haveSpace && !QChar(string[currentPos]).isSpace())
+ break;
+ if (QChar(string[currentPos]).isSpace())
+ haveSpace = true;
+ --currentPos;
+ }
+ return currentPos;
+ }
+
+ int CharBreakIteratorQt::first() {
+ currentPos = 0;
+ return currentPos;
+ }
+
+ int CharBreakIteratorQt::next() {
+ if (currentPos >= length)
+ return -1;
+ currentPos = layout.nextCursorPosition(currentPos);
+ return currentPos;
+ }
+ int CharBreakIteratorQt::previous() {
+ if (currentPos <= 0)
+ return -1;
+ currentPos = layout.previousCursorPosition(currentPos);
+ return currentPos;
+ }
+
+
+TextBreakIterator* wordBreakIterator(const UChar* string, int length)
+{
+ static WordBreakIteratorQt *iterator = 0;
+ if (!iterator)
+ iterator = new WordBreakIteratorQt;
+
+ iterator->string = string;
+ iterator->length = length;
+ iterator->currentPos = 0;
+
+ return iterator;
+}
+
+TextBreakIterator* characterBreakIterator(const UChar* string, int length)
+{
+ static CharBreakIteratorQt *iterator = 0;
+ if (!iterator)
+ iterator = new CharBreakIteratorQt;
+
+ iterator->string = string;
+ iterator->length = length;
+ iterator->currentPos = 0;
+ iterator->layout.setText(QString(reinterpret_cast<const QChar*>(string), length));
+
+ return iterator;
+}
+
+TextBreakIterator* lineBreakIterator(const UChar*, int)
+{
+ // not yet implemented
+ return 0;
+}
+
+TextBreakIterator* sentenceBreakIterator(const UChar*, int)
+{
+ // not yet implemented
+ return 0;
+}
+
+int textBreakFirst(TextBreakIterator* bi)
+{
+ return bi->first();
+}
+
+int textBreakNext(TextBreakIterator* bi)
+{
+ return bi->next();
+}
+
+int textBreakPreceding(TextBreakIterator* bi, int pos)
+{
+ return bi->preceding(pos);
+}
+
+int textBreakFollowing(TextBreakIterator* bi, int pos)
+{
+ return bi->following(pos);
+}
+
+int textBreakCurrent(TextBreakIterator* bi)
+{
+ return bi->currentPos;
+}
+
+bool isTextBreak(TextBreakIterator*, int)
+{
+ return true;
+}
+
+}
+
+#endif
diff --git a/WebCore/platform/text/qt/TextCodecQt.cpp b/WebCore/platform/text/qt/TextCodecQt.cpp
new file mode 100644
index 0000000..888c6af
--- /dev/null
+++ b/WebCore/platform/text/qt/TextCodecQt.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextCodecQt.h"
+#include "PlatformString.h"
+#include "CString.h"
+#include <qset.h>
+// #include <QDebug>
+
+namespace WebCore {
+
+static QSet<QByteArray> *unique_names = 0;
+
+static const char *getAtomicName(const QByteArray &name)
+{
+ if (!unique_names)
+ unique_names = new QSet<QByteArray>;
+
+ unique_names->insert(name);
+ return unique_names->find(name)->constData();
+}
+
+void TextCodecQt::registerEncodingNames(EncodingNameRegistrar registrar)
+{
+ QList<int> mibs = QTextCodec::availableMibs();
+// qDebug() << ">>>>>>>>> registerEncodingNames";
+
+ for (int i = 0; i < mibs.size(); ++i) {
+ QTextCodec *c = QTextCodec::codecForMib(mibs.at(i));
+ const char *name = getAtomicName(c->name());
+ registrar(name, name);
+// qDebug() << " " << name << name;
+ QList<QByteArray> aliases = c->aliases();
+ for (int i = 0; i < aliases.size(); ++i) {
+ const char *a = getAtomicName(aliases.at(i));
+// qDebug() << " (a) " << a << name;
+ registrar(a, name);
+ }
+ }
+}
+
+static std::auto_ptr<TextCodec> newTextCodecQt(const TextEncoding& encoding, const void*)
+{
+ return std::auto_ptr<TextCodec>(new TextCodecQt(encoding));
+}
+
+void TextCodecQt::registerCodecs(TextCodecRegistrar registrar)
+{
+ QList<int> mibs = QTextCodec::availableMibs();
+// qDebug() << ">>>>>>>>> registerCodecs";
+
+ for (int i = 0; i < mibs.size(); ++i) {
+ QTextCodec *c = QTextCodec::codecForMib(mibs.at(i));
+ const char *name = getAtomicName(c->name());
+// qDebug() << " " << name;
+ registrar(name, newTextCodecQt, 0);
+ }
+}
+
+TextCodecQt::TextCodecQt(const TextEncoding& encoding)
+ : m_encoding(encoding)
+{
+ m_codec = QTextCodec::codecForName(m_encoding.name());
+}
+
+TextCodecQt::~TextCodecQt()
+{
+}
+
+
+String TextCodecQt::decode(const char* bytes, size_t length, bool flush)
+{
+ QString unicode = m_codec->toUnicode(bytes, length, &m_state);
+
+ if (flush) {
+ m_state.flags = QTextCodec::DefaultConversion;
+ m_state.remainingChars = 0;
+ m_state.invalidChars = 0;
+ }
+
+ return unicode;
+}
+
+CString TextCodecQt::encode(const UChar* characters, size_t length, bool allowEntities)
+{
+ if (!length)
+ return "";
+
+ // FIXME: do something sensible with allowEntities
+
+ QByteArray ba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), length, 0);
+ return CString(ba.constData(), ba.length());
+}
+
+
+} // namespace WebCore
diff --git a/WebCore/platform/text/qt/TextCodecQt.h b/WebCore/platform/text/qt/TextCodecQt.h
new file mode 100644
index 0000000..9bbb80b
--- /dev/null
+++ b/WebCore/platform/text/qt/TextCodecQt.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextCodecQt_h
+#define TextCodecQt_h
+
+#include "TextCodec.h"
+#include "TextEncoding.h"
+#include <QTextCodec>
+
+class QTextCodec;
+
+namespace WebCore {
+
+ class TextCodecQt : public TextCodec {
+ public:
+ static void registerEncodingNames(EncodingNameRegistrar);
+ static void registerCodecs(TextCodecRegistrar);
+
+ TextCodecQt(const TextEncoding&);
+ virtual ~TextCodecQt();
+
+ virtual String decode(const char*, size_t length, bool flush = false);
+ virtual CString encode(const UChar*, size_t length, bool allowEntities = false);
+
+ private:
+ TextEncoding m_encoding;
+ QTextCodec *m_codec;
+ QTextCodec::ConverterState m_state;
+ };
+
+} // namespace WebCore
+
+#endif // TextCodecICU_h
diff --git a/WebCore/platform/text/symbian/StringImplSymbian.cpp b/WebCore/platform/text/symbian/StringImplSymbian.cpp
new file mode 100644
index 0000000..3a1245f
--- /dev/null
+++ b/WebCore/platform/text/symbian/StringImplSymbian.cpp
@@ -0,0 +1,53 @@
+/*
+* ==============================================================================
+* Copyright (c) 2006, Nokia Corporation
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in
+* the documentation and/or other materials provided with the
+* distribution.
+* * Neither the name of the Nokia Corporation nor the names of its
+* contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+* DAMAGE.
+*
+* ==============================================================================
+*/
+
+#include "config.h"
+#include "StringImpl.h"
+#include <e32std.h>
+
+namespace WebCore {
+
+StringImpl::StringImpl(const TDesC& des)
+{
+ init(des.Ptr(), des.Length());
+}
+
+TPtrC StringImpl::des() const
+{
+ TPtrC tstr((const TUint16 *)m_data, m_length);
+ return tstr;
+}
+
+}
diff --git a/WebCore/platform/text/symbian/StringSymbian.cpp b/WebCore/platform/text/symbian/StringSymbian.cpp
new file mode 100644
index 0000000..27b6a13
--- /dev/null
+++ b/WebCore/platform/text/symbian/StringSymbian.cpp
@@ -0,0 +1,50 @@
+/*
+* ==============================================================================
+* Copyright (c) 2006, Nokia Corporation
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in
+* the documentation and/or other materials provided with the
+* distribution.
+* * Neither the name of the Nokia Corporation nor the names of its
+* contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+* DAMAGE.
+*
+* ==============================================================================
+*/
+
+#include "config.h"
+#include "PlatformString.h"
+#include <e32std.h>
+
+namespace WebCore {
+
+String::String(const TDesC& des)
+{
+ if (!des.Length())
+ m_impl = StringImpl::empty();
+ else
+ m_impl = new StringImpl(des);
+}
+
+} \ No newline at end of file
diff --git a/WebCore/platform/text/win/TextBreakIteratorInternalICUWin.cpp b/WebCore/platform/text/win/TextBreakIteratorInternalICUWin.cpp
new file mode 100644
index 0000000..14cf130
--- /dev/null
+++ b/WebCore/platform/text/win/TextBreakIteratorInternalICUWin.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2007 Apple Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "TextBreakIteratorInternalICU.h"
+
+namespace WebCore {
+
+const char* currentTextBreakLocaleID()
+{
+ return "en_us";
+}
+
+}
diff --git a/WebCore/platform/text/wx/StringWx.cpp b/WebCore/platform/text/wx/StringWx.cpp
new file mode 100644
index 0000000..50919c4
--- /dev/null
+++ b/WebCore/platform/text/wx/StringWx.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2007 Vaclav Slavik, Kevin Ollivier <kevino@theolliviers.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "PlatformString.h"
+
+#include "CString.h"
+#include "unicode/ustring.h"
+
+#include <wx/defs.h>
+#include <wx/string.h>
+
+namespace WebCore {
+
+// String conversions
+String::String(const wxString& wxstr)
+{
+#if !wxUSE_UNICODE
+ #error "This code only works in Unicode build of wxWidgets"
+#endif
+
+ // ICU's UChar is 16bit wide, UTF-16, and the code below counts on it, so
+ // it would break if the definition changed:
+ wxCOMPILE_TIME_ASSERT(sizeof(UChar) == 2, UCharSizeMustBe16Bit);
+
+#if SIZEOF_WCHAR_T == 2 // wchar_t==UChar
+
+ const UChar* str = wxstr.wc_str();
+ const size_t len = wxstr.length();
+
+#else // SIZEOF_WCHAR_T == 4
+
+ // NB: we can't simply use wxstr.mb_str(wxMBConvUTF16()) here because
+ // the number of characters in UTF-16 encoding of the string may differ
+ // from the number of UTF-32 values and we can't get the length from
+ // returned buffer:
+
+#if defined(wxUSE_UNICODE_UTF8) && wxUSE_UNICODE_UTF8
+ // in wx3's UTF8 mode, wc_str() returns a buffer, not raw pointer
+ wxWCharBuffer widestr(wxstr.wc_str());
+#else
+ const wxChar *widestr = wxstr.wc_str();
+#endif
+ const size_t widelen = wxstr.length();
+
+ // allocate buffer for the UTF-16 string:
+ wxMBConvUTF16 conv;
+ const size_t utf16bufLen = conv.FromWChar(NULL, 0, widestr, widelen);
+ wxCharBuffer utf16buf(utf16bufLen);
+
+ // and convert wxString to UTF-16 (=UChar*):
+ const UChar* str = (const UChar*)utf16buf.data();
+ size_t len = conv.FromWChar(utf16buf.data(), utf16bufLen, widestr, widelen) / 2;
+
+#endif // SIZEOF_WCHAR_T == 4
+
+ // conversion to UTF-16 or getting internal buffer isn't supposed to fail:
+ wxASSERT_MSG(str != NULL, _T("failed string conversion?"));
+
+ m_impl = StringImpl::create(str, len);
+}
+
+String::operator wxString() const
+{
+ return wxString(utf8().data(), wxConvUTF8);
+}
+
+}
+
+// vim: ts=4 sw=4 et