summaryrefslogtreecommitdiffstats
path: root/WebCore/platform/text
diff options
context:
space:
mode:
authorSteve Block <steveblock@google.com>2011-05-06 11:45:16 +0100
committerSteve Block <steveblock@google.com>2011-05-12 13:44:10 +0100
commitcad810f21b803229eb11403f9209855525a25d57 (patch)
tree29a6fd0279be608e0fe9ffe9841f722f0f4e4269 /WebCore/platform/text
parent121b0cf4517156d0ac5111caf9830c51b69bae8f (diff)
downloadexternal_webkit-cad810f21b803229eb11403f9209855525a25d57.zip
external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.gz
external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.bz2
Merge WebKit at r75315: Initial merge by git.
Change-Id: I570314b346ce101c935ed22a626b48c2af266b84
Diffstat (limited to 'WebCore/platform/text')
-rw-r--r--WebCore/platform/text/AtomicStringKeyedMRUCache.h73
-rw-r--r--WebCore/platform/text/Base64.cpp210
-rw-r--r--WebCore/platform/text/Base64.h46
-rw-r--r--WebCore/platform/text/BidiContext.cpp72
-rw-r--r--WebCore/platform/text/BidiContext.h64
-rw-r--r--WebCore/platform/text/BidiResolver.h968
-rw-r--r--WebCore/platform/text/CharacterNames.h90
-rw-r--r--WebCore/platform/text/Hyphenation.cpp44
-rw-r--r--WebCore/platform/text/Hyphenation.h39
-rw-r--r--WebCore/platform/text/LineEnding.cpp231
-rw-r--r--WebCore/platform/text/LineEnding.h55
-rw-r--r--WebCore/platform/text/ParserUtilities.h54
-rw-r--r--WebCore/platform/text/PlatformString.h45
-rw-r--r--WebCore/platform/text/RegularExpression.cpp168
-rw-r--r--WebCore/platform/text/RegularExpression.h55
-rw-r--r--WebCore/platform/text/SegmentedString.cpp240
-rw-r--r--WebCore/platform/text/SegmentedString.h257
-rw-r--r--WebCore/platform/text/String.cpp77
-rw-r--r--WebCore/platform/text/SuffixTree.h122
-rw-r--r--WebCore/platform/text/TextBoundaries.cpp107
-rw-r--r--WebCore/platform/text/TextBoundaries.h46
-rw-r--r--WebCore/platform/text/TextBreakIterator.h62
-rw-r--r--WebCore/platform/text/TextBreakIteratorICU.cpp242
-rw-r--r--WebCore/platform/text/TextBreakIteratorInternalICU.h34
-rw-r--r--WebCore/platform/text/TextCodec.cpp58
-rw-r--r--WebCore/platform/text/TextCodec.h86
-rw-r--r--WebCore/platform/text/TextCodecICU.cpp490
-rw-r--r--WebCore/platform/text/TextCodecICU.h81
-rw-r--r--WebCore/platform/text/TextCodecLatin1.cpp248
-rw-r--r--WebCore/platform/text/TextCodecLatin1.h44
-rw-r--r--WebCore/platform/text/TextCodecUTF16.cpp150
-rw-r--r--WebCore/platform/text/TextCodecUTF16.h51
-rw-r--r--WebCore/platform/text/TextCodecUserDefined.cpp111
-rw-r--r--WebCore/platform/text/TextCodecUserDefined.h44
-rw-r--r--WebCore/platform/text/TextDirection.h35
-rw-r--r--WebCore/platform/text/TextEncoding.cpp265
-rw-r--r--WebCore/platform/text/TextEncoding.h98
-rw-r--r--WebCore/platform/text/TextEncodingDetector.h48
-rw-r--r--WebCore/platform/text/TextEncodingDetectorICU.cpp129
-rw-r--r--WebCore/platform/text/TextEncodingDetectorNone.cpp44
-rw-r--r--WebCore/platform/text/TextEncodingRegistry.cpp402
-rw-r--r--WebCore/platform/text/TextEncodingRegistry.h54
-rw-r--r--WebCore/platform/text/TextStream.cpp130
-rw-r--r--WebCore/platform/text/TextStream.h60
-rw-r--r--WebCore/platform/text/UnicodeRange.cpp462
-rw-r--r--WebCore/platform/text/UnicodeRange.h124
-rw-r--r--WebCore/platform/text/android/HyphenationAndroid.cpp110
-rw-r--r--WebCore/platform/text/android/TextBreakIteratorInternalICU.cpp43
-rw-r--r--WebCore/platform/text/brew/TextBoundariesBrew.cpp74
-rw-r--r--WebCore/platform/text/brew/TextBreakIteratorBrew.cpp312
-rw-r--r--WebCore/platform/text/brew/TextCodecBrew.cpp214
-rw-r--r--WebCore/platform/text/brew/TextCodecBrew.h61
-rw-r--r--WebCore/platform/text/cf/HyphenationCF.cpp96
-rw-r--r--WebCore/platform/text/cf/StringCF.cpp55
-rw-r--r--WebCore/platform/text/cf/StringImplCF.cpp162
-rw-r--r--WebCore/platform/text/chromium/TextBreakIteratorInternalICUChromium.cpp50
-rw-r--r--WebCore/platform/text/efl/TextBreakIteratorInternalICUEfl.cpp38
-rw-r--r--WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp365
-rw-r--r--WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp37
-rw-r--r--WebCore/platform/text/gtk/TextCodecGtk.cpp578
-rw-r--r--WebCore/platform/text/gtk/TextCodecGtk.h66
-rw-r--r--WebCore/platform/text/haiku/TextBreakIteratorInternalICUHaiku.cpp42
-rw-r--r--WebCore/platform/text/mac/CharsetData.h37
-rw-r--r--WebCore/platform/text/mac/HyphenationMac.mm70
-rw-r--r--WebCore/platform/text/mac/ShapeArabic.c556
-rw-r--r--WebCore/platform/text/mac/ShapeArabic.h44
-rw-r--r--WebCore/platform/text/mac/StringImplMac.mm33
-rw-r--r--WebCore/platform/text/mac/StringMac.mm42
-rw-r--r--WebCore/platform/text/mac/TextBoundaries.mm56
-rw-r--r--WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm108
-rw-r--r--WebCore/platform/text/mac/TextCodecMac.cpp329
-rw-r--r--WebCore/platform/text/mac/TextCodecMac.h73
-rw-r--r--WebCore/platform/text/mac/character-sets.txt1868
-rw-r--r--WebCore/platform/text/mac/mac-encodings.txt45
-rwxr-xr-xWebCore/platform/text/mac/make-charset-table.pl225
-rw-r--r--WebCore/platform/text/qt/TextBoundariesQt.cpp77
-rw-r--r--WebCore/platform/text/qt/TextBreakIteratorQt.cpp146
-rw-r--r--WebCore/platform/text/qt/TextCodecQt.cpp166
-rw-r--r--WebCore/platform/text/qt/TextCodecQt.h54
-rw-r--r--WebCore/platform/text/transcoder/FontTranscoder.cpp106
-rw-r--r--WebCore/platform/text/transcoder/FontTranscoder.h67
-rw-r--r--WebCore/platform/text/win/TextBreakIteratorInternalICUWin.cpp41
-rw-r--r--WebCore/platform/text/wince/TextBreakIteratorWinCE.cpp303
-rw-r--r--WebCore/platform/text/wince/TextCodecWinCE.cpp389
-rw-r--r--WebCore/platform/text/wince/TextCodecWinCE.h73
85 files changed, 0 insertions, 13826 deletions
diff --git a/WebCore/platform/text/AtomicStringKeyedMRUCache.h b/WebCore/platform/text/AtomicStringKeyedMRUCache.h
deleted file mode 100644
index b3004f7..0000000
--- a/WebCore/platform/text/AtomicStringKeyedMRUCache.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (C) 2010 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef AtomicStringKeyedMRUCache_h
-#define AtomicStringKeyedMRUCache_h
-
-#include <wtf/text/AtomicString.h>
-
-namespace WebCore {
-
-template<typename T, size_t capacity = 4>
-class AtomicStringKeyedMRUCache {
-public:
- T get(const AtomicString& key)
- {
- if (key.isNull()) {
- DEFINE_STATIC_LOCAL(T, valueForNull, (createValueForNullKey()));
- return valueForNull;
- }
-
- for (size_t i = 0; i < m_cache.size(); ++i) {
- if (m_cache[i].first == key) {
- size_t foundIndex = i;
- if (foundIndex + 1 < m_cache.size()) {
- Entry entry = m_cache[foundIndex];
- m_cache.remove(foundIndex);
- foundIndex = m_cache.size();
- m_cache.append(entry);
- }
- return m_cache[foundIndex].second;
- }
- }
- if (m_cache.size() == capacity)
- m_cache.remove(0);
-
- m_cache.append(std::make_pair(key, createValueForKey(key)));
- return m_cache.last().second;
- }
-
-private:
- T createValueForNullKey();
- T createValueForKey(const AtomicString&);
-
- typedef pair<AtomicString, T> Entry;
- typedef Vector<Entry, capacity> Cache;
- Cache m_cache;
-};
-
-}
-
-#endif // AtomicStringKeyedMRUCache_h
diff --git a/WebCore/platform/text/Base64.cpp b/WebCore/platform/text/Base64.cpp
deleted file mode 100644
index 98b537a..0000000
--- a/WebCore/platform/text/Base64.cpp
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- Copyright (C) 2000-2001 Dawit Alemayehu <adawit@kde.org>
- Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org>
- Copyright (C) 2007, 2008 Apple Inc. All rights reserved.
- Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License (LGPL)
- version 2 as published by the Free Software Foundation.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
- This code is based on the java implementation in HTTPClient
- package by Ronald Tschalär Copyright (C) 1996-1999.
-*/
-
-#include "config.h"
-#include "Base64.h"
-
-#include <limits.h>
-#include <wtf/StringExtras.h>
-#include <wtf/text/WTFString.h>
-
-namespace WebCore {
-
-static const char base64EncMap[64] = {
- 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
- 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
- 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
- 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,
- 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E,
- 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76,
- 0x77, 0x78, 0x79, 0x7A, 0x30, 0x31, 0x32, 0x33,
- 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F
-};
-
-static const char base64DecMap[128] = {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3F,
- 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
- 0x3C, 0x3D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
- 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
- 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
- 0x17, 0x18, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20,
- 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
- 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30,
- 0x31, 0x32, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00
-};
-
-void base64Encode(const Vector<char>& in, Vector<char>& out, bool insertLFs)
-{
- base64Encode(in.data(), in.size(), out, insertLFs);
-}
-
-void base64Encode(const char* data, unsigned len, Vector<char>& out, bool insertLFs)
-{
- out.clear();
- if (!len)
- return;
-
- // If the input string is pathologically large, just return nothing.
- // Note: Keep this in sync with the "outLength" computation below.
- // Rather than being perfectly precise, this is a bit conservative.
- const unsigned maxInputBufferSize = UINT_MAX / 77 * 76 / 4 * 3 - 2;
- if (len > maxInputBufferSize)
- return;
-
- unsigned sidx = 0;
- unsigned didx = 0;
-
- unsigned outLength = ((len + 2) / 3) * 4;
-
- // Deal with the 76 character per line limit specified in RFC 2045.
- insertLFs = (insertLFs && outLength > 76);
- if (insertLFs)
- outLength += ((outLength - 1) / 76);
-
- int count = 0;
- out.grow(outLength);
-
- // 3-byte to 4-byte conversion + 0-63 to ascii printable conversion
- if (len > 1) {
- while (sidx < len - 2) {
- if (insertLFs) {
- if (count && !(count % 76))
- out[didx++] = '\n';
- count += 4;
- }
- out[didx++] = base64EncMap[(data[sidx] >> 2) & 077];
- out[didx++] = base64EncMap[((data[sidx + 1] >> 4) & 017) | ((data[sidx] << 4) & 077)];
- out[didx++] = base64EncMap[((data[sidx + 2] >> 6) & 003) | ((data[sidx + 1] << 2) & 077)];
- out[didx++] = base64EncMap[data[sidx + 2] & 077];
- sidx += 3;
- }
- }
-
- if (sidx < len) {
- if (insertLFs && (count > 0) && !(count % 76))
- out[didx++] = '\n';
-
- out[didx++] = base64EncMap[(data[sidx] >> 2) & 077];
- if (sidx < len - 1) {
- out[didx++] = base64EncMap[((data[sidx + 1] >> 4) & 017) | ((data[sidx] << 4) & 077)];
- out[didx++] = base64EncMap[(data[sidx + 1] << 2) & 077];
- } else
- out[didx++] = base64EncMap[(data[sidx] << 4) & 077];
- }
-
- // Add padding
- while (didx < out.size()) {
- out[didx] = '=';
- didx++;
- }
-}
-
-bool base64Decode(const Vector<char>& in, Vector<char>& out, Base64DecodePolicy policy)
-{
- out.clear();
-
- // If the input string is pathologically large, just return nothing.
- if (in.size() > UINT_MAX)
- return false;
-
- return base64Decode(in.data(), in.size(), out, policy);
-}
-
-template<typename T>
-static inline bool base64DecodeInternal(const T* data, unsigned len, Vector<char>& out, Base64DecodePolicy policy)
-{
- out.clear();
- if (!len)
- return true;
-
- out.grow(len);
-
- bool sawEqualsSign = false;
- unsigned outLength = 0;
- for (unsigned idx = 0; idx < len; idx++) {
- unsigned ch = data[idx];
- if (ch == '=')
- sawEqualsSign = true;
- else if (('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z') || ch == '+' || ch == '/') {
- if (sawEqualsSign)
- return false;
- out[outLength] = base64DecMap[ch];
- outLength++;
- } else if (policy == FailOnInvalidCharacter || (policy == IgnoreWhitespace && !isSpaceOrNewline(ch)))
- return false;
- }
-
- if (!outLength)
- return !sawEqualsSign;
-
- // Valid data is (n * 4 + [0,2,3]) characters long.
- if ((outLength % 4) == 1)
- return false;
-
- // 4-byte to 3-byte conversion
- outLength -= (outLength + 3) / 4;
- if (!outLength)
- return false;
-
- unsigned sidx = 0;
- unsigned didx = 0;
- if (outLength > 1) {
- while (didx < outLength - 2) {
- out[didx] = (((out[sidx] << 2) & 255) | ((out[sidx + 1] >> 4) & 003));
- out[didx + 1] = (((out[sidx + 1] << 4) & 255) | ((out[sidx + 2] >> 2) & 017));
- out[didx + 2] = (((out[sidx + 2] << 6) & 255) | (out[sidx + 3] & 077));
- sidx += 4;
- didx += 3;
- }
- }
-
- if (didx < outLength)
- out[didx] = (((out[sidx] << 2) & 255) | ((out[sidx + 1] >> 4) & 003));
-
- if (++didx < outLength)
- out[didx] = (((out[sidx + 1] << 4) & 255) | ((out[sidx + 2] >> 2) & 017));
-
- if (outLength < out.size())
- out.shrink(outLength);
-
- return true;
-}
-
-bool base64Decode(const char* data, unsigned len, Vector<char>& out, Base64DecodePolicy policy)
-{
- return base64DecodeInternal<char>(data, len, out, policy);
-}
-
-bool base64Decode(const String& in, Vector<char>& out, Base64DecodePolicy policy)
-{
- return base64DecodeInternal<UChar>(in.characters(), in.length(), out, policy);
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/Base64.h b/WebCore/platform/text/Base64.h
deleted file mode 100644
index 211bd3c..0000000
--- a/WebCore/platform/text/Base64.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org>
- * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef Base64_h
-#define Base64_h
-
-#include <wtf/Forward.h>
-#include <wtf/Vector.h>
-
-namespace WebCore {
-
-enum Base64DecodePolicy { FailOnInvalidCharacter, IgnoreWhitespace, IgnoreInvalidCharacters };
-
-void base64Encode(const Vector<char>&, Vector<char>&, bool insertLFs = false);
-void base64Encode(const char*, unsigned, Vector<char>&, bool insertLFs = false);
-
-bool base64Decode(const String&, Vector<char>&, Base64DecodePolicy = FailOnInvalidCharacter);
-bool base64Decode(const Vector<char>&, Vector<char>&, Base64DecodePolicy = FailOnInvalidCharacter);
-bool base64Decode(const char*, unsigned, Vector<char>&, Base64DecodePolicy = FailOnInvalidCharacter);
-
-}
-
-#endif // Base64_h
diff --git a/WebCore/platform/text/BidiContext.cpp b/WebCore/platform/text/BidiContext.cpp
deleted file mode 100644
index fb6b8cf..0000000
--- a/WebCore/platform/text/BidiContext.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
- * Copyright (C) 2003, 2004, 2006, 2007, 2009, 2010 Apple Inc. All right reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#include "config.h"
-#include "BidiContext.h"
-
-namespace WebCore {
-
-using namespace WTF::Unicode;
-
-inline PassRefPtr<BidiContext> BidiContext::createUncached(unsigned char level, Direction direction, bool override, BidiContext* parent)
-{
- return adoptRef(new BidiContext(level, direction, override, parent));
-}
-
-PassRefPtr<BidiContext> BidiContext::create(unsigned char level, Direction direction, bool override, BidiContext* parent)
-{
- ASSERT(direction == (level % 2 ? RightToLeft : LeftToRight));
-
- if (parent)
- return createUncached(level, direction, override, parent);
-
- ASSERT(level <= 1);
- if (!level) {
- if (!override) {
- static BidiContext* ltrContext = createUncached(0, LeftToRight, false, 0).releaseRef();
- return ltrContext;
- }
-
- static BidiContext* ltrOverrideContext = createUncached(0, LeftToRight, true, 0).releaseRef();
- return ltrOverrideContext;
- }
-
- if (!override) {
- static BidiContext* rtlContext = createUncached(1, RightToLeft, false, 0).releaseRef();
- return rtlContext;
- }
-
- static BidiContext* rtlOverrideContext = createUncached(1, RightToLeft, true, 0).releaseRef();
- return rtlOverrideContext;
-}
-
-bool operator==(const BidiContext& c1, const BidiContext& c2)
-{
- if (&c1 == &c2)
- return true;
- if (c1.level() != c2.level() || c1.override() != c2.override() || c1.dir() != c2.dir())
- return false;
- if (!c1.parent())
- return !c2.parent();
- return c2.parent() && *c1.parent() == *c2.parent();
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/BidiContext.h b/WebCore/platform/text/BidiContext.h
deleted file mode 100644
index b52815f..0000000
--- a/WebCore/platform/text/BidiContext.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
- * Copyright (C) 2003, 2004, 2006, 2007, 2009, 2010 Apple Inc. All right reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#ifndef BidiContext_h
-#define BidiContext_h
-
-#include <wtf/Assertions.h>
-#include <wtf/PassRefPtr.h>
-#include <wtf/RefCounted.h>
-#include <wtf/RefPtr.h>
-#include <wtf/unicode/Unicode.h>
-
-namespace WebCore {
-
-// Used to keep track of explicit embeddings.
-class BidiContext : public RefCounted<BidiContext> {
-public:
- static PassRefPtr<BidiContext> create(unsigned char level, WTF::Unicode::Direction direction, bool override = false, BidiContext* parent = 0);
-
- BidiContext* parent() const { return m_parent.get(); }
- unsigned char level() const { return m_level; }
- WTF::Unicode::Direction dir() const { return static_cast<WTF::Unicode::Direction>(m_direction); }
- bool override() const { return m_override; }
-
-private:
- BidiContext(unsigned char level, WTF::Unicode::Direction direction, bool override, BidiContext* parent)
- : m_level(level)
- , m_direction(direction)
- , m_override(override)
- , m_parent(parent)
- {
- }
-
- static PassRefPtr<BidiContext> createUncached(unsigned char level, WTF::Unicode::Direction, bool override, BidiContext* parent);
-
- unsigned char m_level;
- unsigned m_direction : 5; // Direction
- bool m_override : 1;
- RefPtr<BidiContext> m_parent;
-};
-
-bool operator==(const BidiContext&, const BidiContext&);
-
-} // namespace WebCore
-
-#endif // BidiContext_h
diff --git a/WebCore/platform/text/BidiResolver.h b/WebCore/platform/text/BidiResolver.h
deleted file mode 100644
index 1f87115..0000000
--- a/WebCore/platform/text/BidiResolver.h
+++ /dev/null
@@ -1,968 +0,0 @@
-/*
- * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
- * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc. All right reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#ifndef BidiResolver_h
-#define BidiResolver_h
-
-#include "BidiContext.h"
-#include <wtf/Noncopyable.h>
-#include <wtf/PassRefPtr.h>
-#include <wtf/Vector.h>
-
-namespace WebCore {
-
-template <class Iterator> struct MidpointState {
- MidpointState()
- {
- reset();
- }
-
- void reset()
- {
- numMidpoints = 0;
- currentMidpoint = 0;
- betweenMidpoints = false;
- }
-
- // The goal is to reuse the line state across multiple
- // lines so we just keep an array around for midpoints and never clear it across multiple
- // lines. We track the number of items and position using the two other variables.
- Vector<Iterator> midpoints;
- unsigned numMidpoints;
- unsigned currentMidpoint;
- bool betweenMidpoints;
-};
-
-// The BidiStatus at a given position (typically the end of a line) can
-// be cached and then used to restart bidi resolution at that position.
-struct BidiStatus {
- BidiStatus()
- : eor(WTF::Unicode::OtherNeutral)
- , lastStrong(WTF::Unicode::OtherNeutral)
- , last(WTF::Unicode::OtherNeutral)
- {
- }
-
- BidiStatus(WTF::Unicode::Direction eorDir, WTF::Unicode::Direction lastStrongDir, WTF::Unicode::Direction lastDir, PassRefPtr<BidiContext> bidiContext)
- : eor(eorDir)
- , lastStrong(lastStrongDir)
- , last(lastDir)
- , context(bidiContext)
- {
- }
-
- WTF::Unicode::Direction eor;
- WTF::Unicode::Direction lastStrong;
- WTF::Unicode::Direction last;
- RefPtr<BidiContext> context;
-};
-
-inline bool operator==(const BidiStatus& status1, const BidiStatus& status2)
-{
- return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context);
-}
-
-inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2)
-{
- return !(status1 == status2);
-}
-
-struct BidiCharacterRun {
- BidiCharacterRun(int start, int stop, BidiContext* context, WTF::Unicode::Direction dir)
- : m_start(start)
- , m_stop(stop)
- , m_override(context->override())
- , m_next(0)
- {
- if (dir == WTF::Unicode::OtherNeutral)
- dir = context->dir();
-
- m_level = context->level();
-
- // add level of run (cases I1 & I2)
- if (m_level % 2) {
- if (dir == WTF::Unicode::LeftToRight || dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
- m_level++;
- } else {
- if (dir == WTF::Unicode::RightToLeft)
- m_level++;
- else if (dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber)
- m_level += 2;
- }
- }
-
- void destroy() { delete this; }
-
- int start() const { return m_start; }
- int stop() const { return m_stop; }
- unsigned char level() const { return m_level; }
- bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; }
- bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; }
-
- BidiCharacterRun* next() const { return m_next; }
-
- unsigned char m_level;
- int m_start;
- int m_stop;
- bool m_override;
- BidiCharacterRun* m_next;
-};
-
-template <class Iterator, class Run> class BidiResolver : public Noncopyable {
-public :
- BidiResolver()
- : m_direction(WTF::Unicode::OtherNeutral)
- , reachedEndOfLine(false)
- , emptyRun(true)
- , m_firstRun(0)
- , m_lastRun(0)
- , m_logicallyLastRun(0)
- , m_runCount(0)
- {
- }
-
- const Iterator& position() const { return current; }
- void setPosition(const Iterator& position) { current = position; }
-
- void increment() { current.increment(); }
-
- BidiContext* context() const { return m_status.context.get(); }
- void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; }
-
- void setLastDir(WTF::Unicode::Direction lastDir) { m_status.last = lastDir; }
- void setLastStrongDir(WTF::Unicode::Direction lastStrongDir) { m_status.lastStrong = lastStrongDir; }
- void setEorDir(WTF::Unicode::Direction eorDir) { m_status.eor = eorDir; }
-
- WTF::Unicode::Direction dir() const { return m_direction; }
- void setDir(WTF::Unicode::Direction d) { m_direction = d; }
-
- const BidiStatus& status() const { return m_status; }
- void setStatus(const BidiStatus s) { m_status = s; }
-
- MidpointState<Iterator>& midpointState() { return m_midpointState; }
-
- void embed(WTF::Unicode::Direction);
- void commitExplicitEmbedding();
-
- void createBidiRunsForLine(const Iterator& end, bool visualOrder = false, bool hardLineBreak = false);
-
- Run* firstRun() const { return m_firstRun; }
- Run* lastRun() const { return m_lastRun; }
- Run* logicallyLastRun() const { return m_logicallyLastRun; }
- unsigned runCount() const { return m_runCount; }
-
- void addRun(Run*);
- void prependRun(Run*);
-
- void moveRunToEnd(Run*);
- void moveRunToBeginning(Run*);
-
- void deleteRuns();
-
-protected:
- void appendRun();
- void reverseRuns(unsigned start, unsigned end);
-
- Iterator current;
- Iterator sor;
- Iterator eor;
- Iterator last;
- BidiStatus m_status;
- WTF::Unicode::Direction m_direction;
- Iterator endOfLine;
- bool reachedEndOfLine;
- Iterator lastBeforeET;
- bool emptyRun;
-
- Run* m_firstRun;
- Run* m_lastRun;
- Run* m_logicallyLastRun;
- unsigned m_runCount;
- MidpointState<Iterator> m_midpointState;
-
-private:
- void raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to);
- void lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from);
- void checkDirectionInLowerRaiseEmbeddingLevel();
-
- Vector<WTF::Unicode::Direction, 8> m_currentExplicitEmbeddingSequence;
-};
-
-template <class Iterator, class Run>
-inline void BidiResolver<Iterator, Run>::addRun(Run* run)
-{
- if (!m_firstRun)
- m_firstRun = run;
- else
- m_lastRun->m_next = run;
- m_lastRun = run;
- m_runCount++;
-}
-
-template <class Iterator, class Run>
-inline void BidiResolver<Iterator, Run>::prependRun(Run* run)
-{
- ASSERT(!run->m_next);
-
- if (!m_lastRun)
- m_lastRun = run;
- else
- run->m_next = m_firstRun;
- m_firstRun = run;
- m_runCount++;
-}
-
-template <class Iterator, class Run>
-inline void BidiResolver<Iterator, Run>::moveRunToEnd(Run* run)
-{
- ASSERT(m_firstRun);
- ASSERT(m_lastRun);
- ASSERT(run->m_next);
-
- Run* current = 0;
- Run* next = m_firstRun;
- while (next != run) {
- current = next;
- next = current->next();
- }
-
- if (!current)
- m_firstRun = run->next();
- else
- current->m_next = run->m_next;
-
- run->m_next = 0;
- m_lastRun->m_next = run;
- m_lastRun = run;
-}
-
-template <class Iterator, class Run>
-inline void BidiResolver<Iterator, Run>::moveRunToBeginning(Run* run)
-{
- ASSERT(m_firstRun);
- ASSERT(m_lastRun);
- ASSERT(run != m_firstRun);
-
- Run* current = m_firstRun;
- Run* next = current->next();
- while (next != run) {
- current = next;
- next = current->next();
- }
-
- current->m_next = run->m_next;
- if (run == m_lastRun)
- m_lastRun = current;
-
- run->m_next = m_firstRun;
- m_firstRun = run;
-}
-
-template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::appendRun()
-{
- if (!emptyRun && !eor.atEnd()) {
- unsigned startOffset = sor.offset();
- unsigned endOffset = eor.offset();
-
- if (!endOfLine.atEnd() && endOffset >= endOfLine.offset()) {
- reachedEndOfLine = true;
- endOffset = endOfLine.offset();
- }
-
- if (endOffset >= startOffset)
- addRun(new Run(startOffset, endOffset + 1, context(), m_direction));
-
- eor.increment();
- sor = eor;
- }
-
- m_direction = WTF::Unicode::OtherNeutral;
- m_status.eor = WTF::Unicode::OtherNeutral;
-}
-
-template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::embed(WTF::Unicode::Direction d)
-{
- using namespace WTF::Unicode;
-
- ASSERT(d == PopDirectionalFormat || d == LeftToRightEmbedding || d == LeftToRightOverride || d == RightToLeftEmbedding || d == RightToLeftOverride);
- m_currentExplicitEmbeddingSequence.append(d);
-}
-
-template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel()
-{
- using namespace WTF::Unicode;
-
- ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
- // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
- // Bidi control characters are included into BidiRun, so last direction
- // could be one of the bidi embeddings when there are nested embeddings.
- // For example: "&#x202a;&#x202b;....."
- ASSERT(m_status.last == EuropeanNumberSeparator
- || m_status.last == EuropeanNumberTerminator
- || m_status.last == CommonNumberSeparator
- || m_status.last == BoundaryNeutral
- || m_status.last == BlockSeparator
- || m_status.last == SegmentSeparator
- || m_status.last == WhiteSpaceNeutral
- || m_status.last == OtherNeutral
- || m_status.last == RightToLeftEmbedding
- || m_status.last == LeftToRightEmbedding
- || m_status.last == RightToLeftOverride
- || m_status.last == LeftToRightOverride
- || m_status.last == PopDirectionalFormat);
- if (m_direction == OtherNeutral)
- m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
-}
-
-template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from)
-{
- using namespace WTF::Unicode;
-
- if (!emptyRun && eor != last) {
- checkDirectionInLowerRaiseEmbeddingLevel();
- if (from == LeftToRight) {
- // bidi.sor ... bidi.eor ... bidi.last L
- if (m_status.eor == EuropeanNumber) {
- if (m_status.lastStrong != LeftToRight) {
- m_direction = EuropeanNumber;
- appendRun();
- }
- } else if (m_status.eor == ArabicNumber) {
- m_direction = ArabicNumber;
- appendRun();
- } else if (m_status.lastStrong != LeftToRight) {
- appendRun();
- m_direction = LeftToRight;
- }
- } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) {
- appendRun();
- m_direction = RightToLeft;
- }
- eor = last;
- }
- appendRun();
- emptyRun = true;
- // sor for the new run is determined by the higher level (rule X10)
- setLastDir(from);
- setLastStrongDir(from);
- eor = Iterator();
-}
-
-template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to)
-{
- using namespace WTF::Unicode;
-
- if (!emptyRun && eor != last) {
- checkDirectionInLowerRaiseEmbeddingLevel();
- if (to == LeftToRight) {
- // bidi.sor ... bidi.eor ... bidi.last L
- if (m_status.eor == EuropeanNumber) {
- if (m_status.lastStrong != LeftToRight) {
- m_direction = EuropeanNumber;
- appendRun();
- }
- } else if (m_status.eor == ArabicNumber) {
- m_direction = ArabicNumber;
- appendRun();
- } else if (m_status.lastStrong != LeftToRight && from == LeftToRight) {
- appendRun();
- m_direction = LeftToRight;
- }
- } else if (m_status.eor == ArabicNumber
- || (m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || from == RightToLeft))
- || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && from == RightToLeft)) {
- appendRun();
- m_direction = RightToLeft;
- }
- eor = last;
- }
- appendRun();
- emptyRun = true;
- setLastDir(to);
- setLastStrongDir(to);
- eor = Iterator();
-}
-
-template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::commitExplicitEmbedding()
-{
- using namespace WTF::Unicode;
-
- unsigned char fromLevel = context()->level();
- RefPtr<BidiContext> toContext = context();
-
- for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) {
- Direction embedding = m_currentExplicitEmbeddingSequence[i];
- if (embedding == PopDirectionalFormat) {
- if (BidiContext* parentContext = toContext->parent())
- toContext = parentContext;
- } else {
- Direction direction = (embedding == RightToLeftEmbedding || embedding == RightToLeftOverride) ? RightToLeft : LeftToRight;
- bool override = embedding == LeftToRightOverride || embedding == RightToLeftOverride;
- unsigned char level = toContext->level();
- if (direction == RightToLeft) {
- // Go to the least greater odd integer
- level += 1;
- level |= 1;
- } else {
- // Go to the least greater even integer
- level += 2;
- level &= ~1;
- }
- if (level < 61)
- toContext = BidiContext::create(level, direction, override, toContext.get());
- }
- }
-
- unsigned char toLevel = toContext->level();
-
- if (toLevel > fromLevel)
- raiseExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight, toLevel % 2 ? RightToLeft : LeftToRight);
- else if (toLevel < fromLevel)
- lowerExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight);
-
- setContext(toContext);
-
- m_currentExplicitEmbeddingSequence.clear();
-}
-
-template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::deleteRuns()
-{
- emptyRun = true;
- if (!m_firstRun)
- return;
-
- Run* curr = m_firstRun;
- while (curr) {
- Run* s = curr->next();
- curr->destroy();
- curr = s;
- }
-
- m_firstRun = 0;
- m_lastRun = 0;
- m_runCount = 0;
-}
-
-template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::reverseRuns(unsigned start, unsigned end)
-{
- if (start >= end)
- return;
-
- ASSERT(end < m_runCount);
-
- // Get the item before the start of the runs to reverse and put it in
- // |beforeStart|. |curr| should point to the first run to reverse.
- Run* curr = m_firstRun;
- Run* beforeStart = 0;
- unsigned i = 0;
- while (i < start) {
- i++;
- beforeStart = curr;
- curr = curr->next();
- }
-
- Run* startRun = curr;
- while (i < end) {
- i++;
- curr = curr->next();
- }
- Run* endRun = curr;
- Run* afterEnd = curr->next();
-
- i = start;
- curr = startRun;
- Run* newNext = afterEnd;
- while (i <= end) {
- // Do the reversal.
- Run* next = curr->next();
- curr->m_next = newNext;
- newNext = curr;
- curr = next;
- i++;
- }
-
- // Now hook up beforeStart and afterEnd to the startRun and endRun.
- if (beforeStart)
- beforeStart->m_next = endRun;
- else
- m_firstRun = endRun;
-
- startRun->m_next = afterEnd;
- if (!afterEnd)
- m_lastRun = startRun;
-}
-
-template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, bool visualOrder, bool hardLineBreak)
-{
- using namespace WTF::Unicode;
-
- ASSERT(m_direction == OtherNeutral);
-
- emptyRun = true;
-
- eor = Iterator();
-
- last = current;
- bool pastEnd = false;
- BidiResolver<Iterator, Run> stateAtEnd;
-
- while (true) {
- Direction dirCurrent;
- if (pastEnd && (hardLineBreak || current.atEnd())) {
- BidiContext* c = context();
- while (c->parent())
- c = c->parent();
- dirCurrent = c->dir();
- if (hardLineBreak) {
- // A deviation from the Unicode Bidi Algorithm in order to match
- // Mac OS X text and WinIE: a hard line break resets bidi state.
- stateAtEnd.setContext(c);
- stateAtEnd.setEorDir(dirCurrent);
- stateAtEnd.setLastDir(dirCurrent);
- stateAtEnd.setLastStrongDir(dirCurrent);
- }
- } else {
- dirCurrent = current.direction();
- if (context()->override()
- && dirCurrent != RightToLeftEmbedding
- && dirCurrent != LeftToRightEmbedding
- && dirCurrent != RightToLeftOverride
- && dirCurrent != LeftToRightOverride
- && dirCurrent != PopDirectionalFormat)
- dirCurrent = context()->dir();
- else if (dirCurrent == NonSpacingMark)
- dirCurrent = m_status.last;
- }
-
- ASSERT(m_status.eor != OtherNeutral || eor.atEnd());
- switch (dirCurrent) {
-
- // embedding and overrides (X1-X9 in the Bidi specs)
- case RightToLeftEmbedding:
- case LeftToRightEmbedding:
- case RightToLeftOverride:
- case LeftToRightOverride:
- case PopDirectionalFormat:
- embed(dirCurrent);
- commitExplicitEmbedding();
- break;
-
- // strong types
- case LeftToRight:
- switch(m_status.last) {
- case RightToLeft:
- case RightToLeftArabic:
- case EuropeanNumber:
- case ArabicNumber:
- if (m_status.last != EuropeanNumber || m_status.lastStrong != LeftToRight)
- appendRun();
- break;
- case LeftToRight:
- break;
- case EuropeanNumberSeparator:
- case EuropeanNumberTerminator:
- case CommonNumberSeparator:
- case BoundaryNeutral:
- case BlockSeparator:
- case SegmentSeparator:
- case WhiteSpaceNeutral:
- case OtherNeutral:
- if (m_status.eor == EuropeanNumber) {
- if (m_status.lastStrong != LeftToRight) {
- // the numbers need to be on a higher embedding level, so let's close that run
- m_direction = EuropeanNumber;
- appendRun();
- if (context()->dir() != LeftToRight) {
- // the neutrals take the embedding direction, which is R
- eor = last;
- m_direction = RightToLeft;
- appendRun();
- }
- }
- } else if (m_status.eor == ArabicNumber) {
- // Arabic numbers are always on a higher embedding level, so let's close that run
- m_direction = ArabicNumber;
- appendRun();
- if (context()->dir() != LeftToRight) {
- // the neutrals take the embedding direction, which is R
- eor = last;
- m_direction = RightToLeft;
- appendRun();
- }
- } else if (m_status.lastStrong != LeftToRight) {
- //last stuff takes embedding dir
- if (context()->dir() == RightToLeft) {
- eor = last;
- m_direction = RightToLeft;
- }
- appendRun();
- }
- default:
- break;
- }
- eor = current;
- m_status.eor = LeftToRight;
- m_status.lastStrong = LeftToRight;
- m_direction = LeftToRight;
- break;
- case RightToLeftArabic:
- case RightToLeft:
- switch (m_status.last) {
- case LeftToRight:
- case EuropeanNumber:
- case ArabicNumber:
- appendRun();
- case RightToLeft:
- case RightToLeftArabic:
- break;
- case EuropeanNumberSeparator:
- case EuropeanNumberTerminator:
- case CommonNumberSeparator:
- case BoundaryNeutral:
- case BlockSeparator:
- case SegmentSeparator:
- case WhiteSpaceNeutral:
- case OtherNeutral:
- if (m_status.eor == EuropeanNumber) {
- if (m_status.lastStrong == LeftToRight && context()->dir() == LeftToRight)
- eor = last;
- appendRun();
- } else if (m_status.eor == ArabicNumber)
- appendRun();
- else if (m_status.lastStrong == LeftToRight) {
- if (context()->dir() == LeftToRight)
- eor = last;
- appendRun();
- }
- default:
- break;
- }
- eor = current;
- m_status.eor = RightToLeft;
- m_status.lastStrong = dirCurrent;
- m_direction = RightToLeft;
- break;
-
- // weak types:
-
- case EuropeanNumber:
- if (m_status.lastStrong != RightToLeftArabic) {
- // if last strong was AL change EN to AN
- switch (m_status.last) {
- case EuropeanNumber:
- case LeftToRight:
- break;
- case RightToLeft:
- case RightToLeftArabic:
- case ArabicNumber:
- eor = last;
- appendRun();
- m_direction = EuropeanNumber;
- break;
- case EuropeanNumberSeparator:
- case CommonNumberSeparator:
- if (m_status.eor == EuropeanNumber)
- break;
- case EuropeanNumberTerminator:
- case BoundaryNeutral:
- case BlockSeparator:
- case SegmentSeparator:
- case WhiteSpaceNeutral:
- case OtherNeutral:
- if (m_status.eor == EuropeanNumber) {
- if (m_status.lastStrong == RightToLeft) {
- // ENs on both sides behave like Rs, so the neutrals should be R.
- // Terminate the EN run.
- appendRun();
- // Make an R run.
- eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last;
- m_direction = RightToLeft;
- appendRun();
- // Begin a new EN run.
- m_direction = EuropeanNumber;
- }
- } else if (m_status.eor == ArabicNumber) {
- // Terminate the AN run.
- appendRun();
- if (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft) {
- // Make an R run.
- eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last;
- m_direction = RightToLeft;
- appendRun();
- // Begin a new EN run.
- m_direction = EuropeanNumber;
- }
- } else if (m_status.lastStrong == RightToLeft) {
- // Extend the R run to include the neutrals.
- eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last;
- m_direction = RightToLeft;
- appendRun();
- // Begin a new EN run.
- m_direction = EuropeanNumber;
- }
- default:
- break;
- }
- eor = current;
- m_status.eor = EuropeanNumber;
- if (m_direction == OtherNeutral)
- m_direction = LeftToRight;
- break;
- }
- case ArabicNumber:
- dirCurrent = ArabicNumber;
- switch (m_status.last) {
- case LeftToRight:
- if (context()->dir() == LeftToRight)
- appendRun();
- break;
- case ArabicNumber:
- break;
- case RightToLeft:
- case RightToLeftArabic:
- case EuropeanNumber:
- eor = last;
- appendRun();
- break;
- case CommonNumberSeparator:
- if (m_status.eor == ArabicNumber)
- break;
- case EuropeanNumberSeparator:
- case EuropeanNumberTerminator:
- case BoundaryNeutral:
- case BlockSeparator:
- case SegmentSeparator:
- case WhiteSpaceNeutral:
- case OtherNeutral:
- if (m_status.eor == ArabicNumber
- || (m_status.eor == EuropeanNumber && (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft))
- || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft)) {
- // Terminate the run before the neutrals.
- appendRun();
- // Begin an R run for the neutrals.
- m_direction = RightToLeft;
- } else if (m_direction == OtherNeutral)
- m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft;
- eor = last;
- appendRun();
- default:
- break;
- }
- eor = current;
- m_status.eor = ArabicNumber;
- if (m_direction == OtherNeutral)
- m_direction = ArabicNumber;
- break;
- case EuropeanNumberSeparator:
- case CommonNumberSeparator:
- break;
- case EuropeanNumberTerminator:
- if (m_status.last == EuropeanNumber) {
- dirCurrent = EuropeanNumber;
- eor = current;
- m_status.eor = dirCurrent;
- } else if (m_status.last != EuropeanNumberTerminator)
- lastBeforeET = emptyRun ? eor : last;
- break;
-
- // boundary neutrals should be ignored
- case BoundaryNeutral:
- if (eor == last)
- eor = current;
- break;
- // neutrals
- case BlockSeparator:
- // ### what do we do with newline and paragraph seperators that come to here?
- break;
- case SegmentSeparator:
- // ### implement rule L1
- break;
- case WhiteSpaceNeutral:
- break;
- case OtherNeutral:
- break;
- default:
- break;
- }
-
- if (pastEnd && eor == current) {
- if (!reachedEndOfLine) {
- eor = endOfLine;
- switch (m_status.eor) {
- case LeftToRight:
- case RightToLeft:
- case ArabicNumber:
- m_direction = m_status.eor;
- break;
- case EuropeanNumber:
- m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : EuropeanNumber;
- break;
- default:
- ASSERT(false);
- }
- appendRun();
- }
- current = end;
- m_status = stateAtEnd.m_status;
- sor = stateAtEnd.sor;
- eor = stateAtEnd.eor;
- last = stateAtEnd.last;
- reachedEndOfLine = stateAtEnd.reachedEndOfLine;
- lastBeforeET = stateAtEnd.lastBeforeET;
- emptyRun = stateAtEnd.emptyRun;
- m_direction = OtherNeutral;
- break;
- }
-
- // set m_status.last as needed.
- switch (dirCurrent) {
- case EuropeanNumberTerminator:
- if (m_status.last != EuropeanNumber)
- m_status.last = EuropeanNumberTerminator;
- break;
- case EuropeanNumberSeparator:
- case CommonNumberSeparator:
- case SegmentSeparator:
- case WhiteSpaceNeutral:
- case OtherNeutral:
- switch(m_status.last) {
- case LeftToRight:
- case RightToLeft:
- case RightToLeftArabic:
- case EuropeanNumber:
- case ArabicNumber:
- m_status.last = dirCurrent;
- break;
- default:
- m_status.last = OtherNeutral;
- }
- break;
- case NonSpacingMark:
- case BoundaryNeutral:
- // ignore these
- break;
- case EuropeanNumber:
- // fall through
- default:
- m_status.last = dirCurrent;
- }
-
- last = current;
-
- if (emptyRun) {
- sor = current;
- emptyRun = false;
- }
-
- increment();
- if (!m_currentExplicitEmbeddingSequence.isEmpty()) {
- commitExplicitEmbedding();
- if (pastEnd) {
- current = end;
- m_status = stateAtEnd.m_status;
- sor = stateAtEnd.sor;
- eor = stateAtEnd.eor;
- last = stateAtEnd.last;
- reachedEndOfLine = stateAtEnd.reachedEndOfLine;
- lastBeforeET = stateAtEnd.lastBeforeET;
- emptyRun = stateAtEnd.emptyRun;
- m_direction = OtherNeutral;
- break;
- }
- }
-
- if (!pastEnd && (current == end || current.atEnd())) {
- if (emptyRun)
- break;
- stateAtEnd.m_status = m_status;
- stateAtEnd.sor = sor;
- stateAtEnd.eor = eor;
- stateAtEnd.last = last;
- stateAtEnd.reachedEndOfLine = reachedEndOfLine;
- stateAtEnd.lastBeforeET = lastBeforeET;
- stateAtEnd.emptyRun = emptyRun;
- endOfLine = last;
- pastEnd = true;
- }
- }
-
- m_logicallyLastRun = m_lastRun;
-
- // reorder line according to run structure...
- // do not reverse for visually ordered web sites
- if (!visualOrder) {
-
- // first find highest and lowest levels
- unsigned char levelLow = 128;
- unsigned char levelHigh = 0;
- Run* r = firstRun();
- while (r) {
- if (r->m_level > levelHigh)
- levelHigh = r->m_level;
- if (r->m_level < levelLow)
- levelLow = r->m_level;
- r = r->next();
- }
-
- // implements reordering of the line (L2 according to Bidi spec):
- // L2. From the highest level found in the text to the lowest odd level on each line,
- // reverse any contiguous sequence of characters that are at that level or higher.
-
- // reversing is only done up to the lowest odd level
- if (!(levelLow % 2))
- levelLow++;
-
- unsigned count = runCount() - 1;
-
- while (levelHigh >= levelLow) {
- unsigned i = 0;
- Run* currRun = firstRun();
- while (i < count) {
- while (i < count && currRun && currRun->m_level < levelHigh) {
- i++;
- currRun = currRun->next();
- }
- unsigned start = i;
- while (i <= count && currRun && currRun->m_level >= levelHigh) {
- i++;
- currRun = currRun->next();
- }
- unsigned end = i - 1;
- reverseRuns(start, end);
- }
- levelHigh--;
- }
- }
- endOfLine = Iterator();
-}
-
-} // namespace WebCore
-
-#endif // BidiResolver_h
diff --git a/WebCore/platform/text/CharacterNames.h b/WebCore/platform/text/CharacterNames.h
deleted file mode 100644
index c4b496e..0000000
--- a/WebCore/platform/text/CharacterNames.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (C) 2007, 2009, 2010 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef CharacterNames_h
-#define CharacterNames_h
-
-#include <wtf/unicode/Unicode.h>
-
-namespace WebCore {
-
-// Names here are taken from the Unicode standard.
-
-// Most of these are UChar constants, not UChar32, which makes them
-// more convenient for WebCore code that mostly uses UTF-16.
-
-const UChar32 aegeanWordSeparatorLine = 0x10100;
-const UChar32 aegeanWordSeparatorDot = 0x10101;
-const UChar blackCircle = 0x25CF;
-const UChar blackSquare = 0x25A0;
-const UChar blackUpPointingTriangle = 0x25B2;
-const UChar bullet = 0x2022;
-const UChar bullseye = 0x25CE;
-const UChar carriageReturn = 0x000D;
-const UChar ethiopicPrefaceColon = 0x1366;
-const UChar ethiopicWordspace = 0x1361;
-const UChar fisheye = 0x25C9;
-const UChar hebrewPunctuationGeresh = 0x05F3;
-const UChar hebrewPunctuationGershayim = 0x05F4;
-const UChar horizontalEllipsis = 0x2026;
-const UChar hyphen = 0x2010;
-const UChar hyphenMinus = 0x002D;
-const UChar ideographicComma = 0x3001;
-const UChar ideographicFullStop = 0x3002;
-const UChar ideographicSpace = 0x3000;
-const UChar leftDoubleQuotationMark = 0x201C;
-const UChar leftSingleQuotationMark = 0x2018;
-const UChar leftToRightEmbed = 0x202A;
-const UChar leftToRightMark = 0x200E;
-const UChar leftToRightOverride = 0x202D;
-const UChar minusSign = 0x2212;
-const UChar newlineCharacter = 0x000A;
-const UChar noBreakSpace = 0x00A0;
-const UChar objectReplacementCharacter = 0xFFFC;
-const UChar popDirectionalFormatting = 0x202C;
-const UChar replacementCharacter = 0xFFFD;
-const UChar rightDoubleQuotationMark = 0x201D;
-const UChar rightSingleQuotationMark = 0x2019;
-const UChar rightToLeftEmbed = 0x202B;
-const UChar rightToLeftMark = 0x200F;
-const UChar rightToLeftOverride = 0x202E;
-const UChar sesameDot = 0xFE45;
-const UChar softHyphen = 0x00AD;
-const UChar space = 0x0020;
-const UChar tibetanMarkIntersyllabicTsheg = 0x0F0B;
-const UChar tibetanMarkDelimiterTshegBstar = 0x0F0C;
-const UChar32 ugariticWordDivider = 0x1039F;
-const UChar whiteBullet = 0x25E6;
-const UChar whiteCircle = 0x25CB;
-const UChar whiteSesameDot = 0xFE46;
-const UChar whiteUpPointingTriangle = 0x25B3;
-const UChar yenSign = 0x00A5;
-const UChar zeroWidthJoiner = 0x200D;
-const UChar zeroWidthNonJoiner = 0x200C;
-const UChar zeroWidthSpace = 0x200B;
-
-}
-
-#endif // CharacterNames_h
diff --git a/WebCore/platform/text/Hyphenation.cpp b/WebCore/platform/text/Hyphenation.cpp
deleted file mode 100644
index 89f6438..0000000
--- a/WebCore/platform/text/Hyphenation.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2010 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "Hyphenation.h"
-
-#include "NotImplemented.h"
-
-namespace WebCore {
-
-bool canHyphenate(const AtomicString& /* localeIdentifier */)
-{
- return false;
-}
-
-size_t lastHyphenLocation(const UChar* /* characters */, size_t /* length */, size_t /* beforeIndex */, const AtomicString& /* localeIdentifier */)
-{
- ASSERT_NOT_REACHED();
- return 0;
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/Hyphenation.h b/WebCore/platform/text/Hyphenation.h
deleted file mode 100644
index a99bff0..0000000
--- a/WebCore/platform/text/Hyphenation.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2010 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef Hyphenation_h
-#define Hyphenation_h
-
-#include <wtf/Forward.h>
-#include <wtf/unicode/Unicode.h>
-
-namespace WebCore {
-
-bool canHyphenate(const AtomicString& localeIdentifier);
-size_t lastHyphenLocation(const UChar*, size_t length, size_t beforeIndex, const AtomicString& localeIdentifier);
-
-} // namespace WebCore
-
-#endif // Hyphenation_h
diff --git a/WebCore/platform/text/LineEnding.cpp b/WebCore/platform/text/LineEnding.cpp
deleted file mode 100644
index 00a90eb..0000000
--- a/WebCore/platform/text/LineEnding.cpp
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
- * Copyright (C) 2010 Google Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "LineEnding.h"
-
-#include "PlatformString.h"
-#include <wtf/text/CString.h>
-
-namespace {
-
-class OutputBuffer {
-public:
- virtual char* allocate(size_t size) = 0;
- virtual void copy(const CString&) = 0;
- virtual ~OutputBuffer() { }
-};
-
-class CStringBuffer : public OutputBuffer {
-public:
- CStringBuffer(CString& buffer)
- : m_buffer(buffer)
- {
- }
- virtual ~CStringBuffer() { }
-
- virtual char* allocate(size_t size)
- {
- char* ptr;
- m_buffer = CString::newUninitialized(size, ptr);
- return ptr;
- }
-
- virtual void copy(const CString& source)
- {
- m_buffer = source;
- }
-
- const CString& buffer() const { return m_buffer; }
-
-private:
- CString m_buffer;
-};
-
-class VectorCharAppendBuffer : public OutputBuffer {
-public:
- VectorCharAppendBuffer(Vector<char>& buffer)
- : m_buffer(buffer)
- {
- }
- virtual ~VectorCharAppendBuffer() { }
-
- virtual char* allocate(size_t size)
- {
- size_t oldSize = m_buffer.size();
- m_buffer.grow(oldSize + size);
- return m_buffer.data() + oldSize;
- }
-
- virtual void copy(const CString& source)
- {
- m_buffer.append(source.data(), source.length());
- }
-
-private:
- Vector<char>& m_buffer;
-};
-
-void internalNormalizeLineEndingsToCRLF(const CString& from, OutputBuffer& buffer)
-{
- // Compute the new length.
- size_t newLen = 0;
- const char* p = from.data();
- while (char c = *p++) {
- if (c == '\r') {
- // Safe to look ahead because of trailing '\0'.
- if (*p != '\n') {
- // Turn CR into CRLF.
- newLen += 2;
- }
- } else if (c == '\n') {
- // Turn LF into CRLF.
- newLen += 2;
- } else {
- // Leave other characters alone.
- newLen += 1;
- }
- }
- if (newLen < from.length())
- return;
-
- if (newLen == from.length()) {
- buffer.copy(from);
- return;
- }
-
- p = from.data();
- char* q = buffer.allocate(newLen);
-
- // Make a copy of the string.
- while (char c = *p++) {
- if (c == '\r') {
- // Safe to look ahead because of trailing '\0'.
- if (*p != '\n') {
- // Turn CR into CRLF.
- *q++ = '\r';
- *q++ = '\n';
- }
- } else if (c == '\n') {
- // Turn LF into CRLF.
- *q++ = '\r';
- *q++ = '\n';
- } else {
- // Leave other characters alone.
- *q++ = c;
- }
- }
-}
-
-};
-
-namespace WebCore {
-
-void normalizeToCROrLF(const CString& from, Vector<char>& result, bool toCR);
-
-// Normalize all line-endings to CR or LF.
-void normalizeToCROrLF(const CString& from, Vector<char>& result, bool toCR)
-{
- // Compute the new length.
- size_t newLen = 0;
- bool needFix = false;
- const char* p = from.data();
- char fromEndingChar = toCR ? '\n' : '\r';
- char toEndingChar = toCR ? '\r' : '\n';
- while (char c = *p++) {
- if (c == '\r' && *p == '\n') {
- // Turn CRLF into CR or LF.
- p++;
- needFix = true;
- } else if (c == fromEndingChar) {
- // Turn CR/LF into LF/CR.
- needFix = true;
- }
- newLen += 1;
- }
-
- // Grow the result buffer.
- p = from.data();
- size_t oldResultSize = result.size();
- result.grow(oldResultSize + newLen);
- char* q = result.data() + oldResultSize;
-
- // If no need to fix the string, just copy the string over.
- if (!needFix) {
- memcpy(q, p, from.length());
- return;
- }
-
- // Make a copy of the string.
- while (char c = *p++) {
- if (c == '\r' && *p == '\n') {
- // Turn CRLF or CR into CR or LF.
- p++;
- *q++ = toEndingChar;
- } else if (c == fromEndingChar) {
- // Turn CR/LF into LF/CR.
- *q++ = toEndingChar;
- } else {
- // Leave other characters alone.
- *q++ = c;
- }
- }
-}
-
-CString normalizeLineEndingsToCRLF(const CString& from)
-{
- CString result;
- CStringBuffer buffer(result);
- internalNormalizeLineEndingsToCRLF(from, buffer);
- return buffer.buffer();
-}
-
-void normalizeLineEndingsToCR(const CString& from, Vector<char>& result)
-{
- normalizeToCROrLF(from, result, true);
-}
-
-void normalizeLineEndingsToLF(const CString& from, Vector<char>& result)
-{
- normalizeToCROrLF(from, result, false);
-}
-
-void normalizeLineEndingsToNative(const CString& from, Vector<char>& result)
-{
-#if OS(WINDOWS)
- VectorCharAppendBuffer buffer(result);
- internalNormalizeLineEndingsToCRLF(from, buffer);
-#else
- normalizeLineEndingsToLF(from, result);
-#endif
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/LineEnding.h b/WebCore/platform/text/LineEnding.h
deleted file mode 100644
index 4306ce8..0000000
--- a/WebCore/platform/text/LineEnding.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
- * Copyright (C) 2010 Google Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef LineEnding_h
-#define LineEnding_h
-
-#include <wtf/Forward.h>
-#include <wtf/Vector.h>
-
-namespace WebCore {
-
-// Normalize all line-endings in the given string to CRLF.
-CString normalizeLineEndingsToCRLF(const CString& from);
-
-// Normalize all line-endings in the given string to CR and append the result to the given buffer.
-void normalizeLineEndingsToCR(const CString& from, Vector<char>& result);
-
-// Normalize all line-endings in the given string to LF and append the result to the given buffer.
-void normalizeLineEndingsToLF(const CString& from, Vector<char>& result);
-
-// Normalize all line-endings in the given string to the native line-endings and append the result to the given buffer.
-// (Normalize to CRLF on Windows and normalize to LF on all other platforms.)
-void normalizeLineEndingsToNative(const CString& from, Vector<char>& result);
-
-} // namespace WebCore
-
-#endif // LineEnding_h
diff --git a/WebCore/platform/text/ParserUtilities.h b/WebCore/platform/text/ParserUtilities.h
deleted file mode 100644
index 3105214..0000000
--- a/WebCore/platform/text/ParserUtilities.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (C) 2008 Apple Inc. All Rights Reserved.
- * Copyright (C) 2002, 2003 The Karbon Developers
- * Copyright (C) 2006, 2007 Rob Buis <buis@kde.org>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef ParserUtilities_h
-#define ParserUtilities_h
-
-#include "PlatformString.h"
-
-namespace WebCore {
-
- inline bool skipString(const UChar*& ptr, const UChar* end, const UChar* name, int length)
- {
- if (end - ptr < length)
- return false;
- if (memcmp(name, ptr, sizeof(UChar) * length))
- return false;
- ptr += length;
- return true;
- }
-
- inline bool skipString(const UChar*& ptr, const UChar* end, const char* str)
- {
- int length = strlen(str);
- if (end - ptr < length)
- return false;
- for (int i = 0; i < length; ++i) {
- if (ptr[i] != str[i])
- return false;
- }
- ptr += length;
- return true;
- }
-
-} // namspace WebCore
-
-#endif // ParserUtilities_h
diff --git a/WebCore/platform/text/PlatformString.h b/WebCore/platform/text/PlatformString.h
deleted file mode 100644
index e525bd4..0000000
--- a/WebCore/platform/text/PlatformString.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * (C) 1999 Lars Knoll (knoll@kde.org)
- * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#ifndef PlatformString_h
-#define PlatformString_h
-
-// This file would be called String.h, but that conflicts with <string.h>
-// on systems without case-sensitive file systems.
-
-#include <wtf/text/WTFString.h>
-
-namespace WebCore {
-
-class SharedBuffer;
-
-PassRefPtr<SharedBuffer> utf8Buffer(const String&);
-// Counts the number of grapheme clusters. A surrogate pair or a sequence
-// of a non-combining character and following combining characters is
-// counted as 1 grapheme cluster.
-unsigned numGraphemeClusters(const String& s);
-// Returns the number of characters which will be less than or equal to
-// the specified grapheme cluster length.
-unsigned numCharactersInGraphemeClusters(const String& s, unsigned);
-
-} // namespace WebCore
-
-#endif
diff --git a/WebCore/platform/text/RegularExpression.cpp b/WebCore/platform/text/RegularExpression.cpp
deleted file mode 100644
index 9b063c9..0000000
--- a/WebCore/platform/text/RegularExpression.cpp
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
- * Copyright (C) 2008 Collabora Ltd.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "RegularExpression.h"
-
-#include "Logging.h"
-#include <pcre/pcre.h>
-
-namespace WebCore {
-
-class RegularExpression::Private : public RefCounted<RegularExpression::Private> {
-public:
- static PassRefPtr<Private> create(const String& pattern, TextCaseSensitivity);
- ~Private();
-
- JSRegExp* regexp() const { return m_regexp; }
- int lastMatchLength;
-
-private:
- Private(const String& pattern, TextCaseSensitivity);
- static JSRegExp* compile(const String& pattern, TextCaseSensitivity);
-
- JSRegExp* m_regexp;
-};
-
-inline JSRegExp* RegularExpression::Private::compile(const String& pattern, TextCaseSensitivity caseSensitivity)
-{
- const char* errorMessage;
- JSRegExp* regexp = jsRegExpCompile(pattern.characters(), pattern.length(),
- caseSensitivity == TextCaseSensitive ? JSRegExpDoNotIgnoreCase : JSRegExpIgnoreCase, JSRegExpSingleLine,
- 0, &errorMessage);
- if (!regexp)
- LOG_ERROR("RegularExpression: pcre_compile failed with '%s'", errorMessage);
- return regexp;
-}
-
-inline RegularExpression::Private::Private(const String& pattern, TextCaseSensitivity caseSensitivity)
- : lastMatchLength(-1)
- , m_regexp(compile(pattern, caseSensitivity))
-{
-}
-
-inline PassRefPtr<RegularExpression::Private> RegularExpression::Private::create(const String& pattern, TextCaseSensitivity caseSensitivity)
-{
- return adoptRef(new Private(pattern, caseSensitivity));
-}
-
-RegularExpression::Private::~Private()
-{
- jsRegExpFree(m_regexp);
-}
-
-RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity)
- : d(Private::create(pattern, caseSensitivity))
-{
-}
-
-RegularExpression::RegularExpression(const RegularExpression& re)
- : d(re.d)
-{
-}
-
-RegularExpression::~RegularExpression()
-{
-}
-
-RegularExpression& RegularExpression::operator=(const RegularExpression& re)
-{
- d = re.d;
- return *this;
-}
-
-int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
-{
- if (!d->regexp())
- return -1;
-
- if (str.isNull())
- return -1;
-
- // First 2 offsets are start and end offsets; 3rd entry is used internally by pcre
- static const size_t maxOffsets = 3;
- int offsets[maxOffsets];
- int result = jsRegExpExecute(d->regexp(), str.characters(), str.length(), startFrom, offsets, maxOffsets);
- if (result < 0) {
- if (result != JSRegExpErrorNoMatch)
- LOG_ERROR("RegularExpression: pcre_exec() failed with result %d", result);
- d->lastMatchLength = -1;
- return -1;
- }
-
- // 1 means 1 match; 0 means more than one match. First match is recorded in offsets.
- d->lastMatchLength = offsets[1] - offsets[0];
- if (matchLength)
- *matchLength = d->lastMatchLength;
- return offsets[0];
-}
-
-int RegularExpression::searchRev(const String& str) const
-{
- // FIXME: This could be faster if it actually searched backwards.
- // Instead, it just searches forwards, multiple times until it finds the last match.
-
- int start = 0;
- int pos;
- int lastPos = -1;
- int lastMatchLength = -1;
- do {
- int matchLength;
- pos = match(str, start, &matchLength);
- if (pos >= 0) {
- if (pos + matchLength > lastPos + lastMatchLength) {
- // replace last match if this one is later and not a subset of the last match
- lastPos = pos;
- lastMatchLength = matchLength;
- }
- start = pos + 1;
- }
- } while (pos != -1);
- d->lastMatchLength = lastMatchLength;
- return lastPos;
-}
-
-int RegularExpression::matchedLength() const
-{
- return d->lastMatchLength;
-}
-
-void replace(String& string, const RegularExpression& target, const String& replacement)
-{
- int index = 0;
- while (index < static_cast<int>(string.length())) {
- int matchLength;
- index = target.match(string, index, &matchLength);
- if (index < 0)
- break;
- string.replace(index, matchLength, replacement);
- index += replacement.length();
- if (!matchLength)
- break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
- }
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/RegularExpression.h b/WebCore/platform/text/RegularExpression.h
deleted file mode 100644
index f1611e5..0000000
--- a/WebCore/platform/text/RegularExpression.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2003, 2008, 2009 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef RegularExpression_h
-#define RegularExpression_h
-
-#include "PlatformString.h"
-
-namespace WebCore {
-
-class RegularExpression : public FastAllocBase {
-public:
- RegularExpression(const String&, TextCaseSensitivity);
- ~RegularExpression();
-
- RegularExpression(const RegularExpression&);
- RegularExpression& operator=(const RegularExpression&);
-
- int match(const String&, int startFrom = 0, int* matchLength = 0) const;
- int searchRev(const String&) const;
-
- int matchedLength() const;
-
-private:
- class Private;
- RefPtr<Private> d;
-};
-
-void replace(String&, const RegularExpression&, const String&);
-
-} // namespace WebCore
-
-#endif // RegularExpression_h
diff --git a/WebCore/platform/text/SegmentedString.cpp b/WebCore/platform/text/SegmentedString.cpp
deleted file mode 100644
index b9ff503..0000000
--- a/WebCore/platform/text/SegmentedString.cpp
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public License
- along with this library; see the file COPYING.LIB. If not, write to
- the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1301, USA.
-*/
-
-#include "config.h"
-#include "SegmentedString.h"
-
-namespace WebCore {
-
-SegmentedString::SegmentedString(const SegmentedString &other)
- : m_pushedChar1(other.m_pushedChar1)
- , m_pushedChar2(other.m_pushedChar2)
- , m_currentString(other.m_currentString)
- , m_substrings(other.m_substrings)
- , m_composite(other.m_composite)
- , m_closed(other.m_closed)
-{
- if (other.m_currentChar == &other.m_pushedChar1)
- m_currentChar = &m_pushedChar1;
- else if (other.m_currentChar == &other.m_pushedChar2)
- m_currentChar = &m_pushedChar2;
- else
- m_currentChar = other.m_currentChar;
-}
-
-const SegmentedString& SegmentedString::operator=(const SegmentedString &other)
-{
- m_pushedChar1 = other.m_pushedChar1;
- m_pushedChar2 = other.m_pushedChar2;
- m_currentString = other.m_currentString;
- m_substrings = other.m_substrings;
- m_composite = other.m_composite;
- if (other.m_currentChar == &other.m_pushedChar1)
- m_currentChar = &m_pushedChar1;
- else if (other.m_currentChar == &other.m_pushedChar2)
- m_currentChar = &m_pushedChar2;
- else
- m_currentChar = other.m_currentChar;
- m_closed = other.m_closed;
- m_numberOfCharactersConsumedPriorToCurrentString = other.m_numberOfCharactersConsumedPriorToCurrentString;
- return *this;
-}
-
-unsigned SegmentedString::length() const
-{
- unsigned length = m_currentString.m_length;
- if (m_pushedChar1) {
- ++length;
- if (m_pushedChar2)
- ++length;
- }
- if (m_composite) {
- Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin();
- Deque<SegmentedSubstring>::const_iterator e = m_substrings.end();
- for (; it != e; ++it)
- length += it->m_length;
- }
- return length;
-}
-
-void SegmentedString::setExcludeLineNumbers()
-{
- if (m_composite) {
- Deque<SegmentedSubstring>::iterator it = m_substrings.begin();
- Deque<SegmentedSubstring>::iterator e = m_substrings.end();
- for (; it != e; ++it)
- it->setExcludeLineNumbers();
- } else
- m_currentString.setExcludeLineNumbers();
-}
-
-void SegmentedString::clear()
-{
- m_pushedChar1 = 0;
- m_pushedChar2 = 0;
- m_currentChar = 0;
- m_currentString.clear();
- m_substrings.clear();
- m_composite = false;
- m_closed = false;
-}
-
-void SegmentedString::append(const SegmentedSubstring &s)
-{
- ASSERT(!m_closed);
- if (s.m_length) {
- if (!m_currentString.m_length) {
- m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed();
- m_currentString = s;
- } else {
- m_substrings.append(s);
- m_composite = true;
- }
- }
-}
-
-void SegmentedString::prepend(const SegmentedSubstring &s)
-{
- ASSERT(!escaped());
- ASSERT(!s.numberOfCharactersConsumed());
- if (s.m_length) {
- // FIXME: We're assuming that the prepend were originally consumed by
- // this SegmentedString. We're also ASSERTing that s is a fresh
- // SegmentedSubstring. These assumptions are sufficient for our
- // current use, but we might need to handle the more elaborate
- // cases in the future.
- m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed();
- m_numberOfCharactersConsumedPriorToCurrentString -= s.m_length;
- if (!m_currentString.m_length)
- m_currentString = s;
- else {
- // Shift our m_currentString into our list.
- m_substrings.prepend(m_currentString);
- m_currentString = s;
- m_composite = true;
- }
- }
-}
-
-void SegmentedString::close()
-{
- // Closing a stream twice is likely a coding mistake.
- ASSERT(!m_closed);
- m_closed = true;
-}
-
-void SegmentedString::append(const SegmentedString &s)
-{
- ASSERT(!m_closed);
- ASSERT(!s.escaped());
- append(s.m_currentString);
- if (s.m_composite) {
- Deque<SegmentedSubstring>::const_iterator it = s.m_substrings.begin();
- Deque<SegmentedSubstring>::const_iterator e = s.m_substrings.end();
- for (; it != e; ++it)
- append(*it);
- }
- m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
-}
-
-void SegmentedString::prepend(const SegmentedString &s)
-{
- ASSERT(!escaped());
- ASSERT(!s.escaped());
- if (s.m_composite) {
- Deque<SegmentedSubstring>::const_reverse_iterator it = s.m_substrings.rbegin();
- Deque<SegmentedSubstring>::const_reverse_iterator e = s.m_substrings.rend();
- for (; it != e; ++it)
- prepend(*it);
- }
- prepend(s.m_currentString);
- m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
-}
-
-void SegmentedString::advanceSubstring()
-{
- if (m_composite) {
- m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed();
- m_currentString = m_substrings.takeFirst();
- // If we've previously consumed some characters of the non-current
- // string, we now account for those characters as part of the current
- // string, not as part of "prior to current string."
- m_numberOfCharactersConsumedPriorToCurrentString -= m_currentString.numberOfCharactersConsumed();
- if (m_substrings.isEmpty())
- m_composite = false;
- } else {
- m_currentString.clear();
- }
-}
-
-String SegmentedString::toString() const
-{
- String result;
- if (m_pushedChar1) {
- result.append(m_pushedChar1);
- if (m_pushedChar2)
- result.append(m_pushedChar2);
- }
- m_currentString.appendTo(result);
- if (m_composite) {
- Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin();
- Deque<SegmentedSubstring>::const_iterator e = m_substrings.end();
- for (; it != e; ++it)
- it->appendTo(result);
- }
- return result;
-}
-
-void SegmentedString::advance(unsigned count, UChar* consumedCharacters)
-{
- ASSERT(count <= length());
- for (unsigned i = 0; i < count; ++i) {
- consumedCharacters[i] = *current();
- advance();
- }
-}
-
-void SegmentedString::advanceSlowCase()
-{
- if (m_pushedChar1) {
- m_pushedChar1 = m_pushedChar2;
- m_pushedChar2 = 0;
- } else if (m_currentString.m_current) {
- ++m_currentString.m_current;
- if (--m_currentString.m_length == 0)
- advanceSubstring();
- }
- m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
-}
-
-void SegmentedString::advanceSlowCase(int& lineNumber)
-{
- if (m_pushedChar1) {
- m_pushedChar1 = m_pushedChar2;
- m_pushedChar2 = 0;
- } else if (m_currentString.m_current) {
- if (*m_currentString.m_current++ == '\n' && m_currentString.doNotExcludeLineNumbers())
- ++lineNumber;
- if (--m_currentString.m_length == 0)
- advanceSubstring();
- }
- m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
-}
-
-}
diff --git a/WebCore/platform/text/SegmentedString.h b/WebCore/platform/text/SegmentedString.h
deleted file mode 100644
index 91c2cbe..0000000
--- a/WebCore/platform/text/SegmentedString.h
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public License
- along with this library; see the file COPYING.LIB. If not, write to
- the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1301, USA.
-*/
-
-#ifndef SegmentedString_h
-#define SegmentedString_h
-
-#include "PlatformString.h"
-#include <wtf/Deque.h>
-
-namespace WebCore {
-
-class SegmentedString;
-
-class SegmentedSubstring {
-public:
- SegmentedSubstring() : m_length(0), m_current(0), m_doNotExcludeLineNumbers(true) {}
- SegmentedSubstring(const String& str)
- : m_length(str.length())
- , m_current(str.isEmpty() ? 0 : str.characters())
- , m_string(str)
- , m_doNotExcludeLineNumbers(true)
- {
- }
-
- void clear() { m_length = 0; m_current = 0; }
-
- bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; }
- bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; }
-
- void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }
-
- int numberOfCharactersConsumed() const { return m_string.length() - m_length; }
-
- void appendTo(String& str) const
- {
- if (m_string.characters() == m_current) {
- if (str.isEmpty())
- str = m_string;
- else
- str.append(m_string);
- } else {
- str.append(String(m_current, m_length));
- }
- }
-
-public:
- int m_length;
- const UChar* m_current;
-
-private:
- String m_string;
- bool m_doNotExcludeLineNumbers;
-};
-
-class SegmentedString {
-public:
- SegmentedString()
- : m_pushedChar1(0)
- , m_pushedChar2(0)
- , m_currentChar(0)
- , m_numberOfCharactersConsumedPriorToCurrentString(0)
- , m_composite(false)
- , m_closed(false)
- {
- }
-
- SegmentedString(const String& str)
- : m_pushedChar1(0)
- , m_pushedChar2(0)
- , m_currentString(str)
- , m_currentChar(m_currentString.m_current)
- , m_numberOfCharactersConsumedPriorToCurrentString(0)
- , m_composite(false)
- , m_closed(false)
- {
- }
-
- SegmentedString(const SegmentedString&);
-
- const SegmentedString& operator=(const SegmentedString&);
-
- void clear();
- void close();
-
- void append(const SegmentedString&);
- void prepend(const SegmentedString&);
-
- bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
- void setExcludeLineNumbers();
-
- void push(UChar c)
- {
- if (!m_pushedChar1) {
- m_pushedChar1 = c;
- m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
- } else {
- ASSERT(!m_pushedChar2);
- m_pushedChar2 = c;
- }
- }
-
- bool isEmpty() const { return !current(); }
- unsigned length() const;
-
- bool isClosed() const { return m_closed; }
-
- enum LookAheadResult {
- DidNotMatch,
- DidMatch,
- NotEnoughCharacters,
- };
-
- LookAheadResult lookAhead(const String& string) { return lookAheadInline<SegmentedString::equalsLiterally>(string); }
- LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline<SegmentedString::equalsIgnoringCase>(string); }
-
- void advance()
- {
- if (!m_pushedChar1 && m_currentString.m_length > 1) {
- --m_currentString.m_length;
- m_currentChar = ++m_currentString.m_current;
- return;
- }
- advanceSlowCase();
- }
-
- void advanceAndASSERT(UChar expectedCharacter)
- {
- ASSERT_UNUSED(expectedCharacter, *current() == expectedCharacter);
- advance();
- }
-
- void advanceAndASSERTIgnoringCase(UChar expectedCharacter)
- {
- ASSERT_UNUSED(expectedCharacter, WTF::Unicode::foldCase(*current()) == WTF::Unicode::foldCase(expectedCharacter));
- advance();
- }
-
- void advancePastNewline(int& lineNumber)
- {
- ASSERT(*current() == '\n');
- if (!m_pushedChar1 && m_currentString.m_length > 1) {
- lineNumber += m_currentString.doNotExcludeLineNumbers();
- --m_currentString.m_length;
- m_currentChar = ++m_currentString.m_current;
- return;
- }
- advanceSlowCase(lineNumber);
- }
-
- void advancePastNonNewline()
- {
- ASSERT(*current() != '\n');
- if (!m_pushedChar1 && m_currentString.m_length > 1) {
- --m_currentString.m_length;
- m_currentChar = ++m_currentString.m_current;
- return;
- }
- advanceSlowCase();
- }
-
- void advance(int& lineNumber)
- {
- if (!m_pushedChar1 && m_currentString.m_length > 1) {
- lineNumber += (*m_currentString.m_current == '\n') & m_currentString.doNotExcludeLineNumbers();
- --m_currentString.m_length;
- m_currentChar = ++m_currentString.m_current;
- return;
- }
- advanceSlowCase(lineNumber);
- }
-
- // Writes the consumed characters into consumedCharacters, which must
- // have space for at least |count| characters.
- void advance(unsigned count, UChar* consumedCharacters);
-
- bool escaped() const { return m_pushedChar1; }
-
- int numberOfCharactersConsumed()
- {
- // We don't currently handle the case when there are pushed character.
- ASSERT(!m_pushedChar1);
- return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed();
- }
-
- String toString() const;
-
- const UChar& operator*() const { return *current(); }
- const UChar* operator->() const { return current(); }
-
-private:
- void append(const SegmentedSubstring&);
- void prepend(const SegmentedSubstring&);
-
- void advanceSlowCase();
- void advanceSlowCase(int& lineNumber);
- void advanceSubstring();
- const UChar* current() const { return m_currentChar; }
-
- static bool equalsLiterally(const UChar* str1, const UChar* str2, size_t count) { return !memcmp(str1, str2, count * sizeof(UChar)); }
- static bool equalsIgnoringCase(const UChar* str1, const UChar* str2, size_t count) { return !WTF::Unicode::umemcasecmp(str1, str2, count); }
-
- template<bool equals(const UChar* str1, const UChar* str2, size_t count)>
- inline LookAheadResult lookAheadInline(const String& string)
- {
- if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) {
- if (equals(string.characters(), m_currentString.m_current, string.length()))
- return DidMatch;
- return DidNotMatch;
- }
- return lookAheadSlowCase<equals>(string);
- }
-
- template<bool equals(const UChar* str1, const UChar* str2, size_t count)>
- LookAheadResult lookAheadSlowCase(const String& string)
- {
- unsigned count = string.length();
- if (count > length())
- return NotEnoughCharacters;
- UChar* consumedCharacters;
- String consumedString = String::createUninitialized(count, consumedCharacters);
- advance(count, consumedCharacters);
- LookAheadResult result = DidNotMatch;
- if (equals(string.characters(), consumedCharacters, count))
- result = DidMatch;
- prepend(SegmentedString(consumedString));
- return result;
- }
-
- UChar m_pushedChar1;
- UChar m_pushedChar2;
- SegmentedSubstring m_currentString;
- const UChar* m_currentChar;
- int m_numberOfCharactersConsumedPriorToCurrentString;
- Deque<SegmentedSubstring> m_substrings;
- bool m_composite;
- bool m_closed;
-};
-
-}
-
-#endif
diff --git a/WebCore/platform/text/String.cpp b/WebCore/platform/text/String.cpp
deleted file mode 100644
index f2f8d2e..0000000
--- a/WebCore/platform/text/String.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * (C) 1999 Lars Knoll (knoll@kde.org)
- * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
- * Copyright (C) 2007-2009 Torch Mobile, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-
-#include "config.h"
-#include "PlatformString.h"
-
-#include "SharedBuffer.h"
-#include "TextBreakIterator.h"
-#include <wtf/unicode/UTF8.h>
-#include <wtf/unicode/Unicode.h>
-
-using namespace WTF;
-using namespace WTF::Unicode;
-
-namespace WebCore {
-
-PassRefPtr<SharedBuffer> utf8Buffer(const String& string)
-{
- // Allocate a buffer big enough to hold all the characters.
- const int length = string.length();
- Vector<char> buffer(length * 3);
-
- // Convert to runs of 8-bit characters.
- char* p = buffer.data();
- const UChar* d = string.characters();
- ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), true);
- if (result != conversionOK)
- return 0;
-
- buffer.shrink(p - buffer.data());
- return SharedBuffer::adoptVector(buffer);
-}
-
-unsigned numGraphemeClusters(const String& s)
-{
- TextBreakIterator* it = characterBreakIterator(s.characters(), s.length());
- if (!it)
- return s.length();
-
- unsigned num = 0;
- while (textBreakNext(it) != TextBreakDone)
- ++num;
- return num;
-}
-
-unsigned numCharactersInGraphemeClusters(const String& s, unsigned numGraphemeClusters)
-{
- TextBreakIterator* it = characterBreakIterator(s.characters(), s.length());
- if (!it)
- return min(s.length(), numGraphemeClusters);
-
- for (unsigned i = 0; i < numGraphemeClusters; ++i) {
- if (textBreakNext(it) == TextBreakDone)
- return s.length();
- }
- return textBreakCurrent(it);
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/SuffixTree.h b/WebCore/platform/text/SuffixTree.h
deleted file mode 100644
index f11fd23..0000000
--- a/WebCore/platform/text/SuffixTree.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (C) 2010 Adam Barth. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef SuffixTree_h
-#define SuffixTree_h
-
-#include "PlatformString.h"
-#include <wtf/Vector.h>
-
-namespace WebCore {
-
-class UnicodeCodebook {
-public:
- static int codeWord(UChar c) { return c; }
- enum { codeSize = 1 << 8 * sizeof(UChar) };
-};
-
-class ASCIICodebook {
-public:
- static int codeWord(UChar c) { return c & (codeSize - 1); }
- enum { codeSize = 1 << (8 * sizeof(char) - 1) };
-};
-
-template<typename Codebook>
-class SuffixTree {
-public:
- SuffixTree(const String& text, unsigned depth)
- : m_depth(depth)
- , m_leaf(true)
- {
- build(text);
- }
-
- bool mightContain(const String& query)
- {
- Node* current = &m_root;
- int limit = std::min(m_depth, query.length());
- for (int i = 0; i < limit; ++i) {
- current = current->at(Codebook::codeWord(query[i]));
- if (!current)
- return false;
- }
- return true;
- }
-
-private:
- class Node {
- public:
- Node(bool isLeaf = false)
- {
- m_children.resize(Codebook::codeSize);
- m_children.fill(0);
- m_isLeaf = isLeaf;
- }
-
- ~Node()
- {
- for (unsigned i = 0; i < m_children.size(); ++i) {
- Node* child = m_children.at(i);
- if (child && !child->m_isLeaf)
- delete child;
- }
- }
-
- Node*& at(int codeWord) { return m_children.at(codeWord); }
-
- private:
- typedef Vector<Node*, Codebook::codeSize> ChildrenVector;
-
- ChildrenVector m_children;
- bool m_isLeaf;
- };
-
- void build(const String& text)
- {
- for (unsigned base = 0; base < text.length(); ++base) {
- Node* current = &m_root;
- unsigned limit = std::min(base + m_depth, text.length());
- for (unsigned offset = 0; base + offset < limit; ++offset) {
- ASSERT(current != &m_leaf);
- Node*& child = current->at(Codebook::codeWord(text[base + offset]));
- if (!child)
- child = base + offset + 1 == limit ? &m_leaf : new Node();
- current = child;
- }
- }
- }
-
- Node m_root;
- unsigned m_depth;
-
- // Instead of allocating a fresh empty leaf node for ever leaf in the tree
- // (there can be a lot of these), we alias all the leaves to this "static"
- // leaf node.
- Node m_leaf;
-};
-
-} // namespace WebCore
-
-#endif // SuffixTree_h
diff --git a/WebCore/platform/text/TextBoundaries.cpp b/WebCore/platform/text/TextBoundaries.cpp
deleted file mode 100644
index fbb261b..0000000
--- a/WebCore/platform/text/TextBoundaries.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
- * Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextBoundaries.h"
-
-#include "TextBreakIterator.h"
-#include <wtf/text/StringImpl.h>
-#include <wtf/unicode/Unicode.h>
-
-using namespace WTF;
-using namespace Unicode;
-
-namespace WebCore {
-
-int endOfFirstWordBoundaryContext(const UChar* characters, int length)
-{
- for (int i = 0; i < length; ) {
- int first = i;
- UChar32 ch;
- U16_NEXT(characters, i, length, ch);
- if (!requiresContextForWordBoundary(ch))
- return first;
- }
- return length;
-}
-
-int startOfLastWordBoundaryContext(const UChar* characters, int length)
-{
- for (int i = length; i > 0; ) {
- int last = i;
- UChar32 ch;
- U16_PREV(characters, 0, i, ch);
- if (!requiresContextForWordBoundary(ch))
- return last;
- }
- return 0;
-}
-
-#if !PLATFORM(BREWMP) && !PLATFORM(MAC) && !PLATFORM(QT)
-
-int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward)
-{
- TextBreakIterator* it = wordBreakIterator(chars, len);
-
- if (forward) {
- position = textBreakFollowing(it, position);
- while (position != TextBreakDone) {
- // We stop searching when the character preceeding the break
- // is alphanumeric.
- if (position < len && isAlphanumeric(chars[position - 1]))
- return position;
-
- position = textBreakFollowing(it, position);
- }
-
- return len;
- } else {
- position = textBreakPreceding(it, position);
- while (position != TextBreakDone) {
- // We stop searching when the character following the break
- // is alphanumeric.
- if (position > 0 && isAlphanumeric(chars[position]))
- return position;
-
- position = textBreakPreceding(it, position);
- }
-
- return 0;
- }
-}
-
-void findWordBoundary(const UChar* chars, int len, int position, int* start, int* end)
-{
- TextBreakIterator* it = wordBreakIterator(chars, len);
- *end = textBreakFollowing(it, position);
- if (*end < 0)
- *end = textBreakLast(it);
- *start = textBreakPrevious(it);
-}
-
-#endif // !PLATFORM(BREWMP) && !PLATFORM(MAC) && !PLATFORM(QT)
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/TextBoundaries.h b/WebCore/platform/text/TextBoundaries.h
deleted file mode 100644
index 870ab62..0000000
--- a/WebCore/platform/text/TextBoundaries.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextBoundaries_h
-#define TextBoundaries_h
-
-#include <wtf/unicode/Unicode.h>
-
-namespace WebCore {
-
- inline bool requiresContextForWordBoundary(UChar32 ch)
- {
- return WTF::Unicode::hasLineBreakingPropertyComplexContext(ch);
- }
-
- int endOfFirstWordBoundaryContext(const UChar* characters, int length);
- int startOfLastWordBoundaryContext(const UChar* characters, int length);
-
- void findWordBoundary(const UChar*, int len, int position, int* start, int* end);
- int findNextWordFromIndex(const UChar*, int len, int position, bool forward);
-
-}
-
-#endif
diff --git a/WebCore/platform/text/TextBreakIterator.h b/WebCore/platform/text/TextBreakIterator.h
deleted file mode 100644
index 17cf5f0..0000000
--- a/WebCore/platform/text/TextBreakIterator.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
- * Copyright (C) 2007 Apple Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#ifndef TextBreakIterator_h
-#define TextBreakIterator_h
-
-#include <wtf/unicode/Unicode.h>
-
-namespace WebCore {
-
- class TextBreakIterator;
-
- // Note: The returned iterator is good only until you get another iterator.
-
- // Iterates over "extended grapheme clusters", as defined in UAX #29.
- // Note that platform implementations may be less sophisticated - e.g. ICU prior to
- // version 4.0 only supports "legacy grapheme clusters".
- // Use this for general text processing, e.g. string truncation.
- TextBreakIterator* characterBreakIterator(const UChar*, int length);
-
- // This is similar to character break iterator in most cases, but is subject to
- // platform UI conventions. One notable example where this can be different
- // from character break iterator is Thai prepend characters, see bug 24342.
- // Use this for insertion point and selection manipulations.
- TextBreakIterator* cursorMovementIterator(const UChar*, int length);
-
- TextBreakIterator* wordBreakIterator(const UChar*, int length);
- TextBreakIterator* lineBreakIterator(const UChar*, int length);
- TextBreakIterator* sentenceBreakIterator(const UChar*, int length);
-
- int textBreakFirst(TextBreakIterator*);
- int textBreakLast(TextBreakIterator*);
- int textBreakNext(TextBreakIterator*);
- int textBreakPrevious(TextBreakIterator*);
- int textBreakCurrent(TextBreakIterator*);
- int textBreakPreceding(TextBreakIterator*, int);
- int textBreakFollowing(TextBreakIterator*, int);
- bool isTextBreak(TextBreakIterator*, int);
-
- const int TextBreakDone = -1;
-
-}
-
-#endif
diff --git a/WebCore/platform/text/TextBreakIteratorICU.cpp b/WebCore/platform/text/TextBreakIteratorICU.cpp
deleted file mode 100644
index f5575ee..0000000
--- a/WebCore/platform/text/TextBreakIteratorICU.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
- * Copyright (C) 2007 Apple Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#include "config.h"
-#include "TextBreakIterator.h"
-
-#include "PlatformString.h"
-#include "TextBreakIteratorInternalICU.h"
-#include <unicode/ubrk.h>
-#include <wtf/Assertions.h>
-
-namespace WebCore {
-
-static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator,
- UBreakIteratorType type, const UChar* string, int length)
-{
- if (!string)
- return 0;
-
- if (!createdIterator) {
- UErrorCode openStatus = U_ZERO_ERROR;
- iterator = reinterpret_cast<TextBreakIterator*>(ubrk_open(type, currentTextBreakLocaleID(), 0, 0, &openStatus));
- createdIterator = true;
- ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
- }
- if (!iterator)
- return 0;
-
- UErrorCode setTextStatus = U_ZERO_ERROR;
- ubrk_setText(reinterpret_cast<UBreakIterator*>(iterator), string, length, &setTextStatus);
- if (U_FAILURE(setTextStatus))
- return 0;
-
- return iterator;
-}
-
-TextBreakIterator* characterBreakIterator(const UChar* string, int length)
-{
- static bool createdCharacterBreakIterator = false;
- static TextBreakIterator* staticCharacterBreakIterator;
- return setUpIterator(createdCharacterBreakIterator,
- staticCharacterBreakIterator, UBRK_CHARACTER, string, length);
-}
-
-TextBreakIterator* wordBreakIterator(const UChar* string, int length)
-{
- static bool createdWordBreakIterator = false;
- static TextBreakIterator* staticWordBreakIterator;
- return setUpIterator(createdWordBreakIterator,
- staticWordBreakIterator, UBRK_WORD, string, length);
-}
-
-TextBreakIterator* lineBreakIterator(const UChar* string, int length)
-{
- static bool createdLineBreakIterator = false;
- static TextBreakIterator* staticLineBreakIterator;
- return setUpIterator(createdLineBreakIterator,
- staticLineBreakIterator, UBRK_LINE, string, length);
-}
-
-TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
-{
- static bool createdSentenceBreakIterator = false;
- static TextBreakIterator* staticSentenceBreakIterator;
- return setUpIterator(createdSentenceBreakIterator,
- staticSentenceBreakIterator, UBRK_SENTENCE, string, length);
-}
-
-int textBreakFirst(TextBreakIterator* iterator)
-{
- return ubrk_first(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-int textBreakLast(TextBreakIterator* iterator)
-{
- return ubrk_last(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-int textBreakNext(TextBreakIterator* iterator)
-{
- return ubrk_next(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-int textBreakPrevious(TextBreakIterator* iterator)
-{
- return ubrk_previous(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-int textBreakPreceding(TextBreakIterator* iterator, int pos)
-{
- return ubrk_preceding(reinterpret_cast<UBreakIterator*>(iterator), pos);
-}
-
-int textBreakFollowing(TextBreakIterator* iterator, int pos)
-{
- return ubrk_following(reinterpret_cast<UBreakIterator*>(iterator), pos);
-}
-
-int textBreakCurrent(TextBreakIterator* iterator)
-{
- return ubrk_current(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-bool isTextBreak(TextBreakIterator* iterator, int position)
-{
- return ubrk_isBoundary(reinterpret_cast<UBreakIterator*>(iterator), position);
-}
-
-#ifndef BUILDING_ON_TIGER
-static TextBreakIterator* setUpIteratorWithRules(bool& createdIterator, TextBreakIterator*& iterator,
- const char* breakRules, const UChar* string, int length)
-{
- if (!string)
- return 0;
-
- if (!createdIterator) {
- UParseError parseStatus;
- UErrorCode openStatus = U_ZERO_ERROR;
- String rules(breakRules);
- iterator = reinterpret_cast<TextBreakIterator*>(ubrk_openRules(rules.characters(), rules.length(), 0, 0, &parseStatus, &openStatus));
- createdIterator = true;
- ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
- }
- if (!iterator)
- return 0;
-
- UErrorCode setTextStatus = U_ZERO_ERROR;
- ubrk_setText(reinterpret_cast<UBreakIterator*>(iterator), string, length, &setTextStatus);
- if (U_FAILURE(setTextStatus))
- return 0;
-
- return iterator;
-}
-#endif // BUILDING_ON_TIGER
-
-TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
-{
-#ifdef BUILDING_ON_TIGER
- // ICU 3.2 cannot compile the below rules.
- return characterBreakIterator(string, length);
-#else
- // This rule set is based on character-break iterator rules of ICU 4.0
- // <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>.
- // The major differences from the original ones are listed below:
- // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
- // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342);
- // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and;
- // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks.
- static const char* kRules =
- "$CR = [\\p{Grapheme_Cluster_Break = CR}];"
- "$LF = [\\p{Grapheme_Cluster_Break = LF}];"
- "$Control = [\\p{Grapheme_Cluster_Break = Control}];"
- "$VoiceMarks = [\\uFF9E\\uFF9F];" // Japanese half-width katakana voiced marks
- "$Extend = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];"
- "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
- "$L = [\\p{Grapheme_Cluster_Break = L}];"
- "$V = [\\p{Grapheme_Cluster_Break = V}];"
- "$T = [\\p{Grapheme_Cluster_Break = T}];"
- "$LV = [\\p{Grapheme_Cluster_Break = LV}];"
- "$LVT = [\\p{Grapheme_Cluster_Break = LVT}];"
- "$Hin0 = [\\u0905-\\u0939];" // Devanagari Letter A,...,Ha
- "$HinV = \\u094D;" // Devanagari Sign Virama
- "$Hin1 = [\\u0915-\\u0939];" // Devanagari Letter Ka,...,Ha
- "$Ben0 = [\\u0985-\\u09B9];" // Bengali Letter A,...,Ha
- "$BenV = \\u09CD;" // Bengali Sign Virama
- "$Ben1 = [\\u0995-\\u09B9];" // Bengali Letter Ka,...,Ha
- "$Pan0 = [\\u0A05-\\u0A39];" // Gurmukhi Letter A,...,Ha
- "$PanV = \\u0A4D;" // Gurmukhi Sign Virama
- "$Pan1 = [\\u0A15-\\u0A39];" // Gurmukhi Letter Ka,...,Ha
- "$Guj0 = [\\u0A85-\\u0AB9];" // Gujarati Letter A,...,Ha
- "$GujV = \\u0ACD;" // Gujarati Sign Virama
- "$Guj1 = [\\u0A95-\\u0AB9];" // Gujarati Letter Ka,...,Ha
- "$Ori0 = [\\u0B05-\\u0B39];" // Oriya Letter A,...,Ha
- "$OriV = \\u0B4D;" // Oriya Sign Virama
- "$Ori1 = [\\u0B15-\\u0B39];" // Oriya Letter Ka,...,Ha
- "$Tel0 = [\\u0C05-\\u0C39];" // Telugu Letter A,...,Ha
- "$TelV = \\u0C4D;" // Telugu Sign Virama
- "$Tel1 = [\\u0C14-\\u0C39];" // Telugu Letter Ka,...,Ha
- "$Kan0 = [\\u0C85-\\u0CB9];" // Kannada Letter A,...,Ha
- "$KanV = \\u0CCD;" // Kannada Sign Virama
- "$Kan1 = [\\u0C95-\\u0CB9];" // Kannada Letter A,...,Ha
- "$Mal0 = [\\u0D05-\\u0D39];" // Malayalam Letter A,...,Ha
- "$MalV = \\u0D4D;" // Malayalam Sign Virama
- "$Mal1 = [\\u0D15-\\u0D39];" // Malayalam Letter A,...,Ha
- "!!chain;"
- "!!forward;"
- "$CR $LF;"
- "$L ($L | $V | $LV | $LVT);"
- "($LV | $V) ($V | $T);"
- "($LVT | $T) $T;"
- "[^$Control $CR $LF] $Extend;"
- "[^$Control $CR $LF] $SpacingMark;"
- "$Hin0 $HinV $Hin1;" // Devanagari Virama (forward)
- "$Ben0 $BenV $Ben1;" // Bengali Virama (forward)
- "$Pan0 $PanV $Pan1;" // Gurmukhi Virama (forward)
- "$Guj0 $GujV $Guj1;" // Gujarati Virama (forward)
- "$Ori0 $OriV $Ori1;" // Oriya Virama (forward)
- "$Tel0 $TelV $Tel1;" // Telugu Virama (forward)
- "$Kan0 $KanV $Kan1;" // Kannada Virama (forward)
- "$Mal0 $MalV $Mal1;" // Malayalam Virama (forward)
- "!!reverse;"
- "$LF $CR;"
- "($L | $V | $LV | $LVT) $L;"
- "($V | $T) ($LV | $V);"
- "$T ($LVT | $T);"
- "$Extend [^$Control $CR $LF];"
- "$SpacingMark [^$Control $CR $LF];"
- "$Hin1 $HinV $Hin0;" // Devanagari Virama (backward)
- "$Ben1 $BenV $Ben0;" // Bengali Virama (backward)
- "$Pan1 $PanV $Pan0;" // Gurmukhi Virama (backward)
- "$Guj1 $GujV $Guj0;" // Gujarati Virama (backward)
- "$Ori1 $OriV $Ori0;" // Gujarati Virama (backward)
- "$Tel1 $TelV $Tel0;" // Telugu Virama (backward)
- "$Kan1 $KanV $Kan0;" // Kannada Virama (backward)
- "$Mal1 $MalV $Mal0;" // Malayalam Virama (backward)
- "!!safe_reverse;"
- "!!safe_forward;";
- static bool createdCursorMovementIterator = false;
- static TextBreakIterator* staticCursorMovementIterator;
- return setUpIteratorWithRules(createdCursorMovementIterator, staticCursorMovementIterator, kRules, string, length);
-#endif // BUILDING_ON_TIGER
-}
-
-}
diff --git a/WebCore/platform/text/TextBreakIteratorInternalICU.h b/WebCore/platform/text/TextBreakIteratorInternalICU.h
deleted file mode 100644
index 68b7003..0000000
--- a/WebCore/platform/text/TextBreakIteratorInternalICU.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2007 Apple Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#ifndef TextBreakIteratorInternalICU_h
-#define TextBreakIteratorInternalICU_h
-
-// FIXME: Now that this handles locales for ICU, not just for text breaking,
-// this file and the various implementation files should be renamed.
-
-namespace WebCore {
-
- const char* currentSearchLocaleID();
- const char* currentTextBreakLocaleID();
-
-}
-
-#endif
diff --git a/WebCore/platform/text/TextCodec.cpp b/WebCore/platform/text/TextCodec.cpp
deleted file mode 100644
index 4222ee1..0000000
--- a/WebCore/platform/text/TextCodec.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
- * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextCodec.h"
-
-#include "PlatformString.h"
-#include <wtf/StringExtras.h>
-
-namespace WebCore {
-
-TextCodec::~TextCodec()
-{
-}
-
-int TextCodec::getUnencodableReplacement(unsigned codePoint, UnencodableHandling handling, UnencodableReplacementArray replacement)
-{
- switch (handling) {
- case QuestionMarksForUnencodables:
- replacement[0] = '?';
- replacement[1] = 0;
- return 1;
- case EntitiesForUnencodables:
- snprintf(replacement, sizeof(UnencodableReplacementArray), "&#%u;", codePoint);
- return static_cast<int>(strlen(replacement));
- case URLEncodedEntitiesForUnencodables:
- snprintf(replacement, sizeof(UnencodableReplacementArray), "%%26%%23%u%%3B", codePoint);
- return static_cast<int>(strlen(replacement));
- }
- ASSERT_NOT_REACHED();
- replacement[0] = 0;
- return 0;
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/TextCodec.h b/WebCore/platform/text/TextCodec.h
deleted file mode 100644
index c6af38a..0000000
--- a/WebCore/platform/text/TextCodec.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
- * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextCodec_h
-#define TextCodec_h
-
-#include <memory>
-#include <wtf/Forward.h>
-#include <wtf/Noncopyable.h>
-#include <wtf/PassOwnPtr.h>
-#include <wtf/Vector.h>
-#include <wtf/unicode/Unicode.h>
-
-#include "PlatformString.h"
-
-namespace WebCore {
- class TextEncoding;
-
- // Specifies what will happen when a character is encountered that is
- // not encodable in the character set.
- enum UnencodableHandling {
- // Substitutes the replacement character "?".
- QuestionMarksForUnencodables,
-
- // Encodes the character as an XML entity. For example, U+06DE
- // would be "&#1758;" (0x6DE = 1758 in octal).
- EntitiesForUnencodables,
-
- // Encodes the character as en entity as above, but escaped
- // non-alphanumeric characters. This is used in URLs.
- // For example, U+6DE would be "%26%231758%3B".
- URLEncodedEntitiesForUnencodables,
- };
-
- typedef char UnencodableReplacementArray[32];
-
- class TextCodec : public Noncopyable {
- public:
- virtual ~TextCodec();
-
- String decode(const char* str, size_t length, bool flush = false)
- {
- bool ignored;
- return decode(str, length, flush, false, ignored);
- }
-
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) = 0;
- virtual CString encode(const UChar*, size_t length, UnencodableHandling) = 0;
-
- // Fills a null-terminated string representation of the given
- // unencodable character into the given replacement buffer.
- // The length of the string (not including the null) will be returned.
- static int getUnencodableReplacement(unsigned codePoint, UnencodableHandling, UnencodableReplacementArray);
- };
-
- typedef void (*EncodingNameRegistrar)(const char* alias, const char* name);
-
- typedef PassOwnPtr<TextCodec> (*NewTextCodecFunction)(const TextEncoding&, const void* additionalData);
- typedef void (*TextCodecRegistrar)(const char* name, NewTextCodecFunction, const void* additionalData);
-
-} // namespace WebCore
-
-#endif // TextCodec_h
diff --git a/WebCore/platform/text/TextCodecICU.cpp b/WebCore/platform/text/TextCodecICU.cpp
deleted file mode 100644
index 6a579f9..0000000
--- a/WebCore/platform/text/TextCodecICU.cpp
+++ /dev/null
@@ -1,490 +0,0 @@
-/*
- * Copyright (C) 2004, 2006, 2007, 2008 Apple Inc. All rights reserved.
- * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextCodecICU.h"
-
-#include "CharacterNames.h"
-#include "PlatformString.h"
-#include "ThreadGlobalData.h"
-#include <unicode/ucnv.h>
-#include <unicode/ucnv_cb.h>
-#include <wtf/Assertions.h>
-#include <wtf/text/CString.h>
-#include <wtf/PassOwnPtr.h>
-#include <wtf/StringExtras.h>
-#include <wtf/Threading.h>
-
-using std::min;
-
-namespace WebCore {
-
-const size_t ConversionBufferSize = 16384;
-
-ICUConverterWrapper::~ICUConverterWrapper()
-{
- if (converter)
- ucnv_close(converter);
-}
-
-static UConverter*& cachedConverterICU()
-{
- return threadGlobalData().cachedConverterICU().converter;
-}
-
-static PassOwnPtr<TextCodec> newTextCodecICU(const TextEncoding& encoding, const void*)
-{
- return new TextCodecICU(encoding);
-}
-
-void TextCodecICU::registerBaseEncodingNames(EncodingNameRegistrar registrar)
-{
- registrar("UTF-8", "UTF-8");
-}
-
-void TextCodecICU::registerBaseCodecs(TextCodecRegistrar registrar)
-{
- registrar("UTF-8", newTextCodecICU, 0);
-}
-
-void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
-{
- // We register Hebrew with logical ordering using a separate name.
- // Otherwise, this would share the same canonical name as the
- // visual ordering case, and then TextEncoding could not tell them
- // apart; ICU treats these names as synonyms.
- registrar("ISO-8859-8-I", "ISO-8859-8-I");
-
- int32_t numEncodings = ucnv_countAvailable();
- for (int32_t i = 0; i < numEncodings; ++i) {
- const char* name = ucnv_getAvailableName(i);
- UErrorCode error = U_ZERO_ERROR;
- // Try MIME before trying IANA to pick up commonly used names like
- // 'EUC-JP' instead of horrendously long names like
- // 'Extended_UNIX_Code_Packed_Format_for_Japanese'.
- const char* standardName = ucnv_getStandardName(name, "MIME", &error);
- if (!U_SUCCESS(error) || !standardName) {
- error = U_ZERO_ERROR;
- // Try IANA to pick up 'windows-12xx' and other names
- // which are not preferred MIME names but are widely used.
- standardName = ucnv_getStandardName(name, "IANA", &error);
- if (!U_SUCCESS(error) || !standardName)
- continue;
- }
-
- // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers.
- // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding
- // for encoding GB_2312-80 and several others. So, we need to override this behavior, too.
- if (strcmp(standardName, "GB2312") == 0 || strcmp(standardName, "GB_2312-80") == 0)
- standardName = "GBK";
- // Similarly, EUC-KR encodings all map to an extended version.
- else if (strcmp(standardName, "KSC_5601") == 0 || strcmp(standardName, "EUC-KR") == 0 || strcmp(standardName, "cp1363") == 0)
- standardName = "windows-949";
- // And so on.
- else if (strcasecmp(standardName, "iso-8859-9") == 0) // This name is returned in different case by ICU 3.2 and 3.6.
- standardName = "windows-1254";
- else if (strcmp(standardName, "TIS-620") == 0)
- standardName = "windows-874";
-
- registrar(standardName, standardName);
-
- uint16_t numAliases = ucnv_countAliases(name, &error);
- ASSERT(U_SUCCESS(error));
- if (U_SUCCESS(error))
- for (uint16_t j = 0; j < numAliases; ++j) {
- error = U_ZERO_ERROR;
- const char* alias = ucnv_getAlias(name, j, &error);
- ASSERT(U_SUCCESS(error));
- if (U_SUCCESS(error) && alias != standardName)
- registrar(alias, standardName);
- }
- }
-
- // Additional aliases.
- // These are present in modern versions of ICU, but not in ICU 3.2 (shipped with Mac OS X 10.4).
- registrar("macroman", "macintosh");
- registrar("maccyrillic", "x-mac-cyrillic");
-
- // Additional aliases that historically were present in the encoding
- // table in WebKit on Macintosh that don't seem to be present in ICU.
- // Perhaps we can prove these are not used on the web and remove them.
- // Or perhaps we can get them added to ICU.
- registrar("x-mac-roman", "macintosh");
- registrar("x-mac-ukrainian", "x-mac-cyrillic");
- registrar("cn-big5", "Big5");
- registrar("x-x-big5", "Big5");
- registrar("cn-gb", "GBK");
- registrar("csgb231280", "GBK");
- registrar("x-euc-cn", "GBK");
- registrar("x-gbk", "GBK");
- registrar("csISO88598I", "ISO-8859-8-I");
- registrar("koi", "KOI8-R");
- registrar("logical", "ISO-8859-8-I");
- registrar("unicode11utf8", "UTF-8");
- registrar("unicode20utf8", "UTF-8");
- registrar("x-unicode20utf8", "UTF-8");
- registrar("visual", "ISO-8859-8");
- registrar("winarabic", "windows-1256");
- registrar("winbaltic", "windows-1257");
- registrar("wincyrillic", "windows-1251");
- registrar("iso-8859-11", "windows-874");
- registrar("iso8859-11", "windows-874");
- registrar("dos-874", "windows-874");
- registrar("wingreek", "windows-1253");
- registrar("winhebrew", "windows-1255");
- registrar("winlatin2", "windows-1250");
- registrar("winturkish", "windows-1254");
- registrar("winvietnamese", "windows-1258");
- registrar("x-cp1250", "windows-1250");
- registrar("x-cp1251", "windows-1251");
- registrar("x-euc", "EUC-JP");
- registrar("x-windows-949", "windows-949");
- registrar("x-uhc", "windows-949");
- registrar("utf8", "UTF-8");
- registrar("shift-jis", "Shift_JIS");
-
- // These aliases are present in modern versions of ICU, but use different codecs, and have no standard names.
- // They are not present in ICU 3.2.
- registrar("dos-720", "cp864");
- registrar("jis7", "ISO-2022-JP");
-
- // Alternative spelling of ISO encoding names.
- registrar("ISO8859-1", "ISO-8859-1");
- registrar("ISO8859-2", "ISO-8859-2");
- registrar("ISO8859-3", "ISO-8859-3");
- registrar("ISO8859-4", "ISO-8859-4");
- registrar("ISO8859-5", "ISO-8859-5");
- registrar("ISO8859-6", "ISO-8859-6");
- registrar("ISO8859-7", "ISO-8859-7");
- registrar("ISO8859-8", "ISO-8859-8");
- registrar("ISO8859-8-I", "ISO-8859-8-I");
- registrar("ISO8859-9", "ISO-8859-9");
- registrar("ISO8859-10", "ISO-8859-10");
- registrar("ISO8859-13", "ISO-8859-13");
- registrar("ISO8859-14", "ISO-8859-14");
- registrar("ISO8859-15", "ISO-8859-15");
- // Not registering ISO8859-16, because Firefox (as of version 3.6.6) doesn't know this particular alias,
- // and because older versions of ICU don't support ISO-8859-16 encoding at all.
-}
-
-void TextCodecICU::registerExtendedCodecs(TextCodecRegistrar registrar)
-{
- // See comment above in registerEncodingNames.
- registrar("ISO-8859-8-I", newTextCodecICU, 0);
-
- int32_t numEncodings = ucnv_countAvailable();
- for (int32_t i = 0; i < numEncodings; ++i) {
- const char* name = ucnv_getAvailableName(i);
- UErrorCode error = U_ZERO_ERROR;
- const char* standardName = ucnv_getStandardName(name, "MIME", &error);
- if (!U_SUCCESS(error) || !standardName) {
- error = U_ZERO_ERROR;
- standardName = ucnv_getStandardName(name, "IANA", &error);
- if (!U_SUCCESS(error) || !standardName)
- continue;
- }
- registrar(standardName, newTextCodecICU, 0);
- }
-}
-
-TextCodecICU::TextCodecICU(const TextEncoding& encoding)
- : m_encoding(encoding)
- , m_numBufferedBytes(0)
- , m_converterICU(0)
- , m_needsGBKFallbacks(false)
-{
-}
-
-TextCodecICU::~TextCodecICU()
-{
- releaseICUConverter();
-}
-
-void TextCodecICU::releaseICUConverter() const
-{
- if (m_converterICU) {
- UConverter*& cachedConverter = cachedConverterICU();
- if (cachedConverter)
- ucnv_close(cachedConverter);
- cachedConverter = m_converterICU;
- m_converterICU = 0;
- }
-}
-
-void TextCodecICU::createICUConverter() const
-{
- ASSERT(!m_converterICU);
-
- const char* name = m_encoding.name();
- m_needsGBKFallbacks = name[0] == 'G' && name[1] == 'B' && name[2] == 'K' && !name[3];
-
- UErrorCode err;
-
- UConverter*& cachedConverter = cachedConverterICU();
- if (cachedConverter) {
- err = U_ZERO_ERROR;
- const char* cachedName = ucnv_getName(cachedConverter, &err);
- if (U_SUCCESS(err) && m_encoding == cachedName) {
- m_converterICU = cachedConverter;
- cachedConverter = 0;
- return;
- }
- }
-
- err = U_ZERO_ERROR;
- m_converterICU = ucnv_open(m_encoding.name(), &err);
-#if !LOG_DISABLED
- if (err == U_AMBIGUOUS_ALIAS_WARNING)
- LOG_ERROR("ICU ambiguous alias warning for encoding: %s", m_encoding.name());
-#endif
- if (m_converterICU)
- ucnv_setFallback(m_converterICU, TRUE);
-}
-
-int TextCodecICU::decodeToBuffer(UChar* target, UChar* targetLimit, const char*& source, const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err)
-{
- UChar* targetStart = target;
- err = U_ZERO_ERROR;
- ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err);
- return target - targetStart;
-}
-
-class ErrorCallbackSetter {
-public:
- ErrorCallbackSetter(UConverter* converter, bool stopOnError)
- : m_converter(converter)
- , m_shouldStopOnEncodingErrors(stopOnError)
- {
- if (m_shouldStopOnEncodingErrors) {
- UErrorCode err = U_ZERO_ERROR;
- ucnv_setToUCallBack(m_converter, UCNV_TO_U_CALLBACK_SUBSTITUTE,
- UCNV_SUB_STOP_ON_ILLEGAL, &m_savedAction,
- &m_savedContext, &err);
- ASSERT(err == U_ZERO_ERROR);
- }
- }
- ~ErrorCallbackSetter()
- {
- if (m_shouldStopOnEncodingErrors) {
- UErrorCode err = U_ZERO_ERROR;
- const void* oldContext;
- UConverterToUCallback oldAction;
- ucnv_setToUCallBack(m_converter, m_savedAction,
- m_savedContext, &oldAction,
- &oldContext, &err);
- ASSERT(oldAction == UCNV_TO_U_CALLBACK_SUBSTITUTE);
- ASSERT(!strcmp(static_cast<const char*>(oldContext), UCNV_SUB_STOP_ON_ILLEGAL));
- ASSERT(err == U_ZERO_ERROR);
- }
- }
-private:
- UConverter* m_converter;
- bool m_shouldStopOnEncodingErrors;
- const void* m_savedContext;
- UConverterToUCallback m_savedAction;
-};
-
-String TextCodecICU::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
-{
- // Get a converter for the passed-in encoding.
- if (!m_converterICU) {
- createICUConverter();
- ASSERT(m_converterICU);
- if (!m_converterICU) {
- LOG_ERROR("error creating ICU encoder even though encoding was in table");
- return String();
- }
- }
-
- ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError);
-
- Vector<UChar> result;
-
- UChar buffer[ConversionBufferSize];
- UChar* bufferLimit = buffer + ConversionBufferSize;
- const char* source = reinterpret_cast<const char*>(bytes);
- const char* sourceLimit = source + length;
- int32_t* offsets = NULL;
- UErrorCode err = U_ZERO_ERROR;
-
- do {
- int ucharsDecoded = decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, flush, err);
- result.append(buffer, ucharsDecoded);
- } while (err == U_BUFFER_OVERFLOW_ERROR);
-
- if (U_FAILURE(err)) {
- // flush the converter so it can be reused, and not be bothered by this error.
- do {
- decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, true, err);
- } while (source < sourceLimit);
- sawError = true;
- }
-
- String resultString = String::adopt(result);
-
- // <http://bugs.webkit.org/show_bug.cgi?id=17014>
- // Simplified Chinese pages use the code A3A0 to mean "full-width space", but ICU decodes it as U+E5E5.
- if (strcmp(m_encoding.name(), "GBK") == 0 || strcasecmp(m_encoding.name(), "gb18030") == 0)
- resultString.replace(0xE5E5, ideographicSpace);
-
- return resultString;
-}
-
-// We need to apply these fallbacks ourselves as they are not currently supported by ICU and
-// they were provided by the old TEC encoding path
-// Needed to fix <rdar://problem/4708689>
-static UChar getGbkEscape(UChar32 codePoint)
-{
- switch (codePoint) {
- case 0x01F9:
- return 0xE7C8;
- case 0x1E3F:
- return 0xE7C7;
- case 0x22EF:
- return 0x2026;
- case 0x301C:
- return 0xFF5E;
- default:
- return 0;
- }
-}
-
-// Invalid character handler when writing escaped entities for unrepresentable
-// characters. See the declaration of TextCodec::encode for more.
-static void urlEscapedEntityCallback(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
- UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
-{
- if (reason == UCNV_UNASSIGNED) {
- *err = U_ZERO_ERROR;
-
- UnencodableReplacementArray entity;
- int entityLen = TextCodec::getUnencodableReplacement(codePoint, URLEncodedEntitiesForUnencodables, entity);
- ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err);
- } else
- UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
-}
-
-// Substitutes special GBK characters, escaping all other unassigned entities.
-static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
- UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
-{
- UChar outChar;
- if (reason == UCNV_UNASSIGNED && (outChar = getGbkEscape(codePoint))) {
- const UChar* source = &outChar;
- *err = U_ZERO_ERROR;
- ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
- return;
- }
- UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
-}
-
-// Combines both gbkUrlEscapedEntityCallback and GBK character substitution.
-static void gbkUrlEscapedEntityCallack(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
- UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
-{
- if (reason == UCNV_UNASSIGNED) {
- if (UChar outChar = getGbkEscape(codePoint)) {
- const UChar* source = &outChar;
- *err = U_ZERO_ERROR;
- ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
- return;
- }
- urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint, reason, err);
- return;
- }
- UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
-}
-
-static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length,
- UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
-{
- UChar outChar;
- if (reason == UCNV_UNASSIGNED && (outChar = getGbkEscape(codePoint))) {
- const UChar* source = &outChar;
- *err = U_ZERO_ERROR;
- ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
- return;
- }
- UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, err);
-}
-
-CString TextCodecICU::encode(const UChar* characters, size_t length, UnencodableHandling handling)
-{
- if (!length)
- return "";
-
- if (!m_converterICU)
- createICUConverter();
- if (!m_converterICU)
- return CString();
-
- // FIXME: We should see if there is "force ASCII range" mode in ICU;
- // until then, we change the backslash into a yen sign.
- // Encoding will change the yen sign back into a backslash.
- String copy(characters, length);
- copy = m_encoding.displayString(copy.impl());
-
- const UChar* source = copy.characters();
- const UChar* sourceLimit = source + copy.length();
-
- UErrorCode err = U_ZERO_ERROR;
-
- switch (handling) {
- case QuestionMarksForUnencodables:
- ucnv_setSubstChars(m_converterICU, "?", 1, &err);
- ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
- break;
- case EntitiesForUnencodables:
- ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
- break;
- case URLEncodedEntitiesForUnencodables:
- ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err);
- break;
- }
-
- ASSERT(U_SUCCESS(err));
- if (U_FAILURE(err))
- return CString();
-
- Vector<char> result;
- size_t size = 0;
- do {
- char buffer[ConversionBufferSize];
- char* target = buffer;
- char* targetLimit = target + ConversionBufferSize;
- err = U_ZERO_ERROR;
- ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true, &err);
- size_t count = target - buffer;
- result.grow(size + count);
- memcpy(result.data() + size, buffer, count);
- size += count;
- } while (err == U_BUFFER_OVERFLOW_ERROR);
-
- return CString(result.data(), size);
-}
-
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/TextCodecICU.h b/WebCore/platform/text/TextCodecICU.h
deleted file mode 100644
index bf517f7..0000000
--- a/WebCore/platform/text/TextCodecICU.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (C) 2004, 2006, 2007 Apple Inc. All rights reserved.
- * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextCodecICU_h
-#define TextCodecICU_h
-
-#include "TextCodec.h"
-#include "TextEncoding.h"
-
-#include <unicode/utypes.h>
-
-typedef struct UConverter UConverter;
-
-namespace WebCore {
-
- class TextCodecICU : public TextCodec {
- public:
- static void registerBaseEncodingNames(EncodingNameRegistrar);
- static void registerBaseCodecs(TextCodecRegistrar);
-
- static void registerExtendedEncodingNames(EncodingNameRegistrar);
- static void registerExtendedCodecs(TextCodecRegistrar);
-
- TextCodecICU(const TextEncoding&);
- virtual ~TextCodecICU();
-
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
- virtual CString encode(const UChar*, size_t length, UnencodableHandling);
-
- private:
- void createICUConverter() const;
- void releaseICUConverter() const;
- bool needsGBKFallbacks() const { return m_needsGBKFallbacks; }
- void setNeedsGBKFallbacks(bool needsFallbacks) { m_needsGBKFallbacks = needsFallbacks; }
-
- int decodeToBuffer(UChar* buffer, UChar* bufferLimit, const char*& source,
- const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err);
-
- TextEncoding m_encoding;
- unsigned m_numBufferedBytes;
- unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
- mutable UConverter* m_converterICU;
- mutable bool m_needsGBKFallbacks;
- };
-
- struct ICUConverterWrapper {
- ICUConverterWrapper()
- : converter(0)
- {
- }
- ~ICUConverterWrapper();
-
- UConverter* converter;
- };
-
-} // namespace WebCore
-
-#endif // TextCodecICU_h
diff --git a/WebCore/platform/text/TextCodecLatin1.cpp b/WebCore/platform/text/TextCodecLatin1.cpp
deleted file mode 100644
index 2a217c5..0000000
--- a/WebCore/platform/text/TextCodecLatin1.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextCodecLatin1.h"
-
-#include "PlatformString.h"
-#include <stdio.h>
-#include <wtf/text/CString.h>
-#include <wtf/text/StringBuffer.h>
-#include <wtf/PassOwnPtr.h>
-
-namespace WebCore {
-
-static const UChar table[256] = {
- 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, // 00-07
- 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, // 08-0F
- 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, // 10-17
- 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, // 18-1F
- 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, // 20-27
- 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, // 28-2F
- 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, // 30-37
- 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, // 38-3F
- 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, // 40-47
- 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, // 48-4F
- 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, // 50-57
- 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, // 58-5F
- 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, // 60-67
- 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, // 68-6F
- 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, // 70-77
- 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, // 78-7F
- 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
- 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
- 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
- 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, // 98-9F
- 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, // A0-A7
- 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, // A8-AF
- 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, // B0-B7
- 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, // B8-BF
- 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, // C0-C7
- 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, // C8-CF
- 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, // D0-D7
- 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, // D8-DF
- 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, // E0-E7
- 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, // E8-EF
- 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, // F0-F7
- 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF // F8-FF
-};
-
-void TextCodecLatin1::registerEncodingNames(EncodingNameRegistrar registrar)
-{
- registrar("windows-1252", "windows-1252");
- registrar("ISO-8859-1", "ISO-8859-1");
- registrar("US-ASCII", "US-ASCII");
-
- registrar("WinLatin1", "windows-1252");
- registrar("ibm-1252", "windows-1252");
- registrar("ibm-1252_P100-2000", "windows-1252");
-
- registrar("CP819", "ISO-8859-1");
- registrar("IBM819", "ISO-8859-1");
- registrar("csISOLatin1", "ISO-8859-1");
- registrar("iso-ir-100", "ISO-8859-1");
- registrar("iso_8859-1:1987", "ISO-8859-1");
- registrar("l1", "ISO-8859-1");
- registrar("latin1", "ISO-8859-1");
-
- registrar("ANSI_X3.4-1968", "US-ASCII");
- registrar("ANSI_X3.4-1986", "US-ASCII");
- registrar("ASCII", "US-ASCII");
- registrar("IBM367", "US-ASCII");
- registrar("ISO646-US", "US-ASCII");
- registrar("ISO_646.irv:1991", "US-ASCII");
- registrar("cp367", "US-ASCII");
- registrar("csASCII", "US-ASCII");
- registrar("ibm-367_P100-1995", "US-ASCII");
- registrar("iso-ir-6", "US-ASCII");
- registrar("iso-ir-6-us", "US-ASCII");
- registrar("us", "US-ASCII");
- registrar("x-ansi", "US-ASCII");
-}
-
-static PassOwnPtr<TextCodec> newStreamingTextDecoderWindowsLatin1(const TextEncoding&, const void*)
-{
- return new TextCodecLatin1;
-}
-
-void TextCodecLatin1::registerCodecs(TextCodecRegistrar registrar)
-{
- registrar("windows-1252", newStreamingTextDecoderWindowsLatin1, 0);
-
- // ASCII and Latin-1 both decode as Windows Latin-1 although they retain unique identities.
- registrar("ISO-8859-1", newStreamingTextDecoderWindowsLatin1, 0);
- registrar("US-ASCII", newStreamingTextDecoderWindowsLatin1, 0);
-}
-
-template<size_t size> struct NonASCIIMask;
-template<> struct NonASCIIMask<4> {
- static unsigned value() { return 0x80808080U; }
-};
-template<> struct NonASCIIMask<8> {
- static unsigned long long value() { return 0x8080808080808080ULL; }
-};
-
-template<size_t size> struct UCharByteFiller;
-template<> struct UCharByteFiller<4> {
- static void copy(UChar* dest, const unsigned char* src)
- {
- dest[0] = src[0];
- dest[1] = src[1];
- dest[2] = src[2];
- dest[3] = src[3];
- }
-};
-template<> struct UCharByteFiller<8> {
- static void copy(UChar* dest, const unsigned char* src)
- {
- dest[0] = src[0];
- dest[1] = src[1];
- dest[2] = src[2];
- dest[3] = src[3];
- dest[4] = src[4];
- dest[5] = src[5];
- dest[6] = src[6];
- dest[7] = src[7];
- }
-};
-
-String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&)
-{
- UChar* characters;
- String result = String::createUninitialized(length, characters);
-
- const unsigned char* src = reinterpret_cast<const unsigned char*>(bytes);
- const unsigned char* end = reinterpret_cast<const unsigned char*>(bytes + length);
- const unsigned char* alignedEnd = reinterpret_cast<const unsigned char*>(reinterpret_cast<ptrdiff_t>(end) & ~(sizeof(uintptr_t) - 1));
- UChar* dest = characters;
-
- while (src < end) {
- if (*src < 0x80) {
- // Fast path for values < 0x80 (most Latin-1 text will be ASCII)
- // Wait until we're at a properly aligned address, then read full CPU words.
- if (!(reinterpret_cast<ptrdiff_t>(src) & (sizeof(uintptr_t) - 1))) {
- while (src < alignedEnd) {
- uintptr_t chunk = *reinterpret_cast_ptr<const uintptr_t*>(src);
-
- if (chunk & NonASCIIMask<sizeof(uintptr_t)>::value())
- goto useLookupTable;
-
- UCharByteFiller<sizeof(uintptr_t)>::copy(dest, src);
-
- src += sizeof(uintptr_t);
- dest += sizeof(uintptr_t);
- }
-
- if (src == end)
- break;
- }
- *dest = *src;
- } else {
-useLookupTable:
- *dest = table[*src];
- }
-
- ++src;
- ++dest;
- }
-
- return result;
-}
-
-static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length, UnencodableHandling handling)
-{
- Vector<char> result(length);
- char* bytes = result.data();
-
- size_t resultLength = 0;
- for (size_t i = 0; i < length; ) {
- UChar32 c;
- U16_NEXT(characters, i, length, c);
- unsigned char b = c;
- // Do an efficient check to detect characters other than 00-7F and A0-FF.
- if (b != c || (c & 0xE0) == 0x80) {
- // Look for a way to encode this with Windows Latin-1.
- for (b = 0x80; b < 0xA0; ++b)
- if (table[b] == c)
- goto gotByte;
- // No way to encode this character with Windows Latin-1.
- UnencodableReplacementArray replacement;
- int replacementLength = TextCodec::getUnencodableReplacement(c, handling, replacement);
- result.grow(resultLength + replacementLength + length - i);
- bytes = result.data();
- memcpy(bytes + resultLength, replacement, replacementLength);
- resultLength += replacementLength;
- continue;
- }
- gotByte:
- bytes[resultLength++] = b;
- }
-
- return CString(bytes, resultLength);
-}
-
-CString TextCodecLatin1::encode(const UChar* characters, size_t length, UnencodableHandling handling)
-{
- {
- char* bytes;
- CString string = CString::newUninitialized(length, bytes);
-
- // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII.
- UChar ored = 0;
- for (size_t i = 0; i < length; ++i) {
- UChar c = characters[i];
- bytes[i] = c;
- ored |= c;
- }
-
- if (!(ored & 0xFF80))
- return string;
- }
-
- // If it wasn't all ASCII, call the function that handles more-complex cases.
- return encodeComplexWindowsLatin1(characters, length, handling);
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/TextCodecLatin1.h b/WebCore/platform/text/TextCodecLatin1.h
deleted file mode 100644
index f035d01..0000000
--- a/WebCore/platform/text/TextCodecLatin1.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextCodecLatin1_h
-#define TextCodecLatin1_h
-
-#include "TextCodec.h"
-
-namespace WebCore {
-
- class TextCodecLatin1 : public TextCodec {
- public:
- static void registerEncodingNames(EncodingNameRegistrar);
- static void registerCodecs(TextCodecRegistrar);
-
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
- virtual CString encode(const UChar*, size_t length, UnencodableHandling);
- };
-
-} // namespace WebCore
-
-#endif // TextCodecLatin1_h
diff --git a/WebCore/platform/text/TextCodecUTF16.cpp b/WebCore/platform/text/TextCodecUTF16.cpp
deleted file mode 100644
index e88e83b..0000000
--- a/WebCore/platform/text/TextCodecUTF16.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (C) 2004, 2006, 2008, 2010 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextCodecUTF16.h"
-
-#include "PlatformString.h"
-#include <wtf/text/CString.h>
-#include <wtf/text/StringBuffer.h>
-#include <wtf/PassOwnPtr.h>
-
-using namespace std;
-
-namespace WebCore {
-
-void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar)
-{
- registrar("UTF-16LE", "UTF-16LE");
- registrar("UTF-16BE", "UTF-16BE");
-
- registrar("ISO-10646-UCS-2", "UTF-16LE");
- registrar("UCS-2", "UTF-16LE");
- registrar("UTF-16", "UTF-16LE");
- registrar("Unicode", "UTF-16LE");
- registrar("csUnicode", "UTF-16LE");
- registrar("unicodeFEFF", "UTF-16LE");
-
- registrar("unicodeFFFE", "UTF-16BE");
-}
-
-static PassOwnPtr<TextCodec> newStreamingTextDecoderUTF16LE(const TextEncoding&, const void*)
-{
- return new TextCodecUTF16(true);
-}
-
-static PassOwnPtr<TextCodec> newStreamingTextDecoderUTF16BE(const TextEncoding&, const void*)
-{
- return new TextCodecUTF16(false);
-}
-
-void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar)
-{
- registrar("UTF-16LE", newStreamingTextDecoderUTF16LE, 0);
- registrar("UTF-16BE", newStreamingTextDecoderUTF16BE, 0);
-}
-
-String TextCodecUTF16::decode(const char* bytes, size_t length, bool, bool, bool&)
-{
- if (!length)
- return String();
-
- const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes);
- size_t numBytes = length + m_haveBufferedByte;
- size_t numChars = numBytes / 2;
-
- StringBuffer buffer(numChars);
- UChar* q = buffer.characters();
-
- if (m_haveBufferedByte) {
- UChar c;
- if (m_littleEndian)
- c = m_bufferedByte | (p[0] << 8);
- else
- c = (m_bufferedByte << 8) | p[0];
- *q++ = c;
- m_haveBufferedByte = false;
- p += 1;
- numChars -= 1;
- }
-
- if (m_littleEndian) {
- for (size_t i = 0; i < numChars; ++i) {
- UChar c = p[0] | (p[1] << 8);
- p += 2;
- *q++ = c;
- }
- } else {
- for (size_t i = 0; i < numChars; ++i) {
- UChar c = (p[0] << 8) | p[1];
- p += 2;
- *q++ = c;
- }
- }
-
- if (numBytes & 1) {
- ASSERT(!m_haveBufferedByte);
- m_haveBufferedByte = true;
- m_bufferedByte = p[0];
- }
-
- buffer.shrink(q - buffer.characters());
-
- return String::adopt(buffer);
-}
-
-CString TextCodecUTF16::encode(const UChar* characters, size_t length, UnencodableHandling)
-{
- // We need to be sure we can double the length without overflowing.
- // Since the passed-in length is the length of an actual existing
- // character buffer, each character is two bytes, and we know
- // the buffer doesn't occupy the entire address space, we can
- // assert here that doubling the length does not overflow size_t
- // and there's no need for a runtime check.
- ASSERT(length <= numeric_limits<size_t>::max() / 2);
-
- char* bytes;
- CString string = CString::newUninitialized(length * 2, bytes);
-
- // FIXME: CString is not a reasonable data structure for encoded UTF-16, which will have
- // null characters inside it. Perhaps the result of encode should not be a CString.
- if (m_littleEndian) {
- for (size_t i = 0; i < length; ++i) {
- UChar c = characters[i];
- bytes[i * 2] = c;
- bytes[i * 2 + 1] = c >> 8;
- }
- } else {
- for (size_t i = 0; i < length; ++i) {
- UChar c = characters[i];
- bytes[i * 2] = c >> 8;
- bytes[i * 2 + 1] = c;
- }
- }
-
- return string;
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/TextCodecUTF16.h b/WebCore/platform/text/TextCodecUTF16.h
deleted file mode 100644
index 8ce9476..0000000
--- a/WebCore/platform/text/TextCodecUTF16.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextCodecUTF16_h
-#define TextCodecUTF16_h
-
-#include "TextCodec.h"
-
-namespace WebCore {
-
- class TextCodecUTF16 : public TextCodec {
- public:
- static void registerEncodingNames(EncodingNameRegistrar);
- static void registerCodecs(TextCodecRegistrar);
-
- TextCodecUTF16(bool littleEndian) : m_littleEndian(littleEndian), m_haveBufferedByte(false) { }
-
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
- virtual CString encode(const UChar*, size_t length, UnencodableHandling);
-
- private:
- bool m_littleEndian;
- bool m_haveBufferedByte;
- unsigned char m_bufferedByte;
- };
-
-} // namespace WebCore
-
-#endif // TextCodecUTF16_h
diff --git a/WebCore/platform/text/TextCodecUserDefined.cpp b/WebCore/platform/text/TextCodecUserDefined.cpp
deleted file mode 100644
index 70d8673..0000000
--- a/WebCore/platform/text/TextCodecUserDefined.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (C) 2007, 2008 Apple, Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextCodecUserDefined.h"
-
-#include "PlatformString.h"
-#include <stdio.h>
-#include <wtf/text/CString.h>
-#include <wtf/text/StringBuffer.h>
-#include <wtf/PassOwnPtr.h>
-
-namespace WebCore {
-
-void TextCodecUserDefined::registerEncodingNames(EncodingNameRegistrar registrar)
-{
- registrar("x-user-defined", "x-user-defined");
-}
-
-static PassOwnPtr<TextCodec> newStreamingTextDecoderUserDefined(const TextEncoding&, const void*)
-{
- return new TextCodecUserDefined;
-}
-
-void TextCodecUserDefined::registerCodecs(TextCodecRegistrar registrar)
-{
- registrar("x-user-defined", newStreamingTextDecoderUserDefined, 0);
-}
-
-String TextCodecUserDefined::decode(const char* bytes, size_t length, bool, bool, bool&)
-{
- UChar* buffer;
- String result = String::createUninitialized(length, buffer);
-
- for (size_t i = 0; i < length; ++i) {
- signed char c = bytes[i];
- buffer[i] = c & 0xF7FF;
- }
-
- return result;
-}
-
-static CString encodeComplexUserDefined(const UChar* characters, size_t length, UnencodableHandling handling)
-{
- Vector<char> result(length);
- char* bytes = result.data();
-
- size_t resultLength = 0;
- for (size_t i = 0; i < length; ) {
- UChar32 c;
- U16_NEXT(characters, i, length, c);
- signed char signedByte = c;
- if ((signedByte & 0xF7FF) == c)
- bytes[resultLength++] = signedByte;
- else {
- // No way to encode this character with x-user-defined.
- UnencodableReplacementArray replacement;
- int replacementLength = TextCodec::getUnencodableReplacement(c, handling, replacement);
- result.grow(resultLength + replacementLength + length - i);
- bytes = result.data();
- memcpy(bytes + resultLength, replacement, replacementLength);
- resultLength += replacementLength;
- }
- }
-
- return CString(bytes, resultLength);
-}
-
-CString TextCodecUserDefined::encode(const UChar* characters, size_t length, UnencodableHandling handling)
-{
- char* bytes;
- CString string = CString::newUninitialized(length, bytes);
-
- // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII.
- UChar ored = 0;
- for (size_t i = 0; i < length; ++i) {
- UChar c = characters[i];
- bytes[i] = c;
- ored |= c;
- }
-
- if (!(ored & 0xFF80))
- return string;
-
- // If it wasn't all ASCII, call the function that handles more-complex cases.
- return encodeComplexUserDefined(characters, length, handling);
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/TextCodecUserDefined.h b/WebCore/platform/text/TextCodecUserDefined.h
deleted file mode 100644
index d1b3160..0000000
--- a/WebCore/platform/text/TextCodecUserDefined.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2007 Apple, Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextCodecUserDefined_h
-#define TextCodecUserDefined_h
-
-#include "TextCodec.h"
-
-namespace WebCore {
-
- class TextCodecUserDefined : public TextCodec {
- public:
- static void registerEncodingNames(EncodingNameRegistrar);
- static void registerCodecs(TextCodecRegistrar);
-
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
- virtual CString encode(const UChar*, size_t length, UnencodableHandling);
- };
-
-} // namespace WebCore
-
-#endif // TextCodecUserDefined_h
diff --git a/WebCore/platform/text/TextDirection.h b/WebCore/platform/text/TextDirection.h
deleted file mode 100644
index 5be416e..0000000
--- a/WebCore/platform/text/TextDirection.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2003, 2006 Apple Computer, Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextDirection_h
-#define TextDirection_h
-
-namespace WebCore {
-
- enum TextDirection { RTL, LTR };
-
-}
-
-#endif
diff --git a/WebCore/platform/text/TextEncoding.cpp b/WebCore/platform/text/TextEncoding.cpp
deleted file mode 100644
index 33313a0..0000000
--- a/WebCore/platform/text/TextEncoding.cpp
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
- * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
- * Copyright (C) 2007-2009 Torch Mobile, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextEncoding.h"
-
-#include "PlatformString.h"
-#include "TextCodec.h"
-#include "TextEncodingRegistry.h"
-#if USE(ICU_UNICODE)
-#include <unicode/unorm.h>
-#elif USE(QT4_UNICODE)
-#include <QString>
-#elif USE(GLIB_UNICODE)
-#include <glib.h>
-#include "GOwnPtr.h"
-#endif
-#include <wtf/text/CString.h>
-#include <wtf/OwnPtr.h>
-#include <wtf/StdLibExtras.h>
-
-namespace WebCore {
-
-static const TextEncoding& UTF7Encoding()
-{
- static TextEncoding globalUTF7Encoding("UTF-7");
- return globalUTF7Encoding;
-}
-
-TextEncoding::TextEncoding(const char* name)
- : m_name(atomicCanonicalTextEncodingName(name))
- , m_backslashAsCurrencySymbol(backslashAsCurrencySymbol())
-{
-}
-
-TextEncoding::TextEncoding(const String& name)
- : m_name(atomicCanonicalTextEncodingName(name.characters(), name.length()))
- , m_backslashAsCurrencySymbol(backslashAsCurrencySymbol())
-{
-}
-
-String TextEncoding::decode(const char* data, size_t length, bool stopOnError, bool& sawError) const
-{
- if (!m_name)
- return String();
-
- return newTextCodec(*this)->decode(data, length, true, stopOnError, sawError);
-}
-
-CString TextEncoding::encode(const UChar* characters, size_t length, UnencodableHandling handling) const
-{
- if (!m_name)
- return CString();
-
- if (!length)
- return "";
-
-#if USE(ICU_UNICODE)
- // FIXME: What's the right place to do normalization?
- // It's a little strange to do it inside the encode function.
- // Perhaps normalization should be an explicit step done before calling encode.
-
- const UChar* source = characters;
- size_t sourceLength = length;
-
- Vector<UChar> normalizedCharacters;
-
- UErrorCode err = U_ZERO_ERROR;
- if (unorm_quickCheck(source, sourceLength, UNORM_NFC, &err) != UNORM_YES) {
- // First try using the length of the original string, since normalization to NFC rarely increases length.
- normalizedCharacters.grow(sourceLength);
- int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), length, &err);
- if (err == U_BUFFER_OVERFLOW_ERROR) {
- err = U_ZERO_ERROR;
- normalizedCharacters.resize(normalizedLength);
- normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), normalizedLength, &err);
- }
- ASSERT(U_SUCCESS(err));
-
- source = normalizedCharacters.data();
- sourceLength = normalizedLength;
- }
- return newTextCodec(*this)->encode(source, sourceLength, handling);
-#elif USE(QT4_UNICODE)
- QString str(reinterpret_cast<const QChar*>(characters), length);
- str = str.normalized(QString::NormalizationForm_C);
- return newTextCodec(*this)->encode(reinterpret_cast<const UChar *>(str.utf16()), str.length(), handling);
-#elif USE(GLIB_UNICODE)
- GOwnPtr<char> UTF8Source;
- UTF8Source.set(g_utf16_to_utf8(characters, length, 0, 0, 0));
- if (!UTF8Source) {
- // If conversion to UTF-8 failed, try with the string without normalization
- return newTextCodec(*this)->encode(characters, length, handling);
- }
-
- GOwnPtr<char> UTF8Normalized;
- UTF8Normalized.set(g_utf8_normalize(UTF8Source.get(), -1, G_NORMALIZE_NFC));
-
- long UTF16Length;
- GOwnPtr<UChar> UTF16Normalized;
- UTF16Normalized.set(g_utf8_to_utf16(UTF8Normalized.get(), -1, 0, &UTF16Length, 0));
-
- return newTextCodec(*this)->encode(UTF16Normalized.get(), UTF16Length, handling);
-#elif OS(WINCE)
- // normalization will be done by Windows CE API
- OwnPtr<TextCodec> textCodec = newTextCodec(*this);
- return textCodec.get() ? textCodec->encode(characters, length, handling) : CString();
-#elif USE(BREWMP_UNICODE)
- // FIXME: not sure if Brew MP normalizes the input string automatically
- OwnPtr<TextCodec> textCodec = newTextCodec(*this);
- return textCodec.get() ? textCodec->encode(characters, length, handling) : CString();
-#endif
-}
-
-const char* TextEncoding::domName() const
-{
- if (noExtendedTextEncodingNameUsed())
- return m_name;
-
- // We treat EUC-KR as windows-949 (its superset), but need to expose
- // the name 'EUC-KR' because the name 'windows-949' is not recognized by
- // most Korean web servers even though they do use the encoding
- // 'windows-949' with the name 'EUC-KR'.
- // FIXME: This is not thread-safe. At the moment, this function is
- // only accessed in a single thread, but eventually has to be made
- // thread-safe along with usesVisualOrdering().
- static const char* const a = atomicCanonicalTextEncodingName("windows-949");
- if (m_name == a)
- return "EUC-KR";
- return m_name;
-}
-
-bool TextEncoding::usesVisualOrdering() const
-{
- if (noExtendedTextEncodingNameUsed())
- return false;
-
- static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8");
- return m_name == a;
-}
-
-bool TextEncoding::isJapanese() const
-{
- return isJapaneseEncoding(m_name);
-}
-
-UChar TextEncoding::backslashAsCurrencySymbol() const
-{
- return shouldShowBackslashAsCurrencySymbolIn(m_name) ? 0x00A5 : '\\';
-}
-
-bool TextEncoding::isNonByteBasedEncoding() const
-{
- if (noExtendedTextEncodingNameUsed()) {
- return *this == UTF16LittleEndianEncoding()
- || *this == UTF16BigEndianEncoding();
- }
-
- return *this == UTF16LittleEndianEncoding()
- || *this == UTF16BigEndianEncoding()
- || *this == UTF32BigEndianEncoding()
- || *this == UTF32LittleEndianEncoding();
-}
-
-bool TextEncoding::isUTF7Encoding() const
-{
- if (noExtendedTextEncodingNameUsed())
- return false;
-
- return *this == UTF7Encoding();
-}
-
-const TextEncoding& TextEncoding::closestByteBasedEquivalent() const
-{
- if (isNonByteBasedEncoding())
- return UTF8Encoding();
- return *this;
-}
-
-// HTML5 specifies that UTF-8 be used in form submission when a form is
-// is a part of a document in UTF-16 probably because UTF-16 is not a
-// byte-based encoding and can contain 0x00. By extension, the same
-// should be done for UTF-32. In case of UTF-7, it is a byte-based encoding,
-// but it's fraught with problems and we'd rather steer clear of it.
-const TextEncoding& TextEncoding::encodingForFormSubmission() const
-{
- if (isNonByteBasedEncoding() || isUTF7Encoding())
- return UTF8Encoding();
- return *this;
-}
-
-const TextEncoding& ASCIIEncoding()
-{
- static TextEncoding globalASCIIEncoding("ASCII");
- return globalASCIIEncoding;
-}
-
-const TextEncoding& Latin1Encoding()
-{
- static TextEncoding globalLatin1Encoding("latin1");
- return globalLatin1Encoding;
-}
-
-const TextEncoding& UTF16BigEndianEncoding()
-{
- static TextEncoding globalUTF16BigEndianEncoding("UTF-16BE");
- return globalUTF16BigEndianEncoding;
-}
-
-const TextEncoding& UTF16LittleEndianEncoding()
-{
- static TextEncoding globalUTF16LittleEndianEncoding("UTF-16LE");
- return globalUTF16LittleEndianEncoding;
-}
-
-const TextEncoding& UTF32BigEndianEncoding()
-{
- static TextEncoding globalUTF32BigEndianEncoding("UTF-32BE");
- return globalUTF32BigEndianEncoding;
-}
-
-const TextEncoding& UTF32LittleEndianEncoding()
-{
- static TextEncoding globalUTF32LittleEndianEncoding("UTF-32LE");
- return globalUTF32LittleEndianEncoding;
-}
-
-const TextEncoding& UTF8Encoding()
-{
- static TextEncoding globalUTF8Encoding("UTF-8");
- ASSERT(globalUTF8Encoding.isValid());
- return globalUTF8Encoding;
-}
-
-const TextEncoding& WindowsLatin1Encoding()
-{
- static TextEncoding globalWindowsLatin1Encoding("WinLatin-1");
- return globalWindowsLatin1Encoding;
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/TextEncoding.h b/WebCore/platform/text/TextEncoding.h
deleted file mode 100644
index 675625b..0000000
--- a/WebCore/platform/text/TextEncoding.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextEncoding_h
-#define TextEncoding_h
-
-#include "TextCodec.h"
-#include <wtf/Forward.h>
-#include <wtf/unicode/Unicode.h>
-
-namespace WebCore {
-
- class TextEncoding {
- public:
- TextEncoding() : m_name(0) { }
- TextEncoding(const char* name);
- TextEncoding(const String& name);
-
- bool isValid() const { return m_name; }
- const char* name() const { return m_name; }
- const char* domName() const; // name exposed via DOM
- bool usesVisualOrdering() const;
- bool isJapanese() const;
-
- PassRefPtr<StringImpl> displayString(PassRefPtr<StringImpl> str) const
- {
- if (m_backslashAsCurrencySymbol == '\\' || !str)
- return str;
- return str->replace('\\', m_backslashAsCurrencySymbol);
- }
- void displayBuffer(UChar* characters, unsigned len) const
- {
- if (m_backslashAsCurrencySymbol == '\\')
- return;
- for (unsigned i = 0; i < len; ++i) {
- if (characters[i] == '\\')
- characters[i] = m_backslashAsCurrencySymbol;
- }
- }
-
- const TextEncoding& closestByteBasedEquivalent() const;
- const TextEncoding& encodingForFormSubmission() const;
-
- String decode(const char* str, size_t length) const
- {
- bool ignored;
- return decode(str, length, false, ignored);
- }
- String decode(const char*, size_t length, bool stopOnError, bool& sawError) const;
- CString encode(const UChar*, size_t length, UnencodableHandling) const;
-
- UChar backslashAsCurrencySymbol() const;
-
- private:
- bool isNonByteBasedEncoding() const;
- bool isUTF7Encoding() const;
-
- const char* m_name;
- UChar m_backslashAsCurrencySymbol;
- };
-
- inline bool operator==(const TextEncoding& a, const TextEncoding& b) { return a.name() == b.name(); }
- inline bool operator!=(const TextEncoding& a, const TextEncoding& b) { return a.name() != b.name(); }
-
- const TextEncoding& ASCIIEncoding();
- const TextEncoding& Latin1Encoding();
- const TextEncoding& UTF16BigEndianEncoding();
- const TextEncoding& UTF16LittleEndianEncoding();
- const TextEncoding& UTF32BigEndianEncoding();
- const TextEncoding& UTF32LittleEndianEncoding();
- const TextEncoding& UTF8Encoding();
- const TextEncoding& WindowsLatin1Encoding();
-
-} // namespace WebCore
-
-#endif // TextEncoding_h
diff --git a/WebCore/platform/text/TextEncodingDetector.h b/WebCore/platform/text/TextEncodingDetector.h
deleted file mode 100644
index 9f16ab0..0000000
--- a/WebCore/platform/text/TextEncodingDetector.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2009 Google Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextEncodingDetector_h
-#define TextEncodingDetector_h
-
-namespace WebCore {
-
- class TextEncoding;
-
- // Given a sequence of bytes in |data| of length |len| and an optional
- // hintEncodingName, detect the most likely character encoding.
- // The way hintEncodingName is used is up to an implementation.
- // Currently, the only caller sets it to the parent frame encoding.
- bool detectTextEncoding(const char* data, size_t len,
- const char* hintEncodingName,
- TextEncoding* detectedEncoding);
-
-} // namespace WebCore
-
-#endif
diff --git a/WebCore/platform/text/TextEncodingDetectorICU.cpp b/WebCore/platform/text/TextEncodingDetectorICU.cpp
deleted file mode 100644
index c0d11de..0000000
--- a/WebCore/platform/text/TextEncodingDetectorICU.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (C) 2008, 2009 Google Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextEncodingDetector.h"
-
-#include "TextEncoding.h"
-#include <wtf/UnusedParam.h>
-
-#ifndef BUILDING_ON_TIGER
-#include "unicode/ucnv.h"
-#include "unicode/ucsdet.h"
-#endif
-
-namespace WebCore {
-
-bool detectTextEncoding(const char* data, size_t len,
- const char* hintEncodingName,
- TextEncoding* detectedEncoding)
-{
- *detectedEncoding = TextEncoding();
-#ifdef BUILDING_ON_TIGER
- // Tiger came with ICU 3.2 and does not have the encoding detector.
- UNUSED_PARAM(data);
- UNUSED_PARAM(len);
- UNUSED_PARAM(hintEncodingName);
- return false;
-#else
- int matchesCount = 0;
- UErrorCode status = U_ZERO_ERROR;
- UCharsetDetector* detector = ucsdet_open(&status);
- if (U_FAILURE(status))
- return false;
- ucsdet_enableInputFilter(detector, true);
- ucsdet_setText(detector, data, static_cast<int32_t>(len), &status);
- if (U_FAILURE(status))
- return false;
-
- // FIXME: A few things we can do other than improving
- // the ICU detector itself.
- // 1. Use ucsdet_detectAll and pick the most likely one given
- // "the context" (parent-encoding, referrer encoding, etc).
- // 2. 'Emulate' Firefox/IE's non-Universal detectors (e.g.
- // Chinese, Japanese, Russian, Korean and Hebrew) by picking the
- // encoding with a highest confidence among the detector-specific
- // limited set of candidate encodings.
- // Below is a partial implementation of the first part of what's outlined
- // above.
- const UCharsetMatch** matches = ucsdet_detectAll(detector, &matchesCount, &status);
- if (U_FAILURE(status)) {
- ucsdet_close(detector);
- return false;
- }
-
- const char* encoding = 0;
- if (hintEncodingName) {
- TextEncoding hintEncoding(hintEncodingName);
- // 10 is the minimum confidence value consistent with the codepoint
- // allocation in a given encoding. The size of a chunk passed to
- // us varies even for the same html file (apparently depending on
- // the network load). When we're given a rather short chunk, we
- // don't have a sufficiently reliable signal other than the fact that
- // the chunk is consistent with a set of encodings. So, instead of
- // setting an arbitrary threshold, we have to scan all the encodings
- // consistent with the data.
- const int32_t kThresold = 10;
- for (int i = 0; i < matchesCount; ++i) {
- int32_t confidence = ucsdet_getConfidence(matches[i], &status);
- if (U_FAILURE(status)) {
- status = U_ZERO_ERROR;
- continue;
- }
- if (confidence < kThresold)
- break;
- const char* matchEncoding = ucsdet_getName(matches[i], &status);
- if (U_FAILURE(status)) {
- status = U_ZERO_ERROR;
- continue;
- }
- if (TextEncoding(matchEncoding) == hintEncoding) {
- encoding = hintEncodingName;
- break;
- }
- }
- }
- // If no match is found so far, just pick the top match.
- // This can happen, say, when a parent frame in EUC-JP refers to
- // a child frame in Shift_JIS and both frames do NOT specify the encoding
- // making us resort to auto-detection (when it IS turned on).
- if (!encoding && matchesCount > 0)
- encoding = ucsdet_getName(matches[0], &status);
- if (U_SUCCESS(status)) {
- *detectedEncoding = TextEncoding(encoding);
- ucsdet_close(detector);
- return true;
- }
- ucsdet_close(detector);
- return false;
-#endif
-}
-
-}
diff --git a/WebCore/platform/text/TextEncodingDetectorNone.cpp b/WebCore/platform/text/TextEncodingDetectorNone.cpp
deleted file mode 100644
index 3b62bc5..0000000
--- a/WebCore/platform/text/TextEncodingDetectorNone.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2009 Google Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextEncodingDetector.h"
-
-#include "TextEncoding.h"
-
-namespace WebCore {
-
-bool detectTextEncoding(const char*, size_t, const char*, TextEncoding* detectedEncoding)
-{
- *detectedEncoding = TextEncoding();
- return false;
-}
-
-}
diff --git a/WebCore/platform/text/TextEncodingRegistry.cpp b/WebCore/platform/text/TextEncodingRegistry.cpp
deleted file mode 100644
index c0c0255..0000000
--- a/WebCore/platform/text/TextEncodingRegistry.cpp
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
- * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
- * Copyright (C) 2007-2009 Torch Mobile, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextEncodingRegistry.h"
-
-#include "PlatformString.h"
-#include "TextCodecLatin1.h"
-#include "TextCodecUserDefined.h"
-#include "TextCodecUTF16.h"
-#include "TextEncoding.h"
-#include <wtf/ASCIICType.h>
-#include <wtf/Assertions.h>
-#include <wtf/HashFunctions.h>
-#include <wtf/HashMap.h>
-#include <wtf/HashSet.h>
-#include <wtf/StdLibExtras.h>
-#include <wtf/StringExtras.h>
-#include <wtf/Threading.h>
-
-#if USE(ICU_UNICODE)
-#include "TextCodecICU.h"
-#endif
-#if PLATFORM(MAC)
-#include "TextCodecMac.h"
-#endif
-#if PLATFORM(QT)
-#include "qt/TextCodecQt.h"
-#endif
-#if USE(GLIB_UNICODE)
-#include "gtk/TextCodecGtk.h"
-#endif
-#if USE(BREWMP_UNICODE)
-#include "brew/TextCodecBrew.h"
-#endif
-#if OS(WINCE) && !PLATFORM(QT)
-#include "TextCodecWinCE.h"
-#endif
-
-using namespace WTF;
-
-namespace WebCore {
-
-const size_t maxEncodingNameLength = 63;
-
-// Hash for all-ASCII strings that does case folding.
-struct TextEncodingNameHash {
-
- static bool equal(const char* s1, const char* s2)
- {
- char c1;
- char c2;
- do {
- c1 = *s1++;
- c2 = *s2++;
- if (toASCIILower(c1) != toASCIILower(c2))
- return false;
- } while (c1 && c2);
- return !c1 && !c2;
- }
-
- // This algorithm is the one-at-a-time hash from:
- // http://burtleburtle.net/bob/hash/hashfaq.html
- // http://burtleburtle.net/bob/hash/doobs.html
- static unsigned hash(const char* s)
- {
- unsigned h = WTF::stringHashingStartValue;
- for (;;) {
- char c = *s++;
- if (!c) {
- h += (h << 3);
- h ^= (h >> 11);
- h += (h << 15);
- return h;
- }
- h += toASCIILower(c);
- h += (h << 10);
- h ^= (h >> 6);
- }
- }
-
- static const bool safeToCompareToEmptyOrDeleted = false;
-};
-
-struct TextCodecFactory {
- NewTextCodecFunction function;
- const void* additionalData;
- TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { }
-};
-
-typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap;
-typedef HashMap<const char*, TextCodecFactory> TextCodecMap;
-
-static Mutex& encodingRegistryMutex()
-{
- // We don't have to use AtomicallyInitializedStatic here because
- // this function is called on the main thread for any page before
- // it is used in worker threads.
- DEFINE_STATIC_LOCAL(Mutex, mutex, ());
- return mutex;
-}
-
-static TextEncodingNameMap* textEncodingNameMap;
-static TextCodecMap* textCodecMap;
-static bool didExtendTextCodecMaps;
-static HashSet<const char*>* japaneseEncodings;
-static HashSet<const char*>* nonBackslashEncodings;
-
-static const char* const textEncodingNameBlacklist[] = {
- "UTF-7"
-};
-
-#if ERROR_DISABLED
-
-static inline void checkExistingName(const char*, const char*) { }
-
-#else
-
-static void checkExistingName(const char* alias, const char* atomicName)
-{
- const char* oldAtomicName = textEncodingNameMap->get(alias);
- if (!oldAtomicName)
- return;
- if (oldAtomicName == atomicName)
- return;
- // Keep the warning silent about one case where we know this will happen.
- if (strcmp(alias, "ISO-8859-8-I") == 0
- && strcmp(oldAtomicName, "ISO-8859-8-I") == 0
- && strcasecmp(atomicName, "iso-8859-8") == 0)
- return;
- LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s", alias, oldAtomicName, atomicName);
-}
-
-#endif
-
-static bool isUndesiredAlias(const char* alias)
-{
- // Reject aliases with version numbers that are supported by some back-ends (such as "ISO_2022,locale=ja,version=0" in ICU).
- for (const char* p = alias; *p; ++p) {
- if (*p == ',')
- return true;
- }
- // 8859_1 is known to (at least) ICU, but other browsers don't support this name - and having it caused a compatibility
- // problem, see bug 43554.
- if (0 == strcmp(alias, "8859_1"))
- return true;
- return false;
-}
-
-static void addToTextEncodingNameMap(const char* alias, const char* name)
-{
- ASSERT(strlen(alias) <= maxEncodingNameLength);
- if (isUndesiredAlias(alias))
- return;
- const char* atomicName = textEncodingNameMap->get(name);
- ASSERT(strcmp(alias, name) == 0 || atomicName);
- if (!atomicName)
- atomicName = name;
- checkExistingName(alias, atomicName);
- textEncodingNameMap->add(alias, atomicName);
-}
-
-static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData)
-{
- const char* atomicName = textEncodingNameMap->get(name);
- ASSERT(atomicName);
- textCodecMap->add(atomicName, TextCodecFactory(function, additionalData));
-}
-
-static void pruneBlacklistedCodecs()
-{
- for (size_t i = 0; i < WTF_ARRAY_LENGTH(textEncodingNameBlacklist); ++i) {
- const char* atomicName = textEncodingNameMap->get(textEncodingNameBlacklist[i]);
- if (!atomicName)
- continue;
-
- Vector<const char*> names;
- TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
- TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
- for (; it != end; ++it) {
- if (it->second == atomicName)
- names.append(it->first);
- }
-
- size_t length = names.size();
- for (size_t j = 0; j < length; ++j)
- textEncodingNameMap->remove(names[j]);
-
- textCodecMap->remove(atomicName);
- }
-}
-
-static void buildBaseTextCodecMaps()
-{
- ASSERT(isMainThread());
- ASSERT(!textCodecMap);
- ASSERT(!textEncodingNameMap);
-
- textCodecMap = new TextCodecMap;
- textEncodingNameMap = new TextEncodingNameMap;
-
- TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap);
- TextCodecLatin1::registerCodecs(addToTextCodecMap);
-
- TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap);
- TextCodecUTF16::registerCodecs(addToTextCodecMap);
-
- TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap);
- TextCodecUserDefined::registerCodecs(addToTextCodecMap);
-
-#if USE(ICU_UNICODE)
- TextCodecICU::registerBaseEncodingNames(addToTextEncodingNameMap);
- TextCodecICU::registerBaseCodecs(addToTextCodecMap);
-#endif
-
-#if USE(GLIB_UNICODE)
- TextCodecGtk::registerBaseEncodingNames(addToTextEncodingNameMap);
- TextCodecGtk::registerBaseCodecs(addToTextCodecMap);
-#endif
-
-#if USE(BREWMP_UNICODE)
- TextCodecBrew::registerBaseEncodingNames(addToTextEncodingNameMap);
- TextCodecBrew::registerBaseCodecs(addToTextCodecMap);
-#endif
-
-#if OS(WINCE) && !PLATFORM(QT)
- TextCodecWinCE::registerBaseEncodingNames(addToTextEncodingNameMap);
- TextCodecWinCE::registerBaseCodecs(addToTextCodecMap);
-#endif
-}
-
-static void addEncodingName(HashSet<const char*>* set, const char* name)
-{
- // We must not use atomicCanonicalTextEncodingName() because this function is called in it.
- const char* atomicName = textEncodingNameMap->get(name);
- if (atomicName)
- set->add(atomicName);
-}
-
-static void buildQuirksSets()
-{
- // FIXME: Having isJapaneseEncoding() and shouldShowBackslashAsCurrencySymbolIn()
- // and initializing the sets for them in TextEncodingRegistry.cpp look strange.
-
- ASSERT(!japaneseEncodings);
- ASSERT(!nonBackslashEncodings);
-
- japaneseEncodings = new HashSet<const char*>();
- addEncodingName(japaneseEncodings, "EUC-JP");
- addEncodingName(japaneseEncodings, "ISO-2022-JP");
- addEncodingName(japaneseEncodings, "ISO-2022-JP-1");
- addEncodingName(japaneseEncodings, "ISO-2022-JP-2");
- addEncodingName(japaneseEncodings, "ISO-2022-JP-3");
- addEncodingName(japaneseEncodings, "JIS_C6226-1978");
- addEncodingName(japaneseEncodings, "JIS_X0201");
- addEncodingName(japaneseEncodings, "JIS_X0208-1983");
- addEncodingName(japaneseEncodings, "JIS_X0208-1990");
- addEncodingName(japaneseEncodings, "JIS_X0212-1990");
- addEncodingName(japaneseEncodings, "Shift_JIS");
- addEncodingName(japaneseEncodings, "Shift_JIS_X0213-2000");
- addEncodingName(japaneseEncodings, "cp932");
- addEncodingName(japaneseEncodings, "x-mac-japanese");
-
- nonBackslashEncodings = new HashSet<const char*>();
- // The text encodings below treat backslash as a currency symbol for IE compatibility.
- // See http://blogs.msdn.com/michkap/archive/2005/09/17/469941.aspx for more information.
- addEncodingName(nonBackslashEncodings, "x-mac-japanese");
- addEncodingName(nonBackslashEncodings, "ISO-2022-JP");
- addEncodingName(nonBackslashEncodings, "EUC-JP");
- // Shift_JIS_X0213-2000 is not the same encoding as Shift_JIS on Mac. We need to register both of them.
- addEncodingName(nonBackslashEncodings, "Shift_JIS");
- addEncodingName(nonBackslashEncodings, "Shift_JIS_X0213-2000");
-}
-
-bool isJapaneseEncoding(const char* canonicalEncodingName)
-{
- return canonicalEncodingName && japaneseEncodings && japaneseEncodings->contains(canonicalEncodingName);
-}
-
-bool shouldShowBackslashAsCurrencySymbolIn(const char* canonicalEncodingName)
-{
- return canonicalEncodingName && nonBackslashEncodings && nonBackslashEncodings->contains(canonicalEncodingName);
-}
-
-static void extendTextCodecMaps()
-{
-#if USE(ICU_UNICODE)
- TextCodecICU::registerExtendedEncodingNames(addToTextEncodingNameMap);
- TextCodecICU::registerExtendedCodecs(addToTextCodecMap);
-#endif
-
-#if USE(QT4_UNICODE)
- TextCodecQt::registerEncodingNames(addToTextEncodingNameMap);
- TextCodecQt::registerCodecs(addToTextCodecMap);
-#endif
-
-#if PLATFORM(MAC)
- TextCodecMac::registerEncodingNames(addToTextEncodingNameMap);
- TextCodecMac::registerCodecs(addToTextCodecMap);
-#endif
-
-#if USE(GLIB_UNICODE)
- TextCodecGtk::registerExtendedEncodingNames(addToTextEncodingNameMap);
- TextCodecGtk::registerExtendedCodecs(addToTextCodecMap);
-#endif
-
-#if OS(WINCE) && !PLATFORM(QT)
- TextCodecWinCE::registerExtendedEncodingNames(addToTextEncodingNameMap);
- TextCodecWinCE::registerExtendedCodecs(addToTextCodecMap);
-#endif
-
- pruneBlacklistedCodecs();
- buildQuirksSets();
-}
-
-PassOwnPtr<TextCodec> newTextCodec(const TextEncoding& encoding)
-{
- MutexLocker lock(encodingRegistryMutex());
-
- ASSERT(textCodecMap);
- TextCodecFactory factory = textCodecMap->get(encoding.name());
- ASSERT(factory.function);
- return factory.function(encoding, factory.additionalData);
-}
-
-const char* atomicCanonicalTextEncodingName(const char* name)
-{
- if (!name || !name[0])
- return 0;
- if (!textEncodingNameMap)
- buildBaseTextCodecMaps();
-
- MutexLocker lock(encodingRegistryMutex());
-
- if (const char* atomicName = textEncodingNameMap->get(name))
- return atomicName;
- if (didExtendTextCodecMaps)
- return 0;
- extendTextCodecMaps();
- didExtendTextCodecMaps = true;
- return textEncodingNameMap->get(name);
-}
-
-const char* atomicCanonicalTextEncodingName(const UChar* characters, size_t length)
-{
- char buffer[maxEncodingNameLength + 1];
- size_t j = 0;
- for (size_t i = 0; i < length; ++i) {
- UChar c = characters[i];
- if (j == maxEncodingNameLength)
- return 0;
- buffer[j++] = c;
- }
- buffer[j] = 0;
- return atomicCanonicalTextEncodingName(buffer);
-}
-
-bool noExtendedTextEncodingNameUsed()
-{
- // If the calling thread did not use extended encoding names, it is fine for it to use a stale false value.
- return !didExtendTextCodecMaps;
-}
-
-#ifndef NDEBUG
-void dumpTextEncodingNameMap()
-{
- unsigned size = textEncodingNameMap->size();
- fprintf(stderr, "Dumping %u entries in WebCore::textEncodingNameMap...\n", size);
-
- MutexLocker lock(encodingRegistryMutex());
-
- TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
- TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
- for (; it != end; ++it)
- fprintf(stderr, "'%s' => '%s'\n", it->first, it->second);
-}
-#endif
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/TextEncodingRegistry.h b/WebCore/platform/text/TextEncodingRegistry.h
deleted file mode 100644
index 16844c6..0000000
--- a/WebCore/platform/text/TextEncodingRegistry.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextEncodingRegistry_h
-#define TextEncodingRegistry_h
-
-#include <memory>
-#include <wtf/PassOwnPtr.h>
-#include <wtf/unicode/Unicode.h>
-
-namespace WebCore {
-
- class TextCodec;
- class TextEncoding;
-
- // Use TextResourceDecoder::decode to decode resources, since it handles BOMs.
- // Use TextEncoding::encode to encode, since it takes care of normalization.
- PassOwnPtr<TextCodec> newTextCodec(const TextEncoding&);
-
- // Only TextEncoding should use the following functions directly.
- const char* atomicCanonicalTextEncodingName(const char* alias);
- const char* atomicCanonicalTextEncodingName(const UChar* aliasCharacters, size_t aliasLength);
- bool noExtendedTextEncodingNameUsed();
- bool isJapaneseEncoding(const char* canonicalEncodingName);
- bool shouldShowBackslashAsCurrencySymbolIn(const char* canonicalEncodingName);
-
-#ifndef NDEBUG
- void dumpTextEncodingNameMap();
-#endif
-}
-
-#endif // TextEncodingRegistry_h
diff --git a/WebCore/platform/text/TextStream.cpp b/WebCore/platform/text/TextStream.cpp
deleted file mode 100644
index 1094fa4..0000000
--- a/WebCore/platform/text/TextStream.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (C) 2004, 2008, 2010 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextStream.h"
-
-#include "PlatformString.h"
-#include <wtf/StringExtras.h>
-
-using namespace std;
-
-namespace WebCore {
-
-static const size_t printBufferSize = 100; // large enough for any integer or floating point value in string format, including trailing null character
-
-TextStream& TextStream::operator<<(bool b)
-{
- return *this << (b ? "1" : "0");
-}
-
-TextStream& TextStream::operator<<(int i)
-{
- char buffer[printBufferSize];
- snprintf(buffer, sizeof(buffer) - 1, "%d", i);
- return *this << buffer;
-}
-
-TextStream& TextStream::operator<<(unsigned i)
-{
- char buffer[printBufferSize];
- snprintf(buffer, sizeof(buffer) - 1, "%u", i);
- return *this << buffer;
-}
-
-TextStream& TextStream::operator<<(long i)
-{
- char buffer[printBufferSize];
- snprintf(buffer, sizeof(buffer) - 1, "%ld", i);
- return *this << buffer;
-}
-
-TextStream& TextStream::operator<<(unsigned long i)
-{
- char buffer[printBufferSize];
- snprintf(buffer, sizeof(buffer) - 1, "%lu", i);
- return *this << buffer;
-}
-
-TextStream& TextStream::operator<<(float f)
-{
- char buffer[printBufferSize];
- snprintf(buffer, sizeof(buffer) - 1, "%.2f", f);
- return *this << buffer;
-}
-
-TextStream& TextStream::operator<<(double d)
-{
- char buffer[printBufferSize];
- snprintf(buffer, sizeof(buffer) - 1, "%.2f", d);
- return *this << buffer;
-}
-
-TextStream& TextStream::operator<<(const char* string)
-{
- size_t stringLength = strlen(string);
- size_t textLength = m_text.size();
- if (stringLength > numeric_limits<size_t>::max() - textLength)
- CRASH();
- m_text.grow(textLength + stringLength);
- for (size_t i = 0; i < stringLength; ++i)
- m_text[textLength + i] = string[i];
- return *this;
-}
-
-TextStream& TextStream::operator<<(const void* p)
-{
- char buffer[printBufferSize];
- snprintf(buffer, sizeof(buffer) - 1, "%p", p);
- return *this << buffer;
-}
-
-TextStream& TextStream::operator<<(const String& string)
-{
- append(m_text, string);
- return *this;
-}
-
-String TextStream::release()
-{
- return String::adopt(m_text);
-}
-
-#if OS(WINDOWS) && CPU(X86_64)
-TextStream& TextStream::operator<<(__int64 i)
-{
- char buffer[printBufferSize];
- snprintf(buffer, sizeof(buffer) - 1, "%I64i", i);
- return *this << buffer;
-}
-TextStream& TextStream::operator<<(unsigned __int64 i)
-{
- char buffer[printBufferSize];
- snprintf(buffer, sizeof(buffer) - 1, "%I64u", i);
- return *this << buffer;
-}
-#endif
-
-}
diff --git a/WebCore/platform/text/TextStream.h b/WebCore/platform/text/TextStream.h
deleted file mode 100644
index e7e4cc0..0000000
--- a/WebCore/platform/text/TextStream.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2004, 2008 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextStream_h
-#define TextStream_h
-
-#include <wtf/Forward.h>
-#include <wtf/Vector.h>
-#include <wtf/unicode/Unicode.h>
-
-namespace WebCore {
-
-class TextStream {
-public:
- TextStream& operator<<(bool);
- TextStream& operator<<(int);
- TextStream& operator<<(unsigned);
- TextStream& operator<<(long);
- TextStream& operator<<(unsigned long);
- TextStream& operator<<(float);
- TextStream& operator<<(double);
- TextStream& operator<<(const char*);
- TextStream& operator<<(const void*);
- TextStream& operator<<(const String&);
-#if OS(WINDOWS) && CPU(X86_64)
- TextStream& operator<<(unsigned __int64);
- TextStream& operator<<(__int64);
-#endif
-
- String release();
-
-private:
- Vector<UChar> m_text;
-};
-
-}
-
-#endif
diff --git a/WebCore/platform/text/UnicodeRange.cpp b/WebCore/platform/text/UnicodeRange.cpp
deleted file mode 100644
index 0373441..0000000
--- a/WebCore/platform/text/UnicodeRange.cpp
+++ /dev/null
@@ -1,462 +0,0 @@
-/*
- * Copyright (C) 2007 Apple Computer, Inc.
- *
- * Portions are Copyright (C) 1998 Netscape Communications Corporation.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Alternatively, the contents of this file may be used under the terms
- * of either the Mozilla Public License Version 1.1, found at
- * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public
- * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html
- * (the "GPL"), in which case the provisions of the MPL or the GPL are
- * applicable instead of those above. If you wish to allow use of your
- * version of this file only under the terms of one of those two
- * licenses (the MPL or the GPL) and not to allow others to use your
- * version of this file under the LGPL, indicate your decision by
- * deletingthe provisions above and replace them with the notice and
- * other provisions required by the MPL or the GPL, as the case may be.
- * If you do not delete the provisions above, a recipient may use your
- * version of this file under any of the LGPL, the MPL or the GPL.
- */
-
-#include "config.h"
-#include "UnicodeRange.h"
-
-namespace WebCore {
-
-// This table depends on unicode range definitions.
-// Each item's index must correspond to a unicode range value
-// eg. x-cyrillic = LangGroupTable[cRangeCyrillic]
-static const char* gUnicodeRangeToLangGroupTable[] =
-{
- "x-cyrillic",
- "el",
- "tr",
- "he",
- "ar",
- "x-baltic",
- "th",
- "ko",
- "ja",
- "zh-CN",
- "zh-TW",
- "x-devanagari",
- "x-tamil",
- "x-armn",
- "x-beng",
- "x-cans",
- "x-ethi",
- "x-geor",
- "x-gujr",
- "x-guru",
- "x-khmr",
- "x-mlym"
-};
-
-/**********************************************************************
- * Unicode subranges as defined in unicode 3.0
- * x-western, x-central-euro, tr, x-baltic -> latin
- * 0000 - 036f
- * 1e00 - 1eff
- * 2000 - 206f (general punctuation)
- * 20a0 - 20cf (currency symbols)
- * 2100 - 214f (letterlike symbols)
- * 2150 - 218f (Number Forms)
- * el -> greek
- * 0370 - 03ff
- * 1f00 - 1fff
- * x-cyrillic -> cyrillic
- * 0400 - 04ff
- * he -> hebrew
- * 0590 - 05ff
- * ar -> arabic
- * 0600 - 06ff
- * fb50 - fdff (arabic presentation forms)
- * fe70 - feff (arabic presentation forms b)
- * th - thai
- * 0e00 - 0e7f
- * ko -> korean
- * ac00 - d7af (hangul Syllables)
- * 1100 - 11ff (jamo)
- * 3130 - 318f (hangul compatibility jamo)
- * ja
- * 3040 - 309f (hiragana)
- * 30a0 - 30ff (katakana)
- * zh-CN
- * zh-TW
- *
- * CJK
- * 3100 - 312f (bopomofo)
- * 31a0 - 31bf (bopomofo extended)
- * 3000 - 303f (CJK Symbols and Punctuation)
- * 2e80 - 2eff (CJK radicals supplement)
- * 2f00 - 2fdf (Kangxi Radicals)
- * 2ff0 - 2fff (Ideographic Description Characters)
- * 3190 - 319f (kanbun)
- * 3200 - 32ff (Enclosed CJK letters and Months)
- * 3300 - 33ff (CJK compatibility)
- * 3400 - 4dbf (CJK Unified Ideographs Extension A)
- * 4e00 - 9faf (CJK Unified Ideographs)
- * f900 - fa5f (CJK Compatibility Ideographs)
- * fe30 - fe4f (CJK compatibility Forms)
- * ff00 - ffef (halfwidth and fullwidth forms)
- *
- * Armenian
- * 0530 - 058f
- * Sriac
- * 0700 - 074f
- * Thaana
- * 0780 - 07bf
- * Devanagari
- * 0900 - 097f
- * Bengali
- * 0980 - 09ff
- * Gurmukhi
- * 0a00 - 0a7f
- * Gujarati
- * 0a80 - 0aff
- * Oriya
- * 0b00 - 0b7f
- * Tamil
- * 0b80 - 0bff
- * Telugu
- * 0c00 - 0c7f
- * Kannada
- * 0c80 - 0cff
- * Malayalam
- * 0d00 - 0d7f
- * Sinhala
- * 0d80 - 0def
- * Lao
- * 0e80 - 0eff
- * Tibetan
- * 0f00 - 0fbf
- * Myanmar
- * 1000 - 109f
- * Georgian
- * 10a0 - 10ff
- * Ethiopic
- * 1200 - 137f
- * Cherokee
- * 13a0 - 13ff
- * Canadian Aboriginal Syllabics
- * 1400 - 167f
- * Ogham
- * 1680 - 169f
- * Runic
- * 16a0 - 16ff
- * Khmer
- * 1780 - 17ff
- * Mongolian
- * 1800 - 18af
- * Misc - superscripts and subscripts
- * 2070 - 209f
- * Misc - Combining Diacritical Marks for Symbols
- * 20d0 - 20ff
- * Misc - Arrows
- * 2190 - 21ff
- * Misc - Mathematical Operators
- * 2200 - 22ff
- * Misc - Miscellaneous Technical
- * 2300 - 23ff
- * Misc - Control picture
- * 2400 - 243f
- * Misc - Optical character recognition
- * 2440 - 2450
- * Misc - Enclose Alphanumerics
- * 2460 - 24ff
- * Misc - Box Drawing
- * 2500 - 257f
- * Misc - Block Elements
- * 2580 - 259f
- * Misc - Geometric Shapes
- * 25a0 - 25ff
- * Misc - Miscellaneous Symbols
- * 2600 - 267f
- * Misc - Dingbats
- * 2700 - 27bf
- * Misc - Braille Patterns
- * 2800 - 28ff
- * Yi Syllables
- * a000 - a48f
- * Yi radicals
- * a490 - a4cf
- * Alphabetic Presentation Forms
- * fb00 - fb4f
- * Misc - Combining half Marks
- * fe20 - fe2f
- * Misc - small form variants
- * fe50 - fe6f
- * Misc - Specials
- * fff0 - ffff
- *********************************************************************/
-
-static const unsigned cNumSubTables = 9;
-static const unsigned cSubTableSize = 16;
-
-static const unsigned char gUnicodeSubrangeTable[cNumSubTables][cSubTableSize] =
-{
- { // table for X---
- cRangeTableBase+1, //u0xxx
- cRangeTableBase+2, //u1xxx
- cRangeTableBase+3, //u2xxx
- cRangeSetCJK, //u3xxx
- cRangeSetCJK, //u4xxx
- cRangeSetCJK, //u5xxx
- cRangeSetCJK, //u6xxx
- cRangeSetCJK, //u7xxx
- cRangeSetCJK, //u8xxx
- cRangeSetCJK, //u9xxx
- cRangeTableBase+4, //uaxxx
- cRangeKorean, //ubxxx
- cRangeKorean, //ucxxx
- cRangeTableBase+5, //udxxx
- cRangePrivate, //uexxx
- cRangeTableBase+6 //ufxxx
- },
- { //table for 0X--
- cRangeSetLatin, //u00xx
- cRangeSetLatin, //u01xx
- cRangeSetLatin, //u02xx
- cRangeGreek, //u03xx XXX 0300-036f is in fact cRangeCombiningDiacriticalMarks
- cRangeCyrillic, //u04xx
- cRangeTableBase+7, //u05xx, includes Cyrillic supplement, Hebrew, and Armenian
- cRangeArabic, //u06xx
- cRangeTertiaryTable, //u07xx
- cRangeUnassigned, //u08xx
- cRangeTertiaryTable, //u09xx
- cRangeTertiaryTable, //u0axx
- cRangeTertiaryTable, //u0bxx
- cRangeTertiaryTable, //u0cxx
- cRangeTertiaryTable, //u0dxx
- cRangeTertiaryTable, //u0exx
- cRangeTibetan, //u0fxx
- },
- { //table for 1x--
- cRangeTertiaryTable, //u10xx
- cRangeKorean, //u11xx
- cRangeEthiopic, //u12xx
- cRangeTertiaryTable, //u13xx
- cRangeCanadian, //u14xx
- cRangeCanadian, //u15xx
- cRangeTertiaryTable, //u16xx
- cRangeKhmer, //u17xx
- cRangeMongolian, //u18xx
- cRangeUnassigned, //u19xx
- cRangeUnassigned, //u1axx
- cRangeUnassigned, //u1bxx
- cRangeUnassigned, //u1cxx
- cRangeUnassigned, //u1dxx
- cRangeSetLatin, //u1exx
- cRangeGreek, //u1fxx
- },
- { //table for 2x--
- cRangeSetLatin, //u20xx
- cRangeSetLatin, //u21xx
- cRangeMathOperators, //u22xx
- cRangeMiscTechnical, //u23xx
- cRangeControlOpticalEnclose, //u24xx
- cRangeBoxBlockGeometrics, //u25xx
- cRangeMiscSymbols, //u26xx
- cRangeDingbats, //u27xx
- cRangeBraillePattern, //u28xx
- cRangeUnassigned, //u29xx
- cRangeUnassigned, //u2axx
- cRangeUnassigned, //u2bxx
- cRangeUnassigned, //u2cxx
- cRangeUnassigned, //u2dxx
- cRangeSetCJK, //u2exx
- cRangeSetCJK, //u2fxx
- },
- { //table for ax--
- cRangeYi, //ua0xx
- cRangeYi, //ua1xx
- cRangeYi, //ua2xx
- cRangeYi, //ua3xx
- cRangeYi, //ua4xx
- cRangeUnassigned, //ua5xx
- cRangeUnassigned, //ua6xx
- cRangeUnassigned, //ua7xx
- cRangeUnassigned, //ua8xx
- cRangeUnassigned, //ua9xx
- cRangeUnassigned, //uaaxx
- cRangeUnassigned, //uabxx
- cRangeKorean, //uacxx
- cRangeKorean, //uadxx
- cRangeKorean, //uaexx
- cRangeKorean, //uafxx
- },
- { //table for dx--
- cRangeKorean, //ud0xx
- cRangeKorean, //ud1xx
- cRangeKorean, //ud2xx
- cRangeKorean, //ud3xx
- cRangeKorean, //ud4xx
- cRangeKorean, //ud5xx
- cRangeKorean, //ud6xx
- cRangeKorean, //ud7xx
- cRangeSurrogate, //ud8xx
- cRangeSurrogate, //ud9xx
- cRangeSurrogate, //udaxx
- cRangeSurrogate, //udbxx
- cRangeSurrogate, //udcxx
- cRangeSurrogate, //uddxx
- cRangeSurrogate, //udexx
- cRangeSurrogate, //udfxx
- },
- { // table for fx--
- cRangePrivate, //uf0xx
- cRangePrivate, //uf1xx
- cRangePrivate, //uf2xx
- cRangePrivate, //uf3xx
- cRangePrivate, //uf4xx
- cRangePrivate, //uf5xx
- cRangePrivate, //uf6xx
- cRangePrivate, //uf7xx
- cRangePrivate, //uf8xx
- cRangeSetCJK, //uf9xx
- cRangeSetCJK, //ufaxx
- cRangeArabic, //ufbxx, includes alphabic presentation form
- cRangeArabic, //ufcxx
- cRangeArabic, //ufdxx
- cRangeArabic, //ufexx, includes Combining half marks,
- // CJK compatibility forms,
- // CJK compatibility forms,
- // small form variants
- cRangeTableBase+8, //uffxx, halfwidth and fullwidth forms, includes Specials
- },
- { //table for 0x0500 - 0x05ff
- cRangeCyrillic, //u050x
- cRangeCyrillic, //u051x
- cRangeCyrillic, //u052x
- cRangeArmenian, //u053x
- cRangeArmenian, //u054x
- cRangeArmenian, //u055x
- cRangeArmenian, //u056x
- cRangeArmenian, //u057x
- cRangeArmenian, //u058x
- cRangeHebrew, //u059x
- cRangeHebrew, //u05ax
- cRangeHebrew, //u05bx
- cRangeHebrew, //u05cx
- cRangeHebrew, //u05dx
- cRangeHebrew, //u05ex
- cRangeHebrew, //u05fx
- },
- { //table for 0xff00 - 0xffff
- cRangeSetCJK, //uff0x, fullwidth latin
- cRangeSetCJK, //uff1x, fullwidth latin
- cRangeSetCJK, //uff2x, fullwidth latin
- cRangeSetCJK, //uff3x, fullwidth latin
- cRangeSetCJK, //uff4x, fullwidth latin
- cRangeSetCJK, //uff5x, fullwidth latin
- cRangeSetCJK, //uff6x, halfwidth katakana
- cRangeSetCJK, //uff7x, halfwidth katakana
- cRangeSetCJK, //uff8x, halfwidth katakana
- cRangeSetCJK, //uff9x, halfwidth katakana
- cRangeSetCJK, //uffax, halfwidth hangul jamo
- cRangeSetCJK, //uffbx, halfwidth hangul jamo
- cRangeSetCJK, //uffcx, halfwidth hangul jamo
- cRangeSetCJK, //uffdx, halfwidth hangul jamo
- cRangeSetCJK, //uffex, fullwidth symbols
- cRangeSpecials, //ufffx, Specials
- },
-};
-
-// Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80)
-// code points so that the number of entries in the tertiary range
-// table for that range is obtained by dividing (0x1700 - 0x0700) by 128.
-// Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal
-// syllabaries take multiple chunks and Ogham and Runic share a single chunk.
-static const unsigned cTertiaryTableSize = ((0x1700 - 0x0700) / 0x80);
-
-static const unsigned char gUnicodeTertiaryRangeTable[cTertiaryTableSize] =
-{ //table for 0x0700 - 0x1600
- cRangeSyriac, //u070x
- cRangeThaana, //u078x
- cRangeUnassigned, //u080x place holder(resolved in the 2ndary tab.)
- cRangeUnassigned, //u088x place holder(resolved in the 2ndary tab.)
- cRangeDevanagari, //u090x
- cRangeBengali, //u098x
- cRangeGurmukhi, //u0a0x
- cRangeGujarati, //u0a8x
- cRangeOriya, //u0b0x
- cRangeTamil, //u0b8x
- cRangeTelugu, //u0c0x
- cRangeKannada, //u0c8x
- cRangeMalayalam, //u0d0x
- cRangeSinhala, //u0d8x
- cRangeThai, //u0e0x
- cRangeLao, //u0e8x
- cRangeTibetan, //u0f0x place holder(resolved in the 2ndary tab.)
- cRangeTibetan, //u0f8x place holder(resolved in the 2ndary tab.)
- cRangeMyanmar, //u100x
- cRangeGeorgian, //u108x
- cRangeKorean, //u110x place holder(resolved in the 2ndary tab.)
- cRangeKorean, //u118x place holder(resolved in the 2ndary tab.)
- cRangeEthiopic, //u120x place holder(resolved in the 2ndary tab.)
- cRangeEthiopic, //u128x place holder(resolved in the 2ndary tab.)
- cRangeEthiopic, //u130x
- cRangeCherokee, //u138x
- cRangeCanadian, //u140x place holder(resolved in the 2ndary tab.)
- cRangeCanadian, //u148x place holder(resolved in the 2ndary tab.)
- cRangeCanadian, //u150x place holder(resolved in the 2ndary tab.)
- cRangeCanadian, //u158x place holder(resolved in the 2ndary tab.)
- cRangeCanadian, //u160x
- cRangeOghamRunic, //u168x this contains two scripts, Ogham & Runic
-};
-
-// A two level index is almost enough for locating a range, with the
-// exception of u03xx and u05xx. Since we don't really care about range for
-// combining diacritical marks in our font application, they are
-// not discriminated further. Future adoption of this method for other use
-// should be aware of this limitation. The implementation can be extended if
-// there is such a need.
-// For Indic, Southeast Asian scripts and some other scripts between
-// U+0700 and U+16FF, it's extended to the third level.
-unsigned int findCharUnicodeRange(UChar32 ch)
-{
- if (ch >= 0xFFFF)
- return 0;
-
- unsigned int range;
-
- //search the first table
- range = gUnicodeSubrangeTable[0][ch >> 12];
-
- if (range < cRangeTableBase)
- // we try to get a specific range
- return range;
-
- // otherwise, we have one more table to look at
- range = gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x0f00) >> 8];
- if (range < cRangeTableBase)
- return range;
- if (range < cRangeTertiaryTable)
- return gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x00f0) >> 4];
-
- // Yet another table to look at : U+0700 - U+16FF : 128 code point blocks
- return gUnicodeTertiaryRangeTable[(ch - 0x0700) >> 7];
-}
-
-const char* langGroupFromUnicodeRange(unsigned char unicodeRange)
-{
- if (cRangeSpecificItemNum > unicodeRange)
- return gUnicodeRangeToLangGroupTable[unicodeRange];
- return 0;
-}
-
-}
diff --git a/WebCore/platform/text/UnicodeRange.h b/WebCore/platform/text/UnicodeRange.h
deleted file mode 100644
index 2278a0e..0000000
--- a/WebCore/platform/text/UnicodeRange.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (C) 2007 Apple Computer, Inc.
- *
- * Portions are Copyright (C) 1998 Netscape Communications Corporation.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Alternatively, the contents of this file may be used under the terms
- * of either the Mozilla Public License Version 1.1, found at
- * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public
- * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html
- * (the "GPL"), in which case the provisions of the MPL or the GPL are
- * applicable instead of those above. If you wish to allow use of your
- * version of this file only under the terms of one of those two
- * licenses (the MPL or the GPL) and not to allow others to use your
- * version of this file under the LGPL, indicate your decision by
- * deletingthe provisions above and replace them with the notice and
- * other provisions required by the MPL or the GPL, as the case may be.
- * If you do not delete the provisions above, a recipient may use your
- * version of this file under any of the LGPL, the MPL or the GPL.
- */
-
-#ifndef UnicodeRange_H
-#define UnicodeRange_H
-
-#if PLATFORM(HAIKU)
-#include "stdint.h"
-#endif
-
-#include <wtf/unicode/Unicode.h>
-
-namespace WebCore {
-
-// The following constants define unicode subranges
-// values below cRangeNum must be continuous so that we can map to
-// a lang group directly.
-// All ranges we care about should fit within 32 bits.
-
-// Frequently used range definitions
-const unsigned char cRangeCyrillic = 0;
-const unsigned char cRangeGreek = 1;
-const unsigned char cRangeTurkish = 2;
-const unsigned char cRangeHebrew = 3;
-const unsigned char cRangeArabic = 4;
-const unsigned char cRangeBaltic = 5;
-const unsigned char cRangeThai = 6;
-const unsigned char cRangeKorean = 7;
-const unsigned char cRangeJapanese = 8;
-const unsigned char cRangeSChinese = 9;
-const unsigned char cRangeTChinese = 10;
-const unsigned char cRangeDevanagari = 11;
-const unsigned char cRangeTamil = 12;
-const unsigned char cRangeArmenian = 13;
-const unsigned char cRangeBengali = 14;
-const unsigned char cRangeCanadian = 15;
-const unsigned char cRangeEthiopic = 16;
-const unsigned char cRangeGeorgian = 17;
-const unsigned char cRangeGujarati = 18;
-const unsigned char cRangeGurmukhi = 19;
-const unsigned char cRangeKhmer = 20;
-const unsigned char cRangeMalayalam = 21;
-
-const unsigned char cRangeSpecificItemNum = 22;
-
-//range/rangeSet grow to this place 22-29
-
-const unsigned char cRangeSetStart = 30; // range set definition starts from here
-const unsigned char cRangeSetLatin = 30;
-const unsigned char cRangeSetCJK = 31;
-const unsigned char cRangeSetEnd = 31; // range set definition ends here
-
-// less frequently used range definition
-const unsigned char cRangeSurrogate = 32;
-const unsigned char cRangePrivate = 33;
-const unsigned char cRangeMisc = 34;
-const unsigned char cRangeUnassigned = 35;
-const unsigned char cRangeSyriac = 36;
-const unsigned char cRangeThaana = 37;
-const unsigned char cRangeOriya = 38;
-const unsigned char cRangeTelugu = 39;
-const unsigned char cRangeKannada = 40;
-const unsigned char cRangeSinhala = 41;
-const unsigned char cRangeLao = 42;
-const unsigned char cRangeTibetan = 43;
-const unsigned char cRangeMyanmar = 44;
-const unsigned char cRangeCherokee = 45;
-const unsigned char cRangeOghamRunic = 46;
-const unsigned char cRangeMongolian = 47;
-const unsigned char cRangeMathOperators = 48;
-const unsigned char cRangeMiscTechnical = 49;
-const unsigned char cRangeControlOpticalEnclose = 50;
-const unsigned char cRangeBoxBlockGeometrics = 51;
-const unsigned char cRangeMiscSymbols = 52;
-const unsigned char cRangeDingbats = 53;
-const unsigned char cRangeBraillePattern = 54;
-const unsigned char cRangeYi = 55;
-const unsigned char cRangeCombiningDiacriticalMarks = 56;
-const unsigned char cRangeSpecials = 57;
-
-const unsigned char cRangeTableBase = 128; //values over 127 are reserved for internal use only
-const unsigned char cRangeTertiaryTable = 145; // leave room for 16 subtable
- // indices (cRangeTableBase + 1 ..
- // cRangeTableBase + 16)
-
-
-
-unsigned int findCharUnicodeRange(UChar32 ch);
-const char* langGroupFromUnicodeRange(unsigned char unicodeRange);
-
-}
-
-#endif // UnicodeRange_H
diff --git a/WebCore/platform/text/android/HyphenationAndroid.cpp b/WebCore/platform/text/android/HyphenationAndroid.cpp
deleted file mode 100644
index d1bd839..0000000
--- a/WebCore/platform/text/android/HyphenationAndroid.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright 2010, The Android Open Source Project
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "Hyphenation.h"
-
-// For external hyphenation library.
-#include "hyphen.h"
-#include <utils/AssetManager.h>
-#include <wtf/text/CString.h>
-#include <wtf/text/WTFString.h>
-
-extern android::AssetManager* globalAssetManager();
-
-using namespace WTF;
-
-namespace WebCore {
-
-static HyphenDict* loadHyphenationDictionary()
-{
- android::AssetManager* am = globalAssetManager();
- // Only support English for now.
- android::Asset* a = am->open("webkit/hyph_en_US.dic",
- android::Asset::ACCESS_BUFFER);
- if (!a) {
- // Asset webkit/hyph_en_US.dic not found!
- return 0;
- }
- const CString dictContents = String(static_cast<const char*>(a->getBuffer(false)),
- a->getLength()).utf8();
- HyphenDict* dict = hnj_hyphen_load_from_buffer(dictContents.data(),
- dictContents.length());
- delete a;
-
- return dict;
-}
-
-bool canHyphenate(const AtomicString& /* localeIdentifier */)
-{
- // FIXME: Check that the locale identifier matches the available dictionary.
- return true;
-}
-
-size_t lastHyphenLocation(const UChar* characters, size_t length, size_t beforeIndex, const AtomicString& /* localeIdentifier */)
-{
- static const size_t minWordLen = 5;
- static const size_t maxWordLen = 100;
- if (beforeIndex <= 0 || length < minWordLen || length > maxWordLen)
- return 0;
-
- static HyphenDict* dict = loadHyphenationDictionary();
- if (!dict)
- return 0;
-
- char word[maxWordLen];
- size_t wordLength = 0;
- for (size_t i = 0; i < length; ++i) {
- const UChar ch = characters[i];
- // Only English for now.
- // To really make it language aware, we need something like language
- // detection or rely on the langAttr in the html element. Though
- // seems right now the langAttr is not used or quite implemented in
- // webkit.
- if (!isASCIIAlpha(ch)) {
- // Bypass leading spaces.
- if (isASCIISpace(ch) && !wordLength)
- continue;
- return 0;
- }
- word[wordLength++] = ch;
- }
- if (wordLength < minWordLen)
- return 0;
-
- static const int extraBuffer = 5;
- const int leadingSpacesCount = length - wordLength;
- char hyphens[maxWordLen + extraBuffer];
- if (!hnj_hyphen_hyphenate(dict, word, wordLength, hyphens)) {
- for (size_t i = beforeIndex - 2 - leadingSpacesCount; i > 0; --i) {
- if (hyphens[i] & 1)
- return i + 1 + leadingSpacesCount;
- }
- }
-
- return 0;
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/android/TextBreakIteratorInternalICU.cpp b/WebCore/platform/text/android/TextBreakIteratorInternalICU.cpp
deleted file mode 100644
index 9732e92..0000000
--- a/WebCore/platform/text/android/TextBreakIteratorInternalICU.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright 2007, The Android Open Source Project
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextBreakIteratorInternalICU.h"
-
-namespace WebCore {
-
-const char* currentSearchLocaleID()
-{
- // FIXME: Should use system locale.
- return "";
-}
-
-const char* currentTextBreakLocaleID()
-{
- // FIXME: Should use system locale.
- return "en_us";
-}
-
-}
diff --git a/WebCore/platform/text/brew/TextBoundariesBrew.cpp b/WebCore/platform/text/brew/TextBoundariesBrew.cpp
deleted file mode 100644
index 506bdcf..0000000
--- a/WebCore/platform/text/brew/TextBoundariesBrew.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (C) 2006 Zack Rusin <zack@kde.org>
- * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextBoundaries.h"
-
-#include "NotImplemented.h"
-#include "PlatformString.h"
-
-using namespace WTF::Unicode;
-
-namespace WebCore {
-
-int findNextWordFromIndex(const UChar* buffer, int len, int position, bool forward)
-{
- notImplemented();
- return 0;
-}
-
-void findWordBoundary(const UChar* buffer, int len, int position, int* start, int* end)
-{
- if (position > len) {
- *start = 0;
- *end = 0;
- return;
- }
-
- String str(buffer, len);
-
- int currentPosition = position - 1;
- String foundWord;
- while (currentPosition >= 0 && isLetter(str[currentPosition])) {
- UChar c = str[currentPosition];
- foundWord.insert(&c, 1, 0);
- --currentPosition;
- }
-
- // currentPosition == 0 means the first char is not letter
- // currentPosition == -1 means we reached the beginning
- int startPos = (currentPosition < 0) ? 0 : ++currentPosition;
- currentPosition = position;
- while (isLetter(str[currentPosition])) {
- foundWord.append(str[currentPosition]);
- ++currentPosition;
- }
-
- *start = startPos;
- *end = currentPosition;
-}
-
-}
diff --git a/WebCore/platform/text/brew/TextBreakIteratorBrew.cpp b/WebCore/platform/text/brew/TextBreakIteratorBrew.cpp
deleted file mode 100644
index 7f46e4f..0000000
--- a/WebCore/platform/text/brew/TextBreakIteratorBrew.cpp
+++ /dev/null
@@ -1,312 +0,0 @@
-/*
- * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
- * Copyright (C) 2007-2009 Torch Mobile, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- *
- */
-
-#include "config.h"
-#include "TextBreakIterator.h"
-
-#include "PlatformString.h"
-#include <wtf/StdLibExtras.h>
-#include <wtf/unicode/Unicode.h>
-
-using namespace WTF::Unicode;
-
-namespace WebCore {
-
-// Hack, not entirely correct
-static inline bool isCharStop(UChar c)
-{
- CharCategory charCategory = category(c);
- return charCategory != Mark_NonSpacing && (charCategory != Other_Surrogate || (c < 0xd800 || c >= 0xdc00));
-}
-
-static inline bool isLineStop(UChar c)
-{
- return category(c) != Separator_Line;
-}
-
-static inline bool isSentenceStop(UChar c)
-{
- return isPunct(c);
-}
-
-class TextBreakIterator {
-public:
- void reset(const UChar* str, int len)
- {
- string = str;
- length = len;
- currentPos = 0;
- }
- virtual int first() = 0;
- virtual int next() = 0;
- virtual int previous() = 0;
- int following(int position)
- {
- currentPos = position;
- return next();
- }
- int preceding(int position)
- {
- currentPos = position;
- return previous();
- }
-
- int currentPos;
- const UChar* string;
- int length;
-};
-
-struct WordBreakIterator: TextBreakIterator {
- virtual int first();
- virtual int next();
- virtual int previous();
-};
-
-struct CharBreakIterator: TextBreakIterator {
- virtual int first();
- virtual int next();
- virtual int previous();
-};
-
-struct LineBreakIterator: TextBreakIterator {
- virtual int first();
- virtual int next();
- virtual int previous();
-};
-
-struct SentenceBreakIterator : TextBreakIterator {
- virtual int first();
- virtual int next();
- virtual int previous();
-};
-
-int WordBreakIterator::first()
-{
- currentPos = 0;
- return currentPos;
-}
-
-int WordBreakIterator::next()
-{
- if (currentPos == length) {
- currentPos = -1;
- return currentPos;
- }
- bool haveSpace = false;
- while (currentPos < length) {
- if (haveSpace && !isSpace(string[currentPos]))
- break;
- if (isSpace(string[currentPos]))
- haveSpace = true;
- ++currentPos;
- }
- return currentPos;
-}
-
-int WordBreakIterator::previous()
-{
- if (!currentPos) {
- currentPos = -1;
- return currentPos;
- }
- bool haveSpace = false;
- while (currentPos > 0) {
- if (haveSpace && !isSpace(string[currentPos]))
- break;
- if (isSpace(string[currentPos]))
- haveSpace = true;
- --currentPos;
- }
- return currentPos;
-}
-
-int CharBreakIterator::first()
-{
- currentPos = 0;
- return currentPos;
-}
-
-int CharBreakIterator::next()
-{
- if (currentPos >= length)
- return -1;
- ++currentPos;
- while (currentPos < length && !isCharStop(string[currentPos]))
- ++currentPos;
- return currentPos;
-}
-
-int CharBreakIterator::previous()
-{
- if (currentPos <= 0)
- return -1;
- if (currentPos > length)
- currentPos = length;
- --currentPos;
- while (currentPos > 0 && !isCharStop(string[currentPos]))
- --currentPos;
- return currentPos;
-}
-
-int LineBreakIterator::first()
-{
- currentPos = 0;
- return currentPos;
-}
-
-int LineBreakIterator::next()
-{
- if (currentPos == length) {
- currentPos = -1;
- return currentPos;
- }
- bool haveSpace = false;
- while (currentPos < length) {
- if (haveSpace && !isLineStop(string[currentPos]))
- break;
- if (isLineStop(string[currentPos]))
- haveSpace = true;
- ++currentPos;
- }
- return currentPos;
-}
-
-int LineBreakIterator::previous()
-{
- if (!currentPos) {
- currentPos = -1;
- return currentPos;
- }
- bool haveSpace = false;
- while (currentPos > 0) {
- if (haveSpace && !isLineStop(string[currentPos]))
- break;
- if (isLineStop(string[currentPos]))
- haveSpace = true;
- --currentPos;
- }
- return currentPos;
-}
-
-int SentenceBreakIterator::first()
-{
- currentPos = 0;
- return currentPos;
-}
-
-int SentenceBreakIterator::next()
-{
- if (currentPos == length) {
- currentPos = -1;
- return currentPos;
- }
- bool haveSpace = false;
- while (currentPos < length) {
- if (haveSpace && !isSentenceStop(string[currentPos]))
- break;
- if (isSentenceStop(string[currentPos]))
- haveSpace = true;
- ++currentPos;
- }
- return currentPos;
-}
-
-int SentenceBreakIterator::previous()
-{
- if (!currentPos) {
- currentPos = -1;
- return currentPos;
- }
- bool haveSpace = false;
- while (currentPos > 0) {
- if (haveSpace && !isSentenceStop(string[currentPos]))
- break;
- if (isSentenceStop(string[currentPos]))
- haveSpace = true;
- --currentPos;
- }
- return currentPos;
-}
-
-TextBreakIterator* wordBreakIterator(const UChar* string, int length)
-{
- DEFINE_STATIC_LOCAL(WordBreakIterator, iterator, ());
- iterator.reset(string, length);
- return &iterator;
-}
-
-TextBreakIterator* characterBreakIterator(const UChar* string, int length)
-{
- DEFINE_STATIC_LOCAL(CharBreakIterator, iterator, ());
- iterator.reset(string, length);
- return &iterator;
-}
-
-TextBreakIterator* lineBreakIterator(const UChar* string, int length)
-{
- DEFINE_STATIC_LOCAL(LineBreakIterator , iterator, ());
- iterator.reset(string, length);
- return &iterator;
-}
-
-TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
-{
- DEFINE_STATIC_LOCAL(SentenceBreakIterator, iterator, ());
- iterator.reset(string, length);
- return &iterator;
-}
-
-int textBreakFirst(TextBreakIterator* breakIterator)
-{
- return breakIterator->first();
-}
-
-int textBreakNext(TextBreakIterator* breakIterator)
-{
- return breakIterator->next();
-}
-
-int textBreakPreceding(TextBreakIterator* breakIterator, int position)
-{
- return breakIterator->preceding(position);
-}
-
-int textBreakFollowing(TextBreakIterator* breakIterator, int position)
-{
- return breakIterator->following(position);
-}
-
-int textBreakCurrent(TextBreakIterator* breakIterator)
-{
- return breakIterator->currentPos;
-}
-
-bool isTextBreak(TextBreakIterator*, int)
-{
- return true;
-}
-
-TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
-{
- return characterBreakIterator(string, length);
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/brew/TextCodecBrew.cpp b/WebCore/platform/text/brew/TextCodecBrew.cpp
deleted file mode 100644
index 1f32298..0000000
--- a/WebCore/platform/text/brew/TextCodecBrew.cpp
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright (C) 2010 Company 100, Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextCodecBrew.h"
-
-#include "AEEAppGen.h"
-#include "AEEICharsetConv.h"
-#include "NotImplemented.h"
-#include "PlatformString.h"
-#include <wtf/Assertions.h>
-#include <wtf/text/CString.h>
-
-namespace WebCore {
-
-// FIXME: Not sure if there are Brew MP devices which use big endian.
-const char* WebCore::TextCodecBrew::m_internalEncodingName = "UTF-16LE";
-
-static PassOwnPtr<TextCodec> newTextCodecBrew(const TextEncoding& encoding, const void*)
-{
- return new TextCodecBrew(encoding);
-}
-
-void TextCodecBrew::registerBaseEncodingNames(EncodingNameRegistrar registrar)
-{
- registrar("UTF-8", "UTF-8");
-}
-
-void TextCodecBrew::registerBaseCodecs(TextCodecRegistrar registrar)
-{
- registrar("UTF-8", newTextCodecBrew, 0);
-}
-
-void TextCodecBrew::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
-{
- // FIXME: Not sure how to enumerate all available encodings.
- notImplemented();
-}
-
-void TextCodecBrew::registerExtendedCodecs(TextCodecRegistrar registrar)
-{
- notImplemented();
-}
-
-TextCodecBrew::TextCodecBrew(const TextEncoding& encoding)
- : m_charsetConverter(0)
- , m_encoding(encoding)
- , m_numBufferedBytes(0)
-{
- String format = String::format("%s>%s", encoding.name(), m_internalEncodingName);
-
- IShell* shell = reinterpret_cast<AEEApplet*>(GETAPPINSTANCE())->m_pIShell;
- AEECLSID classID = ISHELL_GetHandler(shell, AEEIID_ICharsetConv, format.latin1().data());
- ISHELL_CreateInstance(shell, classID, reinterpret_cast<void**>(&m_charsetConverter));
-
- ASSERT(m_charsetConverter);
-}
-
-TextCodecBrew::~TextCodecBrew()
-{
- if (m_charsetConverter)
- ICharsetConv_Release(m_charsetConverter);
-}
-
-String TextCodecBrew::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
-{
- int code = ICharsetConv_Initialize(m_charsetConverter, m_encoding.name(), m_internalEncodingName, 0);
- ASSERT(code == AEE_SUCCESS);
-
- Vector<UChar> result;
- Vector<unsigned char> prefixedBytes(length);
-
- int srcSize;
- unsigned char* srcBegin;
-
- if (m_numBufferedBytes) {
- srcSize = length + m_numBufferedBytes;
- prefixedBytes.grow(srcSize);
- memcpy(prefixedBytes.data(), m_bufferedBytes, m_numBufferedBytes);
- memcpy(prefixedBytes.data() + m_numBufferedBytes, bytes, length);
-
- srcBegin = prefixedBytes.data();
-
- // all buffered bytes are consumed now
- m_numBufferedBytes = 0;
- } else {
- srcSize = length;
- srcBegin = const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(bytes));
- }
-
- unsigned char* src = srcBegin;
- unsigned char* srcEnd = srcBegin + srcSize;
-
- Vector<UChar> dstBuffer(srcSize);
-
- while (src < srcEnd) {
- int numCharsConverted;
- unsigned char* dstBegin = reinterpret_cast<unsigned char*>(dstBuffer.data());
- unsigned char* dst = dstBegin;
- int dstSize = dstBuffer.size() * sizeof(UChar);
-
- code = ICharsetConv_CharsetConvert(m_charsetConverter, &src, &srcSize, &dst, &dstSize, &numCharsConverted);
- ASSERT(code != AEE_ENOSUCH);
-
- if (code == AEE_EBUFFERTOOSMALL) {
- // Increase the buffer and try it again.
- dstBuffer.grow(dstBuffer.size() * 2);
- continue;
- }
-
- if (code == AEE_EBADITEM) {
- sawError = true;
- if (stopOnError) {
- result.append(L'?');
- break;
- }
-
- src++;
- }
-
- if (code == AEE_EINCOMPLETEITEM) {
- if (flush) {
- LOG_ERROR("Partial bytes at end of input while flush requested.");
- sawError = true;
- return String();
- }
-
- m_numBufferedBytes = srcEnd - src;
- memcpy(m_bufferedBytes, src, m_numBufferedBytes);
- break;
- }
-
- int numChars = (dst - dstBegin) / sizeof(UChar);
- if (numChars > 0)
- result.append(dstBuffer.data(), numChars);
- }
-
- return String::adopt(result);
-}
-
-CString TextCodecBrew::encode(const UChar* characters, size_t length, UnencodableHandling handling)
-{
- if (!length)
- return "";
-
- unsigned int replacementCharacter = '?';
-
- // FIXME: Impossible to handle EntitiesForUnencodables or URLEncodedEntitiesForUnencodables with ICharsetConv.
- int code = ICharsetConv_Initialize(m_charsetConverter, m_internalEncodingName, m_encoding.name(), replacementCharacter);
- ASSERT(code == AEE_SUCCESS);
-
- Vector<char> result;
-
- int srcSize = length * sizeof(UChar);
- unsigned char* srcBegin = const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(characters));
- unsigned char* src = srcBegin;
- unsigned char* srcEnd = srcBegin + srcSize;
-
- Vector<unsigned char> dstBuffer(length * sizeof(UChar));
-
- while (src < srcEnd) {
- int numCharsConverted;
- unsigned char* dstBegin = dstBuffer.data();
- unsigned char* dst = dstBegin;
- int dstSize = dstBuffer.size();
-
- code = ICharsetConv_CharsetConvert(m_charsetConverter, &src, &srcSize, &dst, &dstSize, &numCharsConverted);
- ASSERT(code != AEE_EINCOMPLETEITEM);
-
- if (code == AEE_ENOSUCH) {
- LOG_ERROR("Conversion error, Code=%d", code);
- return CString();
- }
-
- if (code == AEE_EBUFFERTOOSMALL) {
- // Increase the buffer and try it again.
- dstBuffer.grow(dstBuffer.size() * 2);
- continue;
- }
-
- if (code == AEE_EBADITEM)
- src += sizeof(UChar); // Skip the invalid character
-
- int numBytes = dst - dstBegin;
- if (numBytes > 0)
- result.append(dstBuffer.data(), numBytes);
- }
-
- return CString(result.data(), result.size());
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/brew/TextCodecBrew.h b/WebCore/platform/text/brew/TextCodecBrew.h
deleted file mode 100644
index 97e2c87..0000000
--- a/WebCore/platform/text/brew/TextCodecBrew.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2010 Company 100, Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextCodecBrew_h
-#define TextCodecBrew_h
-
-#include "TextCodec.h"
-#include "TextEncoding.h"
-
-typedef struct ICharsetConv ICharsetConv;
-
-namespace WebCore {
-
-class TextCodecBrew : public TextCodec {
-public:
- static void registerBaseEncodingNames(EncodingNameRegistrar);
- static void registerBaseCodecs(TextCodecRegistrar);
-
- static void registerExtendedEncodingNames(EncodingNameRegistrar);
- static void registerExtendedCodecs(TextCodecRegistrar);
-
- TextCodecBrew(const TextEncoding&);
- virtual ~TextCodecBrew();
-
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
- virtual CString encode(const UChar*, size_t length, UnencodableHandling);
-
-private:
- TextEncoding m_encoding;
- size_t m_numBufferedBytes;
- unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
- ICharsetConv* m_charsetConverter;
-
- static const char* m_internalEncodingName;
-};
-
-} // namespace WebCore
-
-#endif // TextCodecBrew_h
diff --git a/WebCore/platform/text/cf/HyphenationCF.cpp b/WebCore/platform/text/cf/HyphenationCF.cpp
deleted file mode 100644
index 3adacad..0000000
--- a/WebCore/platform/text/cf/HyphenationCF.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2010 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "Hyphenation.h"
-
-#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !defined(BUILDING_ON_SNOW_LEOPARD)
-
-#include "AtomicStringKeyedMRUCache.h"
-#include "TextBreakIteratorInternalICU.h"
-#include <wtf/ListHashSet.h>
-#include <wtf/RetainPtr.h>
-
-namespace WebCore {
-
-#if !PLATFORM(WIN) || (defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7)
-
-template<>
-RetainPtr<CFLocaleRef> AtomicStringKeyedMRUCache<RetainPtr<CFLocaleRef> >::createValueForNullKey()
-{
- RetainPtr<CFLocaleRef> locale(AdoptCF, CFLocaleCopyCurrent());
-
- return CFStringIsHyphenationAvailableForLocale(locale.get()) ? locale : 0;
-}
-
-template<>
-RetainPtr<CFLocaleRef> AtomicStringKeyedMRUCache<RetainPtr<CFLocaleRef> >::createValueForKey(const AtomicString& localeIdentifier)
-{
- RetainPtr<CFStringRef> cfLocaleIdentifier(AdoptCF, localeIdentifier.createCFString());
- RetainPtr<CFLocaleRef> locale(AdoptCF, CFLocaleCreate(kCFAllocatorDefault, cfLocaleIdentifier.get()));
-
- return CFStringIsHyphenationAvailableForLocale(locale.get()) ? locale : 0;
-}
-
-static AtomicStringKeyedMRUCache<RetainPtr<CFLocaleRef> >& cfLocaleCache()
-{
- DEFINE_STATIC_LOCAL(AtomicStringKeyedMRUCache<RetainPtr<CFLocaleRef> >, cache, ());
- return cache;
-}
-
-bool canHyphenate(const AtomicString& localeIdentifier)
-{
- return cfLocaleCache().get(localeIdentifier);
-}
-
-size_t lastHyphenLocation(const UChar* characters, size_t length, size_t beforeIndex, const AtomicString& localeIdentifier)
-{
- RetainPtr<CFStringRef> string(AdoptCF, CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, reinterpret_cast<const UniChar*>(characters), length, kCFAllocatorNull));
-
- RetainPtr<CFLocaleRef> locale = cfLocaleCache().get(localeIdentifier);
- ASSERT(locale);
-
- CFIndex result = CFStringGetHyphenationLocationBeforeIndex(string.get(), beforeIndex, CFRangeMake(0, length), 0, locale.get(), 0);
- return result == kCFNotFound ? 0 : result;
-}
-
-#else
-
-bool canHyphenate(const AtomicString&)
-{
- return false;
-}
-
-size_t lastHyphenLocation(const UChar*, size_t, size_t, const AtomicString&)
-{
- ASSERT_NOT_REACHED();
- return 0;
-}
-
-#endif // PLATFORM(WIN) && (!defined(MAC_OS_X_VERSION_10_7) || MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_7)
-
-} // namespace WebCore
-
-#endif // !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !defined(BUILDING_ON_SNOW_LEOPARD)
diff --git a/WebCore/platform/text/cf/StringCF.cpp b/WebCore/platform/text/cf/StringCF.cpp
deleted file mode 100644
index dcaf8fb..0000000
--- a/WebCore/platform/text/cf/StringCF.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/**
- * Copyright (C) 2006 Apple Computer, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#include "config.h"
-#include "PlatformString.h"
-
-#if PLATFORM(CF)
-
-#include <CoreFoundation/CoreFoundation.h>
-
-namespace WTF {
-
-String::String(CFStringRef str)
-{
- if (!str)
- return;
-
- CFIndex size = CFStringGetLength(str);
- if (size == 0)
- m_impl = StringImpl::empty();
- else {
- Vector<UChar, 1024> buffer(size);
- CFStringGetCharacters(str, CFRangeMake(0, size), (UniChar*)buffer.data());
- m_impl = StringImpl::create(buffer.data(), size);
- }
-}
-
-CFStringRef String::createCFString() const
-{
- if (!m_impl)
- return static_cast<CFStringRef>(CFRetain(CFSTR("")));
-
- return m_impl->createCFString();
-}
-
-}
-
-#endif // PLATFORM(CF)
diff --git a/WebCore/platform/text/cf/StringImplCF.cpp b/WebCore/platform/text/cf/StringImplCF.cpp
deleted file mode 100644
index 0157918..0000000
--- a/WebCore/platform/text/cf/StringImplCF.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (C) 2006, 2009 Apple Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#include "config.h"
-#include <wtf/text/StringImpl.h>
-
-#if PLATFORM(CF)
-
-#include <CoreFoundation/CoreFoundation.h>
-#include <wtf/MainThread.h>
-#include <wtf/PassRefPtr.h>
-#include <wtf/Threading.h>
-
-#if PLATFORM(MAC) && !defined(BUILDING_ON_TIGER)
-#include <objc/objc-auto.h>
-#endif
-
-namespace WTF {
-
-namespace StringWrapperCFAllocator {
-
- static StringImpl* currentString;
-
- static const void* retain(const void* info)
- {
- return info;
- }
-
- static void release(const void*)
- {
- ASSERT_NOT_REACHED();
- }
-
- static CFStringRef copyDescription(const void*)
- {
- return CFSTR("WTF::String-based allocator");
- }
-
- static void* allocate(CFIndex size, CFOptionFlags, void*)
- {
- StringImpl* underlyingString = 0;
- if (isMainThread()) {
- underlyingString = currentString;
- if (underlyingString) {
- currentString = 0;
- underlyingString->ref(); // Balanced by call to deref in deallocate below.
- }
- }
- StringImpl** header = static_cast<StringImpl**>(fastMalloc(sizeof(StringImpl*) + size));
- *header = underlyingString;
- return header + 1;
- }
-
- static void* reallocate(void* pointer, CFIndex newSize, CFOptionFlags, void*)
- {
- size_t newAllocationSize = sizeof(StringImpl*) + newSize;
- StringImpl** header = static_cast<StringImpl**>(pointer) - 1;
- ASSERT(!*header);
- header = static_cast<StringImpl**>(fastRealloc(header, newAllocationSize));
- return header + 1;
- }
-
- static void deallocateOnMainThread(void* headerPointer)
- {
- StringImpl** header = static_cast<StringImpl**>(headerPointer);
- StringImpl* underlyingString = *header;
- ASSERT(underlyingString);
- underlyingString->deref(); // Balanced by call to ref in allocate above.
- fastFree(header);
- }
-
- static void deallocate(void* pointer, void*)
- {
- StringImpl** header = static_cast<StringImpl**>(pointer) - 1;
- StringImpl* underlyingString = *header;
- if (!underlyingString)
- fastFree(header);
- else {
- if (!isMainThread())
- callOnMainThread(deallocateOnMainThread, header);
- else {
- underlyingString->deref(); // Balanced by call to ref in allocate above.
- fastFree(header);
- }
- }
- }
-
- static CFIndex preferredSize(CFIndex size, CFOptionFlags, void*)
- {
- // FIXME: If FastMalloc provided a "good size" callback, we'd want to use it here.
- // Note that this optimization would help performance for strings created with the
- // allocator that are mutable, and those typically are only created by callers who
- // make a new string using the old string's allocator, such as some of the call
- // sites in CFURL.
- return size;
- }
-
- static CFAllocatorRef create()
- {
-#if PLATFORM(MAC) && !defined(BUILDING_ON_TIGER)
- // Since garbage collection isn't compatible with custom allocators, don't use this at all when garbage collection is active.
- if (objc_collectingEnabled())
- return 0;
-#endif
- CFAllocatorContext context = { 0, 0, retain, release, copyDescription, allocate, reallocate, deallocate, preferredSize };
- return CFAllocatorCreate(0, &context);
- }
-
- static CFAllocatorRef allocator()
- {
- static CFAllocatorRef allocator = create();
- return allocator;
- }
-
-}
-
-CFStringRef StringImpl::createCFString()
-{
- CFAllocatorRef allocator = (m_length && isMainThread()) ? StringWrapperCFAllocator::allocator() : 0;
- if (!allocator)
- return CFStringCreateWithCharacters(0, reinterpret_cast<const UniChar*>(m_data), m_length);
-
- // Put pointer to the StringImpl in a global so the allocator can store it with the CFString.
- ASSERT(!StringWrapperCFAllocator::currentString);
- StringWrapperCFAllocator::currentString = this;
-
- CFStringRef string = CFStringCreateWithCharactersNoCopy(allocator, reinterpret_cast<const UniChar*>(m_data), m_length, kCFAllocatorNull);
-
- // The allocator cleared the global when it read it, but also clear it here just in case.
- ASSERT(!StringWrapperCFAllocator::currentString);
- StringWrapperCFAllocator::currentString = 0;
-
- return string;
-}
-
-// On StringImpl creation we could check if the allocator is the StringWrapperCFAllocator.
-// If it is, then we could find the original StringImpl and just return that. But to
-// do that we'd have to compute the offset from CFStringRef to the allocated block;
-// the CFStringRef is *not* at the start of an allocated block. Testing shows 1000x
-// more calls to createCFString than calls to the create functions with the appropriate
-// allocator, so it's probably not urgent optimize that case.
-
-}
-
-#endif // PLATFORM(CF)
diff --git a/WebCore/platform/text/chromium/TextBreakIteratorInternalICUChromium.cpp b/WebCore/platform/text/chromium/TextBreakIteratorInternalICUChromium.cpp
deleted file mode 100644
index e390a65..0000000
--- a/WebCore/platform/text/chromium/TextBreakIteratorInternalICUChromium.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (C) 2007 Apple Inc. All rights reserved.
- * Copyright (C) 2008, 2009 Google Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- *
- */
-
-#include "config.h"
-#include "TextBreakIteratorInternalICU.h"
-
-#include "Language.h"
-#include "PlatformString.h"
-#include <wtf/StdLibExtras.h>
-#include <wtf/text/CString.h>
-
-namespace WebCore {
-
-static const char* UILanguage()
-{
- // Chrome's UI language can be different from the OS UI language on Windows.
- // We want to return Chrome's UI language here.
- DEFINE_STATIC_LOCAL(CString, locale, (defaultLanguage().latin1()));
- return locale.data();
-}
-
-const char* currentSearchLocaleID()
-{
- return UILanguage();
-}
-
-const char* currentTextBreakLocaleID()
-{
- return UILanguage();
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/efl/TextBreakIteratorInternalICUEfl.cpp b/WebCore/platform/text/efl/TextBreakIteratorInternalICUEfl.cpp
deleted file mode 100644
index 0056869..0000000
--- a/WebCore/platform/text/efl/TextBreakIteratorInternalICUEfl.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2007 Alp Toker <alp@atoker.com>
- * Copyright (C) 2009-2010 ProFUSION embedded systems
- * Copyright (C) 2009-2010 Samsung Electronics
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-
-#include "config.h"
-#include "TextBreakIteratorInternalICU.h"
-
-namespace WebCore {
-
-const char* currentSearchLocaleID()
-{
- // FIXME: Should use system locale.
- return "";
-}
-
-const char* currentTextBreakLocaleID()
-{
- return "en_us";
-}
-
-}
diff --git a/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp b/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp
deleted file mode 100644
index 990e331..0000000
--- a/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp
+++ /dev/null
@@ -1,365 +0,0 @@
-/*
- * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
- * Copyright (C) 2007 Apple Inc. All rights reserved.
- * Copyright (C) 2008 JĂĽrg Billeter <j@bitron.ch>
- * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
- * Copyright (C) 2010 Igalia S.L.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#include "config.h"
-
-#include "TextBreakIterator.h"
-
-#include "GOwnPtr.h"
-#include <pango/pango.h>
-using namespace std;
-
-#define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF)
-
-namespace WebCore {
-
-class CharacterIterator {
-public:
- bool setText(const UChar* string, int length);
- const gchar* getText() { return m_utf8.get(); }
- int getLength() { return m_length; }
- glong getSize() { return m_size; }
- void setIndex(int index);
- int getIndex() { return m_index; }
- void setUTF16Index(int index);
- int getUTF16Index() { return m_utf16Index; }
- int getUTF16Length() { return m_utf16Length; }
- int first();
- int last();
- int next();
- int previous();
-private:
- int characterSize(int index);
-
- GOwnPtr<char> m_utf8;
- int m_length;
- long m_size;
- int m_index;
- int m_utf16Index;
- int m_utf16Length;
-};
-
-int CharacterIterator::characterSize(int index)
-{
- if (index == m_length || index < 0)
- return 0;
- if (m_length == m_utf16Length)
- return 1;
-
- gchar* indexPtr = g_utf8_offset_to_pointer(m_utf8.get(), index);
- gunichar character = g_utf8_get_char(indexPtr);
- return UTF8_IS_SURROGATE(character) ? 2 : 1;
-}
-
-bool CharacterIterator::setText(const UChar* string, int length)
-{
- long utf8Size = 0;
- m_utf8.set(g_utf16_to_utf8(string, length, 0, &utf8Size, 0));
- if (!utf8Size)
- return false;
-
- m_utf16Length = length;
- m_length = g_utf8_strlen(m_utf8.get(), utf8Size);
- m_size = utf8Size;
- m_index = 0;
- m_utf16Index = 0;
-
- return true;
-}
-
-void CharacterIterator::setIndex(int index)
-{
- if (index == m_index)
- return;
- if (index <= 0)
- m_index = m_utf16Index = 0;
- else if (index >= m_length) {
- m_index = m_length;
- m_utf16Index = m_utf16Length;
- } else if (m_length == m_utf16Length)
- m_index = m_utf16Index = index;
- else {
- m_index = index;
- int utf16Index = 0;
- int utf8Index = 0;
- while (utf8Index < index) {
- utf16Index += characterSize(utf8Index);
- utf8Index++;
- }
- m_utf16Index = utf16Index;
- }
-}
-
-void CharacterIterator::setUTF16Index(int index)
-{
- if (index == m_utf16Index)
- return;
- if (index <= 0)
- m_utf16Index = m_index = 0;
- else if (index >= m_utf16Length) {
- m_utf16Index = m_utf16Length;
- m_index = m_length;
- } else if (m_length == m_utf16Length)
- m_utf16Index = m_index = index;
- else {
- m_utf16Index = index;
- int utf16Index = 0;
- int utf8Index = 0;
- while (utf16Index < index) {
- utf16Index += characterSize(utf8Index);
- utf8Index++;
- }
- m_index = utf8Index;
- }
-}
-
-int CharacterIterator::first()
-{
- m_index = m_utf16Index = 0;
- return m_index;
-}
-
-int CharacterIterator::last()
-{
- m_index = m_length;
- m_utf16Index = m_utf16Length;
- return m_index;
-}
-
-int CharacterIterator::next()
-{
- int next = m_index + 1;
-
- if (next <= m_length) {
- m_utf16Index = min(m_utf16Index + characterSize(m_index), m_utf16Length);
- m_index = next;
- } else {
- m_index = TextBreakDone;
- m_utf16Index = TextBreakDone;
- }
-
- return m_index;
-}
-
-int CharacterIterator::previous()
-{
- int previous = m_index - 1;
-
- if (previous >= 0) {
- m_utf16Index = max(m_utf16Index - characterSize(previous), 0);
- m_index = previous;
- } else {
- m_index = TextBreakDone;
- m_utf16Index = TextBreakDone;
- }
-
- return m_index;
-}
-
-enum UBreakIteratorType {
- UBRK_CHARACTER,
- UBRK_WORD,
- UBRK_LINE,
- UBRK_SENTENCE
-};
-
-class TextBreakIterator {
-public:
- UBreakIteratorType m_type;
- PangoLogAttr* m_logAttrs;
- CharacterIterator m_charIterator;
-};
-
-static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator,
- UBreakIteratorType type, const UChar* string, int length)
-{
- if (!string)
- return 0;
-
- if (!createdIterator) {
- iterator = new TextBreakIterator();
- createdIterator = true;
- }
- if (!iterator)
- return 0;
-
- if (!iterator->m_charIterator.setText(string, length))
- return 0;
-
- int charLength = iterator->m_charIterator.getLength();
-
- iterator->m_type = type;
- if (createdIterator)
- g_free(iterator->m_logAttrs);
- iterator->m_logAttrs = g_new0(PangoLogAttr, charLength + 1);
- pango_get_log_attrs(iterator->m_charIterator.getText(), iterator->m_charIterator.getSize(),
- -1, 0, iterator->m_logAttrs, charLength + 1);
-
- return iterator;
-}
-
-TextBreakIterator* characterBreakIterator(const UChar* string, int length)
-{
- static bool createdCharacterBreakIterator = false;
- static TextBreakIterator* staticCharacterBreakIterator;
- return setUpIterator(createdCharacterBreakIterator, staticCharacterBreakIterator, UBRK_CHARACTER, string, length);
-}
-
-TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
-{
- // FIXME: This needs closer inspection to achieve behaviour identical to the ICU version.
- return characterBreakIterator(string, length);
-}
-
-TextBreakIterator* wordBreakIterator(const UChar* string, int length)
-{
- static bool createdWordBreakIterator = false;
- static TextBreakIterator* staticWordBreakIterator;
- return setUpIterator(createdWordBreakIterator, staticWordBreakIterator, UBRK_WORD, string, length);
-}
-
-TextBreakIterator* lineBreakIterator(const UChar* string, int length)
-{
- static bool createdLineBreakIterator = false;
- static TextBreakIterator* staticLineBreakIterator;
- return setUpIterator(createdLineBreakIterator, staticLineBreakIterator, UBRK_LINE, string, length);
-}
-
-TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
-{
- static bool createdSentenceBreakIterator = false;
- static TextBreakIterator* staticSentenceBreakIterator;
- return setUpIterator(createdSentenceBreakIterator, staticSentenceBreakIterator, UBRK_SENTENCE, string, length);
-}
-
-int textBreakFirst(TextBreakIterator* iterator)
-{
- iterator->m_charIterator.first();
- return iterator->m_charIterator.getUTF16Index();
-}
-
-int textBreakLast(TextBreakIterator* iterator)
-{
- // TextBreakLast is not meant to find just any break according to bi->m_type
- // but really the one near the last character.
- // (cmp ICU documentation for ubrk_first and ubrk_last)
- // From ICU docs for ubrk_last:
- // "Determine the index immediately beyond the last character in the text being scanned."
-
- // So we should advance or traverse back based on bi->m_logAttrs cursor positions.
- // If last character position in the original string is a whitespace,
- // traverse to the left until the first non-white character position is found
- // and return the position of the first white-space char after this one.
- // Otherwise return m_length, as "the first character beyond the last" is outside our string.
-
- bool whiteSpaceAtTheEnd = true;
- int nextWhiteSpacePos = iterator->m_charIterator.getLength();
-
- int pos = iterator->m_charIterator.last();
- while (pos >= 0 && whiteSpaceAtTheEnd) {
- if (iterator->m_logAttrs[pos].is_cursor_position) {
- if (whiteSpaceAtTheEnd = iterator->m_logAttrs[pos].is_white)
- nextWhiteSpacePos = pos;
- }
- pos = iterator->m_charIterator.previous();
- }
- iterator->m_charIterator.setIndex(nextWhiteSpacePos);
- return iterator->m_charIterator.getUTF16Index();
-}
-
-int textBreakNext(TextBreakIterator* iterator)
-{
- while (iterator->m_charIterator.next() != TextBreakDone) {
- int index = iterator->m_charIterator.getIndex();
-
- // FIXME: UBRK_WORD case: Single multibyte characters (i.e. white space around them), such as the euro symbol €,
- // are not marked as word_start & word_end as opposed to the way ICU does it.
- // This leads to - for example - different word selection behaviour when right clicking.
-
- if ((iterator->m_type == UBRK_LINE && iterator->m_logAttrs[index].is_line_break)
- || (iterator->m_type == UBRK_WORD && (iterator->m_logAttrs[index].is_word_start || iterator->m_logAttrs[index].is_word_end))
- || (iterator->m_type == UBRK_CHARACTER && iterator->m_logAttrs[index].is_cursor_position)
- || (iterator->m_type == UBRK_SENTENCE && iterator->m_logAttrs[index].is_sentence_boundary)) {
- break;
- }
- }
- return iterator->m_charIterator.getUTF16Index();
-}
-
-int textBreakPrevious(TextBreakIterator* iterator)
-{
- while (iterator->m_charIterator.previous() != TextBreakDone) {
- int index = iterator->m_charIterator.getIndex();
-
- if ((iterator->m_type == UBRK_LINE && iterator->m_logAttrs[index].is_line_break)
- || (iterator->m_type == UBRK_WORD && (iterator->m_logAttrs[index].is_word_start || iterator->m_logAttrs[index].is_word_end))
- || (iterator->m_type == UBRK_CHARACTER && iterator->m_logAttrs[index].is_cursor_position)
- || (iterator->m_type == UBRK_SENTENCE && iterator->m_logAttrs[index].is_sentence_boundary)) {
- break;
- }
- }
- return iterator->m_charIterator.getUTF16Index();
-}
-
-int textBreakPreceding(TextBreakIterator* iterator, int offset)
-{
- if (offset > iterator->m_charIterator.getUTF16Length())
- return TextBreakDone;
- if (offset < 0)
- return 0;
- iterator->m_charIterator.setUTF16Index(offset);
- return textBreakPrevious(iterator);
-}
-
-int textBreakFollowing(TextBreakIterator* iterator, int offset)
-{
- if (offset > iterator->m_charIterator.getUTF16Length())
- return TextBreakDone;
- if (offset < 0)
- return 0;
- iterator->m_charIterator.setUTF16Index(offset);
- return textBreakNext(iterator);
-}
-
-int textBreakCurrent(TextBreakIterator* iterator)
-{
- return iterator->m_charIterator.getUTF16Index();
-}
-
-bool isTextBreak(TextBreakIterator* iterator, int offset)
-{
- if (!offset)
- return true;
- if (offset > iterator->m_charIterator.getUTF16Length())
- return false;
-
- iterator->m_charIterator.setUTF16Index(offset);
-
- int index = iterator->m_charIterator.getIndex();
- iterator->m_charIterator.previous();
- textBreakNext(iterator);
- return iterator->m_charIterator.getIndex() == index;
-}
-
-}
diff --git a/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp b/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp
deleted file mode 100644
index 35e5a05..0000000
--- a/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2007 Alp Toker <alp@atoker.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-
-#include "config.h"
-#include "TextBreakIteratorInternalICU.h"
-
-namespace WebCore {
-
-const char* currentSearchLocaleID()
-{
- // FIXME: Should use system locale.
- return "";
-}
-
-const char* currentTextBreakLocaleID()
-{
- // FIXME: Should use system locale.
- return "en_us";
-}
-
-}
diff --git a/WebCore/platform/text/gtk/TextCodecGtk.cpp b/WebCore/platform/text/gtk/TextCodecGtk.cpp
deleted file mode 100644
index 9308b33..0000000
--- a/WebCore/platform/text/gtk/TextCodecGtk.cpp
+++ /dev/null
@@ -1,578 +0,0 @@
-/*
- * Copyright (C) 2004, 2006, 2007, 2008 Apple Inc. All rights reserved.
- * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
- * Copyright (C) 2008 JĂĽrg Billeter <j@bitron.ch>
- * Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextCodecGtk.h"
-
-#include <gio/gio.h>
-#include "GOwnPtr.h"
-#include "Logging.h"
-#include "PlatformString.h"
-#include <wtf/Assertions.h>
-#include <wtf/HashMap.h>
-#include <wtf/text/CString.h>
-
-using std::min;
-
-namespace WebCore {
-
-// TextCodec's appendOmittingBOM() is gone (http://trac.webkit.org/changeset/33380).
-// That's why we need to avoid generating extra BOM's for the conversion result.
-// This can be achieved by specifying the UTF-16 codecs' endianness explicitly when initializing GLib.
-
-#if (G_BYTE_ORDER == G_BIG_ENDIAN)
-static const gchar* internalEncodingName = "UTF-16BE";
-#else
-static const gchar* internalEncodingName = "UTF-16LE";
-#endif
-
-
-const size_t ConversionBufferSize = 16384;
-
-
-static PassOwnPtr<TextCodec> newTextCodecGtk(const TextEncoding& encoding, const void*)
-{
- return new TextCodecGtk(encoding);
-}
-
-static bool isEncodingAvailable(const gchar* encodingName)
-{
- GIConv tester;
- // test decoding
- tester = g_iconv_open(internalEncodingName, encodingName);
- if (tester == reinterpret_cast<GIConv>(-1)) {
- return false;
- } else {
- g_iconv_close(tester);
- // test encoding
- tester = g_iconv_open(encodingName, internalEncodingName);
- if (tester == reinterpret_cast<GIConv>(-1)) {
- return false;
- } else {
- g_iconv_close(tester);
- return true;
- }
- }
-}
-
-static bool registerEncodingNameIfAvailable(EncodingNameRegistrar registrar, const char* canonicalName)
-{
- if (isEncodingAvailable(canonicalName)) {
- registrar(canonicalName, canonicalName);
- return true;
- }
-
- return false;
-}
-
-static void registerEncodingAliasIfAvailable(EncodingNameRegistrar registrar, const char* canonicalName, const char* aliasName)
-{
- if (isEncodingAvailable(aliasName))
- registrar(aliasName, canonicalName);
-}
-
-static void registerCodecIfAvailable(TextCodecRegistrar registrar, const char* codecName)
-{
- if (isEncodingAvailable(codecName))
- registrar(codecName, newTextCodecGtk, 0);
-}
-
-void TextCodecGtk::registerBaseEncodingNames(EncodingNameRegistrar registrar)
-{
- // Unicode
- registerEncodingNameIfAvailable(registrar, "UTF-8");
- registerEncodingNameIfAvailable(registrar, "UTF-32");
- registerEncodingNameIfAvailable(registrar, "UTF-32BE");
- registerEncodingNameIfAvailable(registrar, "UTF-32LE");
-
- // Western
- if (registerEncodingNameIfAvailable(registrar, "ISO-8859-1")) {
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "CP819");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "IBM819");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO-IR-100");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO8859-1");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO_8859-1");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO_8859-1:1987");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "L1");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "LATIN1");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "CSISOLATIN1");
- }
-}
-
-void TextCodecGtk::registerBaseCodecs(TextCodecRegistrar registrar)
-{
- // Unicode
- registerCodecIfAvailable(registrar, "UTF-8");
- registerCodecIfAvailable(registrar, "UTF-32");
- registerCodecIfAvailable(registrar, "UTF-32BE");
- registerCodecIfAvailable(registrar, "UTF-32LE");
-
- // Western
- registerCodecIfAvailable(registrar, "ISO-8859-1");
-}
-
-void TextCodecGtk::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
-{
- // Western
- if (registerEncodingNameIfAvailable(registrar, "MACROMAN")) {
- registerEncodingAliasIfAvailable(registrar, "MACROMAN", "MAC");
- registerEncodingAliasIfAvailable(registrar, "MACROMAN", "MACINTOSH");
- registerEncodingAliasIfAvailable(registrar, "MACROMAN", "CSMACINTOSH");
- }
-
- // Japanese
- if (registerEncodingNameIfAvailable(registrar, "Shift_JIS")) {
- registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "MS_KANJI");
- registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "SHIFT-JIS");
- registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "SJIS");
- registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "CSSHIFTJIS");
- }
- if (registerEncodingNameIfAvailable(registrar, "EUC-JP")) {
- registerEncodingAliasIfAvailable(registrar, "EUC-JP", "EUC_JP");
- registerEncodingAliasIfAvailable(registrar, "EUC-JP", "EUCJP");
- registerEncodingAliasIfAvailable(registrar, "EUC-JP", "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE");
- registerEncodingAliasIfAvailable(registrar, "EUC-JP", "CSEUCPKDFMTJAPANESE");
- }
- registerEncodingNameIfAvailable(registrar, "ISO-2022-JP");
-
- // Traditional Chinese
- if (registerEncodingNameIfAvailable(registrar, "BIG5")) {
- registerEncodingAliasIfAvailable(registrar, "BIG5", "BIG-5");
- registerEncodingAliasIfAvailable(registrar, "BIG5", "BIG-FIVE");
- registerEncodingAliasIfAvailable(registrar, "BIG5", "BIGFIVE");
- registerEncodingAliasIfAvailable(registrar, "BIG5", "CN-BIG5");
- registerEncodingAliasIfAvailable(registrar, "BIG5", "CSBIG5");
- }
- if (registerEncodingNameIfAvailable(registrar, "BIG5-HKSCS")) {
- registerEncodingAliasIfAvailable(registrar, "BIG5-HKSCS", "BIG5-HKSCS:2004");
- registerEncodingAliasIfAvailable(registrar, "BIG5-HKSCS", "BIG5HKSCS");
- }
- registerEncodingNameIfAvailable(registrar, "CP950");
-
- // Korean
- if (registerEncodingNameIfAvailable(registrar, "ISO-2022-KR"))
- registerEncodingAliasIfAvailable(registrar, "ISO-2022-KR", "CSISO2022KR");
- if (registerEncodingNameIfAvailable(registrar, "CP949"))
- registerEncodingAliasIfAvailable(registrar, "CP949", "UHC");
- if (registerEncodingNameIfAvailable(registrar, "EUC-KR"))
- registerEncodingAliasIfAvailable(registrar, "EUC-KR", "CSEUCKR");
-
- // Arabic
- if (registerEncodingNameIfAvailable(registrar, "ISO-8859-6")) {
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ARABIC");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ASMO-708");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ECMA-114");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO-IR-127");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO8859-6");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO_8859-6");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO_8859-6:1987");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "CSISOLATINARABIC");
- }
- // rearranged, windows-1256 now declared the canonical name and put to lowercase to fix /fast/encoding/ahram-org-eg.html test case
- if (registerEncodingNameIfAvailable(registrar, "windows-1256")) {
- registerEncodingAliasIfAvailable(registrar, "windows-1256", "CP1256");
- registerEncodingAliasIfAvailable(registrar, "windows-1256", "MS-ARAB");
- }
-
- // Hebrew
- if (registerEncodingNameIfAvailable(registrar, "ISO-8859-8")) {
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "HEBREW");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO-8859-8");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO-IR-138");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO8859-8");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO_8859-8");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO_8859-8:1988");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "CSISOLATINHEBREW");
- }
- // rearranged, moved windows-1255 as canonical and lowercased, fixing /fast/encoding/meta-charset.html
- if (registerEncodingNameIfAvailable(registrar, "windows-1255")) {
- registerEncodingAliasIfAvailable(registrar, "windows-1255", "CP1255");
- registerEncodingAliasIfAvailable(registrar, "windows-1255", "MS-HEBR");
- }
-
- // Greek
- if (registerEncodingNameIfAvailable(registrar, "ISO-8859-7")) {
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ECMA-118");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ELOT_928");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "GREEK");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "GREEK8");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO-IR-126");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO8859-7");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO_8859-7");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO_8859-7:1987");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO_8859-7:2003");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "CSI");
- }
- if (registerEncodingNameIfAvailable(registrar, "CP869")) {
- registerEncodingAliasIfAvailable(registrar, "CP869", "869");
- registerEncodingAliasIfAvailable(registrar, "CP869", "CP-GR");
- registerEncodingAliasIfAvailable(registrar, "CP869", "IBM869");
- registerEncodingAliasIfAvailable(registrar, "CP869", "CSIBM869");
- }
- registerEncodingNameIfAvailable(registrar, "WINDOWS-1253");
-
- // Cyrillic
- if (registerEncodingNameIfAvailable(registrar, "ISO-8859-5")) {
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "CYRILLIC");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO-IR-144");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO8859-5");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO_8859-5");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO_8859-5:1988");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "CSISOLATINCYRILLIC");
- }
- if (registerEncodingNameIfAvailable(registrar, "KOI8-R"))
- registerEncodingAliasIfAvailable(registrar, "KOI8-R", "CSKOI8R");
- if (registerEncodingNameIfAvailable(registrar, "CP866")) {
- registerEncodingAliasIfAvailable(registrar, "CP866", "866");
- registerEncodingAliasIfAvailable(registrar, "CP866", "IBM866");
- registerEncodingAliasIfAvailable(registrar, "CP866", "CSIBM866");
- }
- registerEncodingNameIfAvailable(registrar, "KOI8-U");
- // CP1251 added to pass /fast/encoding/charset-cp1251.html
- if (registerEncodingNameIfAvailable(registrar, "windows-1251"))
- registerEncodingAliasIfAvailable(registrar, "windows-1251", "CP1251");
- if (registerEncodingNameIfAvailable(registrar, "mac-cyrillic")) {
- registerEncodingAliasIfAvailable(registrar, "mac-cyrillic", "MACCYRILLIC");
- registerEncodingAliasIfAvailable(registrar, "mac-cyrillic", "x-mac-cyrillic");
- }
-
- // Thai
- if (registerEncodingNameIfAvailable(registrar, "CP874"))
- registerEncodingAliasIfAvailable(registrar, "CP874", "WINDOWS-874");
- registerEncodingNameIfAvailable(registrar, "TIS-620");
-
- // Simplified Chinese
- registerEncodingNameIfAvailable(registrar, "GBK");
- if (registerEncodingNameIfAvailable(registrar, "HZ"))
- registerEncodingAliasIfAvailable(registrar, "HZ", "HZ-GB-2312");
- registerEncodingNameIfAvailable(registrar, "GB18030");
- if (registerEncodingNameIfAvailable(registrar, "EUC-CN")) {
- registerEncodingAliasIfAvailable(registrar, "EUC-CN", "EUCCN");
- registerEncodingAliasIfAvailable(registrar, "EUC-CN", "GB2312");
- registerEncodingAliasIfAvailable(registrar, "EUC-CN", "CN-GB");
- registerEncodingAliasIfAvailable(registrar, "EUC-CN", "CSGB2312");
- registerEncodingAliasIfAvailable(registrar, "EUC-CN", "EUC_CN");
- }
- if (registerEncodingNameIfAvailable(registrar, "GB_2312-80")) {
- registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "CHINESE");
- registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "csISO58GB231280");
- registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "GB2312.1980-0");
- registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "ISO-IR-58");
- }
-
- // Central European
- if (registerEncodingNameIfAvailable(registrar, "ISO-8859-2")) {
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO-IR-101");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO8859-2");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO_8859-2");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO_8859-2:1987");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "L2");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "LATIN2");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "CSISOLATIN2");
- }
- if (registerEncodingNameIfAvailable(registrar, "CP1250")) {
- registerEncodingAliasIfAvailable(registrar, "CP1250", "MS-EE");
- registerEncodingAliasIfAvailable(registrar, "CP1250", "WINDOWS-1250");
- }
- registerEncodingNameIfAvailable(registrar, "MAC-CENTRALEUROPE");
-
- // Vietnamese
- if (registerEncodingNameIfAvailable(registrar, "CP1258"))
- registerEncodingAliasIfAvailable(registrar, "CP1258", "WINDOWS-1258");
-
- // Turkish
- if (registerEncodingNameIfAvailable(registrar, "CP1254")) {
- registerEncodingAliasIfAvailable(registrar, "CP1254", "MS-TURK");
- registerEncodingAliasIfAvailable(registrar, "CP1254", "WINDOWS-1254");
- }
- if (registerEncodingNameIfAvailable(registrar, "ISO-8859-9")) {
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO-IR-148");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO8859-9");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO_8859-9");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO_8859-9:1989");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "L5");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "LATIN5");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "CSISOLATIN5");
- }
-
- // Baltic
- if (registerEncodingNameIfAvailable(registrar, "CP1257")) {
- registerEncodingAliasIfAvailable(registrar, "CP1257", "WINBALTRIM");
- registerEncodingAliasIfAvailable(registrar, "CP1257", "WINDOWS-1257");
- }
- if (registerEncodingNameIfAvailable(registrar, "ISO-8859-4")) {
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO-IR-110");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO8859-4");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO_8859-4");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO_8859-4:1988");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "L4");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "LATIN4");
- registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "CSISOLATIN4");
- }
-}
-
-void TextCodecGtk::registerExtendedCodecs(TextCodecRegistrar registrar)
-{
- // Western
- registerCodecIfAvailable(registrar, "MACROMAN");
-
- // Japanese
- registerCodecIfAvailable(registrar, "Shift_JIS");
- registerCodecIfAvailable(registrar, "EUC-JP");
- registerCodecIfAvailable(registrar, "ISO-2022-JP");
-
- // Traditional Chinese
- registerCodecIfAvailable(registrar, "BIG5");
- registerCodecIfAvailable(registrar, "BIG5-HKSCS");
- registerCodecIfAvailable(registrar, "CP950");
-
- // Korean
- registerCodecIfAvailable(registrar, "ISO-2022-KR");
- registerCodecIfAvailable(registrar, "CP949");
- registerCodecIfAvailable(registrar, "EUC-KR");
-
- // Arabic
- registerCodecIfAvailable(registrar, "ISO-8859-6");
- // rearranged, windows-1256 now declared the canonical name and put to lowercase to fix /fast/encoding/ahram-org-eg.html test case
- registerCodecIfAvailable(registrar, "windows-1256");
-
- // Hebrew
- registerCodecIfAvailable(registrar, "ISO-8859-8");
- // rearranged, moved windows-1255 as canonical and lowercased, fixing /fast/encoding/meta-charset.html
- registerCodecIfAvailable(registrar, "windows-1255");
-
- // Greek
- registerCodecIfAvailable(registrar, "ISO-8859-7");
- registerCodecIfAvailable(registrar, "CP869");
- registerCodecIfAvailable(registrar, "WINDOWS-1253");
-
- // Cyrillic
- registerCodecIfAvailable(registrar, "ISO-8859-5");
- registerCodecIfAvailable(registrar, "KOI8-R");
- registerCodecIfAvailable(registrar, "CP866");
- registerCodecIfAvailable(registrar, "KOI8-U");
- // CP1251 added to pass /fast/encoding/charset-cp1251.html
- registerCodecIfAvailable(registrar, "windows-1251");
- registerCodecIfAvailable(registrar, "mac-cyrillic");
-
- // Thai
- registerCodecIfAvailable(registrar, "CP874");
- registerCodecIfAvailable(registrar, "TIS-620");
-
- // Simplified Chinese
- registerCodecIfAvailable(registrar, "GBK");
- registerCodecIfAvailable(registrar, "HZ");
- registerCodecIfAvailable(registrar, "GB18030");
- registerCodecIfAvailable(registrar, "EUC-CN");
- registerCodecIfAvailable(registrar, "GB_2312-80");
-
- // Central European
- registerCodecIfAvailable(registrar, "ISO-8859-2");
- registerCodecIfAvailable(registrar, "CP1250");
- registerCodecIfAvailable(registrar, "MAC-CENTRALEUROPE");
-
- // Vietnamese
- registerCodecIfAvailable(registrar, "CP1258");
-
- // Turkish
- registerCodecIfAvailable(registrar, "CP1254");
- registerCodecIfAvailable(registrar, "ISO-8859-9");
-
- // Baltic
- registerCodecIfAvailable(registrar, "CP1257");
- registerCodecIfAvailable(registrar, "ISO-8859-4");
-}
-
-TextCodecGtk::TextCodecGtk(const TextEncoding& encoding)
- : m_encoding(encoding)
- , m_numBufferedBytes(0)
-{
-}
-
-TextCodecGtk::~TextCodecGtk()
-{
-}
-
-void TextCodecGtk::createIConvDecoder() const
-{
- ASSERT(!m_iconvDecoder);
-
- m_iconvDecoder = adoptPlatformRef(g_charset_converter_new(internalEncodingName, m_encoding.name(), 0));
-}
-
-void TextCodecGtk::createIConvEncoder() const
-{
- ASSERT(!m_iconvEncoder);
-
- m_iconvEncoder = adoptPlatformRef(g_charset_converter_new(m_encoding.name(), internalEncodingName, 0));
-}
-
-String TextCodecGtk::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
-{
- // Get a converter for the passed-in encoding.
- if (!m_iconvDecoder)
- createIConvDecoder();
- if (!m_iconvDecoder) {
- LOG_ERROR("Error creating IConv encoder even though encoding was in table.");
- return String();
- }
-
- Vector<UChar> result;
-
- gsize bytesRead = 0;
- gsize bytesWritten = 0;
- const gchar* input = bytes;
- gsize inputLength = length;
- gchar buffer[ConversionBufferSize];
- int flags = !length ? G_CONVERTER_INPUT_AT_END : G_CONVERTER_NO_FLAGS;
- if (flush)
- flags |= G_CONVERTER_FLUSH;
-
- bool bufferWasFull = false;
- char* prefixedBytes = 0;
-
- if (m_numBufferedBytes) {
- inputLength = length + m_numBufferedBytes;
- prefixedBytes = static_cast<char*>(fastMalloc(inputLength));
- memcpy(prefixedBytes, m_bufferedBytes, m_numBufferedBytes);
- memcpy(prefixedBytes + m_numBufferedBytes, bytes, length);
-
- input = prefixedBytes;
-
- // all buffered bytes are consumed now
- m_numBufferedBytes = 0;
- }
-
- do {
- GOwnPtr<GError> error;
- GConverterResult res = g_converter_convert(G_CONVERTER(m_iconvDecoder.get()),
- input, inputLength,
- buffer, sizeof(buffer),
- static_cast<GConverterFlags>(flags),
- &bytesRead, &bytesWritten,
- &error.outPtr());
- input += bytesRead;
- inputLength -= bytesRead;
-
- if (res == G_CONVERTER_ERROR) {
- if (g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT)) {
- // There is not enough input to fully determine what the conversion should produce,
- // save it to a buffer to prepend it to the next input.
- memcpy(m_bufferedBytes, input, inputLength);
- m_numBufferedBytes = inputLength;
- inputLength = 0;
- } else if (g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_NO_SPACE))
- bufferWasFull = true;
- else if (g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_INVALID_DATA)) {
- if (stopOnError)
- sawError = true;
- if (inputLength) {
- // Ignore invalid character.
- input += 1;
- inputLength -= 1;
- }
- } else {
- sawError = true;
- LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", error->code, error->message);
- m_numBufferedBytes = 0; // Reset state for subsequent calls to decode.
- fastFree(prefixedBytes);
- return String();
- }
- }
-
- result.append(reinterpret_cast<UChar*>(buffer), bytesWritten / sizeof(UChar));
- } while ((inputLength || bufferWasFull) && !sawError);
-
- fastFree(prefixedBytes);
-
- return String::adopt(result);
-}
-
-CString TextCodecGtk::encode(const UChar* characters, size_t length, UnencodableHandling handling)
-{
- if (!length)
- return "";
-
- if (!m_iconvEncoder)
- createIConvEncoder();
- if (!m_iconvEncoder) {
- LOG_ERROR("Error creating IConv encoder even though encoding was in table.");
- return CString();
- }
-
- gsize bytesRead = 0;
- gsize bytesWritten = 0;
- const gchar* input = reinterpret_cast<const char*>(characters);
- gsize inputLength = length * sizeof(UChar);
- gchar buffer[ConversionBufferSize];
- Vector<char> result;
- GOwnPtr<GError> error;
-
- size_t size = 0;
- do {
- g_converter_convert(G_CONVERTER(m_iconvEncoder.get()),
- input, inputLength,
- buffer, sizeof(buffer),
- G_CONVERTER_INPUT_AT_END,
- &bytesRead, &bytesWritten,
- &error.outPtr());
- input += bytesRead;
- inputLength -= bytesRead;
- if (bytesWritten > 0) {
- result.grow(size + bytesWritten);
- memcpy(result.data() + size, buffer, bytesWritten);
- size += bytesWritten;
- }
-
- if (error && g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_INVALID_DATA)) {
- UChar codePoint = reinterpret_cast<const UChar*>(input)[0];
- UnencodableReplacementArray replacement;
- int replacementLength = TextCodec::getUnencodableReplacement(codePoint, handling, replacement);
-
- // Consume the invalid character.
- input += sizeof(UChar);
- inputLength -= sizeof(UChar);
-
- // Append replacement string to result buffer.
- result.grow(size + replacementLength);
- memcpy(result.data() + size, replacement, replacementLength);
- size += replacementLength;
-
- error.clear();
- }
- } while (inputLength && !error.get());
-
- if (error) {
- LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", error->code, error->message);
- return CString();
- }
-
- return CString(result.data(), size);
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/gtk/TextCodecGtk.h b/WebCore/platform/text/gtk/TextCodecGtk.h
deleted file mode 100644
index 1fb8df9..0000000
--- a/WebCore/platform/text/gtk/TextCodecGtk.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (C) 2004, 2006, 2007 Apple Inc. All rights reserved.
- * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
- * Copyright (C) 2008 JĂĽrg Billeter <j@bitron.ch>
- * Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextCodecGTK_h
-#define TextCodecGTK_h
-
-#include "GRefPtr.h"
-#include <glib.h>
-#include "TextCodec.h"
-#include "TextEncoding.h"
-
-namespace WebCore {
-
- class TextCodecGtk : public TextCodec {
- public:
- static void registerBaseEncodingNames(EncodingNameRegistrar);
- static void registerBaseCodecs(TextCodecRegistrar);
-
- static void registerExtendedEncodingNames(EncodingNameRegistrar);
- static void registerExtendedCodecs(TextCodecRegistrar);
-
- TextCodecGtk(const TextEncoding&);
- virtual ~TextCodecGtk();
-
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
- virtual CString encode(const UChar*, size_t length, UnencodableHandling);
-
- private:
- void createIConvDecoder() const;
- void createIConvEncoder() const;
-
- TextEncoding m_encoding;
- size_t m_numBufferedBytes;
- unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
- mutable PlatformRefPtr<GCharsetConverter> m_iconvDecoder;
- mutable PlatformRefPtr<GCharsetConverter> m_iconvEncoder;
- };
-
-} // namespace WebCore
-
-#endif // TextCodecGTK_h
diff --git a/WebCore/platform/text/haiku/TextBreakIteratorInternalICUHaiku.cpp b/WebCore/platform/text/haiku/TextBreakIteratorInternalICUHaiku.cpp
deleted file mode 100644
index 8bb8c70..0000000
--- a/WebCore/platform/text/haiku/TextBreakIteratorInternalICUHaiku.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2007 Apple Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- *
- */
-
-#include "config.h"
-#include "TextBreakIteratorInternalICU.h"
-
-#include "NotImplemented.h"
-
-
-namespace WebCore {
-
-const char* currentSearchLocaleID()
-{
- notImplemented();
- return "";
-}
-
-const char* currentTextBreakLocaleID()
-{
- notImplemented();
- return "en_us";
-}
-
-} // namespace WebCore
-
diff --git a/WebCore/platform/text/mac/CharsetData.h b/WebCore/platform/text/mac/CharsetData.h
deleted file mode 100644
index 458cecb..0000000
--- a/WebCore/platform/text/mac/CharsetData.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2003, 2006 Apple Computer, Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-namespace WebCore {
-
- #define kTextEncodingISOLatinThai kCFStringEncodingISOLatinThai
-
- struct CharsetEntry {
- const char* name;
- ::TextEncoding encoding;
- };
-
- extern const CharsetEntry CharsetTable[];
-
-}
diff --git a/WebCore/platform/text/mac/HyphenationMac.mm b/WebCore/platform/text/mac/HyphenationMac.mm
deleted file mode 100644
index d5c9283..0000000
--- a/WebCore/platform/text/mac/HyphenationMac.mm
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (C) 2010 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#import "config.h"
-#import "Hyphenation.h"
-
-#if defined(BUILDING_ON_TIGER) || defined(BUILDING_ON_LEOPARD) || defined(BUILDING_ON_SNOW_LEOPARD)
-
-#import "AtomicStringKeyedMRUCache.h"
-#import "TextBreakIteratorInternalICU.h"
-#import "WebCoreSystemInterface.h"
-#import <wtf/RetainPtr.h>
-
-namespace WebCore {
-
-template<>
-bool AtomicStringKeyedMRUCache<bool>::createValueForNullKey()
-{
- return !strcmp(currentSearchLocaleID(), "en");
-}
-
-template<>
-bool AtomicStringKeyedMRUCache<bool>::createValueForKey(const AtomicString& localeIdentifier)
-{
- RetainPtr<CFStringRef> cfLocaleIdentifier(AdoptCF, localeIdentifier.createCFString());
- RetainPtr<CFDictionaryRef> components(AdoptCF, CFLocaleCreateComponentsFromLocaleIdentifier(kCFAllocatorDefault, cfLocaleIdentifier.get()));
- CFStringRef language = reinterpret_cast<CFStringRef>(CFDictionaryGetValue(components.get(), kCFLocaleLanguageCode));
- static CFStringRef englishLanguage = CFSTR("en");
- return language && CFEqual(language, englishLanguage);
-}
-
-bool canHyphenate(const AtomicString& localeIdentifier)
-{
- DEFINE_STATIC_LOCAL(AtomicStringKeyedMRUCache<bool>, isEnglishCache, ());
- return isEnglishCache.get(localeIdentifier);
-}
-
-size_t lastHyphenLocation(const UChar* characters, size_t length, size_t beforeIndex, const AtomicString& localeIdentifier)
-{
- ASSERT_UNUSED(localeIdentifier, canHyphenate(localeIdentifier));
-
- RetainPtr<CFStringRef> string(AdoptCF, CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, characters, length, kCFAllocatorNull));
- return wkGetHyphenationLocationBeforeIndex(string.get(), beforeIndex);
-}
-
-} // namespace WebCore
-
-#endif // defined(BUILDING_ON_TIGER) || defined(BUILDING_ON_LEOPARD) || defined(BUILDING_ON_SNOW_LEOPARD)
diff --git a/WebCore/platform/text/mac/ShapeArabic.c b/WebCore/platform/text/mac/ShapeArabic.c
deleted file mode 100644
index dd61ce5..0000000
--- a/WebCore/platform/text/mac/ShapeArabic.c
+++ /dev/null
@@ -1,556 +0,0 @@
-/*
-******************************************************************************
-*
-* Copyright (C) 2000-2004, International Business Machines
-* Corporation and others. All Rights Reserved.
-* Copyright (C) 2007 Apple Inc. All rights reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a copy of this
-* software and associated documentation files (the "Software"), to deal in the Software
-* without restriction, including without limitation the rights to use, copy, modify,
-* merge, publish, distribute, and/or sell copies of the Software, and to permit persons
-* to whom the Software is furnished to do so, provided that the above copyright notice(s)
-* and this permission notice appear in all copies of the Software and that both the above
-* copyright notice(s) and this permission notice appear in supporting documentation.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
-* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
-* PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER
-* OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
-* CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
-* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
-* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-*
-* Except as contained in this notice, the name of a copyright holder shall not be used in
-* advertising or otherwise to promote the sale, use or other dealings in this Software
-* without prior written authorization of the copyright holder.
-*
-******************************************************************************
-*
-* Arabic letter shaping implemented by Ayman Roshdy
-*/
-
-#include "config.h"
-
-#if USE(ATSUI)
-
-#include "ShapeArabic.h"
-
-#include <stdbool.h>
-#include <string.h>
-#include <unicode/utypes.h>
-#include <unicode/uchar.h>
-#include <unicode/ustring.h>
-#include <unicode/ushape.h>
-#include <wtf/Assertions.h>
-
-/*
- * ### TODO in general for letter shaping:
- * - the letter shaping code is UTF-16-unaware; needs update
- * + especially invertBuffer()?!
- * - needs to handle the "Arabic Tail" that is used in some legacy codepages
- * as a glyph fragment of wide-glyph letters
- * + IBM Unicode conversion tables map it to U+200B (ZWSP)
- * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms
- */
-
-/* definitions for Arabic letter shaping ------------------------------------ */
-
-#define IRRELEVANT 4
-#define LAMTYPE 16
-#define ALEFTYPE 32
-#define LINKR 1
-#define LINKL 2
-
-static const UChar IrrelevantPos[] = {
- 0x0, 0x2, 0x4, 0x6,
- 0x8, 0xA, 0xC, 0xE,
-};
-
-static const UChar araLink[178]=
-{
- 1 + 32 + 256 * 0x11,/*0x0622*/
- 1 + 32 + 256 * 0x13,/*0x0623*/
- 1 + 256 * 0x15,/*0x0624*/
- 1 + 32 + 256 * 0x17,/*0x0625*/
- 1 + 2 + 256 * 0x19,/*0x0626*/
- 1 + 32 + 256 * 0x1D,/*0x0627*/
- 1 + 2 + 256 * 0x1F,/*0x0628*/
- 1 + 256 * 0x23,/*0x0629*/
- 1 + 2 + 256 * 0x25,/*0x062A*/
- 1 + 2 + 256 * 0x29,/*0x062B*/
- 1 + 2 + 256 * 0x2D,/*0x062C*/
- 1 + 2 + 256 * 0x31,/*0x062D*/
- 1 + 2 + 256 * 0x35,/*0x062E*/
- 1 + 256 * 0x39,/*0x062F*/
- 1 + 256 * 0x3B,/*0x0630*/
- 1 + 256 * 0x3D,/*0x0631*/
- 1 + 256 * 0x3F,/*0x0632*/
- 1 + 2 + 256 * 0x41,/*0x0633*/
- 1 + 2 + 256 * 0x45,/*0x0634*/
- 1 + 2 + 256 * 0x49,/*0x0635*/
- 1 + 2 + 256 * 0x4D,/*0x0636*/
- 1 + 2 + 256 * 0x51,/*0x0637*/
- 1 + 2 + 256 * 0x55,/*0x0638*/
- 1 + 2 + 256 * 0x59,/*0x0639*/
- 1 + 2 + 256 * 0x5D,/*0x063A*/
- 0, 0, 0, 0, 0, /*0x063B-0x063F*/
- 1 + 2, /*0x0640*/
- 1 + 2 + 256 * 0x61,/*0x0641*/
- 1 + 2 + 256 * 0x65,/*0x0642*/
- 1 + 2 + 256 * 0x69,/*0x0643*/
- 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/
- 1 + 2 + 256 * 0x71,/*0x0645*/
- 1 + 2 + 256 * 0x75,/*0x0646*/
- 1 + 2 + 256 * 0x79,/*0x0647*/
- 1 + 256 * 0x7D,/*0x0648*/
- 1 + 256 * 0x7F,/*0x0649*/
- 1 + 2 + 256 * 0x81,/*0x064A*/
- 4, 4, 4, 4, /*0x064B-0x064E*/
- 4, 4, 4, 4, /*0x064F-0x0652*/
- 4, 4, 4, 0, 0, /*0x0653-0x0657*/
- 0, 0, 0, 0, /*0x0658-0x065B*/
- 1 + 256 * 0x85,/*0x065C*/
- 1 + 256 * 0x87,/*0x065D*/
- 1 + 256 * 0x89,/*0x065E*/
- 1 + 256 * 0x8B,/*0x065F*/
- 0, 0, 0, 0, 0, /*0x0660-0x0664*/
- 0, 0, 0, 0, 0, /*0x0665-0x0669*/
- 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/
- 4, /*0x0670*/
- 0, /*0x0671*/
- 1 + 32, /*0x0672*/
- 1 + 32, /*0x0673*/
- 0, /*0x0674*/
- 1 + 32, /*0x0675*/
- 1, 1, /*0x0676-0x0677*/
- 1+2, /*0x0678*/
- 1+2 + 256 * 0x16,/*0x0679*/
- 1+2 + 256 * 0x0E,/*0x067A*/
- 1+2 + 256 * 0x02,/*0x067B*/
- 1+2, 1+2, /*0x067C-0x067D*/
- 1+2 + 256 * 0x06,/*0x067E*/
- 1+2 + 256 * 0x12,/*0x067F*/
- 1+2 + 256 * 0x0A,/*0x0680*/
- 1+2, 1+2, /*0x0681-0x0682*/
- 1+2 + 256 * 0x26,/*0x0683*/
- 1+2 + 256 * 0x22,/*0x0684*/
- 1+2, /*0x0685*/
- 1+2 + 256 * 0x2A,/*0x0686*/
- 1+2 + 256 * 0x2E,/*0x0687*/
- 1 + 256 * 0x38,/*0x0688*/
- 1, 1, 1, /*0x0689-0x068B*/
- 1 + 256 * 0x34,/*0x068C*/
- 1 + 256 * 0x32,/*0x068D*/
- 1 + 256 * 0x36,/*0x068E*/
- 1, 1, /*0x068F-0x0690*/
- 1 + 256 * 0x3C,/*0x0691*/
- 1, 1, 1, 1, 1, 1, /*0x0692-0x0697*/
- 1 + 256 * 0x3A,/*0x0698*/
- 1, /*0x0699*/
- 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x069F*/
- 1+2, 1+2, 1+2, 1+2, /*0x06A0-0x06A3*/
- 1+2 + 256 * 0x2E,/*0x06A4*/
- 1+2, /*0x06A5*/
- 1+2 + 256 * 0x1E,/*0x06A6*/
- 1+2, 1+2, /*0x06A7-0x06A8*/
- 1+2 + 256 * 0x3E,/*0x06A9*/
- 1+2, 1+2, 1+2, /*0x06AA-0x06AC*/
- 1+2 + 256 * 0x83,/*0x06AD*/
- 1+2, /*0x06AE*/
- 1+2 + 256 * 0x42,/*0x06AF*/
- 1+2, /*0x06B0*/
- 1+2 + 256 * 0x4A,/*0x06B1*/
- 1+2, /*0x06B2*/
- 1+2 + 256 * 0x46,/*0x06B3*/
- 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x06B4-0x06B9*/
- 1+2, /*0x06BA*/ // FIXME: Seems to have a final form
- 1+2 + 256 * 0x50,/*0x06BB*/
- 1+2, 1+2, /*0x06BC-0x06BD*/
- 1+2 + 256 * 0x5A,/*0x06BE*/
- 1+2, /*0x06BF*/
- 1, /*0x06C0*/
- 1+2 + 256 * 0x56,/*0x06C1*/
- 1+2, /*0x06C2*/
- 1, 1, /*0x06C3-0x06C4*/
- 1 + 256 * 0x90,/*0x06C5*/
- 1 + 256 * 0x89,/*0x06C6*/
- 1 + 256 * 0x87,/*0x06C7*/
- 1 + 256 * 0x8B,/*0x06C8*/
- 1 + 256 * 0x92,/*0x06C9*/
- 1, /*0x06CA*/
- 1 + 256 * 0x8E,/*0x06CB*/
- 1+2 + 256 * 0xAC,/*0x06CC*/
- 1, /*0x06CD*/
- 1+2, /*0x06CE*/
- 1, /*0x06CF*/
- 1+2 + 256 * 0x94,/*0x06D0*/
- 1+2, /*0x06D1*/
- 1 + 256 * 0x5E,/*0x06D2*/
- 1 + 256 * 0x60 /*0x06D3*/
-};
-
-static const UChar presLink[141]=
-{
- 1 + 2, /*0xFE70*/
- 1 + 2, /*0xFE71*/
- 1 + 2, 0, 1+ 2, 0, 1+ 2, /*0xFE72-0xFE76*/
- 1 + 2, /*0xFE77*/
- 1+ 2, 1 + 2, 1+2, 1 + 2, /*0xFE78-0xFE81*/
- 1+ 2, 1 + 2, 1+2, 1 + 2, /*0xFE82-0xFE85*/
- 0, 0 + 32, 1 + 32, 0 + 32, /*0xFE86-0xFE89*/
- 1 + 32, 0, 1, 0 + 32, /*0xFE8A-0xFE8D*/
- 1 + 32, 0, 2, 1 + 2, /*0xFE8E-0xFE91*/
- 1, 0 + 32, 1 + 32, 0, /*0xFE92-0xFE95*/
- 2, 1 + 2, 1, 0, /*0xFE96-0xFE99*/
- 1, 0, 2, 1 + 2, /*0xFE9A-0xFE9D*/
- 1, 0, 2, 1 + 2, /*0xFE9E-0xFEA1*/
- 1, 0, 2, 1 + 2, /*0xFEA2-0xFEA5*/
- 1, 0, 2, 1 + 2, /*0xFEA6-0xFEA9*/
- 1, 0, 2, 1 + 2, /*0xFEAA-0xFEAD*/
- 1, 0, 1, 0, /*0xFEAE-0xFEB1*/
- 1, 0, 1, 0, /*0xFEB2-0xFEB5*/
- 1, 0, 2, 1+2, /*0xFEB6-0xFEB9*/
- 1, 0, 2, 1+2, /*0xFEBA-0xFEBD*/
- 1, 0, 2, 1+2, /*0xFEBE-0xFEC1*/
- 1, 0, 2, 1+2, /*0xFEC2-0xFEC5*/
- 1, 0, 2, 1+2, /*0xFEC6-0xFEC9*/
- 1, 0, 2, 1+2, /*0xFECA-0xFECD*/
- 1, 0, 2, 1+2, /*0xFECE-0xFED1*/
- 1, 0, 2, 1+2, /*0xFED2-0xFED5*/
- 1, 0, 2, 1+2, /*0xFED6-0xFED9*/
- 1, 0, 2, 1+2, /*0xFEDA-0xFEDD*/
- 1, 0, 2, 1+2, /*0xFEDE-0xFEE1*/
- 1, 0 + 16, 2 + 16, 1 + 2 +16, /*0xFEE2-0xFEE5*/
- 1 + 16, 0, 2, 1+2, /*0xFEE6-0xFEE9*/
- 1, 0, 2, 1+2, /*0xFEEA-0xFEED*/
- 1, 0, 2, 1+2, /*0xFEEE-0xFEF1*/
- 1, 0, 1, 0, /*0xFEF2-0xFEF5*/
- 1, 0, 2, 1+2, /*0xFEF6-0xFEF9*/
- 1, 0, 1, 0, /*0xFEFA-0xFEFD*/
- 1, 0, 1, 0,
- 1
-};
-
-static const UChar convertFEto06[] =
-{
-/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
-/*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652,
-/*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628,
-/*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C,
-/*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632,
-/*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636,
-/*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A,
-/*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644,
-/*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649,
-/*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F
-};
-
-static const UChar shapeTable[4][4][4]=
-{
- { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} },
- { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} },
- { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} },
- { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }
-};
-
-/*
- *Name : changeLamAlef
- *Function : Converts the Alef characters into an equivalent
- * LamAlef location in the 0x06xx Range, this is an
- * intermediate stage in the operation of the program
- * later it'll be converted into the 0xFExx LamAlefs
- * in the shaping function.
- */
-static UChar
-changeLamAlef(UChar ch) {
-
- switch(ch) {
- case 0x0622 :
- return(0x065C);
- break;
- case 0x0623 :
- return(0x065D);
- break;
- case 0x0625 :
- return(0x065E);
- break;
- case 0x0627 :
- return(0x065F);
- break;
- default :
- return(0);
- break;
- }
-}
-
-/*
- *Name : specialChar
- *Function : Special Arabic characters need special handling in the shapeUnicode
- * function, this function returns 1 or 2 for these special characters
- */
-static int32_t
-specialChar(UChar ch) {
-
- if( (ch>0x0621 && ch<0x0626)||(ch==0x0627)||(ch>0x062e && ch<0x0633)||
- (ch>0x0647 && ch<0x064a)||(ch==0x0629) ) {
- return (1);
- }
- else
- if( ch>=0x064B && ch<= 0x0652 )
- return (2);
- else
- if( (ch>=0x0653 && ch<= 0x0655) || ch == 0x0670 ||
- (ch>=0xFE70 && ch<= 0xFE7F) )
- return (3);
- else
- return (0);
-}
-
-/*
- *Name : getLink
- *Function : Resolves the link between the characters as
- * Arabic characters have four forms :
- * Isolated, Initial, Middle and Final Form
- */
-static UChar
-getLink(UChar ch) {
-
- if(ch >= 0x0622 && ch <= 0x06D3) {
- return(araLink[ch-0x0622]);
- } else if(ch == 0x200D) {
- return(3);
- } else if(ch >= 0x206D && ch <= 0x206F) {
- return(4);
- } else if(ch >= 0xFE70 && ch <= 0xFEFC) {
- return(presLink[ch-0xFE70]);
- } else {
- return(0);
- }
-}
-
-/*
- *Name : isTashkeelChar
- *Function : Returns 1 for Tashkeel characters else return 0
- */
-static int32_t
-isTashkeelChar(UChar ch) {
-
- if( ch>=0x064B && ch<= 0x0652 )
- return (1);
- else
- return (0);
-}
-
-/*
- *Name : shapeUnicode
- *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped
- * arabic Unicode buffer in FExx Range
- */
-static int32_t
-shapeUnicode(UChar *dest, int32_t sourceLength,
- int32_t destSize,
- int tashkeelFlag) {
-
- int32_t i, iend;
- int32_t prevPos, lastPos,Nx, Nw;
- unsigned int Shape;
- int32_t flag;
- int32_t lamalef_found = 0;
- UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0;
- UChar wLamalef;
-
- /*
- * Converts the input buffer from FExx Range into 06xx Range
- * to make sure that all characters are in the 06xx range
- * even the lamalef is converted to the special region in
- * the 06xx range
- */
- for (i = 0; i < sourceLength; i++) {
- UChar inputChar = dest[i];
- if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) {
- dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ;
- }
- }
-
- /* sets the index to the end of the buffer, together with the step point to -1 */
- i = 0;
- iend = sourceLength;
-
- /*
- * This function resolves the link between the characters .
- * Arabic characters have four forms :
- * Isolated Form, Initial Form, Middle Form and Final Form
- */
- currLink = getLink(dest[i]);
-
- prevPos = i;
- lastPos = i;
- Nx = sourceLength + 2, Nw = 0;
-
- while (i != iend) {
- /* If high byte of currLink > 0 then more than one shape */
- if ((currLink & 0xFF00) > 0 || isTashkeelChar(dest[i])) {
- Nw = i + 1;
- while (Nx >= sourceLength) { /* we need to know about next char */
- if(Nw == iend) {
- nextLink = 0;
- Nx = -1;
- } else {
- nextLink = getLink(dest[Nw]);
- if((nextLink & IRRELEVANT) == 0) {
- Nx = Nw;
- } else {
- Nw = Nw + 1;
- }
- }
- }
-
- if ( ((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0) ) {
- lamalef_found = 1;
- wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */
- if ( wLamalef != 0) {
- dest[i] = ' '; /* The default case is to drop the Alef and replace */
- dest[lastPos] =wLamalef; /* it by a space. */
- i=lastPos;
- }
- lastLink = prevLink;
- currLink = getLink(wLamalef);
- }
- /*
- * get the proper shape according to link ability of neighbors
- * and of character; depends on the order of the shapes
- * (isolated, initial, middle, final) in the compatibility area
- */
- flag = specialChar(dest[i]);
-
- Shape = shapeTable[nextLink & (LINKR + LINKL)]
- [lastLink & (LINKR + LINKL)]
- [currLink & (LINKR + LINKL)];
-
- if (flag == 1) {
- Shape = (Shape == 1 || Shape == 3) ? 1 : 0;
- }
- else
- if(flag == 2) {
- if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) &&
- dest[i] != 0x064C && dest[i] != 0x064D ) {
- Shape = 1;
- if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE )
- Shape = 0;
- }
- else {
- Shape = 0;
- }
- }
-
- if(flag == 2) {
- dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape;
- }
- else
- dest[i] = (UChar)((dest[i] < 0x0670 ? 0xFE70 : 0xFB50) + (currLink >> 8) + Shape);
- }
-
- /* move one notch forward */
- if ((currLink & IRRELEVANT) == 0) {
- prevLink = lastLink;
- lastLink = currLink;
- prevPos = lastPos;
- lastPos = i;
- }
-
- i++;
- if (i == Nx) {
- currLink = nextLink;
- Nx = sourceLength + 2;
- }
- else if(i != iend) {
- currLink = getLink(dest[i]);
- }
- }
-
- destSize = sourceLength;
-
- return destSize;
-}
-
-int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int32_t destCapacity, uint32_t options, UErrorCode *pErrorCode) {
- int32_t destLength;
-
- /* usual error checking */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
- /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */
- if( source==NULL || sourceLength<-1 ||
- (dest==NULL && destCapacity!=0) || destCapacity<0 ||
- options>=U_SHAPE_DIGIT_TYPE_RESERVED ||
- (options&U_SHAPE_DIGITS_MASK)>=U_SHAPE_DIGITS_RESERVED
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- /* determine the source length */
- if(sourceLength==-1) {
- sourceLength=u_strlen(source);
- }
- if(sourceLength==0) {
- return 0;
- }
-
- /* check that source and destination do not overlap */
- if( dest!=NULL &&
- ((source<=dest && dest<source+sourceLength) ||
- (dest<=source && source<dest+destCapacity))
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) {
- int32_t outputSize = sourceLength;
-
- /* calculate destination size */
- /* TODO: do we ever need to do this pure preflighting? */
- ASSERT((options&U_SHAPE_LENGTH_MASK) != U_SHAPE_LENGTH_GROW_SHRINK);
-
- if(outputSize>destCapacity) {
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- return outputSize;
- }
-
- /* Start of Arabic letter shaping part */
- memcpy(dest, source, sourceLength*U_SIZEOF_UCHAR);
-
- ASSERT((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL);
-
- switch(options&U_SHAPE_LETTERS_MASK) {
- case U_SHAPE_LETTERS_SHAPE :
- /* Call the shaping function with tashkeel flag == 1 */
- destLength = shapeUnicode(dest,sourceLength,destCapacity,1);
- break;
- case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED :
- /* Call the shaping function with tashkeel flag == 0 */
- destLength = shapeUnicode(dest,sourceLength,destCapacity,0);
- break;
- case U_SHAPE_LETTERS_UNSHAPE :
- ASSERT_NOT_REACHED();
- break;
- default :
- /* will never occur because of validity checks above */
- destLength = 0;
- break;
- }
-
- /* End of Arabic letter shaping part */
- } else
- ASSERT_NOT_REACHED();
-
- ASSERT((options & U_SHAPE_DIGITS_MASK) == U_SHAPE_DIGITS_NOOP);
-
- return sourceLength;
-}
-
-#endif // USE(ATSUI)
diff --git a/WebCore/platform/text/mac/ShapeArabic.h b/WebCore/platform/text/mac/ShapeArabic.h
deleted file mode 100644
index 8aa577d..0000000
--- a/WebCore/platform/text/mac/ShapeArabic.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2007 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ShapeArabic_h
-#define ShapeArabic_h
-
-#if USE(ATSUI)
-
-#include <unicode/ushape.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int32_t destCapacity, uint32_t options, UErrorCode *pErrorCode);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // USE(ATSUI)
-#endif // ShapeArabic_h
diff --git a/WebCore/platform/text/mac/StringImplMac.mm b/WebCore/platform/text/mac/StringImplMac.mm
deleted file mode 100644
index 6f5e953..0000000
--- a/WebCore/platform/text/mac/StringImplMac.mm
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2006, 2009 Apple Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#include "config.h"
-#include <wtf/text/StringImpl.h>
-
-#include "FoundationExtras.h"
-
-namespace WTF {
-
-StringImpl::operator NSString *()
-{
- return HardAutorelease(createCFString());
-}
-
-}
diff --git a/WebCore/platform/text/mac/StringMac.mm b/WebCore/platform/text/mac/StringMac.mm
deleted file mode 100644
index 7e98b2b..0000000
--- a/WebCore/platform/text/mac/StringMac.mm
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Copyright (C) 2006 Apple Computer, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#include "config.h"
-#include "PlatformString.h"
-#include <CoreFoundation/CFString.h>
-
-namespace WTF {
-
-String::String(NSString* str)
-{
- if (!str)
- return;
-
- CFIndex size = CFStringGetLength(reinterpret_cast<CFStringRef>(str));
- if (size == 0)
- m_impl = StringImpl::empty();
- else {
- Vector<UChar, 1024> buffer(size);
- CFStringGetCharacters(reinterpret_cast<CFStringRef>(str), CFRangeMake(0, size), buffer.data());
- m_impl = StringImpl::create(buffer.data(), size);
- }
-}
-
-}
diff --git a/WebCore/platform/text/mac/TextBoundaries.mm b/WebCore/platform/text/mac/TextBoundaries.mm
deleted file mode 100644
index bd7ddf8..0000000
--- a/WebCore/platform/text/mac/TextBoundaries.mm
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#import "config.h"
-#import "TextBoundaries.h"
-
-using namespace WTF::Unicode;
-
-namespace WebCore {
-
-void findWordBoundary(const UChar* chars, int len, int position, int* start, int* end)
-{
- NSString* string = [[NSString alloc] initWithCharactersNoCopy:const_cast<unichar*>(chars)
- length:len freeWhenDone:NO];
- NSAttributedString* attr = [[NSAttributedString alloc] initWithString:string];
- NSRange range = [attr doubleClickAtIndex:(position >= len) ? len - 1 : position];
- [attr release];
- [string release];
- *start = range.location;
- *end = range.location + range.length;
-}
-
-int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward)
-{
- NSString* string = [[NSString alloc] initWithCharactersNoCopy:const_cast<unichar*>(chars)
- length:len freeWhenDone:NO];
- NSAttributedString* attr = [[NSAttributedString alloc] initWithString:string];
- int result = [attr nextWordFromIndex:position forward:forward];
- [attr release];
- [string release];
- return result;
-}
-
-}
diff --git a/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm b/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm
deleted file mode 100644
index 6af5616..0000000
--- a/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (C) 2007, 2009 Apple Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#include "config.h"
-#include "TextBreakIteratorInternalICU.h"
-
-#include <wtf/RetainPtr.h>
-
-namespace WebCore {
-
-static const int maxLocaleStringLength = 32;
-
-static inline RetainPtr<CFStringRef> textBreakLocalePreference()
-{
- RetainPtr<CFPropertyListRef> locale(AdoptCF, CFPreferencesCopyValue(CFSTR("AppleTextBreakLocale"),
- kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost));
- if (!locale || CFGetTypeID(locale.get()) != CFStringGetTypeID())
- return 0;
- return static_cast<CFStringRef>(locale.get());
-}
-
-static RetainPtr<CFStringRef> topLanguagePreference()
-{
- NSArray *languagesArray = [[NSUserDefaults standardUserDefaults] arrayForKey:@"AppleLanguages"];
- if (!languagesArray)
- return 0;
- if ([languagesArray count] < 1)
- return 0;
- NSString *value = [languagesArray objectAtIndex:0];
- if (![value isKindOfClass:[NSString class]])
- return 0;
- return reinterpret_cast<CFStringRef>(value);
-}
-
-static RetainPtr<CFStringRef> canonicalLanguageIdentifier(CFStringRef locale)
-{
- if (!locale)
- return 0;
- RetainPtr<CFStringRef> canonicalLocale(AdoptCF,
- CFLocaleCreateCanonicalLanguageIdentifierFromString(kCFAllocatorDefault, locale));
- if (!canonicalLocale)
- return locale;
- return canonicalLocale;
-}
-
-static void getLocale(CFStringRef locale, char localeStringBuffer[maxLocaleStringLength])
-{
- // Empty string means "root locale", and that is what we use if we can't get a preference.
- localeStringBuffer[0] = 0;
- if (!locale)
- return;
- CFStringGetCString(locale, localeStringBuffer, maxLocaleStringLength, kCFStringEncodingASCII);
-}
-
-static void getSearchLocale(char localeStringBuffer[maxLocaleStringLength])
-{
- getLocale(canonicalLanguageIdentifier(topLanguagePreference().get()).get(), localeStringBuffer);
-}
-
-const char* currentSearchLocaleID()
-{
- static char localeStringBuffer[maxLocaleStringLength];
- static bool gotSearchLocale = false;
- if (!gotSearchLocale) {
- getSearchLocale(localeStringBuffer);
- gotSearchLocale = true;
- }
- return localeStringBuffer;
-}
-
-static void getTextBreakLocale(char localeStringBuffer[maxLocaleStringLength])
-{
- // If there is no text break locale, use the top language preference.
- RetainPtr<CFStringRef> locale = textBreakLocalePreference();
- if (!locale)
- locale = topLanguagePreference();
- getLocale(canonicalLanguageIdentifier(locale.get()).get(), localeStringBuffer);
-}
-
-const char* currentTextBreakLocaleID()
-{
- static char localeStringBuffer[maxLocaleStringLength];
- static bool gotTextBreakLocale = false;
- if (!gotTextBreakLocale) {
- getTextBreakLocale(localeStringBuffer);
- gotTextBreakLocale = true;
- }
- return localeStringBuffer;
-}
-
-}
diff --git a/WebCore/platform/text/mac/TextCodecMac.cpp b/WebCore/platform/text/mac/TextCodecMac.cpp
deleted file mode 100644
index b743f3d..0000000
--- a/WebCore/platform/text/mac/TextCodecMac.cpp
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved.
- * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextCodecMac.h"
-
-#include "CharacterNames.h"
-#include "CharsetData.h"
-#include "PlatformString.h"
-#include "ThreadGlobalData.h"
-#include <wtf/Assertions.h>
-#include <wtf/text/CString.h>
-#include <wtf/PassOwnPtr.h>
-#include <wtf/RetainPtr.h>
-#include <wtf/Threading.h>
-
-using namespace std;
-
-namespace WebCore {
-
-// We need to keep this because ICU doesn't support some of the encodings that we need:
-// <http://bugs.webkit.org/show_bug.cgi?id=4195>.
-
-const size_t ConversionBufferSize = 16384;
-
-static TECConverterWrapper& cachedConverterTEC()
-{
- return threadGlobalData().cachedConverterTEC();
-}
-
-void TextCodecMac::registerEncodingNames(EncodingNameRegistrar registrar)
-{
- TECTextEncodingID lastEncoding = invalidEncoding;
- const char* lastName = 0;
-
- for (size_t i = 0; CharsetTable[i].name; ++i) {
- if (CharsetTable[i].encoding != lastEncoding) {
- lastEncoding = CharsetTable[i].encoding;
- lastName = CharsetTable[i].name;
- }
- registrar(CharsetTable[i].name, lastName);
- }
-}
-
-static PassOwnPtr<TextCodec> newTextCodecMac(const TextEncoding&, const void* additionalData)
-{
- return new TextCodecMac(*static_cast<const TECTextEncodingID*>(additionalData));
-}
-
-void TextCodecMac::registerCodecs(TextCodecRegistrar registrar)
-{
- TECTextEncodingID lastEncoding = invalidEncoding;
-
- for (size_t i = 0; CharsetTable[i].name; ++i)
- if (CharsetTable[i].encoding != lastEncoding) {
- registrar(CharsetTable[i].name, newTextCodecMac, &CharsetTable[i].encoding);
- lastEncoding = CharsetTable[i].encoding;
- }
-}
-
-TextCodecMac::TextCodecMac(TECTextEncodingID encoding)
- : m_encoding(encoding)
- , m_numBufferedBytes(0)
- , m_converterTEC(0)
-{
-}
-
-TextCodecMac::~TextCodecMac()
-{
- releaseTECConverter();
-}
-
-void TextCodecMac::releaseTECConverter() const
-{
- if (m_converterTEC) {
- TECConverterWrapper& cachedConverter = cachedConverterTEC();
- if (cachedConverter.converter)
- TECDisposeConverter(cachedConverter.converter);
- cachedConverter.converter = m_converterTEC;
- cachedConverter.encoding = m_encoding;
- m_converterTEC = 0;
- }
-}
-
-OSStatus TextCodecMac::createTECConverter() const
-{
- TECConverterWrapper& cachedConverter = cachedConverterTEC();
-
- bool cachedEncodingEqual = cachedConverter.encoding == m_encoding;
- cachedConverter.encoding = invalidEncoding;
-
- if (cachedEncodingEqual && cachedConverter.converter) {
- m_converterTEC = cachedConverter.converter;
- cachedConverter.converter = 0;
-
- TECClearConverterContextInfo(m_converterTEC);
- } else {
- OSStatus status = TECCreateConverter(&m_converterTEC, m_encoding,
- CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicode16BitFormat));
- if (status)
- return status;
-
- TECSetBasicOptions(m_converterTEC, kUnicodeForceASCIIRangeMask);
- }
-
- return noErr;
-}
-
-OSStatus TextCodecMac::decode(const unsigned char* inputBuffer, int inputBufferLength, int& inputLength,
- void *outputBuffer, int outputBufferLength, int& outputLength)
-{
- OSStatus status;
- unsigned long bytesRead = 0;
- unsigned long bytesWritten = 0;
-
- if (m_numBufferedBytes != 0) {
- // Finish converting a partial character that's in our buffer.
-
- // First, fill the partial character buffer with as many bytes as are available.
- ASSERT(m_numBufferedBytes < sizeof(m_bufferedBytes));
- const int spaceInBuffer = sizeof(m_bufferedBytes) - m_numBufferedBytes;
- const int bytesToPutInBuffer = min(spaceInBuffer, inputBufferLength);
- ASSERT(bytesToPutInBuffer != 0);
- memcpy(m_bufferedBytes + m_numBufferedBytes, inputBuffer, bytesToPutInBuffer);
-
- // Now, do a conversion on the buffer.
- status = TECConvertText(m_converterTEC, m_bufferedBytes, m_numBufferedBytes + bytesToPutInBuffer, &bytesRead,
- reinterpret_cast<unsigned char*>(outputBuffer), outputBufferLength, &bytesWritten);
- ASSERT(bytesRead <= m_numBufferedBytes + bytesToPutInBuffer);
-
- if (status == kTECPartialCharErr && bytesRead == 0) {
- // Handle the case where the partial character was not converted.
- if (bytesToPutInBuffer >= spaceInBuffer) {
- LOG_ERROR("TECConvertText gave a kTECPartialCharErr but read none of the %zu bytes in the buffer", sizeof(m_bufferedBytes));
- m_numBufferedBytes = 0;
- status = kTECUnmappableElementErr; // should never happen, but use this error code
- } else {
- // Tell the caller we read all the source bytes and keep them in the buffer.
- m_numBufferedBytes += bytesToPutInBuffer;
- bytesRead = bytesToPutInBuffer;
- status = noErr;
- }
- } else {
- // We are done with the partial character buffer.
- // Also, we have read some of the bytes from the main buffer.
- if (bytesRead > m_numBufferedBytes) {
- bytesRead -= m_numBufferedBytes;
- } else {
- LOG_ERROR("TECConvertText accepted some bytes it previously rejected with kTECPartialCharErr");
- bytesRead = 0;
- }
- m_numBufferedBytes = 0;
- if (status == kTECPartialCharErr) {
- // While there may be a partial character problem in the small buffer,
- // we have to try again and not get confused and think there is a partial
- // character problem in the large buffer.
- status = noErr;
- }
- }
- } else {
- status = TECConvertText(m_converterTEC, inputBuffer, inputBufferLength, &bytesRead,
- static_cast<unsigned char*>(outputBuffer), outputBufferLength, &bytesWritten);
- ASSERT(static_cast<int>(bytesRead) <= inputBufferLength);
- }
-
- // Work around bug 3351093, where sometimes we get kTECBufferBelowMinimumSizeErr instead of kTECOutputBufferFullStatus.
- if (status == kTECBufferBelowMinimumSizeErr && bytesWritten != 0)
- status = kTECOutputBufferFullStatus;
-
- inputLength = bytesRead;
- outputLength = bytesWritten;
- return status;
-}
-
-String TextCodecMac::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
-{
- // Get a converter for the passed-in encoding.
- if (!m_converterTEC && createTECConverter() != noErr)
- return String();
-
- Vector<UChar> result;
-
- const unsigned char* sourcePointer = reinterpret_cast<const unsigned char*>(bytes);
- int sourceLength = length;
- bool bufferWasFull = false;
- UniChar buffer[ConversionBufferSize];
-
- while ((sourceLength || bufferWasFull) && !sawError) {
- int bytesRead = 0;
- int bytesWritten = 0;
- OSStatus status = decode(sourcePointer, sourceLength, bytesRead, buffer, sizeof(buffer), bytesWritten);
- ASSERT(bytesRead <= sourceLength);
- sourcePointer += bytesRead;
- sourceLength -= bytesRead;
-
- switch (status) {
- case noErr:
- case kTECOutputBufferFullStatus:
- break;
- case kTextMalformedInputErr:
- case kTextUndefinedElementErr:
- // FIXME: Put FFFD character into the output string in this case?
- TECClearConverterContextInfo(m_converterTEC);
- if (stopOnError) {
- sawError = true;
- break;
- }
- if (sourceLength) {
- sourcePointer += 1;
- sourceLength -= 1;
- }
- break;
- case kTECPartialCharErr: {
- // Put the partial character into the buffer.
- ASSERT(m_numBufferedBytes == 0);
- const int bufferSize = sizeof(m_numBufferedBytes);
- if (sourceLength < bufferSize) {
- memcpy(m_bufferedBytes, sourcePointer, sourceLength);
- m_numBufferedBytes = sourceLength;
- } else {
- LOG_ERROR("TECConvertText gave a kTECPartialCharErr, but left %u bytes in the buffer", sourceLength);
- }
- sourceLength = 0;
- break;
- }
- default:
- sawError = true;
- return String();
- }
-
- ASSERT(!(bytesWritten % sizeof(UChar)));
- result.append(buffer, bytesWritten / sizeof(UChar));
-
- bufferWasFull = status == kTECOutputBufferFullStatus;
- }
-
- if (flush) {
- unsigned long bytesWritten = 0;
- TECFlushText(m_converterTEC, reinterpret_cast<unsigned char*>(buffer), sizeof(buffer), &bytesWritten);
- ASSERT(!(bytesWritten % sizeof(UChar)));
- result.append(buffer, bytesWritten / sizeof(UChar));
- }
-
- String resultString = String::adopt(result);
-
- // <rdar://problem/3225472>
- // Simplified Chinese pages use the code A3A0 to mean "full-width space".
- // But GB18030 decodes it to U+E5E5, which is correct in theory but not in practice.
- // To work around, just change all occurences of U+E5E5 to U+3000 (ideographic space).
- if (m_encoding == kCFStringEncodingGB_18030_2000)
- resultString.replace(0xE5E5, ideographicSpace);
-
- return resultString;
-}
-
-CString TextCodecMac::encode(const UChar* characters, size_t length, UnencodableHandling handling)
-{
- // FIXME: We should really use TEC here instead of CFString for consistency with the other direction.
-
- // FIXME: Since there's no "force ASCII range" mode in CFString, we change the backslash into a yen sign.
- // Encoding will change the yen sign back into a backslash.
- String copy(characters, length);
- copy.replace('\\', m_backslashAsCurrencySymbol);
- RetainPtr<CFStringRef> cfs(AdoptCF, copy.createCFString());
-
- CFIndex startPos = 0;
- CFIndex charactersLeft = CFStringGetLength(cfs.get());
- Vector<char> result;
- size_t size = 0;
- UInt8 lossByte = handling == QuestionMarksForUnencodables ? '?' : 0;
- while (charactersLeft > 0) {
- CFRange range = CFRangeMake(startPos, charactersLeft);
- CFIndex bufferLength;
- CFStringGetBytes(cfs.get(), range, m_encoding, lossByte, false, NULL, 0x7FFFFFFF, &bufferLength);
-
- result.grow(size + bufferLength);
- unsigned char* buffer = reinterpret_cast<unsigned char*>(result.data() + size);
- CFIndex charactersConverted = CFStringGetBytes(cfs.get(), range, m_encoding, lossByte, false, buffer, bufferLength, &bufferLength);
- size += bufferLength;
-
- if (charactersConverted != charactersLeft) {
- unsigned badChar = CFStringGetCharacterAtIndex(cfs.get(), startPos + charactersConverted);
- ++charactersConverted;
- if ((badChar & 0xFC00) == 0xD800 && charactersConverted != charactersLeft) { // is high surrogate
- UniChar low = CFStringGetCharacterAtIndex(cfs.get(), startPos + charactersConverted);
- if ((low & 0xFC00) == 0xDC00) { // is low surrogate
- badChar <<= 10;
- badChar += low;
- badChar += 0x10000 - (0xD800 << 10) - 0xDC00;
- ++charactersConverted;
- }
- }
- UnencodableReplacementArray entity;
- int entityLength = getUnencodableReplacement(badChar, handling, entity);
- result.grow(size + entityLength);
- memcpy(result.data() + size, entity, entityLength);
- size += entityLength;
- }
-
- startPos += charactersConverted;
- charactersLeft -= charactersConverted;
- }
- return CString(result.data(), size);
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/mac/TextCodecMac.h b/WebCore/platform/text/mac/TextCodecMac.h
deleted file mode 100644
index 3e7a237..0000000
--- a/WebCore/platform/text/mac/TextCodecMac.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
- * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextCodecMac_h
-#define TextCodecMac_h
-
-#include "TextCodec.h"
-#include <CoreServices/CoreServices.h>
-
-namespace WebCore {
-
- typedef ::TextEncoding TECTextEncodingID;
- const TECTextEncodingID invalidEncoding = kCFStringEncodingInvalidId;
-
- class TextCodecMac : public TextCodec {
- public:
- static void registerEncodingNames(EncodingNameRegistrar);
- static void registerCodecs(TextCodecRegistrar);
-
- explicit TextCodecMac(TECTextEncodingID);
- virtual ~TextCodecMac();
-
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
- virtual CString encode(const UChar*, size_t length, UnencodableHandling);
-
- private:
- OSStatus decode(const unsigned char* inputBuffer, int inputBufferLength, int& inputLength,
- void* outputBuffer, int outputBufferLength, int& outputLength);
-
- OSStatus createTECConverter() const;
- void releaseTECConverter() const;
-
- TECTextEncodingID m_encoding;
- UChar m_backslashAsCurrencySymbol;
- unsigned m_numBufferedBytes;
- unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character
- mutable TECObjectRef m_converterTEC;
- };
-
- struct TECConverterWrapper {
- TECConverterWrapper() : converter(0), encoding(invalidEncoding) { }
- ~TECConverterWrapper() { if (converter) TECDisposeConverter(converter); }
-
- TECObjectRef converter;
- TECTextEncodingID encoding;
- };
-
-} // namespace WebCore
-
-#endif // TextCodecMac_h
diff --git a/WebCore/platform/text/mac/character-sets.txt b/WebCore/platform/text/mac/character-sets.txt
deleted file mode 100644
index 475e78e..0000000
--- a/WebCore/platform/text/mac/character-sets.txt
+++ /dev/null
@@ -1,1868 +0,0 @@
-
-===================================================================
-CHARACTER SETS
-
-(last updated 28 January 2005)
-
-These are the official names for character sets that may be used in
-the Internet and may be referred to in Internet documentation. These
-names are expressed in ANSI_X3.4-1968 which is commonly called
-US-ASCII or simply ASCII. The character set most commonly use in the
-Internet and used especially in protocol standards is US-ASCII, this
-is strongly encouraged. The use of the name US-ASCII is also
-encouraged.
-
-The character set names may be up to 40 characters taken from the
-printable characters of US-ASCII. However, no distinction is made
-between use of upper and lower case letters.
-
-The MIBenum value is a unique value for use in MIBs to identify coded
-character sets.
-
-The value space for MIBenum values has been divided into three
-regions. The first region (3-999) consists of coded character sets
-that have been standardized by some standard setting organization.
-This region is intended for standards that do not have subset
-implementations. The second region (1000-1999) is for the Unicode and
-ISO/IEC 10646 coded character sets together with a specification of a
-(set of) sub-repertoires that may occur. The third region (>1999) is
-intended for vendor specific coded character sets.
-
- Assigned MIB enum Numbers
- -------------------------
- 0-2 Reserved
- 3-999 Set By Standards Organizations
- 1000-1999 Unicode / 10646
- 2000-2999 Vendor
-
-The aliases that start with "cs" have been added for use with the
-IANA-CHARSET-MIB as originally defined in RFC3808, and as currently
-maintained by IANA at http://www/iana.org/assignments/ianacharset-mib.
-Note that the ianacharset-mib needs to be kept in sync with this
-registry. These aliases that start with "cs" contain the standard
-numbers along with suggestive names in order to facilitate applications
-that want to display the names in user interfaces. The "cs" stands
-for character set and is provided for applications that need a lower
-case first letter but want to use mixed case thereafter that cannot
-contain any special characters, such as underbar ("_") and dash ("-").
-
-If the character set is from an ISO standard, its cs alias is the ISO
-standard number or name. If the character set is not from an ISO
-standard, but is registered with ISO (IPSJ/ITSCJ is the current ISO
-Registration Authority), the ISO Registry number is specified as
-ISOnnn followed by letters suggestive of the name or standards number
-of the code set. When a national or international standard is
-revised, the year of revision is added to the cs alias of the new
-character set entry in the IANA Registry in order to distinguish the
-revised character set from the original character set.
-
-
-Character Set Reference
-------------- ---------
-
-Name: ANSI_X3.4-1968 [RFC1345,KXS2]
-MIBenum: 3
-Source: ECMA registry
-Alias: iso-ir-6
-Alias: ANSI_X3.4-1986
-Alias: ISO_646.irv:1991
-Alias: ASCII
-Alias: ISO646-US
-Alias: US-ASCII (preferred MIME name)
-Alias: us
-Alias: IBM367
-Alias: cp367
-Alias: csASCII
-
-Name: ISO-10646-UTF-1
-MIBenum: 27
-Source: Universal Transfer Format (1), this is the multibyte
- encoding, that subsets ASCII-7. It does not have byte
- ordering issues.
-Alias: csISO10646UTF1
-
-Name: ISO_646.basic:1983 [RFC1345,KXS2]
-MIBenum: 28
-Source: ECMA registry
-Alias: ref
-Alias: csISO646basic1983
-
-Name: INVARIANT [RFC1345,KXS2]
-MIBenum: 29
-Alias: csINVARIANT
-
-Name: ISO_646.irv:1983 [RFC1345,KXS2]
-MIBenum: 30
-Source: ECMA registry
-Alias: iso-ir-2
-Alias: irv
-Alias: csISO2IntlRefVersion
-
-Name: BS_4730 [RFC1345,KXS2]
-MIBenum: 20
-Source: ECMA registry
-Alias: iso-ir-4
-Alias: ISO646-GB
-Alias: gb
-Alias: uk
-Alias: csISO4UnitedKingdom
-
-Name: NATS-SEFI [RFC1345,KXS2]
-MIBenum: 31
-Source: ECMA registry
-Alias: iso-ir-8-1
-Alias: csNATSSEFI
-
-Name: NATS-SEFI-ADD [RFC1345,KXS2]
-MIBenum: 32
-Source: ECMA registry
-Alias: iso-ir-8-2
-Alias: csNATSSEFIADD
-
-Name: NATS-DANO [RFC1345,KXS2]
-MIBenum: 33
-Source: ECMA registry
-Alias: iso-ir-9-1
-Alias: csNATSDANO
-
-Name: NATS-DANO-ADD [RFC1345,KXS2]
-MIBenum: 34
-Source: ECMA registry
-Alias: iso-ir-9-2
-Alias: csNATSDANOADD
-
-Name: SEN_850200_B [RFC1345,KXS2]
-MIBenum: 35
-Source: ECMA registry
-Alias: iso-ir-10
-Alias: FI
-Alias: ISO646-FI
-Alias: ISO646-SE
-Alias: se
-Alias: csISO10Swedish
-
-Name: SEN_850200_C [RFC1345,KXS2]
-MIBenum: 21
-Source: ECMA registry
-Alias: iso-ir-11
-Alias: ISO646-SE2
-Alias: se2
-Alias: csISO11SwedishForNames
-
-Name: KS_C_5601-1987 [RFC1345,KXS2]
-MIBenum: 36
-Source: ECMA registry
-Alias: iso-ir-149
-Alias: KS_C_5601-1989
-Alias: KSC_5601
-Alias: korean
-Alias: csKSC56011987
-
-Name: ISO-2022-KR (preferred MIME name) [RFC1557,Choi]
-MIBenum: 37
-Source: RFC-1557 (see also KS_C_5601-1987)
-Alias: csISO2022KR
-
-Name: EUC-KR (preferred MIME name) [RFC1557,Choi]
-MIBenum: 38
-Source: RFC-1557 (see also KS_C_5861-1992)
-Alias: csEUCKR
-
-Name: ISO-2022-JP (preferred MIME name) [RFC1468,Murai]
-MIBenum: 39
-Source: RFC-1468 (see also RFC-2237)
-Alias: csISO2022JP
-
-Name: ISO-2022-JP-2 (preferred MIME name) [RFC1554,Ohta]
-MIBenum: 40
-Source: RFC-1554
-Alias: csISO2022JP2
-
-Name: ISO-2022-CN [RFC1922]
-MIBenum: 104
-Source: RFC-1922
-
-Name: ISO-2022-CN-EXT [RFC1922]
-MIBenum: 105
-Source: RFC-1922
-
-Name: JIS_C6220-1969-jp [RFC1345,KXS2]
-MIBenum: 41
-Source: ECMA registry
-Alias: JIS_C6220-1969
-Alias: iso-ir-13
-Alias: katakana
-Alias: x0201-7
-Alias: csISO13JISC6220jp
-
-Name: JIS_C6220-1969-ro [RFC1345,KXS2]
-MIBenum: 42
-Source: ECMA registry
-Alias: iso-ir-14
-Alias: jp
-Alias: ISO646-JP
-Alias: csISO14JISC6220ro
-
-Name: IT [RFC1345,KXS2]
-MIBenum: 22
-Source: ECMA registry
-Alias: iso-ir-15
-Alias: ISO646-IT
-Alias: csISO15Italian
-
-Name: PT [RFC1345,KXS2]
-MIBenum: 43
-Source: ECMA registry
-Alias: iso-ir-16
-Alias: ISO646-PT
-Alias: csISO16Portuguese
-
-Name: ES [RFC1345,KXS2]
-MIBenum: 23
-Source: ECMA registry
-Alias: iso-ir-17
-Alias: ISO646-ES
-Alias: csISO17Spanish
-
-Name: greek7-old [RFC1345,KXS2]
-MIBenum: 44
-Source: ECMA registry
-Alias: iso-ir-18
-Alias: csISO18Greek7Old
-
-Name: latin-greek [RFC1345,KXS2]
-MIBenum: 45
-Source: ECMA registry
-Alias: iso-ir-19
-Alias: csISO19LatinGreek
-
-Name: DIN_66003 [RFC1345,KXS2]
-MIBenum: 24
-Source: ECMA registry
-Alias: iso-ir-21
-Alias: de
-Alias: ISO646-DE
-Alias: csISO21German
-
-Name: NF_Z_62-010_(1973) [RFC1345,KXS2]
-MIBenum: 46
-Source: ECMA registry
-Alias: iso-ir-25
-Alias: ISO646-FR1
-Alias: csISO25French
-
-Name: Latin-greek-1 [RFC1345,KXS2]
-MIBenum: 47
-Source: ECMA registry
-Alias: iso-ir-27
-Alias: csISO27LatinGreek1
-
-Name: ISO_5427 [RFC1345,KXS2]
-MIBenum: 48
-Source: ECMA registry
-Alias: iso-ir-37
-Alias: csISO5427Cyrillic
-
-Name: JIS_C6226-1978 [RFC1345,KXS2]
-MIBenum: 49
-Source: ECMA registry
-Alias: iso-ir-42
-Alias: csISO42JISC62261978
-
-Name: BS_viewdata [RFC1345,KXS2]
-MIBenum: 50
-Source: ECMA registry
-Alias: iso-ir-47
-Alias: csISO47BSViewdata
-
-Name: INIS [RFC1345,KXS2]
-MIBenum: 51
-Source: ECMA registry
-Alias: iso-ir-49
-Alias: csISO49INIS
-
-Name: INIS-8 [RFC1345,KXS2]
-MIBenum: 52
-Source: ECMA registry
-Alias: iso-ir-50
-Alias: csISO50INIS8
-
-Name: INIS-cyrillic [RFC1345,KXS2]
-MIBenum: 53
-Source: ECMA registry
-Alias: iso-ir-51
-Alias: csISO51INISCyrillic
-
-Name: ISO_5427:1981 [RFC1345,KXS2]
-MIBenum: 54
-Source: ECMA registry
-Alias: iso-ir-54
-Alias: ISO5427Cyrillic1981
-
-Name: ISO_5428:1980 [RFC1345,KXS2]
-MIBenum: 55
-Source: ECMA registry
-Alias: iso-ir-55
-Alias: csISO5428Greek
-
-Name: GB_1988-80 [RFC1345,KXS2]
-MIBenum: 56
-Source: ECMA registry
-Alias: iso-ir-57
-Alias: cn
-Alias: ISO646-CN
-Alias: csISO57GB1988
-
-Name: GB_2312-80 [RFC1345,KXS2]
-MIBenum: 57
-Source: ECMA registry
-Alias: iso-ir-58
-Alias: chinese
-Alias: csISO58GB231280
-
-Name: NS_4551-1 [RFC1345,KXS2]
-MIBenum: 25
-Source: ECMA registry
-Alias: iso-ir-60
-Alias: ISO646-NO
-Alias: no
-Alias: csISO60DanishNorwegian
-Alias: csISO60Norwegian1
-
-Name: NS_4551-2 [RFC1345,KXS2]
-MIBenum: 58
-Source: ECMA registry
-Alias: ISO646-NO2
-Alias: iso-ir-61
-Alias: no2
-Alias: csISO61Norwegian2
-
-Name: NF_Z_62-010 [RFC1345,KXS2]
-MIBenum: 26
-Source: ECMA registry
-Alias: iso-ir-69
-Alias: ISO646-FR
-Alias: fr
-Alias: csISO69French
-
-Name: videotex-suppl [RFC1345,KXS2]
-MIBenum: 59
-Source: ECMA registry
-Alias: iso-ir-70
-Alias: csISO70VideotexSupp1
-
-Name: PT2 [RFC1345,KXS2]
-MIBenum: 60
-Source: ECMA registry
-Alias: iso-ir-84
-Alias: ISO646-PT2
-Alias: csISO84Portuguese2
-
-Name: ES2 [RFC1345,KXS2]
-MIBenum: 61
-Source: ECMA registry
-Alias: iso-ir-85
-Alias: ISO646-ES2
-Alias: csISO85Spanish2
-
-Name: MSZ_7795.3 [RFC1345,KXS2]
-MIBenum: 62
-Source: ECMA registry
-Alias: iso-ir-86
-Alias: ISO646-HU
-Alias: hu
-Alias: csISO86Hungarian
-
-Name: JIS_C6226-1983 [RFC1345,KXS2]
-MIBenum: 63
-Source: ECMA registry
-Alias: iso-ir-87
-Alias: x0208
-Alias: JIS_X0208-1983
-Alias: csISO87JISX0208
-
-Name: greek7 [RFC1345,KXS2]
-MIBenum: 64
-Source: ECMA registry
-Alias: iso-ir-88
-Alias: csISO88Greek7
-
-Name: ASMO_449 [RFC1345,KXS2]
-MIBenum: 65
-Source: ECMA registry
-Alias: ISO_9036
-Alias: arabic7
-Alias: iso-ir-89
-Alias: csISO89ASMO449
-
-Name: iso-ir-90 [RFC1345,KXS2]
-MIBenum: 66
-Source: ECMA registry
-Alias: csISO90
-
-Name: JIS_C6229-1984-a [RFC1345,KXS2]
-MIBenum: 67
-Source: ECMA registry
-Alias: iso-ir-91
-Alias: jp-ocr-a
-Alias: csISO91JISC62291984a
-
-Name: JIS_C6229-1984-b [RFC1345,KXS2]
-MIBenum: 68
-Source: ECMA registry
-Alias: iso-ir-92
-Alias: ISO646-JP-OCR-B
-Alias: jp-ocr-b
-Alias: csISO92JISC62991984b
-
-Name: JIS_C6229-1984-b-add [RFC1345,KXS2]
-MIBenum: 69
-Source: ECMA registry
-Alias: iso-ir-93
-Alias: jp-ocr-b-add
-Alias: csISO93JIS62291984badd
-
-Name: JIS_C6229-1984-hand [RFC1345,KXS2]
-MIBenum: 70
-Source: ECMA registry
-Alias: iso-ir-94
-Alias: jp-ocr-hand
-Alias: csISO94JIS62291984hand
-
-Name: JIS_C6229-1984-hand-add [RFC1345,KXS2]
-MIBenum: 71
-Source: ECMA registry
-Alias: iso-ir-95
-Alias: jp-ocr-hand-add
-Alias: csISO95JIS62291984handadd
-
-Name: JIS_C6229-1984-kana [RFC1345,KXS2]
-MIBenum: 72
-Source: ECMA registry
-Alias: iso-ir-96
-Alias: csISO96JISC62291984kana
-
-Name: ISO_2033-1983 [RFC1345,KXS2]
-MIBenum: 73
-Source: ECMA registry
-Alias: iso-ir-98
-Alias: e13b
-Alias: csISO2033
-
-Name: ANSI_X3.110-1983 [RFC1345,KXS2]
-MIBenum: 74
-Source: ECMA registry
-Alias: iso-ir-99
-Alias: CSA_T500-1983
-Alias: NAPLPS
-Alias: csISO99NAPLPS
-
-Name: ISO_8859-1:1987 [RFC1345,KXS2]
-MIBenum: 4
-Source: ECMA registry
-Alias: iso-ir-100
-Alias: ISO_8859-1
-Alias: ISO-8859-1 (preferred MIME name)
-Alias: latin1
-Alias: l1
-Alias: IBM819
-Alias: CP819
-Alias: csISOLatin1
-
-Name: ISO_8859-2:1987 [RFC1345,KXS2]
-MIBenum: 5
-Source: ECMA registry
-Alias: iso-ir-101
-Alias: ISO_8859-2
-Alias: ISO-8859-2 (preferred MIME name)
-Alias: latin2
-Alias: l2
-Alias: csISOLatin2
-
-Name: T.61-7bit [RFC1345,KXS2]
-MIBenum: 75
-Source: ECMA registry
-Alias: iso-ir-102
-Alias: csISO102T617bit
-
-Name: T.61-8bit [RFC1345,KXS2]
-MIBenum: 76
-Alias: T.61
-Source: ECMA registry
-Alias: iso-ir-103
-Alias: csISO103T618bit
-
-Name: ISO_8859-3:1988 [RFC1345,KXS2]
-MIBenum: 6
-Source: ECMA registry
-Alias: iso-ir-109
-Alias: ISO_8859-3
-Alias: ISO-8859-3 (preferred MIME name)
-Alias: latin3
-Alias: l3
-Alias: csISOLatin3
-
-Name: ISO_8859-4:1988 [RFC1345,KXS2]
-MIBenum: 7
-Source: ECMA registry
-Alias: iso-ir-110
-Alias: ISO_8859-4
-Alias: ISO-8859-4 (preferred MIME name)
-Alias: latin4
-Alias: l4
-Alias: csISOLatin4
-
-Name: ECMA-cyrillic
-MIBenum: 77
-Source: ISO registry (formerly ECMA registry)
- http://www.itscj.ipsj.jp/ISO-IR/111.pdf
-Alias: iso-ir-111
-Alias: KOI8-E
-Alias: csISO111ECMACyrillic
-
-Name: CSA_Z243.4-1985-1 [RFC1345,KXS2]
-MIBenum: 78
-Source: ECMA registry
-Alias: iso-ir-121
-Alias: ISO646-CA
-Alias: csa7-1
-Alias: ca
-Alias: csISO121Canadian1
-
-Name: CSA_Z243.4-1985-2 [RFC1345,KXS2]
-MIBenum: 79
-Source: ECMA registry
-Alias: iso-ir-122
-Alias: ISO646-CA2
-Alias: csa7-2
-Alias: csISO122Canadian2
-
-Name: CSA_Z243.4-1985-gr [RFC1345,KXS2]
-MIBenum: 80
-Source: ECMA registry
-Alias: iso-ir-123
-Alias: csISO123CSAZ24341985gr
-
-Name: ISO_8859-6:1987 [RFC1345,KXS2]
-MIBenum: 9
-Source: ECMA registry
-Alias: iso-ir-127
-Alias: ISO_8859-6
-Alias: ISO-8859-6 (preferred MIME name)
-Alias: ECMA-114
-Alias: ASMO-708
-Alias: arabic
-Alias: csISOLatinArabic
-
-Name: ISO_8859-6-E [RFC1556,IANA]
-MIBenum: 81
-Source: RFC1556
-Alias: csISO88596E
-Alias: ISO-8859-6-E (preferred MIME name)
-
-Name: ISO_8859-6-I [RFC1556,IANA]
-MIBenum: 82
-Source: RFC1556
-Alias: csISO88596I
-Alias: ISO-8859-6-I (preferred MIME name)
-
-Name: ISO_8859-7:1987 [RFC1947,RFC1345,KXS2]
-MIBenum: 10
-Source: ECMA registry
-Alias: iso-ir-126
-Alias: ISO_8859-7
-Alias: ISO-8859-7 (preferred MIME name)
-Alias: ELOT_928
-Alias: ECMA-118
-Alias: greek
-Alias: greek8
-Alias: csISOLatinGreek
-
-Name: T.101-G2 [RFC1345,KXS2]
-MIBenum: 83
-Source: ECMA registry
-Alias: iso-ir-128
-Alias: csISO128T101G2
-
-Name: ISO_8859-8:1988 [RFC1345,KXS2]
-MIBenum: 11
-Source: ECMA registry
-Alias: iso-ir-138
-Alias: ISO_8859-8
-Alias: ISO-8859-8 (preferred MIME name)
-Alias: hebrew
-Alias: csISOLatinHebrew
-
-Name: ISO_8859-8-E [RFC1556,Nussbacher]
-MIBenum: 84
-Source: RFC1556
-Alias: csISO88598E
-Alias: ISO-8859-8-E (preferred MIME name)
-
-Name: ISO_8859-8-I [RFC1556,Nussbacher]
-MIBenum: 85
-Source: RFC1556
-Alias: csISO88598I
-Alias: ISO-8859-8-I (preferred MIME name)
-
-Name: CSN_369103 [RFC1345,KXS2]
-MIBenum: 86
-Source: ECMA registry
-Alias: iso-ir-139
-Alias: csISO139CSN369103
-
-Name: JUS_I.B1.002 [RFC1345,KXS2]
-MIBenum: 87
-Source: ECMA registry
-Alias: iso-ir-141
-Alias: ISO646-YU
-Alias: js
-Alias: yu
-Alias: csISO141JUSIB1002
-
-Name: ISO_6937-2-add [RFC1345,KXS2]
-MIBenum: 14
-Source: ECMA registry and ISO 6937-2:1983
-Alias: iso-ir-142
-Alias: csISOTextComm
-
-Name: IEC_P27-1 [RFC1345,KXS2]
-MIBenum: 88
-Source: ECMA registry
-Alias: iso-ir-143
-Alias: csISO143IECP271
-
-Name: ISO_8859-5:1988 [RFC1345,KXS2]
-MIBenum: 8
-Source: ECMA registry
-Alias: iso-ir-144
-Alias: ISO_8859-5
-Alias: ISO-8859-5 (preferred MIME name)
-Alias: cyrillic
-Alias: csISOLatinCyrillic
-
-Name: JUS_I.B1.003-serb [RFC1345,KXS2]
-MIBenum: 89
-Source: ECMA registry
-Alias: iso-ir-146
-Alias: serbian
-Alias: csISO146Serbian
-
-Name: JUS_I.B1.003-mac [RFC1345,KXS2]
-MIBenum: 90
-Source: ECMA registry
-Alias: macedonian
-Alias: iso-ir-147
-Alias: csISO147Macedonian
-
-Name: ISO_8859-9:1989 [RFC1345,KXS2]
-MIBenum: 12
-Source: ECMA registry
-Alias: iso-ir-148
-Alias: ISO_8859-9
-Alias: ISO-8859-9 (preferred MIME name)
-Alias: latin5
-Alias: l5
-Alias: csISOLatin5
-
-Name: greek-ccitt [RFC1345,KXS2]
-MIBenum: 91
-Source: ECMA registry
-Alias: iso-ir-150
-Alias: csISO150
-Alias: csISO150GreekCCITT
-
-Name: NC_NC00-10:81 [RFC1345,KXS2]
-MIBenum: 92
-Source: ECMA registry
-Alias: cuba
-Alias: iso-ir-151
-Alias: ISO646-CU
-Alias: csISO151Cuba
-
-Name: ISO_6937-2-25 [RFC1345,KXS2]
-MIBenum: 93
-Source: ECMA registry
-Alias: iso-ir-152
-Alias: csISO6937Add
-
-Name: GOST_19768-74 [RFC1345,KXS2]
-MIBenum: 94
-Source: ECMA registry
-Alias: ST_SEV_358-88
-Alias: iso-ir-153
-Alias: csISO153GOST1976874
-
-Name: ISO_8859-supp [RFC1345,KXS2]
-MIBenum: 95
-Source: ECMA registry
-Alias: iso-ir-154
-Alias: latin1-2-5
-Alias: csISO8859Supp
-
-Name: ISO_10367-box [RFC1345,KXS2]
-MIBenum: 96
-Source: ECMA registry
-Alias: iso-ir-155
-Alias: csISO10367Box
-
-Name: ISO-8859-10 (preferred MIME name) [RFC1345,KXS2]
-MIBenum: 13
-Source: ECMA registry
-Alias: iso-ir-157
-Alias: l6
-Alias: ISO_8859-10:1992
-Alias: csISOLatin6
-Alias: latin6
-
-Name: latin-lap [RFC1345,KXS2]
-MIBenum: 97
-Source: ECMA registry
-Alias: lap
-Alias: iso-ir-158
-Alias: csISO158Lap
-
-Name: JIS_X0212-1990 [RFC1345,KXS2]
-MIBenum: 98
-Source: ECMA registry
-Alias: x0212
-Alias: iso-ir-159
-Alias: csISO159JISX02121990
-
-Name: DS_2089 [RFC1345,KXS2]
-MIBenum: 99
-Source: Danish Standard, DS 2089, February 1974
-Alias: DS2089
-Alias: ISO646-DK
-Alias: dk
-Alias: csISO646Danish
-
-Name: us-dk [RFC1345,KXS2]
-MIBenum: 100
-Alias: csUSDK
-
-Name: dk-us [RFC1345,KXS2]
-MIBenum: 101
-Alias: csDKUS
-
-Name: JIS_X0201 [RFC1345,KXS2]
-MIBenum: 15
-Source: JIS X 0201-1976. One byte only, this is equivalent to
- JIS/Roman (similar to ASCII) plus eight-bit half-width
- Katakana
-Alias: X0201
-Alias: csHalfWidthKatakana
-
-Name: KSC5636 [RFC1345,KXS2]
-MIBenum: 102
-Alias: ISO646-KR
-Alias: csKSC5636
-
-Name: ISO-10646-UCS-2
-MIBenum: 1000
-Source: the 2-octet Basic Multilingual Plane, aka Unicode
- this needs to specify network byte order: the standard
- does not specify (it is a 16-bit integer space)
-Alias: csUnicode
-
-Name: ISO-10646-UCS-4
-MIBenum: 1001
-Source: the full code space. (same comment about byte order,
- these are 31-bit numbers.
-Alias: csUCS4
-
-Name: DEC-MCS [RFC1345,KXS2]
-MIBenum: 2008
-Source: VAX/VMS User's Manual,
- Order Number: AI-Y517A-TE, April 1986.
-Alias: dec
-Alias: csDECMCS
-
-Name: hp-roman8 [HP-PCL5,RFC1345,KXS2]
-MIBenum: 2004
-Source: LaserJet IIP Printer User's Manual,
- HP part no 33471-90901, Hewlet-Packard, June 1989.
-Alias: roman8
-Alias: r8
-Alias: csHPRoman8
-
-Name: macintosh [RFC1345,KXS2]
-MIBenum: 2027
-Source: The Unicode Standard ver1.0, ISBN 0-201-56788-1, Oct 1991
-Alias: mac
-Alias: csMacintosh
-
-Name: IBM037 [RFC1345,KXS2]
-MIBenum: 2028
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp037
-Alias: ebcdic-cp-us
-Alias: ebcdic-cp-ca
-Alias: ebcdic-cp-wt
-Alias: ebcdic-cp-nl
-Alias: csIBM037
-
-Name: IBM038 [RFC1345,KXS2]
-MIBenum: 2029
-Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
-Alias: EBCDIC-INT
-Alias: cp038
-Alias: csIBM038
-
-Name: IBM273 [RFC1345,KXS2]
-MIBenum: 2030
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP273
-Alias: csIBM273
-
-Name: IBM274 [RFC1345,KXS2]
-MIBenum: 2031
-Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
-Alias: EBCDIC-BE
-Alias: CP274
-Alias: csIBM274
-
-Name: IBM275 [RFC1345,KXS2]
-MIBenum: 2032
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: EBCDIC-BR
-Alias: cp275
-Alias: csIBM275
-
-Name: IBM277 [RFC1345,KXS2]
-MIBenum: 2033
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: EBCDIC-CP-DK
-Alias: EBCDIC-CP-NO
-Alias: csIBM277
-
-Name: IBM278 [RFC1345,KXS2]
-MIBenum: 2034
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP278
-Alias: ebcdic-cp-fi
-Alias: ebcdic-cp-se
-Alias: csIBM278
-
-Name: IBM280 [RFC1345,KXS2]
-MIBenum: 2035
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP280
-Alias: ebcdic-cp-it
-Alias: csIBM280
-
-Name: IBM281 [RFC1345,KXS2]
-MIBenum: 2036
-Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
-Alias: EBCDIC-JP-E
-Alias: cp281
-Alias: csIBM281
-
-Name: IBM284 [RFC1345,KXS2]
-MIBenum: 2037
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP284
-Alias: ebcdic-cp-es
-Alias: csIBM284
-
-Name: IBM285 [RFC1345,KXS2]
-MIBenum: 2038
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP285
-Alias: ebcdic-cp-gb
-Alias: csIBM285
-
-Name: IBM290 [RFC1345,KXS2]
-MIBenum: 2039
-Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
-Alias: cp290
-Alias: EBCDIC-JP-kana
-Alias: csIBM290
-
-Name: IBM297 [RFC1345,KXS2]
-MIBenum: 2040
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp297
-Alias: ebcdic-cp-fr
-Alias: csIBM297
-
-Name: IBM420 [RFC1345,KXS2]
-MIBenum: 2041
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990,
- IBM NLS RM p 11-11
-Alias: cp420
-Alias: ebcdic-cp-ar1
-Alias: csIBM420
-
-Name: IBM423 [RFC1345,KXS2]
-MIBenum: 2042
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp423
-Alias: ebcdic-cp-gr
-Alias: csIBM423
-
-Name: IBM424 [RFC1345,KXS2]
-MIBenum: 2043
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp424
-Alias: ebcdic-cp-he
-Alias: csIBM424
-
-Name: IBM437 [RFC1345,KXS2]
-MIBenum: 2011
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp437
-Alias: 437
-Alias: csPC8CodePage437
-
-Name: IBM500 [RFC1345,KXS2]
-MIBenum: 2044
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP500
-Alias: ebcdic-cp-be
-Alias: ebcdic-cp-ch
-Alias: csIBM500
-
-Name: IBM775 [HP-PCL5]
-MIBenum: 2087
-Source: HP PCL 5 Comparison Guide (P/N 5021-0329) pp B-13, 1996
-Alias: cp775
-Alias: csPC775Baltic
-
-Name: IBM850 [RFC1345,KXS2]
-MIBenum: 2009
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp850
-Alias: 850
-Alias: csPC850Multilingual
-
-Name: IBM851 [RFC1345,KXS2]
-MIBenum: 2045
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp851
-Alias: 851
-Alias: csIBM851
-
-Name: IBM852 [RFC1345,KXS2]
-MIBenum: 2010
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp852
-Alias: 852
-Alias: csPCp852
-
-Name: IBM855 [RFC1345,KXS2]
-MIBenum: 2046
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp855
-Alias: 855
-Alias: csIBM855
-
-Name: IBM857 [RFC1345,KXS2]
-MIBenum: 2047
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp857
-Alias: 857
-Alias: csIBM857
-
-Name: IBM860 [RFC1345,KXS2]
-MIBenum: 2048
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp860
-Alias: 860
-Alias: csIBM860
-
-Name: IBM861 [RFC1345,KXS2]
-MIBenum: 2049
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp861
-Alias: 861
-Alias: cp-is
-Alias: csIBM861
-
-Name: IBM862 [RFC1345,KXS2]
-MIBenum: 2013
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp862
-Alias: 862
-Alias: csPC862LatinHebrew
-
-Name: IBM863 [RFC1345,KXS2]
-MIBenum: 2050
-Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
-Alias: cp863
-Alias: 863
-Alias: csIBM863
-
-Name: IBM864 [RFC1345,KXS2]
-MIBenum: 2051
-Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
-Alias: cp864
-Alias: csIBM864
-
-Name: IBM865 [RFC1345,KXS2]
-MIBenum: 2052
-Source: IBM DOS 3.3 Ref (Abridged), 94X9575 (Feb 1987)
-Alias: cp865
-Alias: 865
-Alias: csIBM865
-
-Name: IBM866 [Pond]
-MIBenum: 2086
-Source: IBM NLDG Volume 2 (SE09-8002-03) August 1994
-Alias: cp866
-Alias: 866
-Alias: csIBM866
-
-Name: IBM868 [RFC1345,KXS2]
-MIBenum: 2053
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP868
-Alias: cp-ar
-Alias: csIBM868
-
-Name: IBM869 [RFC1345,KXS2]
-MIBenum: 2054
-Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
-Alias: cp869
-Alias: 869
-Alias: cp-gr
-Alias: csIBM869
-
-Name: IBM870 [RFC1345,KXS2]
-MIBenum: 2055
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP870
-Alias: ebcdic-cp-roece
-Alias: ebcdic-cp-yu
-Alias: csIBM870
-
-Name: IBM871 [RFC1345,KXS2]
-MIBenum: 2056
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP871
-Alias: ebcdic-cp-is
-Alias: csIBM871
-
-Name: IBM880 [RFC1345,KXS2]
-MIBenum: 2057
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp880
-Alias: EBCDIC-Cyrillic
-Alias: csIBM880
-
-Name: IBM891 [RFC1345,KXS2]
-MIBenum: 2058
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp891
-Alias: csIBM891
-
-Name: IBM903 [RFC1345,KXS2]
-MIBenum: 2059
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp903
-Alias: csIBM903
-
-Name: IBM904 [RFC1345,KXS2]
-MIBenum: 2060
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: cp904
-Alias: 904
-Alias: csIBBM904
-
-Name: IBM905 [RFC1345,KXS2]
-MIBenum: 2061
-Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
-Alias: CP905
-Alias: ebcdic-cp-tr
-Alias: csIBM905
-
-Name: IBM918 [RFC1345,KXS2]
-MIBenum: 2062
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP918
-Alias: ebcdic-cp-ar2
-Alias: csIBM918
-
-Name: IBM1026 [RFC1345,KXS2]
-MIBenum: 2063
-Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
-Alias: CP1026
-Alias: csIBM1026
-
-Name: EBCDIC-AT-DE [RFC1345,KXS2]
-MIBenum: 2064
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csIBMEBCDICATDE
-
-Name: EBCDIC-AT-DE-A [RFC1345,KXS2]
-MIBenum: 2065
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICATDEA
-
-Name: EBCDIC-CA-FR [RFC1345,KXS2]
-MIBenum: 2066
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICCAFR
-
-Name: EBCDIC-DK-NO [RFC1345,KXS2]
-MIBenum: 2067
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICDKNO
-
-Name: EBCDIC-DK-NO-A [RFC1345,KXS2]
-MIBenum: 2068
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICDKNOA
-
-Name: EBCDIC-FI-SE [RFC1345,KXS2]
-MIBenum: 2069
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICFISE
-
-Name: EBCDIC-FI-SE-A [RFC1345,KXS2]
-MIBenum: 2070
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICFISEA
-
-Name: EBCDIC-FR [RFC1345,KXS2]
-MIBenum: 2071
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICFR
-
-Name: EBCDIC-IT [RFC1345,KXS2]
-MIBenum: 2072
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICIT
-
-Name: EBCDIC-PT [RFC1345,KXS2]
-MIBenum: 2073
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICPT
-
-Name: EBCDIC-ES [RFC1345,KXS2]
-MIBenum: 2074
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICES
-
-Name: EBCDIC-ES-A [RFC1345,KXS2]
-MIBenum: 2075
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICESA
-
-Name: EBCDIC-ES-S [RFC1345,KXS2]
-MIBenum: 2076
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICESS
-
-Name: EBCDIC-UK [RFC1345,KXS2]
-MIBenum: 2077
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICUK
-
-Name: EBCDIC-US [RFC1345,KXS2]
-MIBenum: 2078
-Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
-Alias: csEBCDICUS
-
-Name: UNKNOWN-8BIT [RFC1428]
-MIBenum: 2079
-Alias: csUnknown8BiT
-
-Name: MNEMONIC [RFC1345,KXS2]
-MIBenum: 2080
-Source: RFC 1345, also known as "mnemonic+ascii+38"
-Alias: csMnemonic
-
-Name: MNEM [RFC1345,KXS2]
-MIBenum: 2081
-Source: RFC 1345, also known as "mnemonic+ascii+8200"
-Alias: csMnem
-
-Name: VISCII [RFC1456]
-MIBenum: 2082
-Source: RFC 1456
-Alias: csVISCII
-
-Name: VIQR [RFC1456]
-MIBenum: 2083
-Source: RFC 1456
-Alias: csVIQR
-
-Name: KOI8-R (preferred MIME name) [RFC1489]
-MIBenum: 2084
-Source: RFC 1489, based on GOST-19768-74, ISO-6937/8,
- INIS-Cyrillic, ISO-5427.
-Alias: csKOI8R
-
-Name: KOI8-U [RFC2319]
-MIBenum: 2088
-Source: RFC 2319
-
-Name: IBM00858
-MIBenum: 2089
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00858) [Mahdi]
-Alias: CCSID00858
-Alias: CP00858
-Alias: PC-Multilingual-850+euro
-
-Name: IBM00924
-MIBenum: 2090
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00924) [Mahdi]
-Alias: CCSID00924
-Alias: CP00924
-Alias: ebcdic-Latin9--euro
-
-Name: IBM01140
-MIBenum: 2091
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01140) [Mahdi]
-Alias: CCSID01140
-Alias: CP01140
-Alias: ebcdic-us-37+euro
-
-Name: IBM01141
-MIBenum: 2092
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01141) [Mahdi]
-Alias: CCSID01141
-Alias: CP01141
-Alias: ebcdic-de-273+euro
-
-Name: IBM01142
-MIBenum: 2093
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01142) [Mahdi]
-Alias: CCSID01142
-Alias: CP01142
-Alias: ebcdic-dk-277+euro
-Alias: ebcdic-no-277+euro
-
-Name: IBM01143
-MIBenum: 2094
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01143) [Mahdi]
-Alias: CCSID01143
-Alias: CP01143
-Alias: ebcdic-fi-278+euro
-Alias: ebcdic-se-278+euro
-
-Name: IBM01144
-MIBenum: 2095
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01144) [Mahdi]
-Alias: CCSID01144
-Alias: CP01144
-Alias: ebcdic-it-280+euro
-
-Name: IBM01145
-MIBenum: 2096
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01145) [Mahdi]
-Alias: CCSID01145
-Alias: CP01145
-Alias: ebcdic-es-284+euro
-
-Name: IBM01146
-MIBenum: 2097
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01146) [Mahdi]
-Alias: CCSID01146
-Alias: CP01146
-Alias: ebcdic-gb-285+euro
-
-Name: IBM01147
-MIBenum: 2098
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01147) [Mahdi]
-Alias: CCSID01147
-Alias: CP01147
-Alias: ebcdic-fr-297+euro
-
-Name: IBM01148
-MIBenum: 2099
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01148) [Mahdi]
-Alias: CCSID01148
-Alias: CP01148
-Alias: ebcdic-international-500+euro
-
-Name: IBM01149
-MIBenum: 2100
-Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01149) [Mahdi]
-Alias: CCSID01149
-Alias: CP01149
-Alias: ebcdic-is-871+euro
-
-Name: Big5-HKSCS [Yick]
-MIBenum: 2101
-Source: See (http://www.iana.org/assignments/charset-reg/Big5-HKSCS)
-Alias: None
-
-Name: IBM1047 [Robrigado]
-MIBenum: 2102
-Source: IBM1047 (EBCDIC Latin 1/Open Systems)
-http://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf
-Alias: IBM-1047
-
-Name: PTCP154 [Uskov]
-MIBenum: 2103
-Source: See (http://www.iana.org/assignments/charset-reg/PTCP154)
-Alias: csPTCP154
-Alias: PT154
-Alias: CP154
-Alias: Cyrillic-Asian
-
-Name: Amiga-1251
-MIBenum: 2104
-Source: See (http://www.amiga.ultranet.ru/Amiga-1251.html)
-Alias: Ami1251
-Alias: Amiga1251
-Alias: Ami-1251
-(Aliases are provided for historical reasons and should not be used)
- [Malyshev]
-
-Name: KOI7-switched
-MIBenum: 2105
-Source: See <http://www.iana.org/assignments/charset-reg/KOI7-switched>
-Aliases: None
-
-Name: UNICODE-1-1 [RFC1641]
-MIBenum: 1010
-Source: RFC 1641
-Alias: csUnicode11
-
-Name: SCSU
-MIBenum: 1011
-Source: SCSU See (http://www.iana.org/assignments/charset-reg/SCSU) [Scherer]
-Alias: None
-
-Name: UTF-7 [RFC2152]
-MIBenum: 1012
-Source: RFC 2152
-Alias: None
-
-Name: UTF-16BE [RFC2781]
-MIBenum: 1013
-Source: RFC 2781
-Alias: None
-
-Name: UTF-16LE [RFC2781]
-MIBenum: 1014
-Source: RFC 2781
-Alias: None
-
-Name: UTF-16 [RFC2781]
-MIBenum: 1015
-Source: RFC 2781
-Alias: None
-
-Name: CESU-8 [Phipps]
-MIBenum: 1016
-Source: <http://www.unicode.org/unicode/reports/tr26>
-Alias: csCESU-8
-
-Name: UTF-32 [Davis]
-MIBenum: 1017
-Source: <http://www.unicode.org/unicode/reports/tr19/>
-Alias: None
-
-Name: UTF-32BE [Davis]
-MIBenum: 1018
-Source: <http://www.unicode.org/unicode/reports/tr19/>
-Alias: None
-
-Name: UTF-32LE [Davis]
-MIBenum: 1019
-Source: <http://www.unicode.org/unicode/reports/tr19/>
-Alias: None
-
-Name: BOCU-1 [Scherer]
-MIBenum: 1020
-Source: http://www.unicode.org/notes/tn6/
-Alias: csBOCU-1
-
-Name: UNICODE-1-1-UTF-7 [RFC1642]
-MIBenum: 103
-Source: RFC 1642
-Alias: csUnicode11UTF7
-
-Name: UTF-8 [RFC3629]
-MIBenum: 106
-Source: RFC 3629
-Alias: None
-
-Name: ISO-8859-13
-MIBenum: 109
-Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-13)[Tumasonis]
-Alias: None
-
-Name: ISO-8859-14
-MIBenum: 110
-Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-14) [Simonsen]
-Alias: iso-ir-199
-Alias: ISO_8859-14:1998
-Alias: ISO_8859-14
-Alias: latin8
-Alias: iso-celtic
-Alias: l8
-
-Name: ISO-8859-15
-MIBenum: 111
-Source: ISO
- Please see: <http://www.iana.org/assignments/charset-reg/ISO-8859-15>
-Alias: ISO_8859-15
-Alias: Latin-9
-
-Name: ISO-8859-16
-MIBenum: 112
-Source: ISO
-Alias: iso-ir-226
-Alias: ISO_8859-16:2001
-Alias: ISO_8859-16
-Alias: latin10
-Alias: l10
-
-Name: GBK
-MIBenum: 113
-Source: Chinese IT Standardization Technical Committee
- Please see: <http://www.iana.org/assignments/charset-reg/GBK>
-Alias: CP936
-Alias: MS936
-Alias: windows-936
-
-Name: GB18030
-MIBenum: 114
-Source: Chinese IT Standardization Technical Committee
- Please see: <http://www.iana.org/assignments/charset-reg/GB18030>
-Alias: None
-
-Name: OSD_EBCDIC_DF04_15
-MIBenum: 115
-Source: Fujitsu-Siemens standard mainframe EBCDIC encoding
- Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15>
-Alias: None
-
-Name: OSD_EBCDIC_DF03_IRV
-MIBenum: 116
-Source: Fujitsu-Siemens standard mainframe EBCDIC encoding
- Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV>
-Alias: None
-
-Name: OSD_EBCDIC_DF04_1
-MIBenum: 117
-Source: Fujitsu-Siemens standard mainframe EBCDIC encoding
- Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1>
-Alias: None
-
-Name: JIS_Encoding
-MIBenum: 16
-Source: JIS X 0202-1991. Uses ISO 2022 escape sequences to
- shift code sets as documented in JIS X 0202-1991.
-Alias: csJISEncoding
-
-Name: Shift_JIS (preferred MIME name)
-MIBenum: 17
-Source: This charset is an extension of csHalfWidthKatakana by
- adding graphic characters in JIS X 0208. The CCS's are
- JIS X0201:1997 and JIS X0208:1997. The
- complete definition is shown in Appendix 1 of JIS
- X0208:1997.
- This charset can be used for the top-level media type "text".
-Alias: MS_Kanji
-Alias: csShiftJIS
-
-Name: Extended_UNIX_Code_Packed_Format_for_Japanese
-MIBenum: 18
-Source: Standardized by OSF, UNIX International, and UNIX Systems
- Laboratories Pacific. Uses ISO 2022 rules to select
- code set 0: US-ASCII (a single 7-bit byte set)
- code set 1: JIS X0208-1990 (a double 8-bit byte set)
- restricted to A0-FF in both bytes
- code set 2: Half Width Katakana (a single 7-bit byte set)
- requiring SS2 as the character prefix
- code set 3: JIS X0212-1990 (a double 7-bit byte set)
- restricted to A0-FF in both bytes
- requiring SS3 as the character prefix
-Alias: csEUCPkdFmtJapanese
-Alias: EUC-JP (preferred MIME name)
-
-Name: Extended_UNIX_Code_Fixed_Width_for_Japanese
-MIBenum: 19
-Source: Used in Japan. Each character is 2 octets.
- code set 0: US-ASCII (a single 7-bit byte set)
- 1st byte = 00
- 2nd byte = 20-7E
- code set 1: JIS X0208-1990 (a double 7-bit byte set)
- restricted to A0-FF in both bytes
- code set 2: Half Width Katakana (a single 7-bit byte set)
- 1st byte = 00
- 2nd byte = A0-FF
- code set 3: JIS X0212-1990 (a double 7-bit byte set)
- restricted to A0-FF in
- the first byte
- and 21-7E in the second byte
-Alias: csEUCFixWidJapanese
-
-Name: ISO-10646-UCS-Basic
-MIBenum: 1002
-Source: ASCII subset of Unicode. Basic Latin = collection 1
- See ISO 10646, Appendix A
-Alias: csUnicodeASCII
-
-Name: ISO-10646-Unicode-Latin1
-MIBenum: 1003
-Source: ISO Latin-1 subset of Unicode. Basic Latin and Latin-1
- Supplement = collections 1 and 2. See ISO 10646,
- Appendix A. See RFC 1815.
-Alias: csUnicodeLatin1
-Alias: ISO-10646
-
-Name: ISO-10646-J-1
-Source: ISO 10646 Japanese, see RFC 1815.
-
-Name: ISO-Unicode-IBM-1261
-MIBenum: 1005
-Source: IBM Latin-2, -3, -5, Extended Presentation Set, GCSGID: 1261
-Alias: csUnicodeIBM1261
-
-Name: ISO-Unicode-IBM-1268
-MIBenum: 1006
-Source: IBM Latin-4 Extended Presentation Set, GCSGID: 1268
-Alias: csUnicodeIBM1268
-
-Name: ISO-Unicode-IBM-1276
-MIBenum: 1007
-Source: IBM Cyrillic Greek Extended Presentation Set, GCSGID: 1276
-Alias: csUnicodeIBM1276
-
-Name: ISO-Unicode-IBM-1264
-MIBenum: 1008
-Source: IBM Arabic Presentation Set, GCSGID: 1264
-Alias: csUnicodeIBM1264
-
-Name: ISO-Unicode-IBM-1265
-MIBenum: 1009
-Source: IBM Hebrew Presentation Set, GCSGID: 1265
-Alias: csUnicodeIBM1265
-
-Name: ISO-8859-1-Windows-3.0-Latin-1 [HP-PCL5]
-MIBenum: 2000
-Source: Extended ISO 8859-1 Latin-1 for Windows 3.0.
- PCL Symbol Set id: 9U
-Alias: csWindows30Latin1
-
-Name: ISO-8859-1-Windows-3.1-Latin-1 [HP-PCL5]
-MIBenum: 2001
-Source: Extended ISO 8859-1 Latin-1 for Windows 3.1.
- PCL Symbol Set id: 19U
-Alias: csWindows31Latin1
-
-Name: ISO-8859-2-Windows-Latin-2 [HP-PCL5]
-MIBenum: 2002
-Source: Extended ISO 8859-2. Latin-2 for Windows 3.1.
- PCL Symbol Set id: 9E
-Alias: csWindows31Latin2
-
-Name: ISO-8859-9-Windows-Latin-5 [HP-PCL5]
-MIBenum: 2003
-Source: Extended ISO 8859-9. Latin-5 for Windows 3.1
- PCL Symbol Set id: 5T
-Alias: csWindows31Latin5
-
-Name: Adobe-Standard-Encoding [Adobe]
-MIBenum: 2005
-Source: PostScript Language Reference Manual
- PCL Symbol Set id: 10J
-Alias: csAdobeStandardEncoding
-
-Name: Ventura-US [HP-PCL5]
-MIBenum: 2006
-Source: Ventura US. ASCII plus characters typically used in
- publishing, like pilcrow, copyright, registered, trade mark,
- section, dagger, and double dagger in the range A0 (hex)
- to FF (hex).
- PCL Symbol Set id: 14J
-Alias: csVenturaUS
-
-Name: Ventura-International [HP-PCL5]
-MIBenum: 2007
-Source: Ventura International. ASCII plus coded characters similar
- to Roman8.
- PCL Symbol Set id: 13J
-Alias: csVenturaInternational
-
-Name: PC8-Danish-Norwegian [HP-PCL5]
-MIBenum: 2012
-Source: PC Danish Norwegian
- 8-bit PC set for Danish Norwegian
- PCL Symbol Set id: 11U
-Alias: csPC8DanishNorwegian
-
-Name: PC8-Turkish [HP-PCL5]
-MIBenum: 2014
-Source: PC Latin Turkish. PCL Symbol Set id: 9T
-Alias: csPC8Turkish
-
-Name: IBM-Symbols [IBM-CIDT]
-MIBenum: 2015
-Source: Presentation Set, CPGID: 259
-Alias: csIBMSymbols
-
-Name: IBM-Thai [IBM-CIDT]
-MIBenum: 2016
-Source: Presentation Set, CPGID: 838
-Alias: csIBMThai
-
-Name: HP-Legal [HP-PCL5]
-MIBenum: 2017
-Source: PCL 5 Comparison Guide, Hewlett-Packard,
- HP part number 5961-0510, October 1992
- PCL Symbol Set id: 1U
-Alias: csHPLegal
-
-Name: HP-Pi-font [HP-PCL5]
-MIBenum: 2018
-Source: PCL 5 Comparison Guide, Hewlett-Packard,
- HP part number 5961-0510, October 1992
- PCL Symbol Set id: 15U
-Alias: csHPPiFont
-
-Name: HP-Math8 [HP-PCL5]
-MIBenum: 2019
-Source: PCL 5 Comparison Guide, Hewlett-Packard,
- HP part number 5961-0510, October 1992
- PCL Symbol Set id: 8M
-Alias: csHPMath8
-
-Name: Adobe-Symbol-Encoding [Adobe]
-MIBenum: 2020
-Source: PostScript Language Reference Manual
- PCL Symbol Set id: 5M
-Alias: csHPPSMath
-
-Name: HP-DeskTop [HP-PCL5]
-MIBenum: 2021
-Source: PCL 5 Comparison Guide, Hewlett-Packard,
- HP part number 5961-0510, October 1992
- PCL Symbol Set id: 7J
-Alias: csHPDesktop
-
-Name: Ventura-Math [HP-PCL5]
-MIBenum: 2022
-Source: PCL 5 Comparison Guide, Hewlett-Packard,
- HP part number 5961-0510, October 1992
- PCL Symbol Set id: 6M
-Alias: csVenturaMath
-
-Name: Microsoft-Publishing [HP-PCL5]
-MIBenum: 2023
-Source: PCL 5 Comparison Guide, Hewlett-Packard,
- HP part number 5961-0510, October 1992
- PCL Symbol Set id: 6J
-Alias: csMicrosoftPublishing
-
-Name: Windows-31J
-MIBenum: 2024
-Source: Windows Japanese. A further extension of Shift_JIS
- to include NEC special characters (Row 13), NEC
- selection of IBM extensions (Rows 89 to 92), and IBM
- extensions (Rows 115 to 119). The CCS's are
- JIS X0201:1997, JIS X0208:1997, and these extensions.
- This charset can be used for the top-level media type "text",
- but it is of limited or specialized use (see RFC2278).
- PCL Symbol Set id: 19K
-Alias: csWindows31J
-
-Name: GB2312 (preferred MIME name)
-MIBenum: 2025
-Source: Chinese for People's Republic of China (PRC) mixed one byte,
- two byte set:
- 20-7E = one byte ASCII
- A1-FE = two byte PRC Kanji
- See GB 2312-80
- PCL Symbol Set Id: 18C
-Alias: csGB2312
-
-Name: Big5 (preferred MIME name)
-MIBenum: 2026
-Source: Chinese for Taiwan Multi-byte set.
- PCL Symbol Set Id: 18T
-Alias: csBig5
-
-Name: windows-1250
-MIBenum: 2250
-Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1250) [Lazhintseva]
-Alias: None
-
-Name: windows-1251
-MIBenum: 2251
-Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1251) [Lazhintseva]
-Alias: None
-
-Name: windows-1252
-MIBenum: 2252
-Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1252) [Wendt]
-Alias: None
-
-Name: windows-1253
-MIBenum: 2253
-Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1253) [Lazhintseva]
-Alias: None
-
-Name: windows-1254
-MIBenum: 2254
-Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1254) [Lazhintseva]
-Alias: None
-
-Name: windows-1255
-MIBenum: 2255
-Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1255) [Lazhintseva]
-Alias: None
-
-Name: windows-1256
-MIBenum: 2256
-Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1256) [Lazhintseva]
-Alias: None
-
-Name: windows-1257
-MIBenum: 2257
-Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1257) [Lazhintseva]
-Alias: None
-
-Name: windows-1258
-MIBenum: 2258
-Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1258) [Lazhintseva]
-Alias: None
-
-Name: TIS-620
-MIBenum: 2259
-Source: Thai Industrial Standards Institute (TISI) [Tantsetthi]
-
-Name: HZ-GB-2312
-MIBenum: 2085
-Source: RFC 1842, RFC 1843 [RFC1842, RFC1843]
-
-
-REFERENCES
-----------
-
-[RFC1345] Simonsen, K., "Character Mnemonics & Character Sets",
- RFC 1345, Rationel Almen Planlaegning, Rationel Almen
- Planlaegning, June 1992.
-
-[RFC1428] Vaudreuil, G., "Transition of Internet Mail from
- Just-Send-8 to 8bit-SMTP/MIME", RFC1428, CNRI, February
- 1993.
-
-[RFC1456] Vietnamese Standardization Working Group, "Conventions for
- Encoding the Vietnamese Language VISCII: VIetnamese
- Standard Code for Information Interchange VIQR: VIetnamese
- Quoted-Readable Specification Revision 1.1", RFC 1456, May
- 1993.
-
-[RFC1468] Murai, J., Crispin, M., and E. van der Poel, "Japanese
- Character Encoding for Internet Messages", RFC 1468,
- Keio University, Panda Programming, June 1993.
-
-[RFC1489] Chernov, A., "Registration of a Cyrillic Character Set",
- RFC1489, RELCOM Development Team, July 1993.
-
-[RFC1554] Ohta, M., and K. Handa, "ISO-2022-JP-2: Multilingual
- Extension of ISO-2022-JP", RFC1554, Tokyo Institute of
- Technology, ETL, December 1993.
-
-[RFC1556] Nussbacher, H., "Handling of Bi-directional Texts in MIME",
- RFC1556, Israeli Inter-University, December 1993.
-
-[RFC1557] Choi, U., Chon, K., and H. Park, "Korean Character Encoding
- for Internet Messages", KAIST, Solvit Chosun Media,
- December 1993.
-
-[RFC1641] Goldsmith, D., and M. Davis, "Using Unicode with MIME",
- RFC1641, Taligent, Inc., July 1994.
-
-[RFC1642] Goldsmith, D., and M. Davis, "UTF-7", RFC1642, Taligent,
- Inc., July 1994.
-
-[RFC1815] Ohta, M., "Character Sets ISO-10646 and ISO-10646-J-1",
- RFC 1815, Tokyo Institute of Technology, July 1995.
-
-
-[Adobe] Adobe Systems Incorporated, PostScript Language Reference
- Manual, second edition, Addison-Wesley Publishing Company,
- Inc., 1990.
-
-[ECMA Registry] ISO-IR: International Register of Escape Sequences
- http://www.itscj.ipsj.or.jp/ISO-IE/ Note: The current
- registration authority is IPSJ/ITSCJ, Japan.
-
-[HP-PCL5] Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
- (P/N 5021-0329) pp B-13, 1996.
-
-[IBM-CIDT] IBM Corporation, "ABOUT TYPE: IBM's Technical Reference
- for Core Interchange Digitized Type", Publication number
- S544-3708-01
-
-[RFC1842] Wei, Y., J. Li, and Y. Jiang, "ASCII Printable
- Characters-Based Chinese Character Encoding for Internet
- Messages", RFC 1842, Harvard University, Rice University,
- University of Maryland, August 1995.
-
-[RFC1843] Lee, F., "HZ - A Data Format for Exchanging Files of
- Arbitrarily Mixed Chinese and ASCII Characters", RFC 1843,
- Stanford University, August 1995.
-
-[RFC2152] Goldsmith, D., M. Davis, "UTF-7: A Mail-Safe Transformation
- Format of Unicode", RFC 2152, Apple Computer, Inc.,
- Taligent Inc., May 1997.
-
-[RFC2279] Yergeau, F., "UTF-8, A Transformation Format of ISO 10646",
- RFC 2279, Alis Technologies, January, 1998.
-
-[RFC2781] Hoffman, P., Yergeau, F., "UTF-16, an encoding of ISO 10646",
- RFC 2781, February 2000.
-
-[RFC3629] Yergeau, F., "UTF-8, a transformation format of ISO 10646",
- RFC3629, November 2003.
-
-PEOPLE
-------
-
-[KXS2] Keld Simonsen <Keld.Simonsen@dkuug.dk>
-
-[Choi] Woohyong Choi <whchoi@cosmos.kaist.ac.kr>
-
-[Davis] Mark Davis, <mark@unicode.org>, April 2002.
-
-[Lazhintseva] Katya Lazhintseva, <katyal@MICROSOFT.com>, May 1996.
-
-[Mahdi] Tamer Mahdi, <tamer@ca.ibm.com>, August 2000.
-
-[Malyshev] Michael Malyshev, <michael_malyshev@mail.ru>, January 2004
-
-[Murai] Jun Murai <jun@wide.ad.jp>
-
-[Nussbacher] Hank Nussbacher, <hank@vm.tau.ac.il>
-
-[Ohta] Masataka Ohta, <mohta@cc.titech.ac.jp>, July 1995.
-
-[Phipps] Toby Phipps, <tphipps@peoplesoft.com>, March 2002.
-
-[Pond] Rick Pond, <rickpond@vnet.ibm.com>, March 1997.
-
-[Robrigado] Reuel Robrigado, <reuelr@ca.ibm.com>, September 2002.
-
-[Scherer] Markus Scherer, <markus.scherer@jtcsv.com>, August 2000,
- September 2002.
-
-[Simonsen] Keld Simonsen, <Keld.Simonsen@rap.dk>, August 2000.
-
-[Tantsetthi] Trin Tantsetthi, <trin@mozart.inet.co.th>, September 1998.
-
-[Tumasonis] Vladas Tumasonis, <vladas.tumasonis@maf.vu.lt>, August 2000.
-
-[Uskov] Alexander Uskov, <auskov@idc.kz>, September 2002.
-
-[Wendt] Chris Wendt, <christw@microsoft.com>, December 1999.
-
-[Yick] Nicky Yick, <cliac@itsd.gcn.gov.hk>, October 2000.
-
-[]
-
-
-
-
-
-
-
diff --git a/WebCore/platform/text/mac/mac-encodings.txt b/WebCore/platform/text/mac/mac-encodings.txt
deleted file mode 100644
index bb45e22..0000000
--- a/WebCore/platform/text/mac/mac-encodings.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-# We'd like to eliminate this file.
-# It would be nice to get rid of dependence on the TextEncodingConvert entirely.
-# Perhaps we can prove these are not used on the web and remove them.
-# Or perhaps we can get them added to ICU.
-
-# The items on the left are names of TEC TextEncoding values (without the leading kTextEncoding).
-# The items on the right are IANA character set names. Names listed in character-sets.txt are not
-# repeated here; mentioning any one character set from a group in there pulls in all the aliases in
-# that group.
-
-DOSChineseTrad: cp950
-DOSGreek: cp737, ibm737
-EUC_TW: EUC-TW
-ISOLatin10: ISO-8859-16
-ISOLatin6: ISO-8859-10
-ISOLatin8: ISO-8859-14
-ISOLatinThai: ISO-8859-11
-ISO_2022_JP_3: ISO-2022-JP-3
-JIS_C6226_78: JIS_C6226-1978
-JIS_X0208_83: JIS_X0208-1983
-JIS_X0208_90: JIS_X0208-1990
-JIS_X0212_90: JIS_X0212-1990
-KOI8_U: KOI8-U
-MacArabic: x-mac-arabic
-MacChineseSimp: x-mac-chinesesimp, xmacsimpchinese
-MacChineseTrad: x-mac-chinesetrad, xmactradchinese
-MacCroatian: x-mac-croatian
-MacDevanagari: x-mac-devanagari
-MacDingbats: x-mac-dingbats
-MacFarsi: x-mac-farsi
-MacGujarati: x-mac-gujarati
-MacGurmukhi: x-mac-gurmukhi
-MacHebrew: x-mac-hebrew
-MacIcelandic: x-mac-icelandic
-MacJapanese: x-mac-japanese
-MacKorean: x-mac-korean
-MacRomanLatin1: x-mac-roman-latin1
-MacRomanian: x-mac-romanian
-MacSymbol: x-mac-symbol
-MacThai: x-mac-thai
-MacTibetan: x-mac-tibetan
-MacVT100: x-mac-vt100
-NextStepLatin: x-nextstep
-ShiftJIS_X0213_00: Shift_JIS_X0213-2000
-WindowsKoreanJohab: johab
diff --git a/WebCore/platform/text/mac/make-charset-table.pl b/WebCore/platform/text/mac/make-charset-table.pl
deleted file mode 100755
index 16fd25a..0000000
--- a/WebCore/platform/text/mac/make-charset-table.pl
+++ /dev/null
@@ -1,225 +0,0 @@
-#!/usr/bin/perl -w
-
-# Copyright (C) 2003, 2004, 2005, 2006 Apple Computer, Inc. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
-# its contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
-# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-use strict;
-
-my %aliasesFromCharsetsFile;
-my %namesWritten;
-
-my $output = "";
-
-my $error = 0;
-
-sub error ($)
-{
- print STDERR @_, "\n";
- $error = 1;
-}
-
-sub emit_line
-{
- my ($name, $prefix, $encoding, $flags) = @_;
-
- error "$name shows up twice in output" if $namesWritten{$name};
- $namesWritten{$name} = 1;
-
- $output .= " { \"$name\", $prefix$encoding },\n";
-}
-
-sub process_platform_encodings
-{
- my ($filename, $PlatformPrefix) = @_;
- my $baseFilename = $filename;
- $baseFilename =~ s|.*/||;
-
- my %seenPlatformNames;
- my %seenIANANames;
-
- open PLATFORM_ENCODINGS, $filename or die;
-
- while (<PLATFORM_ENCODINGS>) {
- chomp;
- s/\#.*$//;
- s/\s+$//;
- if (my ($PlatformName, undef, $flags, $IANANames) = /^(.+?)(, (.+))?: (.+)$/) {
- my %aliases;
-
- my $PlatformNameWithFlags = $PlatformName;
- if ($flags) {
- $PlatformNameWithFlags .= ", " . $flags;
- } else {
- $flags = "NoEncodingFlags";
- }
- error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformNameWithFlags};
- $seenPlatformNames{$PlatformNameWithFlags} = 1;
-
- # Build the aliases list.
- # Also check that no two names are part of the same entry in the charsets file.
- my @IANANames = split ", ", $IANANames;
- my $firstName = "";
- my $canonicalFirstName = "";
- my $prevName = "";
- for my $name (@IANANames) {
- if ($firstName eq "") {
- if ($name !~ /^[-A-Za-z0-9_]+$/) {
- error "$name, in $baseFilename, has illegal characters in it";
- next;
- }
- $firstName = $name;
- } else {
- if ($name !~ /^[a-z0-9]+$/) {
- error "$name, in $baseFilename, has illegal characters in it (must be all lowercase alphanumeric)";
- next;
- }
- if ($name le $prevName) {
- error "$name comes after $prevName in $baseFilename, but everything must be in alphabetical order";
- }
- $prevName = $name;
- }
-
- my $canonicalName = lc $name;
- $canonicalName =~ tr/-_//d;
-
- $canonicalFirstName = $canonicalName if $canonicalFirstName eq "";
-
- error "$name is mentioned twice in $baseFilename" if $seenIANANames{$canonicalName};
- $seenIANANames{$canonicalName} = 1;
-
- $aliases{$canonicalName} = 1;
- next if !$aliasesFromCharsetsFile{$canonicalName};
- for my $alias (@{$aliasesFromCharsetsFile{$canonicalName}}) {
- $aliases{$alias} = 1;
- }
- for my $otherName (@IANANames) {
- next if $canonicalName eq $otherName;
- if ($aliasesFromCharsetsFile{$otherName}
- && $aliasesFromCharsetsFile{$canonicalName} eq $aliasesFromCharsetsFile{$otherName}
- && $canonicalName le $otherName) {
- error "$baseFilename lists both $name and $otherName under $PlatformName, but that aliasing is already specified in character-sets.txt";
- }
- }
- }
-
- # write out
- emit_line($firstName, $PlatformPrefix, $PlatformName, $flags);
- for my $alias (sort keys %aliases) {
- emit_line($alias, $PlatformPrefix, $PlatformName, $flags) if $alias ne $canonicalFirstName;
- }
- } elsif (/^([a-zA-Z0-9_]+)(, (.+))?$/) {
- my $PlatformName = $1;
-
- error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformName};
- $seenPlatformNames{$PlatformName} = 1;
- } elsif (/./) {
- error "syntax error in $baseFilename, line $.";
- }
- }
-
- close PLATFORM_ENCODINGS;
-}
-
-sub process_iana_charset
-{
- my ($canonical_name, @aliases) = @_;
-
- return if !$canonical_name;
-
- my @names = sort $canonical_name, @aliases;
-
- for my $name (@names) {
- $aliasesFromCharsetsFile{$name} = \@names;
- }
-}
-
-sub process_iana_charsets
-{
- my ($filename) = @_;
-
- open CHARSETS, $filename or die;
-
- my %seen;
-
- my $canonical_name;
- my @aliases;
-
- my %exceptions = ( isoir91 => 1, isoir92 => 1 );
-
- while (<CHARSETS>) {
- chomp;
- if ((my $new_canonical_name) = /Name: ([^ \t]*).*/) {
- $new_canonical_name = lc $new_canonical_name;
- $new_canonical_name =~ tr/a-z0-9//cd;
-
- error "saw $new_canonical_name twice in character-sets.txt", if $seen{$new_canonical_name};
- $seen{$new_canonical_name} = $new_canonical_name;
-
- process_iana_charset $canonical_name, @aliases;
-
- $canonical_name = $new_canonical_name;
- @aliases = ();
- } elsif ((my $new_alias) = /Alias: ([^ \t]*).*/) {
- $new_alias = lc $new_alias;
- $new_alias =~ tr/a-z0-9//cd;
-
- # do this after normalizing the alias, sometimes character-sets.txt
- # has weird escape characters, e.g. \b after None
- next if $new_alias eq "none";
-
- error "saw $new_alias twice in character-sets.txt $seen{$new_alias}, $canonical_name", if $seen{$new_alias} && $seen{$new_alias} ne $canonical_name && !$exceptions{$new_alias};
- push @aliases, $new_alias if !$seen{$new_alias};
- $seen{$new_alias} = $canonical_name;
- }
- }
-
- process_iana_charset $canonical_name, @aliases;
-
- close CHARSETS;
-}
-
-# Program body
-
-process_iana_charsets($ARGV[0]);
-process_platform_encodings($ARGV[1], $ARGV[2]);
-
-exit 1 if $error;
-
-print <<EOF
-// File generated by make-charset-table.pl. Do not edit!
-
-#include "config.h"
-#include "CharsetData.h"
-
-namespace WebCore {
-
- const CharsetEntry CharsetTable[] = {
-$output
- { 0, 0 }
- };
-
-}
-EOF
diff --git a/WebCore/platform/text/qt/TextBoundariesQt.cpp b/WebCore/platform/text/qt/TextBoundariesQt.cpp
deleted file mode 100644
index a354ca6..0000000
--- a/WebCore/platform/text/qt/TextBoundariesQt.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2006 Zack Rusin <zack@kde.org>
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#include "TextBoundaries.h"
-#include "NotImplemented.h"
-
-#include <QString>
-#include <QChar>
-
-#include <QDebug>
-#include <stdio.h>
-
-#include <qtextboundaryfinder.h>
-
-namespace WebCore {
-
-int findNextWordFromIndex(UChar const* buffer, int len, int position, bool forward)
-{
- QString str(reinterpret_cast<QChar const*>(buffer), len);
- QTextBoundaryFinder iterator(QTextBoundaryFinder::Word, str);
- iterator.setPosition(position >= len ? len - 1 : position);
- if (forward) {
- int pos = iterator.toNextBoundary();
- while (pos > 0) {
- if (QChar(buffer[pos-1]).isLetterOrNumber())
- return pos;
- pos = iterator.toNextBoundary();
- }
- return len;
- } else {
- int pos = iterator.toPreviousBoundary();
- while (pos > 0) {
- if (QChar(buffer[pos]).isLetterOrNumber())
- return pos;
- pos = iterator.toPreviousBoundary();
- }
- return 0;
- }
-}
-
-void findWordBoundary(UChar const* buffer, int len, int position, int* start, int* end)
-{
- QString str(reinterpret_cast<QChar const*>(buffer), len);
- QTextBoundaryFinder iterator(QTextBoundaryFinder::Word, str);
- iterator.setPosition(position);
- *start = position > 0 ? iterator.toPreviousBoundary() : 0;
- *end = position == len ? len : iterator.toNextBoundary();
-}
-
-}
-
diff --git a/WebCore/platform/text/qt/TextBreakIteratorQt.cpp b/WebCore/platform/text/qt/TextBreakIteratorQt.cpp
deleted file mode 100644
index b9f5a9e..0000000
--- a/WebCore/platform/text/qt/TextBreakIteratorQt.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#include "config.h"
-#include "TextBreakIterator.h"
-
-#include <QtCore/qtextboundaryfinder.h>
-#include <qdebug.h>
-
-// #define DEBUG_TEXT_ITERATORS
-#ifdef DEBUG_TEXT_ITERATORS
-#define DEBUG qDebug
-#else
-#define DEBUG if (1) {} else qDebug
-#endif
-
-namespace WebCore {
-
-#if USE(QT_ICU_TEXT_BREAKING)
-const char* currentTextBreakLocaleID()
-{
- return QLocale::system().name().toLatin1();
-}
-#else
- static unsigned char buffer[1024];
-
- class TextBreakIterator : public QTextBoundaryFinder {
- public:
- TextBreakIterator(QTextBoundaryFinder::BoundaryType type, const UChar* string, int length)
- : QTextBoundaryFinder(type, (const QChar*)string, length, buffer, sizeof(buffer))
- , length(length)
- , string(string) {}
- TextBreakIterator()
- : QTextBoundaryFinder()
- , length(0)
- , string(0) {}
-
- int length;
- const UChar* string;
- };
-
- TextBreakIterator* setUpIterator(TextBreakIterator& iterator, QTextBoundaryFinder::BoundaryType type, const UChar* string, int length)
- {
- if (!string || !length)
- return 0;
-
- if (iterator.isValid() && type == iterator.type() && length == iterator.length
- && memcmp(string, iterator.string, length) == 0) {
- iterator.toStart();
- return &iterator;
- }
-
- iterator = TextBreakIterator(type, string, length);
-
- return &iterator;
- }
-
- TextBreakIterator* wordBreakIterator(const UChar* string, int length)
- {
- static TextBreakIterator staticWordBreakIterator;
- return setUpIterator(staticWordBreakIterator, QTextBoundaryFinder::Word, string, length);
- }
-
- TextBreakIterator* characterBreakIterator(const UChar* string, int length)
- {
- static TextBreakIterator staticCharacterBreakIterator;
- return setUpIterator(staticCharacterBreakIterator, QTextBoundaryFinder::Grapheme, string, length);
- }
-
- TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
- {
- return characterBreakIterator(string, length);
- }
-
- TextBreakIterator* lineBreakIterator(const UChar* string, int length)
- {
- static TextBreakIterator staticLineBreakIterator;
- return setUpIterator(staticLineBreakIterator, QTextBoundaryFinder::Line, string, length);
- }
-
- TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
- {
- static TextBreakIterator staticSentenceBreakIterator;
- return setUpIterator(staticSentenceBreakIterator, QTextBoundaryFinder::Sentence, string, length);
-
- }
-
- int textBreakFirst(TextBreakIterator* bi)
- {
- bi->toStart();
- DEBUG() << "textBreakFirst" << bi->position();
- return bi->position();
- }
-
- int textBreakNext(TextBreakIterator* bi)
- {
- int pos = bi->toNextBoundary();
- DEBUG() << "textBreakNext" << pos;
- return pos;
- }
-
- int textBreakPreceding(TextBreakIterator* bi, int pos)
- {
- bi->setPosition(pos);
- int newpos = bi->toPreviousBoundary();
- DEBUG() << "textBreakPreceding" << pos << newpos;
- return newpos;
- }
-
- int textBreakFollowing(TextBreakIterator* bi, int pos)
- {
- bi->setPosition(pos);
- int newpos = bi->toNextBoundary();
- DEBUG() << "textBreakFollowing" << pos << newpos;
- return newpos;
- }
-
- int textBreakCurrent(TextBreakIterator* bi)
- {
- return bi->position();
- }
-
- bool isTextBreak(TextBreakIterator*, int)
- {
- return true;
- }
-#endif
-
-}
diff --git a/WebCore/platform/text/qt/TextCodecQt.cpp b/WebCore/platform/text/qt/TextCodecQt.cpp
deleted file mode 100644
index 1e95d87..0000000
--- a/WebCore/platform/text/qt/TextCodecQt.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
- * Copyright (C) 2008 Holger Hans Peter Freyther
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextCodecQt.h"
-#include "PlatformString.h"
-#include <wtf/text/CString.h>
-#include <qset.h>
-
-namespace WebCore {
-
-static QSet<QByteArray> *unique_names = 0;
-
-static const char *getAtomicName(const QByteArray &name)
-{
- if (!unique_names)
- unique_names = new QSet<QByteArray>;
-
- unique_names->insert(name);
- return unique_names->find(name)->constData();
-}
-
-void TextCodecQt::registerEncodingNames(EncodingNameRegistrar registrar)
-{
- QList<int> mibs = QTextCodec::availableMibs();
-// qDebug() << ">>>>>>>>> registerEncodingNames";
-
- for (int i = 0; i < mibs.size(); ++i) {
- QTextCodec *c = QTextCodec::codecForMib(mibs.at(i));
- const char *name = getAtomicName(c->name());
- registrar(name, name);
-// qDebug() << " " << name << name;
- QList<QByteArray> aliases = c->aliases();
- for (int i = 0; i < aliases.size(); ++i) {
- const char *a = getAtomicName(aliases.at(i));
-// qDebug() << " (a) " << a << name;
- registrar(a, name);
- }
- }
-}
-
-static PassOwnPtr<TextCodec> newTextCodecQt(const TextEncoding& encoding, const void*)
-{
- return new TextCodecQt(encoding);
-}
-
-void TextCodecQt::registerCodecs(TextCodecRegistrar registrar)
-{
- QList<int> mibs = QTextCodec::availableMibs();
-// qDebug() << ">>>>>>>>> registerCodecs";
-
- for (int i = 0; i < mibs.size(); ++i) {
- QTextCodec *c = QTextCodec::codecForMib(mibs.at(i));
- const char *name = getAtomicName(c->name());
-// qDebug() << " " << name;
- registrar(name, newTextCodecQt, 0);
- }
-}
-
-TextCodecQt::TextCodecQt(const TextEncoding& encoding)
- : m_encoding(encoding)
-{
- m_codec = QTextCodec::codecForName(m_encoding.name());
-}
-
-TextCodecQt::~TextCodecQt()
-{
-}
-
-
-String TextCodecQt::decode(const char* bytes, size_t length, bool flush, bool /*stopOnError*/, bool& sawError)
-{
- // We chop input buffer to smaller buffers to avoid excessive memory consumption
- // when the input buffer is big. This helps reduce peak memory consumption in
- // mobile devices where system RAM is limited.
-#if OS(SYMBIAN)
- static const int MaxInputChunkSize = 32 * 1024;
-#else
- static const int MaxInputChunkSize = 1024 * 1024;
-#endif
- const char* buf = bytes;
- const char* end = buf + length;
- String unicode(""); // a non-null string is expected
-
- while (buf < end) {
- int size = end - buf;
- size = qMin(size, MaxInputChunkSize);
- QString decoded = m_codec->toUnicode(buf, size, &m_state);
- unicode.append(reinterpret_cast_ptr<const UChar*>(decoded.unicode()), decoded.length());
- buf += size;
- }
-
- sawError = m_state.invalidChars != 0;
-
- if (flush) {
- m_state.flags = QTextCodec::DefaultConversion;
- m_state.remainingChars = 0;
- m_state.invalidChars = 0;
- }
-
- return unicode;
-}
-
-CString TextCodecQt::encode(const UChar* characters, size_t length, UnencodableHandling handling)
-{
- QTextCodec::ConverterState state;
- state.flags = QTextCodec::ConversionFlags(QTextCodec::ConvertInvalidToNull | QTextCodec::IgnoreHeader);
-
- if (!length)
- return "";
-
- QByteArray ba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), length, &state);
-
- // If some <b> characters </b> are unencodable, escape them as specified by <b> handling </b>
- // We append one valid encoded chunk to a QByteArray at a time. When we encounter an unencodable chunk we
- // escape it with getUnencodableReplacement, append it, then move to the next chunk.
- if (state.invalidChars) {
- state.invalidChars = 0;
- state.remainingChars = 0;
- int len = 0;
- ba.clear();
- for (size_t pos = 0; pos < length; ++pos) {
- QByteArray tba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), ++len, &state);
- if (state.remainingChars)
- continue;
- if (state.invalidChars) {
- UnencodableReplacementArray replacement;
- getUnencodableReplacement(characters[0], handling, replacement);
- tba.replace('\0', replacement);
- state.invalidChars = 0;
- }
- ba.append(tba);
- characters += len;
- len = 0;
- state.remainingChars = 0;
- }
- }
-
- return CString(ba.constData(), ba.length());
-}
-
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/qt/TextCodecQt.h b/WebCore/platform/text/qt/TextCodecQt.h
deleted file mode 100644
index f28f0bb..0000000
--- a/WebCore/platform/text/qt/TextCodecQt.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextCodecQt_h
-#define TextCodecQt_h
-
-#include "TextCodec.h"
-#include "TextEncoding.h"
-#include <QTextCodec>
-
-namespace WebCore {
-
- class TextCodecQt : public TextCodec {
- public:
- static void registerEncodingNames(EncodingNameRegistrar);
- static void registerCodecs(TextCodecRegistrar);
-
- TextCodecQt(const TextEncoding&);
- virtual ~TextCodecQt();
-
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
- virtual CString encode(const UChar*, size_t length, UnencodableHandling);
-
- private:
- TextEncoding m_encoding;
- QTextCodec *m_codec;
- QTextCodec::ConverterState m_state;
- };
-
-} // namespace WebCore
-
-#endif // TextCodecICU_h
diff --git a/WebCore/platform/text/transcoder/FontTranscoder.cpp b/WebCore/platform/text/transcoder/FontTranscoder.cpp
deleted file mode 100644
index 68601f9..0000000
--- a/WebCore/platform/text/transcoder/FontTranscoder.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2010, Google Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "FontTranscoder.h"
-
-#include "CharacterNames.h"
-#include "FontDescription.h"
-#include "TextEncoding.h"
-
-namespace WebCore {
-
-FontTranscoder::FontTranscoder()
-{
- m_converterTypes.add("MS PGothic", BackslashToYenSign);
- UChar unicodeNameMSPGothic[] = {0xFF2D, 0xFF33, 0x0020, 0xFF30, 0x30B4, 0x30B7, 0x30C3, 0x30AF};
- m_converterTypes.add(AtomicString(unicodeNameMSPGothic, WTF_ARRAY_LENGTH(unicodeNameMSPGothic)), BackslashToYenSign);
-
- m_converterTypes.add("MS PMincho", BackslashToYenSign);
- UChar unicodeNameMSPMincho[] = {0xFF2D, 0xFF33, 0x0020, 0xFF30, 0x660E, 0x671D};
- m_converterTypes.add(AtomicString(unicodeNameMSPMincho, WTF_ARRAY_LENGTH(unicodeNameMSPMincho)), BackslashToYenSign);
-
- m_converterTypes.add("MS Gothic", BackslashToYenSign);
- UChar unicodeNameMSGothic[] = {0xFF2D, 0xFF33, 0x0020, 0x30B4, 0x30B7, 0x30C3, 0x30AF};
- m_converterTypes.add(AtomicString(unicodeNameMSGothic, WTF_ARRAY_LENGTH(unicodeNameMSGothic)), BackslashToYenSign);
-
- m_converterTypes.add("MS Mincho", BackslashToYenSign);
- UChar unicodeNameMSMincho[] = {0xFF2D, 0xFF33, 0x0020, 0x660E, 0x671D};
- m_converterTypes.add(AtomicString(unicodeNameMSMincho, WTF_ARRAY_LENGTH(unicodeNameMSMincho)), BackslashToYenSign);
-
- m_converterTypes.add("Meiryo", BackslashToYenSign);
- UChar unicodeNameMeiryo[] = {0x30E1, 0x30A4, 0x30EA, 0x30AA};
- m_converterTypes.add(AtomicString(unicodeNameMeiryo, WTF_ARRAY_LENGTH(unicodeNameMeiryo)), BackslashToYenSign);
-}
-
-FontTranscoder::ConverterType FontTranscoder::converterType(const FontDescription& fontDescription, const TextEncoding* encoding) const
-{
- const AtomicString& fontFamily = fontDescription.family().family().string();
- if (!fontFamily.isNull()) {
- HashMap<AtomicString, ConverterType>::const_iterator found = m_converterTypes.find(fontFamily);
- if (found != m_converterTypes.end())
- return found->second;
- }
-
- // IE's default fonts for Japanese encodings change backslashes into yen signs.
- // We emulate this behavior only when no font is explicitly specified.
- if (encoding && encoding->backslashAsCurrencySymbol() != '\\' && !fontDescription.isSpecifiedFont())
- return BackslashToYenSign;
-
- return NoConversion;
-}
-
-void FontTranscoder::convert(String& text, const FontDescription& fontDescription, const TextEncoding* encoding) const
-{
- switch (converterType(fontDescription, encoding)) {
- case BackslashToYenSign: {
- // FIXME: TextEncoding.h has similar code. We need to factor them out.
- text.replace('\\', yenSign);
- break;
- }
- case NoConversion:
- default:
- ASSERT_NOT_REACHED();
- }
-}
-
-bool FontTranscoder::needsTranscoding(const FontDescription& fontDescription, const TextEncoding* encoding) const
-{
- ConverterType type = converterType(fontDescription, encoding);
- return type != NoConversion;
-}
-
-FontTranscoder& fontTranscoder()
-{
- static FontTranscoder* transcoder = new FontTranscoder;
- return *transcoder;
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/transcoder/FontTranscoder.h b/WebCore/platform/text/transcoder/FontTranscoder.h
deleted file mode 100644
index 67db977..0000000
--- a/WebCore/platform/text/transcoder/FontTranscoder.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2010, Google Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef FontTranscoder_h
-#define FontTranscoder_h
-
-#include <wtf/HashMap.h>
-#include <wtf/Noncopyable.h>
-#include <wtf/text/AtomicStringHash.h>
-
-namespace WebCore {
-
-class FontDescription;
-class TextEncoding;
-
-class FontTranscoder : public Noncopyable {
-public:
- void convert(String& text, const FontDescription&, const TextEncoding* = 0) const;
- bool needsTranscoding(const FontDescription&, const TextEncoding* = 0) const;
-
-private:
- FontTranscoder();
- ~FontTranscoder(); // Not implemented to make sure nobody accidentally calls delete -- WebCore does not delete singletons.
-
- enum ConverterType {
- NoConversion, BackslashToYenSign,
- };
-
- ConverterType converterType(const FontDescription&, const TextEncoding*) const;
-
- HashMap<AtomicString, ConverterType> m_converterTypes;
-
- friend FontTranscoder& fontTranscoder();
-};
-
-FontTranscoder& fontTranscoder();
-
-} // namespace WebCore
-
-#endif // FontTranscoder_h
diff --git a/WebCore/platform/text/win/TextBreakIteratorInternalICUWin.cpp b/WebCore/platform/text/win/TextBreakIteratorInternalICUWin.cpp
deleted file mode 100644
index e417e17..0000000
--- a/WebCore/platform/text/win/TextBreakIteratorInternalICUWin.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2007 Apple Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#include "config.h"
-#include "TextBreakIteratorInternalICU.h"
-
-namespace WebCore {
-
-const char* currentSearchLocaleID()
-{
- // FIXME: Should use system locale.
- return "";
-}
-
-const char* currentTextBreakLocaleID()
-{
- // Using en_US_POSIX now so word selection in address field works as expected as before (double-clicking
- // in a URL selects a word delimited by periods rather than selecting the entire URL).
- // However, this is not entirely correct - we should honor the system locale in the normal case.
- // FIXME: <rdar://problem/6786703> Should use system locale for text breaking
- return "en_US_POSIX";
-}
-
-}
diff --git a/WebCore/platform/text/wince/TextBreakIteratorWinCE.cpp b/WebCore/platform/text/wince/TextBreakIteratorWinCE.cpp
deleted file mode 100644
index 96488c0..0000000
--- a/WebCore/platform/text/wince/TextBreakIteratorWinCE.cpp
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
- * Copyright (C) 2007-2009 Torch Mobile, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- *
- */
-
-#include "config.h"
-#include "TextBreakIterator.h"
-
-#include "PlatformString.h"
-#include <wtf/StdLibExtras.h>
-#include <wtf/unicode/Unicode.h>
-
-using namespace WTF::Unicode;
-
-namespace WebCore {
-
-// Hack, not entirely correct
-static inline bool isCharStop(UChar c)
-{
- CharCategory charCategory = category(c);
- return charCategory != Mark_NonSpacing && (charCategory != Other_Surrogate || (c < 0xd800 || c >= 0xdc00));
-}
-
-static inline bool isLineStop(UChar c)
-{
- return category(c) != Separator_Line;
-}
-
-static inline bool isSentenceStop(UChar c)
-{
- return isPunct(c);
-}
-
-class TextBreakIterator {
-public:
- void reset(const UChar* str, int len)
- {
- string = str;
- length = len;
- currentPos = 0;
- }
- int first()
- {
- currentPos = 0;
- return currentPos;
- }
- int last()
- {
- currentPos = length;
- return currentPos;
- }
- virtual int next() = 0;
- virtual int previous() = 0;
- int following(int position)
- {
- currentPos = position;
- return next();
- }
- int preceding(int position)
- {
- currentPos = position;
- return previous();
- }
-
- int currentPos;
- const UChar* string;
- int length;
-};
-
-struct WordBreakIterator: TextBreakIterator {
- virtual int next();
- virtual int previous();
-};
-
-struct CharBreakIterator: TextBreakIterator {
- virtual int next();
- virtual int previous();
-};
-
-struct LineBreakIterator: TextBreakIterator {
- virtual int next();
- virtual int previous();
-};
-
-struct SentenceBreakIterator : TextBreakIterator {
- virtual int next();
- virtual int previous();
-};
-
-int WordBreakIterator::next()
-{
- if (currentPos == length) {
- currentPos = -1;
- return currentPos;
- }
- bool haveSpace = false;
- while (currentPos < length) {
- if (haveSpace && !isSpace(string[currentPos]))
- break;
- if (isSpace(string[currentPos]))
- haveSpace = true;
- ++currentPos;
- }
- return currentPos;
-}
-
-int WordBreakIterator::previous()
-{
- if (!currentPos) {
- currentPos = -1;
- return currentPos;
- }
- bool haveSpace = false;
- while (currentPos > 0) {
- if (haveSpace && !isSpace(string[currentPos]))
- break;
- if (isSpace(string[currentPos]))
- haveSpace = true;
- --currentPos;
- }
- return currentPos;
-}
-
-int CharBreakIterator::next()
-{
- if (currentPos >= length)
- return -1;
- ++currentPos;
- while (currentPos < length && !isCharStop(string[currentPos]))
- ++currentPos;
- return currentPos;
-}
-
-int CharBreakIterator::previous()
-{
- if (currentPos <= 0)
- return -1;
- if (currentPos > length)
- currentPos = length;
- --currentPos;
- while (currentPos > 0 && !isCharStop(string[currentPos]))
- --currentPos;
- return currentPos;
-}
-
-int LineBreakIterator::next()
-{
- if (currentPos == length) {
- currentPos = -1;
- return currentPos;
- }
- bool haveSpace = false;
- while (currentPos < length) {
- if (haveSpace && !isLineStop(string[currentPos]))
- break;
- if (isLineStop(string[currentPos]))
- haveSpace = true;
- ++currentPos;
- }
- return currentPos;
-}
-
-int LineBreakIterator::previous()
-{
- if (!currentPos) {
- currentPos = -1;
- return currentPos;
- }
- bool haveSpace = false;
- while (currentPos > 0) {
- if (haveSpace && !isLineStop(string[currentPos]))
- break;
- if (isLineStop(string[currentPos]))
- haveSpace = true;
- --currentPos;
- }
- return currentPos;
-}
-
-int SentenceBreakIterator::next()
-{
- if (currentPos == length) {
- currentPos = -1;
- return currentPos;
- }
- bool haveSpace = false;
- while (currentPos < length) {
- if (haveSpace && !isSentenceStop(string[currentPos]))
- break;
- if (isSentenceStop(string[currentPos]))
- haveSpace = true;
- ++currentPos;
- }
- return currentPos;
-}
-
-int SentenceBreakIterator::previous()
-{
- if (!currentPos) {
- currentPos = -1;
- return currentPos;
- }
- bool haveSpace = false;
- while (currentPos > 0) {
- if (haveSpace && !isSentenceStop(string[currentPos]))
- break;
- if (isSentenceStop(string[currentPos]))
- haveSpace = true;
- --currentPos;
- }
- return currentPos;
-}
-
-TextBreakIterator* wordBreakIterator(const UChar* string, int length)
-{
- DEFINE_STATIC_LOCAL(WordBreakIterator, iterator, ());
- iterator.reset(string, length);
- return &iterator;
-}
-
-TextBreakIterator* characterBreakIterator(const UChar* string, int length)
-{
- DEFINE_STATIC_LOCAL(CharBreakIterator, iterator, ());
- iterator.reset(string, length);
- return &iterator;
-}
-
-TextBreakIterator* lineBreakIterator(const UChar* string, int length)
-{
- DEFINE_STATIC_LOCAL(LineBreakIterator , iterator, ());
- iterator.reset(string, length);
- return &iterator;
-}
-
-TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
-{
- DEFINE_STATIC_LOCAL(SentenceBreakIterator, iterator, ());
- iterator.reset(string, length);
- return &iterator;
-}
-
-int textBreakFirst(TextBreakIterator* breakIterator)
-{
- return breakIterator->first();
-}
-
-int textBreakLast(TextBreakIterator* breakIterator)
-{
- return breakIterator->last();
-}
-
-int textBreakNext(TextBreakIterator* breakIterator)
-{
- return breakIterator->next();
-}
-
-int textBreakPrevious(TextBreakIterator* breakIterator)
-{
- return breakIterator->previous();
-}
-
-int textBreakPreceding(TextBreakIterator* breakIterator, int position)
-{
- return breakIterator->preceding(position);
-}
-
-int textBreakFollowing(TextBreakIterator* breakIterator, int position)
-{
- return breakIterator->following(position);
-}
-
-int textBreakCurrent(TextBreakIterator* breakIterator)
-{
- return breakIterator->currentPos;
-}
-
-bool isTextBreak(TextBreakIterator*, int)
-{
- return true;
-}
-
-TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
-{
- return characterBreakIterator(string, length);
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/wince/TextCodecWinCE.cpp b/WebCore/platform/text/wince/TextCodecWinCE.cpp
deleted file mode 100644
index 3532e74..0000000
--- a/WebCore/platform/text/wince/TextCodecWinCE.cpp
+++ /dev/null
@@ -1,389 +0,0 @@
-/*
- * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved.
- * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * This library is distributed in the hope that i will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-
-#include "config.h"
-#include "TextCodecWinCE.h"
-
-#include "FontCache.h"
-#include "PlatformString.h"
-#include <mlang.h>
-#include <winbase.h>
-#include <winnls.h>
-#include <wtf/HashMap.h>
-#include <wtf/HashSet.h>
-#include <wtf/text/CString.h>
-#include <wtf/text/StringConcatenate.h>
-#include <wtf/text/StringHash.h>
-#include <wtf/unicode/UTF8.h>
-
-namespace WebCore {
-
-struct CharsetInfo {
- CString m_name;
- String m_friendlyName;
- UINT m_codePage;
- Vector<CString> m_aliases;
-};
-
-class LanguageManager {
-private:
- LanguageManager();
-
- friend LanguageManager& languageManager();
-};
-
-// Usage: a lookup table used to get CharsetInfo with code page ID.
-// Key: code page ID. Value: charset information.
-static HashMap<UINT, CString>& codePageCharsets()
-{
- static HashMap<UINT, CString> cc;
- return cc;
-}
-
-static HashMap<String, CharsetInfo>& knownCharsets()
-{
- static HashMap<String, CharsetInfo> kc;
- return kc;
-}
-
-// Usage: a map that stores charsets that are supported by system. Sorted by name.
-// Key: charset. Value: code page ID.
-typedef HashSet<String> CharsetSet;
-static CharsetSet& supportedCharsets()
-{
- static CharsetSet sl;
- return sl;
-}
-
-static LanguageManager& languageManager()
-{
- static LanguageManager lm;
- return lm;
-}
-
-LanguageManager::LanguageManager()
-{
- IEnumCodePage* enumInterface;
- IMultiLanguage* mli = FontCache::getMultiLanguageInterface();
- if (mli && S_OK == mli->EnumCodePages(MIMECONTF_BROWSER, &enumInterface)) {
- MIMECPINFO cpInfo;
- ULONG ccpInfo;
- while (S_OK == enumInterface->Next(1, &cpInfo, &ccpInfo) && ccpInfo) {
- if (!IsValidCodePage(cpInfo.uiCodePage))
- continue;
-
- HashMap<UINT, CString>::iterator i = codePageCharsets().find(cpInfo.uiCodePage);
-
- CString name(String(cpInfo.wszWebCharset).latin1());
- if (i == codePageCharsets().end()) {
- CharsetInfo info;
- info.m_codePage = cpInfo.uiCodePage;
- knownCharsets().set(name.data(), info);
- i = codePageCharsets().set(cpInfo.uiCodePage, name).first;
- }
- if (i != codePageCharsets().end()) {
- HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(String(i->second.data(), i->second.length()));
- ASSERT(j != knownCharsets().end());
- CharsetInfo& info = j->second;
- info.m_name = i->second.data();
- info.m_friendlyName = cpInfo.wszDescription;
- info.m_aliases.append(name);
- info.m_aliases.append(String(cpInfo.wszHeaderCharset).latin1());
- info.m_aliases.append(String(cpInfo.wszBodyCharset).latin1());
- String cpName = makeString("cp", String::number(cpInfo.uiCodePage));
- info.m_aliases.append(cpName.latin1());
- supportedCharsets().add(i->second.data());
- }
- }
- enumInterface->Release();
- }
-}
-
-static UINT getCodePage(const char* name)
-{
- if (!strcmp(name, "UTF-8"))
- return CP_UTF8;
-
- // Explicitly use a "const" reference to fix the silly VS build error
- // saying "==" is not found for const_iterator and iterator
- const HashMap<String, CharsetInfo>& charsets = knownCharsets();
- HashMap<String, CharsetInfo>::const_iterator i = charsets.find(name);
- return i == charsets.end() ? CP_ACP : i->second.m_codePage;
-}
-
-static PassOwnPtr<TextCodec> newTextCodecWinCE(const TextEncoding& encoding, const void*)
-{
- return new TextCodecWinCE(getCodePage(encoding.name()));
-}
-
-TextCodecWinCE::TextCodecWinCE(UINT codePage)
- : m_codePage(codePage)
-{
-}
-
-TextCodecWinCE::~TextCodecWinCE()
-{
-}
-
-void TextCodecWinCE::registerBaseEncodingNames(EncodingNameRegistrar registrar)
-{
- registrar("UTF-8", "UTF-8");
-}
-
-void TextCodecWinCE::registerBaseCodecs(TextCodecRegistrar registrar)
-{
- registrar("UTF-8", newTextCodecWinCE, 0);
-}
-
-void TextCodecWinCE::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
-{
- languageManager();
- for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
- HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
- if (j != knownCharsets().end()) {
- registrar(j->second.m_name.data(), j->second.m_name.data());
- for (Vector<CString>::const_iterator alias = j->second.m_aliases.begin(); alias != j->second.m_aliases.end(); ++alias)
- registrar(alias->data(), j->second.m_name.data());
- }
- }
-}
-
-void TextCodecWinCE::registerExtendedCodecs(TextCodecRegistrar registrar)
-{
- languageManager();
- for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
- HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
- if (j != knownCharsets().end())
- registrar(j->second.m_name.data(), newTextCodecWinCE, 0);
- }
-}
-
-static DWORD getCodePageFlags(UINT codePage)
-{
- if (codePage == CP_UTF8)
- return MB_ERR_INVALID_CHARS;
-
- if (codePage == 42) // Symbol
- return 0;
-
- // Microsoft says the flag must be 0 for the following code pages
- if (codePage > 50000) {
- if ((codePage >= 50220 && codePage <= 50222)
- || codePage == 50225
- || codePage == 50227
- || codePage == 50229
- || codePage == 52936
- || codePage == 54936
- || (codePage >= 57002 && codePage <= 57001)
- || codePage == 65000 // UTF-7
- )
- return 0;
- }
-
- return MB_PRECOMPOSED | MB_ERR_INVALID_CHARS;
-}
-
-static inline const char* findFirstNonAsciiCharacter(const char* bytes, size_t length)
-{
- for (const char* bytesEnd = bytes + length; bytes < bytesEnd; ++bytes) {
- if (*bytes & 0x80)
- break;
- }
- return bytes;
-}
-
-static void decode(Vector<UChar, 8192>& result, UINT codePage, const char* bytes, size_t length, size_t* left, bool canBeFirstTime, bool& sawInvalidChar)
-{
- *left = length;
- if (!bytes || !length)
- return;
-
- DWORD flags = getCodePageFlags(codePage);
-
- if (codePage == CP_UTF8) {
- if (canBeFirstTime) {
- // Handle BOM.
- if (length > 3) {
- if (bytes[0] == (char)0xEF && bytes[1] == (char)0xBB && bytes[2] == (char)0xBF) {
- // BOM found!
- length -= 3;
- bytes += 3;
- *left = length;
- }
- } else if (bytes[0] == 0xEF && (length < 2 || bytes[1] == (char)0xBB) && (length < 3 || bytes[2] == (char)0xBF)) {
- if (length == 3)
- *left = 0;
- return;
- }
- }
-
- // Process ASCII characters at beginning.
- const char* firstNonAsciiChar = findFirstNonAsciiCharacter(bytes, length);
- int numAsciiCharacters = firstNonAsciiChar - bytes;
- if (numAsciiCharacters) {
- result.append(bytes, numAsciiCharacters);
- length -= numAsciiCharacters;
- if (!length) {
- *left = 0;
- return;
- }
- bytes = firstNonAsciiChar;
- }
-
- int oldSize = result.size();
- result.resize(oldSize + length);
- UChar* resultStart = result.data() + oldSize;
- const char* sourceStart = bytes;
- const char* const sourceEnd = bytes + length;
- for (;;) {
- using namespace WTF::Unicode;
- ConversionResult convRes = convertUTF8ToUTF16(&sourceStart
- , sourceEnd
- , &resultStart
- , result.data() + result.size()
- , true);
-
- // FIXME: is it possible?
- if (convRes == targetExhausted && sourceStart < sourceEnd) {
- oldSize = result.size();
- result.resize(oldSize + 256);
- resultStart = result.data() + oldSize;
- continue;
- }
-
- if (convRes != conversionOK)
- sawInvalidChar = true;
-
- break;
- }
-
- *left = sourceEnd - sourceStart;
- result.resize(resultStart - result.data());
- } else {
- int testLength = length;
- int untestedLength = length;
- for (;;) {
- int resultLength = MultiByteToWideChar(codePage, flags, bytes, testLength, 0, 0);
-
- if (resultLength > 0) {
- int oldSize = result.size();
- result.resize(oldSize + resultLength);
-
- MultiByteToWideChar(codePage, flags, bytes, testLength, result.data() + oldSize, resultLength);
-
- if (testLength == untestedLength) {
- *left = length - testLength;
- break;
- }
- untestedLength -= testLength;
- length -= testLength;
- bytes += testLength;
- } else {
- untestedLength = testLength - 1;
- if (!untestedLength) {
- *left = length;
- break;
- }
- }
- testLength = (untestedLength + 1) / 2;
- }
- }
-}
-
-String TextCodecWinCE::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
-{
- if (!m_decodeBuffer.isEmpty()) {
- m_decodeBuffer.append(bytes, length);
- bytes = m_decodeBuffer.data();
- length = m_decodeBuffer.size();
- }
-
- size_t left;
- Vector<UChar, 8192> result;
- for (;;) {
- bool sawInvalidChar = false;
- WebCore::decode(result, m_codePage, bytes, length, &left, m_decodeBuffer.isEmpty(), sawInvalidChar);
- if (!left)
- break;
-
- if (!sawInvalidChar && !flush && left < 16)
- break;
-
- result.append(L'?');
- sawError = true;
- if (stopOnError)
- return String::adopt(result);
-
- if (left == 1)
- break;
-
- bytes += length - left + 1;
- length = left - 1;
- }
- if (left && !flush) {
- if (m_decodeBuffer.isEmpty())
- m_decodeBuffer.append(bytes + length - left, left);
- else {
- memmove(m_decodeBuffer.data(), bytes + length - left, left);
- m_decodeBuffer.resize(left);
- }
- } else
- m_decodeBuffer.clear();
-
- return String::adopt(result);
-}
-
-CString TextCodecWinCE::encode(const UChar* characters, size_t length, UnencodableHandling)
-{
- if (!characters || !length)
- return CString();
-
- DWORD flags = m_codePage == CP_UTF8 ? 0 : WC_COMPOSITECHECK;
-
- int resultLength = WideCharToMultiByte(m_codePage, flags, characters, length, 0, 0, 0, 0);
-
- // FIXME: We need to implement UnencodableHandling: QuestionMarksForUnencodables, EntitiesForUnencodables, and URLEncodedEntitiesForUnencodables.
-
- if (resultLength <= 0)
- return "?";
-
- char* characterBuffer;
- CString result = CString::newUninitialized(resultLength, characterBuffer);
-
- WideCharToMultiByte(m_codePage, flags, characters, length, characterBuffer, resultLength, 0, 0);
-
- return result;
-}
-
-void TextCodecWinCE::enumerateSupportedEncodings(EncodingReceiver& receiver)
-{
- languageManager();
- for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
- HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
- if (j != knownCharsets().end() && !receiver.receive(j->second.m_name.data(), j->second.m_friendlyName.charactersWithNullTermination(), j->second.m_codePage))
- break;
- }
-}
-
-} // namespace WebCore
diff --git a/WebCore/platform/text/wince/TextCodecWinCE.h b/WebCore/platform/text/wince/TextCodecWinCE.h
deleted file mode 100644
index 8d332a6..0000000
--- a/WebCore/platform/text/wince/TextCodecWinCE.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (C) 2004, 2006, 2007 Apple Inc. All rights reserved.
- * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
- * Copyright (C) 2007-2009 Torch Mobile, Inc.
- * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef TextCodecWinCE_h
-#define TextCodecWinCE_h
-
-#include "PlatformString.h"
-#include "TextCodec.h"
-#include "TextEncoding.h"
-#include <wtf/Vector.h>
-#include <windows.h>
-
-namespace WebCore {
-
-class TextCodecWinCE : public TextCodec {
-public:
- static void registerBaseEncodingNames(EncodingNameRegistrar);
- static void registerBaseCodecs(TextCodecRegistrar);
-
- static void registerExtendedEncodingNames(EncodingNameRegistrar);
- static void registerExtendedCodecs(TextCodecRegistrar);
-
- TextCodecWinCE(UINT codePage);
- virtual ~TextCodecWinCE();
-
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
- virtual CString encode(const UChar*, size_t length, UnencodableHandling);
-
- struct EncodingInfo {
- String m_encoding;
- String m_friendlyName;
- };
-
- struct EncodingReceiver {
- // Return false to stop enumerating.
- virtual bool receive(const char* encoding, const wchar_t* friendlyName, unsigned int codePage) = 0;
- };
-
- static void enumerateSupportedEncodings(EncodingReceiver& receiver);
-
-private:
- UINT m_codePage;
- Vector<char> m_decodeBuffer;
-};
-
-} // namespace WebCore
-
-#endif // TextCodecWinCE_h