diff options
author | Steve Block <steveblock@google.com> | 2011-05-06 11:45:16 +0100 |
---|---|---|
committer | Steve Block <steveblock@google.com> | 2011-05-12 13:44:10 +0100 |
commit | cad810f21b803229eb11403f9209855525a25d57 (patch) | |
tree | 29a6fd0279be608e0fe9ffe9841f722f0f4e4269 /WebCore/platform/text | |
parent | 121b0cf4517156d0ac5111caf9830c51b69bae8f (diff) | |
download | external_webkit-cad810f21b803229eb11403f9209855525a25d57.zip external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.gz external_webkit-cad810f21b803229eb11403f9209855525a25d57.tar.bz2 |
Merge WebKit at r75315: Initial merge by git.
Change-Id: I570314b346ce101c935ed22a626b48c2af266b84
Diffstat (limited to 'WebCore/platform/text')
85 files changed, 0 insertions, 13826 deletions
diff --git a/WebCore/platform/text/AtomicStringKeyedMRUCache.h b/WebCore/platform/text/AtomicStringKeyedMRUCache.h deleted file mode 100644 index b3004f7..0000000 --- a/WebCore/platform/text/AtomicStringKeyedMRUCache.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (C) 2010 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef AtomicStringKeyedMRUCache_h -#define AtomicStringKeyedMRUCache_h - -#include <wtf/text/AtomicString.h> - -namespace WebCore { - -template<typename T, size_t capacity = 4> -class AtomicStringKeyedMRUCache { -public: - T get(const AtomicString& key) - { - if (key.isNull()) { - DEFINE_STATIC_LOCAL(T, valueForNull, (createValueForNullKey())); - return valueForNull; - } - - for (size_t i = 0; i < m_cache.size(); ++i) { - if (m_cache[i].first == key) { - size_t foundIndex = i; - if (foundIndex + 1 < m_cache.size()) { - Entry entry = m_cache[foundIndex]; - m_cache.remove(foundIndex); - foundIndex = m_cache.size(); - m_cache.append(entry); - } - return m_cache[foundIndex].second; - } - } - if (m_cache.size() == capacity) - m_cache.remove(0); - - m_cache.append(std::make_pair(key, createValueForKey(key))); - return m_cache.last().second; - } - -private: - T createValueForNullKey(); - T createValueForKey(const AtomicString&); - - typedef pair<AtomicString, T> Entry; - typedef Vector<Entry, capacity> Cache; - Cache m_cache; -}; - -} - -#endif // AtomicStringKeyedMRUCache_h diff --git a/WebCore/platform/text/Base64.cpp b/WebCore/platform/text/Base64.cpp deleted file mode 100644 index 98b537a..0000000 --- a/WebCore/platform/text/Base64.cpp +++ /dev/null @@ -1,210 +0,0 @@ -/* - Copyright (C) 2000-2001 Dawit Alemayehu <adawit@kde.org> - Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org> - Copyright (C) 2007, 2008 Apple Inc. All rights reserved. - Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License (LGPL) - version 2 as published by the Free Software Foundation. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - This code is based on the java implementation in HTTPClient - package by Ronald Tschalär Copyright (C) 1996-1999. -*/ - -#include "config.h" -#include "Base64.h" - -#include <limits.h> -#include <wtf/StringExtras.h> -#include <wtf/text/WTFString.h> - -namespace WebCore { - -static const char base64EncMap[64] = { - 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, - 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, - 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, - 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, - 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, - 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, - 0x77, 0x78, 0x79, 0x7A, 0x30, 0x31, 0x32, 0x33, - 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x2B, 0x2F -}; - -static const char base64DecMap[128] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3F, - 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, - 0x3C, 0x3D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, - 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, - 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, - 0x17, 0x18, 0x19, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, - 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, - 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, - 0x31, 0x32, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00 -}; - -void base64Encode(const Vector<char>& in, Vector<char>& out, bool insertLFs) -{ - base64Encode(in.data(), in.size(), out, insertLFs); -} - -void base64Encode(const char* data, unsigned len, Vector<char>& out, bool insertLFs) -{ - out.clear(); - if (!len) - return; - - // If the input string is pathologically large, just return nothing. - // Note: Keep this in sync with the "outLength" computation below. - // Rather than being perfectly precise, this is a bit conservative. - const unsigned maxInputBufferSize = UINT_MAX / 77 * 76 / 4 * 3 - 2; - if (len > maxInputBufferSize) - return; - - unsigned sidx = 0; - unsigned didx = 0; - - unsigned outLength = ((len + 2) / 3) * 4; - - // Deal with the 76 character per line limit specified in RFC 2045. - insertLFs = (insertLFs && outLength > 76); - if (insertLFs) - outLength += ((outLength - 1) / 76); - - int count = 0; - out.grow(outLength); - - // 3-byte to 4-byte conversion + 0-63 to ascii printable conversion - if (len > 1) { - while (sidx < len - 2) { - if (insertLFs) { - if (count && !(count % 76)) - out[didx++] = '\n'; - count += 4; - } - out[didx++] = base64EncMap[(data[sidx] >> 2) & 077]; - out[didx++] = base64EncMap[((data[sidx + 1] >> 4) & 017) | ((data[sidx] << 4) & 077)]; - out[didx++] = base64EncMap[((data[sidx + 2] >> 6) & 003) | ((data[sidx + 1] << 2) & 077)]; - out[didx++] = base64EncMap[data[sidx + 2] & 077]; - sidx += 3; - } - } - - if (sidx < len) { - if (insertLFs && (count > 0) && !(count % 76)) - out[didx++] = '\n'; - - out[didx++] = base64EncMap[(data[sidx] >> 2) & 077]; - if (sidx < len - 1) { - out[didx++] = base64EncMap[((data[sidx + 1] >> 4) & 017) | ((data[sidx] << 4) & 077)]; - out[didx++] = base64EncMap[(data[sidx + 1] << 2) & 077]; - } else - out[didx++] = base64EncMap[(data[sidx] << 4) & 077]; - } - - // Add padding - while (didx < out.size()) { - out[didx] = '='; - didx++; - } -} - -bool base64Decode(const Vector<char>& in, Vector<char>& out, Base64DecodePolicy policy) -{ - out.clear(); - - // If the input string is pathologically large, just return nothing. - if (in.size() > UINT_MAX) - return false; - - return base64Decode(in.data(), in.size(), out, policy); -} - -template<typename T> -static inline bool base64DecodeInternal(const T* data, unsigned len, Vector<char>& out, Base64DecodePolicy policy) -{ - out.clear(); - if (!len) - return true; - - out.grow(len); - - bool sawEqualsSign = false; - unsigned outLength = 0; - for (unsigned idx = 0; idx < len; idx++) { - unsigned ch = data[idx]; - if (ch == '=') - sawEqualsSign = true; - else if (('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z') || ch == '+' || ch == '/') { - if (sawEqualsSign) - return false; - out[outLength] = base64DecMap[ch]; - outLength++; - } else if (policy == FailOnInvalidCharacter || (policy == IgnoreWhitespace && !isSpaceOrNewline(ch))) - return false; - } - - if (!outLength) - return !sawEqualsSign; - - // Valid data is (n * 4 + [0,2,3]) characters long. - if ((outLength % 4) == 1) - return false; - - // 4-byte to 3-byte conversion - outLength -= (outLength + 3) / 4; - if (!outLength) - return false; - - unsigned sidx = 0; - unsigned didx = 0; - if (outLength > 1) { - while (didx < outLength - 2) { - out[didx] = (((out[sidx] << 2) & 255) | ((out[sidx + 1] >> 4) & 003)); - out[didx + 1] = (((out[sidx + 1] << 4) & 255) | ((out[sidx + 2] >> 2) & 017)); - out[didx + 2] = (((out[sidx + 2] << 6) & 255) | (out[sidx + 3] & 077)); - sidx += 4; - didx += 3; - } - } - - if (didx < outLength) - out[didx] = (((out[sidx] << 2) & 255) | ((out[sidx + 1] >> 4) & 003)); - - if (++didx < outLength) - out[didx] = (((out[sidx + 1] << 4) & 255) | ((out[sidx + 2] >> 2) & 017)); - - if (outLength < out.size()) - out.shrink(outLength); - - return true; -} - -bool base64Decode(const char* data, unsigned len, Vector<char>& out, Base64DecodePolicy policy) -{ - return base64DecodeInternal<char>(data, len, out, policy); -} - -bool base64Decode(const String& in, Vector<char>& out, Base64DecodePolicy policy) -{ - return base64DecodeInternal<UChar>(in.characters(), in.length(), out, policy); -} - -} // namespace WebCore diff --git a/WebCore/platform/text/Base64.h b/WebCore/platform/text/Base64.h deleted file mode 100644 index 211bd3c..0000000 --- a/WebCore/platform/text/Base64.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org> - * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef Base64_h -#define Base64_h - -#include <wtf/Forward.h> -#include <wtf/Vector.h> - -namespace WebCore { - -enum Base64DecodePolicy { FailOnInvalidCharacter, IgnoreWhitespace, IgnoreInvalidCharacters }; - -void base64Encode(const Vector<char>&, Vector<char>&, bool insertLFs = false); -void base64Encode(const char*, unsigned, Vector<char>&, bool insertLFs = false); - -bool base64Decode(const String&, Vector<char>&, Base64DecodePolicy = FailOnInvalidCharacter); -bool base64Decode(const Vector<char>&, Vector<char>&, Base64DecodePolicy = FailOnInvalidCharacter); -bool base64Decode(const char*, unsigned, Vector<char>&, Base64DecodePolicy = FailOnInvalidCharacter); - -} - -#endif // Base64_h diff --git a/WebCore/platform/text/BidiContext.cpp b/WebCore/platform/text/BidiContext.cpp deleted file mode 100644 index fb6b8cf..0000000 --- a/WebCore/platform/text/BidiContext.cpp +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (C) 2000 Lars Knoll (knoll@kde.org) - * Copyright (C) 2003, 2004, 2006, 2007, 2009, 2010 Apple Inc. All right reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#include "config.h" -#include "BidiContext.h" - -namespace WebCore { - -using namespace WTF::Unicode; - -inline PassRefPtr<BidiContext> BidiContext::createUncached(unsigned char level, Direction direction, bool override, BidiContext* parent) -{ - return adoptRef(new BidiContext(level, direction, override, parent)); -} - -PassRefPtr<BidiContext> BidiContext::create(unsigned char level, Direction direction, bool override, BidiContext* parent) -{ - ASSERT(direction == (level % 2 ? RightToLeft : LeftToRight)); - - if (parent) - return createUncached(level, direction, override, parent); - - ASSERT(level <= 1); - if (!level) { - if (!override) { - static BidiContext* ltrContext = createUncached(0, LeftToRight, false, 0).releaseRef(); - return ltrContext; - } - - static BidiContext* ltrOverrideContext = createUncached(0, LeftToRight, true, 0).releaseRef(); - return ltrOverrideContext; - } - - if (!override) { - static BidiContext* rtlContext = createUncached(1, RightToLeft, false, 0).releaseRef(); - return rtlContext; - } - - static BidiContext* rtlOverrideContext = createUncached(1, RightToLeft, true, 0).releaseRef(); - return rtlOverrideContext; -} - -bool operator==(const BidiContext& c1, const BidiContext& c2) -{ - if (&c1 == &c2) - return true; - if (c1.level() != c2.level() || c1.override() != c2.override() || c1.dir() != c2.dir()) - return false; - if (!c1.parent()) - return !c2.parent(); - return c2.parent() && *c1.parent() == *c2.parent(); -} - -} // namespace WebCore diff --git a/WebCore/platform/text/BidiContext.h b/WebCore/platform/text/BidiContext.h deleted file mode 100644 index b52815f..0000000 --- a/WebCore/platform/text/BidiContext.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (C) 2000 Lars Knoll (knoll@kde.org) - * Copyright (C) 2003, 2004, 2006, 2007, 2009, 2010 Apple Inc. All right reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#ifndef BidiContext_h -#define BidiContext_h - -#include <wtf/Assertions.h> -#include <wtf/PassRefPtr.h> -#include <wtf/RefCounted.h> -#include <wtf/RefPtr.h> -#include <wtf/unicode/Unicode.h> - -namespace WebCore { - -// Used to keep track of explicit embeddings. -class BidiContext : public RefCounted<BidiContext> { -public: - static PassRefPtr<BidiContext> create(unsigned char level, WTF::Unicode::Direction direction, bool override = false, BidiContext* parent = 0); - - BidiContext* parent() const { return m_parent.get(); } - unsigned char level() const { return m_level; } - WTF::Unicode::Direction dir() const { return static_cast<WTF::Unicode::Direction>(m_direction); } - bool override() const { return m_override; } - -private: - BidiContext(unsigned char level, WTF::Unicode::Direction direction, bool override, BidiContext* parent) - : m_level(level) - , m_direction(direction) - , m_override(override) - , m_parent(parent) - { - } - - static PassRefPtr<BidiContext> createUncached(unsigned char level, WTF::Unicode::Direction, bool override, BidiContext* parent); - - unsigned char m_level; - unsigned m_direction : 5; // Direction - bool m_override : 1; - RefPtr<BidiContext> m_parent; -}; - -bool operator==(const BidiContext&, const BidiContext&); - -} // namespace WebCore - -#endif // BidiContext_h diff --git a/WebCore/platform/text/BidiResolver.h b/WebCore/platform/text/BidiResolver.h deleted file mode 100644 index 1f87115..0000000 --- a/WebCore/platform/text/BidiResolver.h +++ /dev/null @@ -1,968 +0,0 @@ -/* - * Copyright (C) 2000 Lars Knoll (knoll@kde.org) - * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc. All right reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#ifndef BidiResolver_h -#define BidiResolver_h - -#include "BidiContext.h" -#include <wtf/Noncopyable.h> -#include <wtf/PassRefPtr.h> -#include <wtf/Vector.h> - -namespace WebCore { - -template <class Iterator> struct MidpointState { - MidpointState() - { - reset(); - } - - void reset() - { - numMidpoints = 0; - currentMidpoint = 0; - betweenMidpoints = false; - } - - // The goal is to reuse the line state across multiple - // lines so we just keep an array around for midpoints and never clear it across multiple - // lines. We track the number of items and position using the two other variables. - Vector<Iterator> midpoints; - unsigned numMidpoints; - unsigned currentMidpoint; - bool betweenMidpoints; -}; - -// The BidiStatus at a given position (typically the end of a line) can -// be cached and then used to restart bidi resolution at that position. -struct BidiStatus { - BidiStatus() - : eor(WTF::Unicode::OtherNeutral) - , lastStrong(WTF::Unicode::OtherNeutral) - , last(WTF::Unicode::OtherNeutral) - { - } - - BidiStatus(WTF::Unicode::Direction eorDir, WTF::Unicode::Direction lastStrongDir, WTF::Unicode::Direction lastDir, PassRefPtr<BidiContext> bidiContext) - : eor(eorDir) - , lastStrong(lastStrongDir) - , last(lastDir) - , context(bidiContext) - { - } - - WTF::Unicode::Direction eor; - WTF::Unicode::Direction lastStrong; - WTF::Unicode::Direction last; - RefPtr<BidiContext> context; -}; - -inline bool operator==(const BidiStatus& status1, const BidiStatus& status2) -{ - return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context); -} - -inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2) -{ - return !(status1 == status2); -} - -struct BidiCharacterRun { - BidiCharacterRun(int start, int stop, BidiContext* context, WTF::Unicode::Direction dir) - : m_start(start) - , m_stop(stop) - , m_override(context->override()) - , m_next(0) - { - if (dir == WTF::Unicode::OtherNeutral) - dir = context->dir(); - - m_level = context->level(); - - // add level of run (cases I1 & I2) - if (m_level % 2) { - if (dir == WTF::Unicode::LeftToRight || dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber) - m_level++; - } else { - if (dir == WTF::Unicode::RightToLeft) - m_level++; - else if (dir == WTF::Unicode::ArabicNumber || dir == WTF::Unicode::EuropeanNumber) - m_level += 2; - } - } - - void destroy() { delete this; } - - int start() const { return m_start; } - int stop() const { return m_stop; } - unsigned char level() const { return m_level; } - bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; } - bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; } - - BidiCharacterRun* next() const { return m_next; } - - unsigned char m_level; - int m_start; - int m_stop; - bool m_override; - BidiCharacterRun* m_next; -}; - -template <class Iterator, class Run> class BidiResolver : public Noncopyable { -public : - BidiResolver() - : m_direction(WTF::Unicode::OtherNeutral) - , reachedEndOfLine(false) - , emptyRun(true) - , m_firstRun(0) - , m_lastRun(0) - , m_logicallyLastRun(0) - , m_runCount(0) - { - } - - const Iterator& position() const { return current; } - void setPosition(const Iterator& position) { current = position; } - - void increment() { current.increment(); } - - BidiContext* context() const { return m_status.context.get(); } - void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; } - - void setLastDir(WTF::Unicode::Direction lastDir) { m_status.last = lastDir; } - void setLastStrongDir(WTF::Unicode::Direction lastStrongDir) { m_status.lastStrong = lastStrongDir; } - void setEorDir(WTF::Unicode::Direction eorDir) { m_status.eor = eorDir; } - - WTF::Unicode::Direction dir() const { return m_direction; } - void setDir(WTF::Unicode::Direction d) { m_direction = d; } - - const BidiStatus& status() const { return m_status; } - void setStatus(const BidiStatus s) { m_status = s; } - - MidpointState<Iterator>& midpointState() { return m_midpointState; } - - void embed(WTF::Unicode::Direction); - void commitExplicitEmbedding(); - - void createBidiRunsForLine(const Iterator& end, bool visualOrder = false, bool hardLineBreak = false); - - Run* firstRun() const { return m_firstRun; } - Run* lastRun() const { return m_lastRun; } - Run* logicallyLastRun() const { return m_logicallyLastRun; } - unsigned runCount() const { return m_runCount; } - - void addRun(Run*); - void prependRun(Run*); - - void moveRunToEnd(Run*); - void moveRunToBeginning(Run*); - - void deleteRuns(); - -protected: - void appendRun(); - void reverseRuns(unsigned start, unsigned end); - - Iterator current; - Iterator sor; - Iterator eor; - Iterator last; - BidiStatus m_status; - WTF::Unicode::Direction m_direction; - Iterator endOfLine; - bool reachedEndOfLine; - Iterator lastBeforeET; - bool emptyRun; - - Run* m_firstRun; - Run* m_lastRun; - Run* m_logicallyLastRun; - unsigned m_runCount; - MidpointState<Iterator> m_midpointState; - -private: - void raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to); - void lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from); - void checkDirectionInLowerRaiseEmbeddingLevel(); - - Vector<WTF::Unicode::Direction, 8> m_currentExplicitEmbeddingSequence; -}; - -template <class Iterator, class Run> -inline void BidiResolver<Iterator, Run>::addRun(Run* run) -{ - if (!m_firstRun) - m_firstRun = run; - else - m_lastRun->m_next = run; - m_lastRun = run; - m_runCount++; -} - -template <class Iterator, class Run> -inline void BidiResolver<Iterator, Run>::prependRun(Run* run) -{ - ASSERT(!run->m_next); - - if (!m_lastRun) - m_lastRun = run; - else - run->m_next = m_firstRun; - m_firstRun = run; - m_runCount++; -} - -template <class Iterator, class Run> -inline void BidiResolver<Iterator, Run>::moveRunToEnd(Run* run) -{ - ASSERT(m_firstRun); - ASSERT(m_lastRun); - ASSERT(run->m_next); - - Run* current = 0; - Run* next = m_firstRun; - while (next != run) { - current = next; - next = current->next(); - } - - if (!current) - m_firstRun = run->next(); - else - current->m_next = run->m_next; - - run->m_next = 0; - m_lastRun->m_next = run; - m_lastRun = run; -} - -template <class Iterator, class Run> -inline void BidiResolver<Iterator, Run>::moveRunToBeginning(Run* run) -{ - ASSERT(m_firstRun); - ASSERT(m_lastRun); - ASSERT(run != m_firstRun); - - Run* current = m_firstRun; - Run* next = current->next(); - while (next != run) { - current = next; - next = current->next(); - } - - current->m_next = run->m_next; - if (run == m_lastRun) - m_lastRun = current; - - run->m_next = m_firstRun; - m_firstRun = run; -} - -template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::appendRun() -{ - if (!emptyRun && !eor.atEnd()) { - unsigned startOffset = sor.offset(); - unsigned endOffset = eor.offset(); - - if (!endOfLine.atEnd() && endOffset >= endOfLine.offset()) { - reachedEndOfLine = true; - endOffset = endOfLine.offset(); - } - - if (endOffset >= startOffset) - addRun(new Run(startOffset, endOffset + 1, context(), m_direction)); - - eor.increment(); - sor = eor; - } - - m_direction = WTF::Unicode::OtherNeutral; - m_status.eor = WTF::Unicode::OtherNeutral; -} - -template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::embed(WTF::Unicode::Direction d) -{ - using namespace WTF::Unicode; - - ASSERT(d == PopDirectionalFormat || d == LeftToRightEmbedding || d == LeftToRightOverride || d == RightToLeftEmbedding || d == RightToLeftOverride); - m_currentExplicitEmbeddingSequence.append(d); -} - -template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel() -{ - using namespace WTF::Unicode; - - ASSERT(m_status.eor != OtherNeutral || eor.atEnd()); - // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last - // Bidi control characters are included into BidiRun, so last direction - // could be one of the bidi embeddings when there are nested embeddings. - // For example: "‪‫....." - ASSERT(m_status.last == EuropeanNumberSeparator - || m_status.last == EuropeanNumberTerminator - || m_status.last == CommonNumberSeparator - || m_status.last == BoundaryNeutral - || m_status.last == BlockSeparator - || m_status.last == SegmentSeparator - || m_status.last == WhiteSpaceNeutral - || m_status.last == OtherNeutral - || m_status.last == RightToLeftEmbedding - || m_status.last == LeftToRightEmbedding - || m_status.last == RightToLeftOverride - || m_status.last == LeftToRightOverride - || m_status.last == PopDirectionalFormat); - if (m_direction == OtherNeutral) - m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft; -} - -template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(WTF::Unicode::Direction from) -{ - using namespace WTF::Unicode; - - if (!emptyRun && eor != last) { - checkDirectionInLowerRaiseEmbeddingLevel(); - if (from == LeftToRight) { - // bidi.sor ... bidi.eor ... bidi.last L - if (m_status.eor == EuropeanNumber) { - if (m_status.lastStrong != LeftToRight) { - m_direction = EuropeanNumber; - appendRun(); - } - } else if (m_status.eor == ArabicNumber) { - m_direction = ArabicNumber; - appendRun(); - } else if (m_status.lastStrong != LeftToRight) { - appendRun(); - m_direction = LeftToRight; - } - } else if (m_status.eor == EuropeanNumber || m_status.eor == ArabicNumber || m_status.lastStrong == LeftToRight) { - appendRun(); - m_direction = RightToLeft; - } - eor = last; - } - appendRun(); - emptyRun = true; - // sor for the new run is determined by the higher level (rule X10) - setLastDir(from); - setLastStrongDir(from); - eor = Iterator(); -} - -template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(WTF::Unicode::Direction from, WTF::Unicode::Direction to) -{ - using namespace WTF::Unicode; - - if (!emptyRun && eor != last) { - checkDirectionInLowerRaiseEmbeddingLevel(); - if (to == LeftToRight) { - // bidi.sor ... bidi.eor ... bidi.last L - if (m_status.eor == EuropeanNumber) { - if (m_status.lastStrong != LeftToRight) { - m_direction = EuropeanNumber; - appendRun(); - } - } else if (m_status.eor == ArabicNumber) { - m_direction = ArabicNumber; - appendRun(); - } else if (m_status.lastStrong != LeftToRight && from == LeftToRight) { - appendRun(); - m_direction = LeftToRight; - } - } else if (m_status.eor == ArabicNumber - || (m_status.eor == EuropeanNumber && (m_status.lastStrong != LeftToRight || from == RightToLeft)) - || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && from == RightToLeft)) { - appendRun(); - m_direction = RightToLeft; - } - eor = last; - } - appendRun(); - emptyRun = true; - setLastDir(to); - setLastStrongDir(to); - eor = Iterator(); -} - -template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::commitExplicitEmbedding() -{ - using namespace WTF::Unicode; - - unsigned char fromLevel = context()->level(); - RefPtr<BidiContext> toContext = context(); - - for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) { - Direction embedding = m_currentExplicitEmbeddingSequence[i]; - if (embedding == PopDirectionalFormat) { - if (BidiContext* parentContext = toContext->parent()) - toContext = parentContext; - } else { - Direction direction = (embedding == RightToLeftEmbedding || embedding == RightToLeftOverride) ? RightToLeft : LeftToRight; - bool override = embedding == LeftToRightOverride || embedding == RightToLeftOverride; - unsigned char level = toContext->level(); - if (direction == RightToLeft) { - // Go to the least greater odd integer - level += 1; - level |= 1; - } else { - // Go to the least greater even integer - level += 2; - level &= ~1; - } - if (level < 61) - toContext = BidiContext::create(level, direction, override, toContext.get()); - } - } - - unsigned char toLevel = toContext->level(); - - if (toLevel > fromLevel) - raiseExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight, toLevel % 2 ? RightToLeft : LeftToRight); - else if (toLevel < fromLevel) - lowerExplicitEmbeddingLevel(fromLevel % 2 ? RightToLeft : LeftToRight); - - setContext(toContext); - - m_currentExplicitEmbeddingSequence.clear(); -} - -template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::deleteRuns() -{ - emptyRun = true; - if (!m_firstRun) - return; - - Run* curr = m_firstRun; - while (curr) { - Run* s = curr->next(); - curr->destroy(); - curr = s; - } - - m_firstRun = 0; - m_lastRun = 0; - m_runCount = 0; -} - -template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::reverseRuns(unsigned start, unsigned end) -{ - if (start >= end) - return; - - ASSERT(end < m_runCount); - - // Get the item before the start of the runs to reverse and put it in - // |beforeStart|. |curr| should point to the first run to reverse. - Run* curr = m_firstRun; - Run* beforeStart = 0; - unsigned i = 0; - while (i < start) { - i++; - beforeStart = curr; - curr = curr->next(); - } - - Run* startRun = curr; - while (i < end) { - i++; - curr = curr->next(); - } - Run* endRun = curr; - Run* afterEnd = curr->next(); - - i = start; - curr = startRun; - Run* newNext = afterEnd; - while (i <= end) { - // Do the reversal. - Run* next = curr->next(); - curr->m_next = newNext; - newNext = curr; - curr = next; - i++; - } - - // Now hook up beforeStart and afterEnd to the startRun and endRun. - if (beforeStart) - beforeStart->m_next = endRun; - else - m_firstRun = endRun; - - startRun->m_next = afterEnd; - if (!afterEnd) - m_lastRun = startRun; -} - -template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, bool visualOrder, bool hardLineBreak) -{ - using namespace WTF::Unicode; - - ASSERT(m_direction == OtherNeutral); - - emptyRun = true; - - eor = Iterator(); - - last = current; - bool pastEnd = false; - BidiResolver<Iterator, Run> stateAtEnd; - - while (true) { - Direction dirCurrent; - if (pastEnd && (hardLineBreak || current.atEnd())) { - BidiContext* c = context(); - while (c->parent()) - c = c->parent(); - dirCurrent = c->dir(); - if (hardLineBreak) { - // A deviation from the Unicode Bidi Algorithm in order to match - // Mac OS X text and WinIE: a hard line break resets bidi state. - stateAtEnd.setContext(c); - stateAtEnd.setEorDir(dirCurrent); - stateAtEnd.setLastDir(dirCurrent); - stateAtEnd.setLastStrongDir(dirCurrent); - } - } else { - dirCurrent = current.direction(); - if (context()->override() - && dirCurrent != RightToLeftEmbedding - && dirCurrent != LeftToRightEmbedding - && dirCurrent != RightToLeftOverride - && dirCurrent != LeftToRightOverride - && dirCurrent != PopDirectionalFormat) - dirCurrent = context()->dir(); - else if (dirCurrent == NonSpacingMark) - dirCurrent = m_status.last; - } - - ASSERT(m_status.eor != OtherNeutral || eor.atEnd()); - switch (dirCurrent) { - - // embedding and overrides (X1-X9 in the Bidi specs) - case RightToLeftEmbedding: - case LeftToRightEmbedding: - case RightToLeftOverride: - case LeftToRightOverride: - case PopDirectionalFormat: - embed(dirCurrent); - commitExplicitEmbedding(); - break; - - // strong types - case LeftToRight: - switch(m_status.last) { - case RightToLeft: - case RightToLeftArabic: - case EuropeanNumber: - case ArabicNumber: - if (m_status.last != EuropeanNumber || m_status.lastStrong != LeftToRight) - appendRun(); - break; - case LeftToRight: - break; - case EuropeanNumberSeparator: - case EuropeanNumberTerminator: - case CommonNumberSeparator: - case BoundaryNeutral: - case BlockSeparator: - case SegmentSeparator: - case WhiteSpaceNeutral: - case OtherNeutral: - if (m_status.eor == EuropeanNumber) { - if (m_status.lastStrong != LeftToRight) { - // the numbers need to be on a higher embedding level, so let's close that run - m_direction = EuropeanNumber; - appendRun(); - if (context()->dir() != LeftToRight) { - // the neutrals take the embedding direction, which is R - eor = last; - m_direction = RightToLeft; - appendRun(); - } - } - } else if (m_status.eor == ArabicNumber) { - // Arabic numbers are always on a higher embedding level, so let's close that run - m_direction = ArabicNumber; - appendRun(); - if (context()->dir() != LeftToRight) { - // the neutrals take the embedding direction, which is R - eor = last; - m_direction = RightToLeft; - appendRun(); - } - } else if (m_status.lastStrong != LeftToRight) { - //last stuff takes embedding dir - if (context()->dir() == RightToLeft) { - eor = last; - m_direction = RightToLeft; - } - appendRun(); - } - default: - break; - } - eor = current; - m_status.eor = LeftToRight; - m_status.lastStrong = LeftToRight; - m_direction = LeftToRight; - break; - case RightToLeftArabic: - case RightToLeft: - switch (m_status.last) { - case LeftToRight: - case EuropeanNumber: - case ArabicNumber: - appendRun(); - case RightToLeft: - case RightToLeftArabic: - break; - case EuropeanNumberSeparator: - case EuropeanNumberTerminator: - case CommonNumberSeparator: - case BoundaryNeutral: - case BlockSeparator: - case SegmentSeparator: - case WhiteSpaceNeutral: - case OtherNeutral: - if (m_status.eor == EuropeanNumber) { - if (m_status.lastStrong == LeftToRight && context()->dir() == LeftToRight) - eor = last; - appendRun(); - } else if (m_status.eor == ArabicNumber) - appendRun(); - else if (m_status.lastStrong == LeftToRight) { - if (context()->dir() == LeftToRight) - eor = last; - appendRun(); - } - default: - break; - } - eor = current; - m_status.eor = RightToLeft; - m_status.lastStrong = dirCurrent; - m_direction = RightToLeft; - break; - - // weak types: - - case EuropeanNumber: - if (m_status.lastStrong != RightToLeftArabic) { - // if last strong was AL change EN to AN - switch (m_status.last) { - case EuropeanNumber: - case LeftToRight: - break; - case RightToLeft: - case RightToLeftArabic: - case ArabicNumber: - eor = last; - appendRun(); - m_direction = EuropeanNumber; - break; - case EuropeanNumberSeparator: - case CommonNumberSeparator: - if (m_status.eor == EuropeanNumber) - break; - case EuropeanNumberTerminator: - case BoundaryNeutral: - case BlockSeparator: - case SegmentSeparator: - case WhiteSpaceNeutral: - case OtherNeutral: - if (m_status.eor == EuropeanNumber) { - if (m_status.lastStrong == RightToLeft) { - // ENs on both sides behave like Rs, so the neutrals should be R. - // Terminate the EN run. - appendRun(); - // Make an R run. - eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last; - m_direction = RightToLeft; - appendRun(); - // Begin a new EN run. - m_direction = EuropeanNumber; - } - } else if (m_status.eor == ArabicNumber) { - // Terminate the AN run. - appendRun(); - if (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft) { - // Make an R run. - eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last; - m_direction = RightToLeft; - appendRun(); - // Begin a new EN run. - m_direction = EuropeanNumber; - } - } else if (m_status.lastStrong == RightToLeft) { - // Extend the R run to include the neutrals. - eor = m_status.last == EuropeanNumberTerminator ? lastBeforeET : last; - m_direction = RightToLeft; - appendRun(); - // Begin a new EN run. - m_direction = EuropeanNumber; - } - default: - break; - } - eor = current; - m_status.eor = EuropeanNumber; - if (m_direction == OtherNeutral) - m_direction = LeftToRight; - break; - } - case ArabicNumber: - dirCurrent = ArabicNumber; - switch (m_status.last) { - case LeftToRight: - if (context()->dir() == LeftToRight) - appendRun(); - break; - case ArabicNumber: - break; - case RightToLeft: - case RightToLeftArabic: - case EuropeanNumber: - eor = last; - appendRun(); - break; - case CommonNumberSeparator: - if (m_status.eor == ArabicNumber) - break; - case EuropeanNumberSeparator: - case EuropeanNumberTerminator: - case BoundaryNeutral: - case BlockSeparator: - case SegmentSeparator: - case WhiteSpaceNeutral: - case OtherNeutral: - if (m_status.eor == ArabicNumber - || (m_status.eor == EuropeanNumber && (m_status.lastStrong == RightToLeft || context()->dir() == RightToLeft)) - || (m_status.eor != EuropeanNumber && m_status.lastStrong == LeftToRight && context()->dir() == RightToLeft)) { - // Terminate the run before the neutrals. - appendRun(); - // Begin an R run for the neutrals. - m_direction = RightToLeft; - } else if (m_direction == OtherNeutral) - m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : RightToLeft; - eor = last; - appendRun(); - default: - break; - } - eor = current; - m_status.eor = ArabicNumber; - if (m_direction == OtherNeutral) - m_direction = ArabicNumber; - break; - case EuropeanNumberSeparator: - case CommonNumberSeparator: - break; - case EuropeanNumberTerminator: - if (m_status.last == EuropeanNumber) { - dirCurrent = EuropeanNumber; - eor = current; - m_status.eor = dirCurrent; - } else if (m_status.last != EuropeanNumberTerminator) - lastBeforeET = emptyRun ? eor : last; - break; - - // boundary neutrals should be ignored - case BoundaryNeutral: - if (eor == last) - eor = current; - break; - // neutrals - case BlockSeparator: - // ### what do we do with newline and paragraph seperators that come to here? - break; - case SegmentSeparator: - // ### implement rule L1 - break; - case WhiteSpaceNeutral: - break; - case OtherNeutral: - break; - default: - break; - } - - if (pastEnd && eor == current) { - if (!reachedEndOfLine) { - eor = endOfLine; - switch (m_status.eor) { - case LeftToRight: - case RightToLeft: - case ArabicNumber: - m_direction = m_status.eor; - break; - case EuropeanNumber: - m_direction = m_status.lastStrong == LeftToRight ? LeftToRight : EuropeanNumber; - break; - default: - ASSERT(false); - } - appendRun(); - } - current = end; - m_status = stateAtEnd.m_status; - sor = stateAtEnd.sor; - eor = stateAtEnd.eor; - last = stateAtEnd.last; - reachedEndOfLine = stateAtEnd.reachedEndOfLine; - lastBeforeET = stateAtEnd.lastBeforeET; - emptyRun = stateAtEnd.emptyRun; - m_direction = OtherNeutral; - break; - } - - // set m_status.last as needed. - switch (dirCurrent) { - case EuropeanNumberTerminator: - if (m_status.last != EuropeanNumber) - m_status.last = EuropeanNumberTerminator; - break; - case EuropeanNumberSeparator: - case CommonNumberSeparator: - case SegmentSeparator: - case WhiteSpaceNeutral: - case OtherNeutral: - switch(m_status.last) { - case LeftToRight: - case RightToLeft: - case RightToLeftArabic: - case EuropeanNumber: - case ArabicNumber: - m_status.last = dirCurrent; - break; - default: - m_status.last = OtherNeutral; - } - break; - case NonSpacingMark: - case BoundaryNeutral: - // ignore these - break; - case EuropeanNumber: - // fall through - default: - m_status.last = dirCurrent; - } - - last = current; - - if (emptyRun) { - sor = current; - emptyRun = false; - } - - increment(); - if (!m_currentExplicitEmbeddingSequence.isEmpty()) { - commitExplicitEmbedding(); - if (pastEnd) { - current = end; - m_status = stateAtEnd.m_status; - sor = stateAtEnd.sor; - eor = stateAtEnd.eor; - last = stateAtEnd.last; - reachedEndOfLine = stateAtEnd.reachedEndOfLine; - lastBeforeET = stateAtEnd.lastBeforeET; - emptyRun = stateAtEnd.emptyRun; - m_direction = OtherNeutral; - break; - } - } - - if (!pastEnd && (current == end || current.atEnd())) { - if (emptyRun) - break; - stateAtEnd.m_status = m_status; - stateAtEnd.sor = sor; - stateAtEnd.eor = eor; - stateAtEnd.last = last; - stateAtEnd.reachedEndOfLine = reachedEndOfLine; - stateAtEnd.lastBeforeET = lastBeforeET; - stateAtEnd.emptyRun = emptyRun; - endOfLine = last; - pastEnd = true; - } - } - - m_logicallyLastRun = m_lastRun; - - // reorder line according to run structure... - // do not reverse for visually ordered web sites - if (!visualOrder) { - - // first find highest and lowest levels - unsigned char levelLow = 128; - unsigned char levelHigh = 0; - Run* r = firstRun(); - while (r) { - if (r->m_level > levelHigh) - levelHigh = r->m_level; - if (r->m_level < levelLow) - levelLow = r->m_level; - r = r->next(); - } - - // implements reordering of the line (L2 according to Bidi spec): - // L2. From the highest level found in the text to the lowest odd level on each line, - // reverse any contiguous sequence of characters that are at that level or higher. - - // reversing is only done up to the lowest odd level - if (!(levelLow % 2)) - levelLow++; - - unsigned count = runCount() - 1; - - while (levelHigh >= levelLow) { - unsigned i = 0; - Run* currRun = firstRun(); - while (i < count) { - while (i < count && currRun && currRun->m_level < levelHigh) { - i++; - currRun = currRun->next(); - } - unsigned start = i; - while (i <= count && currRun && currRun->m_level >= levelHigh) { - i++; - currRun = currRun->next(); - } - unsigned end = i - 1; - reverseRuns(start, end); - } - levelHigh--; - } - } - endOfLine = Iterator(); -} - -} // namespace WebCore - -#endif // BidiResolver_h diff --git a/WebCore/platform/text/CharacterNames.h b/WebCore/platform/text/CharacterNames.h deleted file mode 100644 index c4b496e..0000000 --- a/WebCore/platform/text/CharacterNames.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (C) 2007, 2009, 2010 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef CharacterNames_h -#define CharacterNames_h - -#include <wtf/unicode/Unicode.h> - -namespace WebCore { - -// Names here are taken from the Unicode standard. - -// Most of these are UChar constants, not UChar32, which makes them -// more convenient for WebCore code that mostly uses UTF-16. - -const UChar32 aegeanWordSeparatorLine = 0x10100; -const UChar32 aegeanWordSeparatorDot = 0x10101; -const UChar blackCircle = 0x25CF; -const UChar blackSquare = 0x25A0; -const UChar blackUpPointingTriangle = 0x25B2; -const UChar bullet = 0x2022; -const UChar bullseye = 0x25CE; -const UChar carriageReturn = 0x000D; -const UChar ethiopicPrefaceColon = 0x1366; -const UChar ethiopicWordspace = 0x1361; -const UChar fisheye = 0x25C9; -const UChar hebrewPunctuationGeresh = 0x05F3; -const UChar hebrewPunctuationGershayim = 0x05F4; -const UChar horizontalEllipsis = 0x2026; -const UChar hyphen = 0x2010; -const UChar hyphenMinus = 0x002D; -const UChar ideographicComma = 0x3001; -const UChar ideographicFullStop = 0x3002; -const UChar ideographicSpace = 0x3000; -const UChar leftDoubleQuotationMark = 0x201C; -const UChar leftSingleQuotationMark = 0x2018; -const UChar leftToRightEmbed = 0x202A; -const UChar leftToRightMark = 0x200E; -const UChar leftToRightOverride = 0x202D; -const UChar minusSign = 0x2212; -const UChar newlineCharacter = 0x000A; -const UChar noBreakSpace = 0x00A0; -const UChar objectReplacementCharacter = 0xFFFC; -const UChar popDirectionalFormatting = 0x202C; -const UChar replacementCharacter = 0xFFFD; -const UChar rightDoubleQuotationMark = 0x201D; -const UChar rightSingleQuotationMark = 0x2019; -const UChar rightToLeftEmbed = 0x202B; -const UChar rightToLeftMark = 0x200F; -const UChar rightToLeftOverride = 0x202E; -const UChar sesameDot = 0xFE45; -const UChar softHyphen = 0x00AD; -const UChar space = 0x0020; -const UChar tibetanMarkIntersyllabicTsheg = 0x0F0B; -const UChar tibetanMarkDelimiterTshegBstar = 0x0F0C; -const UChar32 ugariticWordDivider = 0x1039F; -const UChar whiteBullet = 0x25E6; -const UChar whiteCircle = 0x25CB; -const UChar whiteSesameDot = 0xFE46; -const UChar whiteUpPointingTriangle = 0x25B3; -const UChar yenSign = 0x00A5; -const UChar zeroWidthJoiner = 0x200D; -const UChar zeroWidthNonJoiner = 0x200C; -const UChar zeroWidthSpace = 0x200B; - -} - -#endif // CharacterNames_h diff --git a/WebCore/platform/text/Hyphenation.cpp b/WebCore/platform/text/Hyphenation.cpp deleted file mode 100644 index 89f6438..0000000 --- a/WebCore/platform/text/Hyphenation.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2010 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "Hyphenation.h" - -#include "NotImplemented.h" - -namespace WebCore { - -bool canHyphenate(const AtomicString& /* localeIdentifier */) -{ - return false; -} - -size_t lastHyphenLocation(const UChar* /* characters */, size_t /* length */, size_t /* beforeIndex */, const AtomicString& /* localeIdentifier */) -{ - ASSERT_NOT_REACHED(); - return 0; -} - -} // namespace WebCore diff --git a/WebCore/platform/text/Hyphenation.h b/WebCore/platform/text/Hyphenation.h deleted file mode 100644 index a99bff0..0000000 --- a/WebCore/platform/text/Hyphenation.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2010 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef Hyphenation_h -#define Hyphenation_h - -#include <wtf/Forward.h> -#include <wtf/unicode/Unicode.h> - -namespace WebCore { - -bool canHyphenate(const AtomicString& localeIdentifier); -size_t lastHyphenLocation(const UChar*, size_t length, size_t beforeIndex, const AtomicString& localeIdentifier); - -} // namespace WebCore - -#endif // Hyphenation_h diff --git a/WebCore/platform/text/LineEnding.cpp b/WebCore/platform/text/LineEnding.cpp deleted file mode 100644 index 00a90eb..0000000 --- a/WebCore/platform/text/LineEnding.cpp +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved. - * Copyright (C) 2010 Google Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "LineEnding.h" - -#include "PlatformString.h" -#include <wtf/text/CString.h> - -namespace { - -class OutputBuffer { -public: - virtual char* allocate(size_t size) = 0; - virtual void copy(const CString&) = 0; - virtual ~OutputBuffer() { } -}; - -class CStringBuffer : public OutputBuffer { -public: - CStringBuffer(CString& buffer) - : m_buffer(buffer) - { - } - virtual ~CStringBuffer() { } - - virtual char* allocate(size_t size) - { - char* ptr; - m_buffer = CString::newUninitialized(size, ptr); - return ptr; - } - - virtual void copy(const CString& source) - { - m_buffer = source; - } - - const CString& buffer() const { return m_buffer; } - -private: - CString m_buffer; -}; - -class VectorCharAppendBuffer : public OutputBuffer { -public: - VectorCharAppendBuffer(Vector<char>& buffer) - : m_buffer(buffer) - { - } - virtual ~VectorCharAppendBuffer() { } - - virtual char* allocate(size_t size) - { - size_t oldSize = m_buffer.size(); - m_buffer.grow(oldSize + size); - return m_buffer.data() + oldSize; - } - - virtual void copy(const CString& source) - { - m_buffer.append(source.data(), source.length()); - } - -private: - Vector<char>& m_buffer; -}; - -void internalNormalizeLineEndingsToCRLF(const CString& from, OutputBuffer& buffer) -{ - // Compute the new length. - size_t newLen = 0; - const char* p = from.data(); - while (char c = *p++) { - if (c == '\r') { - // Safe to look ahead because of trailing '\0'. - if (*p != '\n') { - // Turn CR into CRLF. - newLen += 2; - } - } else if (c == '\n') { - // Turn LF into CRLF. - newLen += 2; - } else { - // Leave other characters alone. - newLen += 1; - } - } - if (newLen < from.length()) - return; - - if (newLen == from.length()) { - buffer.copy(from); - return; - } - - p = from.data(); - char* q = buffer.allocate(newLen); - - // Make a copy of the string. - while (char c = *p++) { - if (c == '\r') { - // Safe to look ahead because of trailing '\0'. - if (*p != '\n') { - // Turn CR into CRLF. - *q++ = '\r'; - *q++ = '\n'; - } - } else if (c == '\n') { - // Turn LF into CRLF. - *q++ = '\r'; - *q++ = '\n'; - } else { - // Leave other characters alone. - *q++ = c; - } - } -} - -}; - -namespace WebCore { - -void normalizeToCROrLF(const CString& from, Vector<char>& result, bool toCR); - -// Normalize all line-endings to CR or LF. -void normalizeToCROrLF(const CString& from, Vector<char>& result, bool toCR) -{ - // Compute the new length. - size_t newLen = 0; - bool needFix = false; - const char* p = from.data(); - char fromEndingChar = toCR ? '\n' : '\r'; - char toEndingChar = toCR ? '\r' : '\n'; - while (char c = *p++) { - if (c == '\r' && *p == '\n') { - // Turn CRLF into CR or LF. - p++; - needFix = true; - } else if (c == fromEndingChar) { - // Turn CR/LF into LF/CR. - needFix = true; - } - newLen += 1; - } - - // Grow the result buffer. - p = from.data(); - size_t oldResultSize = result.size(); - result.grow(oldResultSize + newLen); - char* q = result.data() + oldResultSize; - - // If no need to fix the string, just copy the string over. - if (!needFix) { - memcpy(q, p, from.length()); - return; - } - - // Make a copy of the string. - while (char c = *p++) { - if (c == '\r' && *p == '\n') { - // Turn CRLF or CR into CR or LF. - p++; - *q++ = toEndingChar; - } else if (c == fromEndingChar) { - // Turn CR/LF into LF/CR. - *q++ = toEndingChar; - } else { - // Leave other characters alone. - *q++ = c; - } - } -} - -CString normalizeLineEndingsToCRLF(const CString& from) -{ - CString result; - CStringBuffer buffer(result); - internalNormalizeLineEndingsToCRLF(from, buffer); - return buffer.buffer(); -} - -void normalizeLineEndingsToCR(const CString& from, Vector<char>& result) -{ - normalizeToCROrLF(from, result, true); -} - -void normalizeLineEndingsToLF(const CString& from, Vector<char>& result) -{ - normalizeToCROrLF(from, result, false); -} - -void normalizeLineEndingsToNative(const CString& from, Vector<char>& result) -{ -#if OS(WINDOWS) - VectorCharAppendBuffer buffer(result); - internalNormalizeLineEndingsToCRLF(from, buffer); -#else - normalizeLineEndingsToLF(from, result); -#endif -} - -} // namespace WebCore diff --git a/WebCore/platform/text/LineEnding.h b/WebCore/platform/text/LineEnding.h deleted file mode 100644 index 4306ce8..0000000 --- a/WebCore/platform/text/LineEnding.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved. - * Copyright (C) 2010 Google Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef LineEnding_h -#define LineEnding_h - -#include <wtf/Forward.h> -#include <wtf/Vector.h> - -namespace WebCore { - -// Normalize all line-endings in the given string to CRLF. -CString normalizeLineEndingsToCRLF(const CString& from); - -// Normalize all line-endings in the given string to CR and append the result to the given buffer. -void normalizeLineEndingsToCR(const CString& from, Vector<char>& result); - -// Normalize all line-endings in the given string to LF and append the result to the given buffer. -void normalizeLineEndingsToLF(const CString& from, Vector<char>& result); - -// Normalize all line-endings in the given string to the native line-endings and append the result to the given buffer. -// (Normalize to CRLF on Windows and normalize to LF on all other platforms.) -void normalizeLineEndingsToNative(const CString& from, Vector<char>& result); - -} // namespace WebCore - -#endif // LineEnding_h diff --git a/WebCore/platform/text/ParserUtilities.h b/WebCore/platform/text/ParserUtilities.h deleted file mode 100644 index 3105214..0000000 --- a/WebCore/platform/text/ParserUtilities.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2008 Apple Inc. All Rights Reserved. - * Copyright (C) 2002, 2003 The Karbon Developers - * Copyright (C) 2006, 2007 Rob Buis <buis@kde.org> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ - -#ifndef ParserUtilities_h -#define ParserUtilities_h - -#include "PlatformString.h" - -namespace WebCore { - - inline bool skipString(const UChar*& ptr, const UChar* end, const UChar* name, int length) - { - if (end - ptr < length) - return false; - if (memcmp(name, ptr, sizeof(UChar) * length)) - return false; - ptr += length; - return true; - } - - inline bool skipString(const UChar*& ptr, const UChar* end, const char* str) - { - int length = strlen(str); - if (end - ptr < length) - return false; - for (int i = 0; i < length; ++i) { - if (ptr[i] != str[i]) - return false; - } - ptr += length; - return true; - } - -} // namspace WebCore - -#endif // ParserUtilities_h diff --git a/WebCore/platform/text/PlatformString.h b/WebCore/platform/text/PlatformString.h deleted file mode 100644 index e525bd4..0000000 --- a/WebCore/platform/text/PlatformString.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * (C) 1999 Lars Knoll (knoll@kde.org) - * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#ifndef PlatformString_h -#define PlatformString_h - -// This file would be called String.h, but that conflicts with <string.h> -// on systems without case-sensitive file systems. - -#include <wtf/text/WTFString.h> - -namespace WebCore { - -class SharedBuffer; - -PassRefPtr<SharedBuffer> utf8Buffer(const String&); -// Counts the number of grapheme clusters. A surrogate pair or a sequence -// of a non-combining character and following combining characters is -// counted as 1 grapheme cluster. -unsigned numGraphemeClusters(const String& s); -// Returns the number of characters which will be less than or equal to -// the specified grapheme cluster length. -unsigned numCharactersInGraphemeClusters(const String& s, unsigned); - -} // namespace WebCore - -#endif diff --git a/WebCore/platform/text/RegularExpression.cpp b/WebCore/platform/text/RegularExpression.cpp deleted file mode 100644 index 9b063c9..0000000 --- a/WebCore/platform/text/RegularExpression.cpp +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved. - * Copyright (C) 2008 Collabora Ltd. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "RegularExpression.h" - -#include "Logging.h" -#include <pcre/pcre.h> - -namespace WebCore { - -class RegularExpression::Private : public RefCounted<RegularExpression::Private> { -public: - static PassRefPtr<Private> create(const String& pattern, TextCaseSensitivity); - ~Private(); - - JSRegExp* regexp() const { return m_regexp; } - int lastMatchLength; - -private: - Private(const String& pattern, TextCaseSensitivity); - static JSRegExp* compile(const String& pattern, TextCaseSensitivity); - - JSRegExp* m_regexp; -}; - -inline JSRegExp* RegularExpression::Private::compile(const String& pattern, TextCaseSensitivity caseSensitivity) -{ - const char* errorMessage; - JSRegExp* regexp = jsRegExpCompile(pattern.characters(), pattern.length(), - caseSensitivity == TextCaseSensitive ? JSRegExpDoNotIgnoreCase : JSRegExpIgnoreCase, JSRegExpSingleLine, - 0, &errorMessage); - if (!regexp) - LOG_ERROR("RegularExpression: pcre_compile failed with '%s'", errorMessage); - return regexp; -} - -inline RegularExpression::Private::Private(const String& pattern, TextCaseSensitivity caseSensitivity) - : lastMatchLength(-1) - , m_regexp(compile(pattern, caseSensitivity)) -{ -} - -inline PassRefPtr<RegularExpression::Private> RegularExpression::Private::create(const String& pattern, TextCaseSensitivity caseSensitivity) -{ - return adoptRef(new Private(pattern, caseSensitivity)); -} - -RegularExpression::Private::~Private() -{ - jsRegExpFree(m_regexp); -} - -RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity) - : d(Private::create(pattern, caseSensitivity)) -{ -} - -RegularExpression::RegularExpression(const RegularExpression& re) - : d(re.d) -{ -} - -RegularExpression::~RegularExpression() -{ -} - -RegularExpression& RegularExpression::operator=(const RegularExpression& re) -{ - d = re.d; - return *this; -} - -int RegularExpression::match(const String& str, int startFrom, int* matchLength) const -{ - if (!d->regexp()) - return -1; - - if (str.isNull()) - return -1; - - // First 2 offsets are start and end offsets; 3rd entry is used internally by pcre - static const size_t maxOffsets = 3; - int offsets[maxOffsets]; - int result = jsRegExpExecute(d->regexp(), str.characters(), str.length(), startFrom, offsets, maxOffsets); - if (result < 0) { - if (result != JSRegExpErrorNoMatch) - LOG_ERROR("RegularExpression: pcre_exec() failed with result %d", result); - d->lastMatchLength = -1; - return -1; - } - - // 1 means 1 match; 0 means more than one match. First match is recorded in offsets. - d->lastMatchLength = offsets[1] - offsets[0]; - if (matchLength) - *matchLength = d->lastMatchLength; - return offsets[0]; -} - -int RegularExpression::searchRev(const String& str) const -{ - // FIXME: This could be faster if it actually searched backwards. - // Instead, it just searches forwards, multiple times until it finds the last match. - - int start = 0; - int pos; - int lastPos = -1; - int lastMatchLength = -1; - do { - int matchLength; - pos = match(str, start, &matchLength); - if (pos >= 0) { - if (pos + matchLength > lastPos + lastMatchLength) { - // replace last match if this one is later and not a subset of the last match - lastPos = pos; - lastMatchLength = matchLength; - } - start = pos + 1; - } - } while (pos != -1); - d->lastMatchLength = lastMatchLength; - return lastPos; -} - -int RegularExpression::matchedLength() const -{ - return d->lastMatchLength; -} - -void replace(String& string, const RegularExpression& target, const String& replacement) -{ - int index = 0; - while (index < static_cast<int>(string.length())) { - int matchLength; - index = target.match(string, index, &matchLength); - if (index < 0) - break; - string.replace(index, matchLength, replacement); - index += replacement.length(); - if (!matchLength) - break; // Avoid infinite loop on 0-length matches, e.g. [a-z]* - } -} - -} // namespace WebCore diff --git a/WebCore/platform/text/RegularExpression.h b/WebCore/platform/text/RegularExpression.h deleted file mode 100644 index f1611e5..0000000 --- a/WebCore/platform/text/RegularExpression.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (C) 2003, 2008, 2009 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef RegularExpression_h -#define RegularExpression_h - -#include "PlatformString.h" - -namespace WebCore { - -class RegularExpression : public FastAllocBase { -public: - RegularExpression(const String&, TextCaseSensitivity); - ~RegularExpression(); - - RegularExpression(const RegularExpression&); - RegularExpression& operator=(const RegularExpression&); - - int match(const String&, int startFrom = 0, int* matchLength = 0) const; - int searchRev(const String&) const; - - int matchedLength() const; - -private: - class Private; - RefPtr<Private> d; -}; - -void replace(String&, const RegularExpression&, const String&); - -} // namespace WebCore - -#endif // RegularExpression_h diff --git a/WebCore/platform/text/SegmentedString.cpp b/WebCore/platform/text/SegmentedString.cpp deleted file mode 100644 index b9ff503..0000000 --- a/WebCore/platform/text/SegmentedString.cpp +++ /dev/null @@ -1,240 +0,0 @@ -/* - Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public License - along with this library; see the file COPYING.LIB. If not, write to - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. -*/ - -#include "config.h" -#include "SegmentedString.h" - -namespace WebCore { - -SegmentedString::SegmentedString(const SegmentedString &other) - : m_pushedChar1(other.m_pushedChar1) - , m_pushedChar2(other.m_pushedChar2) - , m_currentString(other.m_currentString) - , m_substrings(other.m_substrings) - , m_composite(other.m_composite) - , m_closed(other.m_closed) -{ - if (other.m_currentChar == &other.m_pushedChar1) - m_currentChar = &m_pushedChar1; - else if (other.m_currentChar == &other.m_pushedChar2) - m_currentChar = &m_pushedChar2; - else - m_currentChar = other.m_currentChar; -} - -const SegmentedString& SegmentedString::operator=(const SegmentedString &other) -{ - m_pushedChar1 = other.m_pushedChar1; - m_pushedChar2 = other.m_pushedChar2; - m_currentString = other.m_currentString; - m_substrings = other.m_substrings; - m_composite = other.m_composite; - if (other.m_currentChar == &other.m_pushedChar1) - m_currentChar = &m_pushedChar1; - else if (other.m_currentChar == &other.m_pushedChar2) - m_currentChar = &m_pushedChar2; - else - m_currentChar = other.m_currentChar; - m_closed = other.m_closed; - m_numberOfCharactersConsumedPriorToCurrentString = other.m_numberOfCharactersConsumedPriorToCurrentString; - return *this; -} - -unsigned SegmentedString::length() const -{ - unsigned length = m_currentString.m_length; - if (m_pushedChar1) { - ++length; - if (m_pushedChar2) - ++length; - } - if (m_composite) { - Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin(); - Deque<SegmentedSubstring>::const_iterator e = m_substrings.end(); - for (; it != e; ++it) - length += it->m_length; - } - return length; -} - -void SegmentedString::setExcludeLineNumbers() -{ - if (m_composite) { - Deque<SegmentedSubstring>::iterator it = m_substrings.begin(); - Deque<SegmentedSubstring>::iterator e = m_substrings.end(); - for (; it != e; ++it) - it->setExcludeLineNumbers(); - } else - m_currentString.setExcludeLineNumbers(); -} - -void SegmentedString::clear() -{ - m_pushedChar1 = 0; - m_pushedChar2 = 0; - m_currentChar = 0; - m_currentString.clear(); - m_substrings.clear(); - m_composite = false; - m_closed = false; -} - -void SegmentedString::append(const SegmentedSubstring &s) -{ - ASSERT(!m_closed); - if (s.m_length) { - if (!m_currentString.m_length) { - m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed(); - m_currentString = s; - } else { - m_substrings.append(s); - m_composite = true; - } - } -} - -void SegmentedString::prepend(const SegmentedSubstring &s) -{ - ASSERT(!escaped()); - ASSERT(!s.numberOfCharactersConsumed()); - if (s.m_length) { - // FIXME: We're assuming that the prepend were originally consumed by - // this SegmentedString. We're also ASSERTing that s is a fresh - // SegmentedSubstring. These assumptions are sufficient for our - // current use, but we might need to handle the more elaborate - // cases in the future. - m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed(); - m_numberOfCharactersConsumedPriorToCurrentString -= s.m_length; - if (!m_currentString.m_length) - m_currentString = s; - else { - // Shift our m_currentString into our list. - m_substrings.prepend(m_currentString); - m_currentString = s; - m_composite = true; - } - } -} - -void SegmentedString::close() -{ - // Closing a stream twice is likely a coding mistake. - ASSERT(!m_closed); - m_closed = true; -} - -void SegmentedString::append(const SegmentedString &s) -{ - ASSERT(!m_closed); - ASSERT(!s.escaped()); - append(s.m_currentString); - if (s.m_composite) { - Deque<SegmentedSubstring>::const_iterator it = s.m_substrings.begin(); - Deque<SegmentedSubstring>::const_iterator e = s.m_substrings.end(); - for (; it != e; ++it) - append(*it); - } - m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current; -} - -void SegmentedString::prepend(const SegmentedString &s) -{ - ASSERT(!escaped()); - ASSERT(!s.escaped()); - if (s.m_composite) { - Deque<SegmentedSubstring>::const_reverse_iterator it = s.m_substrings.rbegin(); - Deque<SegmentedSubstring>::const_reverse_iterator e = s.m_substrings.rend(); - for (; it != e; ++it) - prepend(*it); - } - prepend(s.m_currentString); - m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current; -} - -void SegmentedString::advanceSubstring() -{ - if (m_composite) { - m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed(); - m_currentString = m_substrings.takeFirst(); - // If we've previously consumed some characters of the non-current - // string, we now account for those characters as part of the current - // string, not as part of "prior to current string." - m_numberOfCharactersConsumedPriorToCurrentString -= m_currentString.numberOfCharactersConsumed(); - if (m_substrings.isEmpty()) - m_composite = false; - } else { - m_currentString.clear(); - } -} - -String SegmentedString::toString() const -{ - String result; - if (m_pushedChar1) { - result.append(m_pushedChar1); - if (m_pushedChar2) - result.append(m_pushedChar2); - } - m_currentString.appendTo(result); - if (m_composite) { - Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin(); - Deque<SegmentedSubstring>::const_iterator e = m_substrings.end(); - for (; it != e; ++it) - it->appendTo(result); - } - return result; -} - -void SegmentedString::advance(unsigned count, UChar* consumedCharacters) -{ - ASSERT(count <= length()); - for (unsigned i = 0; i < count; ++i) { - consumedCharacters[i] = *current(); - advance(); - } -} - -void SegmentedString::advanceSlowCase() -{ - if (m_pushedChar1) { - m_pushedChar1 = m_pushedChar2; - m_pushedChar2 = 0; - } else if (m_currentString.m_current) { - ++m_currentString.m_current; - if (--m_currentString.m_length == 0) - advanceSubstring(); - } - m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current; -} - -void SegmentedString::advanceSlowCase(int& lineNumber) -{ - if (m_pushedChar1) { - m_pushedChar1 = m_pushedChar2; - m_pushedChar2 = 0; - } else if (m_currentString.m_current) { - if (*m_currentString.m_current++ == '\n' && m_currentString.doNotExcludeLineNumbers()) - ++lineNumber; - if (--m_currentString.m_length == 0) - advanceSubstring(); - } - m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current; -} - -} diff --git a/WebCore/platform/text/SegmentedString.h b/WebCore/platform/text/SegmentedString.h deleted file mode 100644 index 91c2cbe..0000000 --- a/WebCore/platform/text/SegmentedString.h +++ /dev/null @@ -1,257 +0,0 @@ -/* - Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public License - along with this library; see the file COPYING.LIB. If not, write to - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. -*/ - -#ifndef SegmentedString_h -#define SegmentedString_h - -#include "PlatformString.h" -#include <wtf/Deque.h> - -namespace WebCore { - -class SegmentedString; - -class SegmentedSubstring { -public: - SegmentedSubstring() : m_length(0), m_current(0), m_doNotExcludeLineNumbers(true) {} - SegmentedSubstring(const String& str) - : m_length(str.length()) - , m_current(str.isEmpty() ? 0 : str.characters()) - , m_string(str) - , m_doNotExcludeLineNumbers(true) - { - } - - void clear() { m_length = 0; m_current = 0; } - - bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; } - bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; } - - void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; } - - int numberOfCharactersConsumed() const { return m_string.length() - m_length; } - - void appendTo(String& str) const - { - if (m_string.characters() == m_current) { - if (str.isEmpty()) - str = m_string; - else - str.append(m_string); - } else { - str.append(String(m_current, m_length)); - } - } - -public: - int m_length; - const UChar* m_current; - -private: - String m_string; - bool m_doNotExcludeLineNumbers; -}; - -class SegmentedString { -public: - SegmentedString() - : m_pushedChar1(0) - , m_pushedChar2(0) - , m_currentChar(0) - , m_numberOfCharactersConsumedPriorToCurrentString(0) - , m_composite(false) - , m_closed(false) - { - } - - SegmentedString(const String& str) - : m_pushedChar1(0) - , m_pushedChar2(0) - , m_currentString(str) - , m_currentChar(m_currentString.m_current) - , m_numberOfCharactersConsumedPriorToCurrentString(0) - , m_composite(false) - , m_closed(false) - { - } - - SegmentedString(const SegmentedString&); - - const SegmentedString& operator=(const SegmentedString&); - - void clear(); - void close(); - - void append(const SegmentedString&); - void prepend(const SegmentedString&); - - bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); } - void setExcludeLineNumbers(); - - void push(UChar c) - { - if (!m_pushedChar1) { - m_pushedChar1 = c; - m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current; - } else { - ASSERT(!m_pushedChar2); - m_pushedChar2 = c; - } - } - - bool isEmpty() const { return !current(); } - unsigned length() const; - - bool isClosed() const { return m_closed; } - - enum LookAheadResult { - DidNotMatch, - DidMatch, - NotEnoughCharacters, - }; - - LookAheadResult lookAhead(const String& string) { return lookAheadInline<SegmentedString::equalsLiterally>(string); } - LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline<SegmentedString::equalsIgnoringCase>(string); } - - void advance() - { - if (!m_pushedChar1 && m_currentString.m_length > 1) { - --m_currentString.m_length; - m_currentChar = ++m_currentString.m_current; - return; - } - advanceSlowCase(); - } - - void advanceAndASSERT(UChar expectedCharacter) - { - ASSERT_UNUSED(expectedCharacter, *current() == expectedCharacter); - advance(); - } - - void advanceAndASSERTIgnoringCase(UChar expectedCharacter) - { - ASSERT_UNUSED(expectedCharacter, WTF::Unicode::foldCase(*current()) == WTF::Unicode::foldCase(expectedCharacter)); - advance(); - } - - void advancePastNewline(int& lineNumber) - { - ASSERT(*current() == '\n'); - if (!m_pushedChar1 && m_currentString.m_length > 1) { - lineNumber += m_currentString.doNotExcludeLineNumbers(); - --m_currentString.m_length; - m_currentChar = ++m_currentString.m_current; - return; - } - advanceSlowCase(lineNumber); - } - - void advancePastNonNewline() - { - ASSERT(*current() != '\n'); - if (!m_pushedChar1 && m_currentString.m_length > 1) { - --m_currentString.m_length; - m_currentChar = ++m_currentString.m_current; - return; - } - advanceSlowCase(); - } - - void advance(int& lineNumber) - { - if (!m_pushedChar1 && m_currentString.m_length > 1) { - lineNumber += (*m_currentString.m_current == '\n') & m_currentString.doNotExcludeLineNumbers(); - --m_currentString.m_length; - m_currentChar = ++m_currentString.m_current; - return; - } - advanceSlowCase(lineNumber); - } - - // Writes the consumed characters into consumedCharacters, which must - // have space for at least |count| characters. - void advance(unsigned count, UChar* consumedCharacters); - - bool escaped() const { return m_pushedChar1; } - - int numberOfCharactersConsumed() - { - // We don't currently handle the case when there are pushed character. - ASSERT(!m_pushedChar1); - return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed(); - } - - String toString() const; - - const UChar& operator*() const { return *current(); } - const UChar* operator->() const { return current(); } - -private: - void append(const SegmentedSubstring&); - void prepend(const SegmentedSubstring&); - - void advanceSlowCase(); - void advanceSlowCase(int& lineNumber); - void advanceSubstring(); - const UChar* current() const { return m_currentChar; } - - static bool equalsLiterally(const UChar* str1, const UChar* str2, size_t count) { return !memcmp(str1, str2, count * sizeof(UChar)); } - static bool equalsIgnoringCase(const UChar* str1, const UChar* str2, size_t count) { return !WTF::Unicode::umemcasecmp(str1, str2, count); } - - template<bool equals(const UChar* str1, const UChar* str2, size_t count)> - inline LookAheadResult lookAheadInline(const String& string) - { - if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) { - if (equals(string.characters(), m_currentString.m_current, string.length())) - return DidMatch; - return DidNotMatch; - } - return lookAheadSlowCase<equals>(string); - } - - template<bool equals(const UChar* str1, const UChar* str2, size_t count)> - LookAheadResult lookAheadSlowCase(const String& string) - { - unsigned count = string.length(); - if (count > length()) - return NotEnoughCharacters; - UChar* consumedCharacters; - String consumedString = String::createUninitialized(count, consumedCharacters); - advance(count, consumedCharacters); - LookAheadResult result = DidNotMatch; - if (equals(string.characters(), consumedCharacters, count)) - result = DidMatch; - prepend(SegmentedString(consumedString)); - return result; - } - - UChar m_pushedChar1; - UChar m_pushedChar2; - SegmentedSubstring m_currentString; - const UChar* m_currentChar; - int m_numberOfCharactersConsumedPriorToCurrentString; - Deque<SegmentedSubstring> m_substrings; - bool m_composite; - bool m_closed; -}; - -} - -#endif diff --git a/WebCore/platform/text/String.cpp b/WebCore/platform/text/String.cpp deleted file mode 100644 index f2f8d2e..0000000 --- a/WebCore/platform/text/String.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * (C) 1999 Lars Knoll (knoll@kde.org) - * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. - * Copyright (C) 2007-2009 Torch Mobile, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ - -#include "config.h" -#include "PlatformString.h" - -#include "SharedBuffer.h" -#include "TextBreakIterator.h" -#include <wtf/unicode/UTF8.h> -#include <wtf/unicode/Unicode.h> - -using namespace WTF; -using namespace WTF::Unicode; - -namespace WebCore { - -PassRefPtr<SharedBuffer> utf8Buffer(const String& string) -{ - // Allocate a buffer big enough to hold all the characters. - const int length = string.length(); - Vector<char> buffer(length * 3); - - // Convert to runs of 8-bit characters. - char* p = buffer.data(); - const UChar* d = string.characters(); - ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), true); - if (result != conversionOK) - return 0; - - buffer.shrink(p - buffer.data()); - return SharedBuffer::adoptVector(buffer); -} - -unsigned numGraphemeClusters(const String& s) -{ - TextBreakIterator* it = characterBreakIterator(s.characters(), s.length()); - if (!it) - return s.length(); - - unsigned num = 0; - while (textBreakNext(it) != TextBreakDone) - ++num; - return num; -} - -unsigned numCharactersInGraphemeClusters(const String& s, unsigned numGraphemeClusters) -{ - TextBreakIterator* it = characterBreakIterator(s.characters(), s.length()); - if (!it) - return min(s.length(), numGraphemeClusters); - - for (unsigned i = 0; i < numGraphemeClusters; ++i) { - if (textBreakNext(it) == TextBreakDone) - return s.length(); - } - return textBreakCurrent(it); -} - -} // namespace WebCore diff --git a/WebCore/platform/text/SuffixTree.h b/WebCore/platform/text/SuffixTree.h deleted file mode 100644 index f11fd23..0000000 --- a/WebCore/platform/text/SuffixTree.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (C) 2010 Adam Barth. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SuffixTree_h -#define SuffixTree_h - -#include "PlatformString.h" -#include <wtf/Vector.h> - -namespace WebCore { - -class UnicodeCodebook { -public: - static int codeWord(UChar c) { return c; } - enum { codeSize = 1 << 8 * sizeof(UChar) }; -}; - -class ASCIICodebook { -public: - static int codeWord(UChar c) { return c & (codeSize - 1); } - enum { codeSize = 1 << (8 * sizeof(char) - 1) }; -}; - -template<typename Codebook> -class SuffixTree { -public: - SuffixTree(const String& text, unsigned depth) - : m_depth(depth) - , m_leaf(true) - { - build(text); - } - - bool mightContain(const String& query) - { - Node* current = &m_root; - int limit = std::min(m_depth, query.length()); - for (int i = 0; i < limit; ++i) { - current = current->at(Codebook::codeWord(query[i])); - if (!current) - return false; - } - return true; - } - -private: - class Node { - public: - Node(bool isLeaf = false) - { - m_children.resize(Codebook::codeSize); - m_children.fill(0); - m_isLeaf = isLeaf; - } - - ~Node() - { - for (unsigned i = 0; i < m_children.size(); ++i) { - Node* child = m_children.at(i); - if (child && !child->m_isLeaf) - delete child; - } - } - - Node*& at(int codeWord) { return m_children.at(codeWord); } - - private: - typedef Vector<Node*, Codebook::codeSize> ChildrenVector; - - ChildrenVector m_children; - bool m_isLeaf; - }; - - void build(const String& text) - { - for (unsigned base = 0; base < text.length(); ++base) { - Node* current = &m_root; - unsigned limit = std::min(base + m_depth, text.length()); - for (unsigned offset = 0; base + offset < limit; ++offset) { - ASSERT(current != &m_leaf); - Node*& child = current->at(Codebook::codeWord(text[base + offset])); - if (!child) - child = base + offset + 1 == limit ? &m_leaf : new Node(); - current = child; - } - } - } - - Node m_root; - unsigned m_depth; - - // Instead of allocating a fresh empty leaf node for ever leaf in the tree - // (there can be a lot of these), we alias all the leaves to this "static" - // leaf node. - Node m_leaf; -}; - -} // namespace WebCore - -#endif // SuffixTree_h diff --git a/WebCore/platform/text/TextBoundaries.cpp b/WebCore/platform/text/TextBoundaries.cpp deleted file mode 100644 index fbb261b..0000000 --- a/WebCore/platform/text/TextBoundaries.cpp +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (C) 2006, 2007 Apple Inc. All rights reserved. - * Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextBoundaries.h" - -#include "TextBreakIterator.h" -#include <wtf/text/StringImpl.h> -#include <wtf/unicode/Unicode.h> - -using namespace WTF; -using namespace Unicode; - -namespace WebCore { - -int endOfFirstWordBoundaryContext(const UChar* characters, int length) -{ - for (int i = 0; i < length; ) { - int first = i; - UChar32 ch; - U16_NEXT(characters, i, length, ch); - if (!requiresContextForWordBoundary(ch)) - return first; - } - return length; -} - -int startOfLastWordBoundaryContext(const UChar* characters, int length) -{ - for (int i = length; i > 0; ) { - int last = i; - UChar32 ch; - U16_PREV(characters, 0, i, ch); - if (!requiresContextForWordBoundary(ch)) - return last; - } - return 0; -} - -#if !PLATFORM(BREWMP) && !PLATFORM(MAC) && !PLATFORM(QT) - -int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward) -{ - TextBreakIterator* it = wordBreakIterator(chars, len); - - if (forward) { - position = textBreakFollowing(it, position); - while (position != TextBreakDone) { - // We stop searching when the character preceeding the break - // is alphanumeric. - if (position < len && isAlphanumeric(chars[position - 1])) - return position; - - position = textBreakFollowing(it, position); - } - - return len; - } else { - position = textBreakPreceding(it, position); - while (position != TextBreakDone) { - // We stop searching when the character following the break - // is alphanumeric. - if (position > 0 && isAlphanumeric(chars[position])) - return position; - - position = textBreakPreceding(it, position); - } - - return 0; - } -} - -void findWordBoundary(const UChar* chars, int len, int position, int* start, int* end) -{ - TextBreakIterator* it = wordBreakIterator(chars, len); - *end = textBreakFollowing(it, position); - if (*end < 0) - *end = textBreakLast(it); - *start = textBreakPrevious(it); -} - -#endif // !PLATFORM(BREWMP) && !PLATFORM(MAC) && !PLATFORM(QT) - -} // namespace WebCore diff --git a/WebCore/platform/text/TextBoundaries.h b/WebCore/platform/text/TextBoundaries.h deleted file mode 100644 index 870ab62..0000000 --- a/WebCore/platform/text/TextBoundaries.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextBoundaries_h -#define TextBoundaries_h - -#include <wtf/unicode/Unicode.h> - -namespace WebCore { - - inline bool requiresContextForWordBoundary(UChar32 ch) - { - return WTF::Unicode::hasLineBreakingPropertyComplexContext(ch); - } - - int endOfFirstWordBoundaryContext(const UChar* characters, int length); - int startOfLastWordBoundaryContext(const UChar* characters, int length); - - void findWordBoundary(const UChar*, int len, int position, int* start, int* end); - int findNextWordFromIndex(const UChar*, int len, int position, bool forward); - -} - -#endif diff --git a/WebCore/platform/text/TextBreakIterator.h b/WebCore/platform/text/TextBreakIterator.h deleted file mode 100644 index 17cf5f0..0000000 --- a/WebCore/platform/text/TextBreakIterator.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> - * Copyright (C) 2007 Apple Inc. All rights reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#ifndef TextBreakIterator_h -#define TextBreakIterator_h - -#include <wtf/unicode/Unicode.h> - -namespace WebCore { - - class TextBreakIterator; - - // Note: The returned iterator is good only until you get another iterator. - - // Iterates over "extended grapheme clusters", as defined in UAX #29. - // Note that platform implementations may be less sophisticated - e.g. ICU prior to - // version 4.0 only supports "legacy grapheme clusters". - // Use this for general text processing, e.g. string truncation. - TextBreakIterator* characterBreakIterator(const UChar*, int length); - - // This is similar to character break iterator in most cases, but is subject to - // platform UI conventions. One notable example where this can be different - // from character break iterator is Thai prepend characters, see bug 24342. - // Use this for insertion point and selection manipulations. - TextBreakIterator* cursorMovementIterator(const UChar*, int length); - - TextBreakIterator* wordBreakIterator(const UChar*, int length); - TextBreakIterator* lineBreakIterator(const UChar*, int length); - TextBreakIterator* sentenceBreakIterator(const UChar*, int length); - - int textBreakFirst(TextBreakIterator*); - int textBreakLast(TextBreakIterator*); - int textBreakNext(TextBreakIterator*); - int textBreakPrevious(TextBreakIterator*); - int textBreakCurrent(TextBreakIterator*); - int textBreakPreceding(TextBreakIterator*, int); - int textBreakFollowing(TextBreakIterator*, int); - bool isTextBreak(TextBreakIterator*, int); - - const int TextBreakDone = -1; - -} - -#endif diff --git a/WebCore/platform/text/TextBreakIteratorICU.cpp b/WebCore/platform/text/TextBreakIteratorICU.cpp deleted file mode 100644 index f5575ee..0000000 --- a/WebCore/platform/text/TextBreakIteratorICU.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> - * Copyright (C) 2007 Apple Inc. All rights reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#include "config.h" -#include "TextBreakIterator.h" - -#include "PlatformString.h" -#include "TextBreakIteratorInternalICU.h" -#include <unicode/ubrk.h> -#include <wtf/Assertions.h> - -namespace WebCore { - -static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator, - UBreakIteratorType type, const UChar* string, int length) -{ - if (!string) - return 0; - - if (!createdIterator) { - UErrorCode openStatus = U_ZERO_ERROR; - iterator = reinterpret_cast<TextBreakIterator*>(ubrk_open(type, currentTextBreakLocaleID(), 0, 0, &openStatus)); - createdIterator = true; - ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus); - } - if (!iterator) - return 0; - - UErrorCode setTextStatus = U_ZERO_ERROR; - ubrk_setText(reinterpret_cast<UBreakIterator*>(iterator), string, length, &setTextStatus); - if (U_FAILURE(setTextStatus)) - return 0; - - return iterator; -} - -TextBreakIterator* characterBreakIterator(const UChar* string, int length) -{ - static bool createdCharacterBreakIterator = false; - static TextBreakIterator* staticCharacterBreakIterator; - return setUpIterator(createdCharacterBreakIterator, - staticCharacterBreakIterator, UBRK_CHARACTER, string, length); -} - -TextBreakIterator* wordBreakIterator(const UChar* string, int length) -{ - static bool createdWordBreakIterator = false; - static TextBreakIterator* staticWordBreakIterator; - return setUpIterator(createdWordBreakIterator, - staticWordBreakIterator, UBRK_WORD, string, length); -} - -TextBreakIterator* lineBreakIterator(const UChar* string, int length) -{ - static bool createdLineBreakIterator = false; - static TextBreakIterator* staticLineBreakIterator; - return setUpIterator(createdLineBreakIterator, - staticLineBreakIterator, UBRK_LINE, string, length); -} - -TextBreakIterator* sentenceBreakIterator(const UChar* string, int length) -{ - static bool createdSentenceBreakIterator = false; - static TextBreakIterator* staticSentenceBreakIterator; - return setUpIterator(createdSentenceBreakIterator, - staticSentenceBreakIterator, UBRK_SENTENCE, string, length); -} - -int textBreakFirst(TextBreakIterator* iterator) -{ - return ubrk_first(reinterpret_cast<UBreakIterator*>(iterator)); -} - -int textBreakLast(TextBreakIterator* iterator) -{ - return ubrk_last(reinterpret_cast<UBreakIterator*>(iterator)); -} - -int textBreakNext(TextBreakIterator* iterator) -{ - return ubrk_next(reinterpret_cast<UBreakIterator*>(iterator)); -} - -int textBreakPrevious(TextBreakIterator* iterator) -{ - return ubrk_previous(reinterpret_cast<UBreakIterator*>(iterator)); -} - -int textBreakPreceding(TextBreakIterator* iterator, int pos) -{ - return ubrk_preceding(reinterpret_cast<UBreakIterator*>(iterator), pos); -} - -int textBreakFollowing(TextBreakIterator* iterator, int pos) -{ - return ubrk_following(reinterpret_cast<UBreakIterator*>(iterator), pos); -} - -int textBreakCurrent(TextBreakIterator* iterator) -{ - return ubrk_current(reinterpret_cast<UBreakIterator*>(iterator)); -} - -bool isTextBreak(TextBreakIterator* iterator, int position) -{ - return ubrk_isBoundary(reinterpret_cast<UBreakIterator*>(iterator), position); -} - -#ifndef BUILDING_ON_TIGER -static TextBreakIterator* setUpIteratorWithRules(bool& createdIterator, TextBreakIterator*& iterator, - const char* breakRules, const UChar* string, int length) -{ - if (!string) - return 0; - - if (!createdIterator) { - UParseError parseStatus; - UErrorCode openStatus = U_ZERO_ERROR; - String rules(breakRules); - iterator = reinterpret_cast<TextBreakIterator*>(ubrk_openRules(rules.characters(), rules.length(), 0, 0, &parseStatus, &openStatus)); - createdIterator = true; - ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus); - } - if (!iterator) - return 0; - - UErrorCode setTextStatus = U_ZERO_ERROR; - ubrk_setText(reinterpret_cast<UBreakIterator*>(iterator), string, length, &setTextStatus); - if (U_FAILURE(setTextStatus)) - return 0; - - return iterator; -} -#endif // BUILDING_ON_TIGER - -TextBreakIterator* cursorMovementIterator(const UChar* string, int length) -{ -#ifdef BUILDING_ON_TIGER - // ICU 3.2 cannot compile the below rules. - return characterBreakIterator(string, length); -#else - // This rule set is based on character-break iterator rules of ICU 4.0 - // <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>. - // The major differences from the original ones are listed below: - // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier; - // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342); - // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and; - // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks. - static const char* kRules = - "$CR = [\\p{Grapheme_Cluster_Break = CR}];" - "$LF = [\\p{Grapheme_Cluster_Break = LF}];" - "$Control = [\\p{Grapheme_Cluster_Break = Control}];" - "$VoiceMarks = [\\uFF9E\\uFF9F];" // Japanese half-width katakana voiced marks - "$Extend = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];" - "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];" - "$L = [\\p{Grapheme_Cluster_Break = L}];" - "$V = [\\p{Grapheme_Cluster_Break = V}];" - "$T = [\\p{Grapheme_Cluster_Break = T}];" - "$LV = [\\p{Grapheme_Cluster_Break = LV}];" - "$LVT = [\\p{Grapheme_Cluster_Break = LVT}];" - "$Hin0 = [\\u0905-\\u0939];" // Devanagari Letter A,...,Ha - "$HinV = \\u094D;" // Devanagari Sign Virama - "$Hin1 = [\\u0915-\\u0939];" // Devanagari Letter Ka,...,Ha - "$Ben0 = [\\u0985-\\u09B9];" // Bengali Letter A,...,Ha - "$BenV = \\u09CD;" // Bengali Sign Virama - "$Ben1 = [\\u0995-\\u09B9];" // Bengali Letter Ka,...,Ha - "$Pan0 = [\\u0A05-\\u0A39];" // Gurmukhi Letter A,...,Ha - "$PanV = \\u0A4D;" // Gurmukhi Sign Virama - "$Pan1 = [\\u0A15-\\u0A39];" // Gurmukhi Letter Ka,...,Ha - "$Guj0 = [\\u0A85-\\u0AB9];" // Gujarati Letter A,...,Ha - "$GujV = \\u0ACD;" // Gujarati Sign Virama - "$Guj1 = [\\u0A95-\\u0AB9];" // Gujarati Letter Ka,...,Ha - "$Ori0 = [\\u0B05-\\u0B39];" // Oriya Letter A,...,Ha - "$OriV = \\u0B4D;" // Oriya Sign Virama - "$Ori1 = [\\u0B15-\\u0B39];" // Oriya Letter Ka,...,Ha - "$Tel0 = [\\u0C05-\\u0C39];" // Telugu Letter A,...,Ha - "$TelV = \\u0C4D;" // Telugu Sign Virama - "$Tel1 = [\\u0C14-\\u0C39];" // Telugu Letter Ka,...,Ha - "$Kan0 = [\\u0C85-\\u0CB9];" // Kannada Letter A,...,Ha - "$KanV = \\u0CCD;" // Kannada Sign Virama - "$Kan1 = [\\u0C95-\\u0CB9];" // Kannada Letter A,...,Ha - "$Mal0 = [\\u0D05-\\u0D39];" // Malayalam Letter A,...,Ha - "$MalV = \\u0D4D;" // Malayalam Sign Virama - "$Mal1 = [\\u0D15-\\u0D39];" // Malayalam Letter A,...,Ha - "!!chain;" - "!!forward;" - "$CR $LF;" - "$L ($L | $V | $LV | $LVT);" - "($LV | $V) ($V | $T);" - "($LVT | $T) $T;" - "[^$Control $CR $LF] $Extend;" - "[^$Control $CR $LF] $SpacingMark;" - "$Hin0 $HinV $Hin1;" // Devanagari Virama (forward) - "$Ben0 $BenV $Ben1;" // Bengali Virama (forward) - "$Pan0 $PanV $Pan1;" // Gurmukhi Virama (forward) - "$Guj0 $GujV $Guj1;" // Gujarati Virama (forward) - "$Ori0 $OriV $Ori1;" // Oriya Virama (forward) - "$Tel0 $TelV $Tel1;" // Telugu Virama (forward) - "$Kan0 $KanV $Kan1;" // Kannada Virama (forward) - "$Mal0 $MalV $Mal1;" // Malayalam Virama (forward) - "!!reverse;" - "$LF $CR;" - "($L | $V | $LV | $LVT) $L;" - "($V | $T) ($LV | $V);" - "$T ($LVT | $T);" - "$Extend [^$Control $CR $LF];" - "$SpacingMark [^$Control $CR $LF];" - "$Hin1 $HinV $Hin0;" // Devanagari Virama (backward) - "$Ben1 $BenV $Ben0;" // Bengali Virama (backward) - "$Pan1 $PanV $Pan0;" // Gurmukhi Virama (backward) - "$Guj1 $GujV $Guj0;" // Gujarati Virama (backward) - "$Ori1 $OriV $Ori0;" // Gujarati Virama (backward) - "$Tel1 $TelV $Tel0;" // Telugu Virama (backward) - "$Kan1 $KanV $Kan0;" // Kannada Virama (backward) - "$Mal1 $MalV $Mal0;" // Malayalam Virama (backward) - "!!safe_reverse;" - "!!safe_forward;"; - static bool createdCursorMovementIterator = false; - static TextBreakIterator* staticCursorMovementIterator; - return setUpIteratorWithRules(createdCursorMovementIterator, staticCursorMovementIterator, kRules, string, length); -#endif // BUILDING_ON_TIGER -} - -} diff --git a/WebCore/platform/text/TextBreakIteratorInternalICU.h b/WebCore/platform/text/TextBreakIteratorInternalICU.h deleted file mode 100644 index 68b7003..0000000 --- a/WebCore/platform/text/TextBreakIteratorInternalICU.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (C) 2007 Apple Inc. All rights reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#ifndef TextBreakIteratorInternalICU_h -#define TextBreakIteratorInternalICU_h - -// FIXME: Now that this handles locales for ICU, not just for text breaking, -// this file and the various implementation files should be renamed. - -namespace WebCore { - - const char* currentSearchLocaleID(); - const char* currentTextBreakLocaleID(); - -} - -#endif diff --git a/WebCore/platform/text/TextCodec.cpp b/WebCore/platform/text/TextCodec.cpp deleted file mode 100644 index 4222ee1..0000000 --- a/WebCore/platform/text/TextCodec.cpp +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved. - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextCodec.h" - -#include "PlatformString.h" -#include <wtf/StringExtras.h> - -namespace WebCore { - -TextCodec::~TextCodec() -{ -} - -int TextCodec::getUnencodableReplacement(unsigned codePoint, UnencodableHandling handling, UnencodableReplacementArray replacement) -{ - switch (handling) { - case QuestionMarksForUnencodables: - replacement[0] = '?'; - replacement[1] = 0; - return 1; - case EntitiesForUnencodables: - snprintf(replacement, sizeof(UnencodableReplacementArray), "&#%u;", codePoint); - return static_cast<int>(strlen(replacement)); - case URLEncodedEntitiesForUnencodables: - snprintf(replacement, sizeof(UnencodableReplacementArray), "%%26%%23%u%%3B", codePoint); - return static_cast<int>(strlen(replacement)); - } - ASSERT_NOT_REACHED(); - replacement[0] = 0; - return 0; -} - -} // namespace WebCore diff --git a/WebCore/platform/text/TextCodec.h b/WebCore/platform/text/TextCodec.h deleted file mode 100644 index c6af38a..0000000 --- a/WebCore/platform/text/TextCodec.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved. - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextCodec_h -#define TextCodec_h - -#include <memory> -#include <wtf/Forward.h> -#include <wtf/Noncopyable.h> -#include <wtf/PassOwnPtr.h> -#include <wtf/Vector.h> -#include <wtf/unicode/Unicode.h> - -#include "PlatformString.h" - -namespace WebCore { - class TextEncoding; - - // Specifies what will happen when a character is encountered that is - // not encodable in the character set. - enum UnencodableHandling { - // Substitutes the replacement character "?". - QuestionMarksForUnencodables, - - // Encodes the character as an XML entity. For example, U+06DE - // would be "۞" (0x6DE = 1758 in octal). - EntitiesForUnencodables, - - // Encodes the character as en entity as above, but escaped - // non-alphanumeric characters. This is used in URLs. - // For example, U+6DE would be "%26%231758%3B". - URLEncodedEntitiesForUnencodables, - }; - - typedef char UnencodableReplacementArray[32]; - - class TextCodec : public Noncopyable { - public: - virtual ~TextCodec(); - - String decode(const char* str, size_t length, bool flush = false) - { - bool ignored; - return decode(str, length, flush, false, ignored); - } - - virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) = 0; - virtual CString encode(const UChar*, size_t length, UnencodableHandling) = 0; - - // Fills a null-terminated string representation of the given - // unencodable character into the given replacement buffer. - // The length of the string (not including the null) will be returned. - static int getUnencodableReplacement(unsigned codePoint, UnencodableHandling, UnencodableReplacementArray); - }; - - typedef void (*EncodingNameRegistrar)(const char* alias, const char* name); - - typedef PassOwnPtr<TextCodec> (*NewTextCodecFunction)(const TextEncoding&, const void* additionalData); - typedef void (*TextCodecRegistrar)(const char* name, NewTextCodecFunction, const void* additionalData); - -} // namespace WebCore - -#endif // TextCodec_h diff --git a/WebCore/platform/text/TextCodecICU.cpp b/WebCore/platform/text/TextCodecICU.cpp deleted file mode 100644 index 6a579f9..0000000 --- a/WebCore/platform/text/TextCodecICU.cpp +++ /dev/null @@ -1,490 +0,0 @@ -/* - * Copyright (C) 2004, 2006, 2007, 2008 Apple Inc. All rights reserved. - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextCodecICU.h" - -#include "CharacterNames.h" -#include "PlatformString.h" -#include "ThreadGlobalData.h" -#include <unicode/ucnv.h> -#include <unicode/ucnv_cb.h> -#include <wtf/Assertions.h> -#include <wtf/text/CString.h> -#include <wtf/PassOwnPtr.h> -#include <wtf/StringExtras.h> -#include <wtf/Threading.h> - -using std::min; - -namespace WebCore { - -const size_t ConversionBufferSize = 16384; - -ICUConverterWrapper::~ICUConverterWrapper() -{ - if (converter) - ucnv_close(converter); -} - -static UConverter*& cachedConverterICU() -{ - return threadGlobalData().cachedConverterICU().converter; -} - -static PassOwnPtr<TextCodec> newTextCodecICU(const TextEncoding& encoding, const void*) -{ - return new TextCodecICU(encoding); -} - -void TextCodecICU::registerBaseEncodingNames(EncodingNameRegistrar registrar) -{ - registrar("UTF-8", "UTF-8"); -} - -void TextCodecICU::registerBaseCodecs(TextCodecRegistrar registrar) -{ - registrar("UTF-8", newTextCodecICU, 0); -} - -void TextCodecICU::registerExtendedEncodingNames(EncodingNameRegistrar registrar) -{ - // We register Hebrew with logical ordering using a separate name. - // Otherwise, this would share the same canonical name as the - // visual ordering case, and then TextEncoding could not tell them - // apart; ICU treats these names as synonyms. - registrar("ISO-8859-8-I", "ISO-8859-8-I"); - - int32_t numEncodings = ucnv_countAvailable(); - for (int32_t i = 0; i < numEncodings; ++i) { - const char* name = ucnv_getAvailableName(i); - UErrorCode error = U_ZERO_ERROR; - // Try MIME before trying IANA to pick up commonly used names like - // 'EUC-JP' instead of horrendously long names like - // 'Extended_UNIX_Code_Packed_Format_for_Japanese'. - const char* standardName = ucnv_getStandardName(name, "MIME", &error); - if (!U_SUCCESS(error) || !standardName) { - error = U_ZERO_ERROR; - // Try IANA to pick up 'windows-12xx' and other names - // which are not preferred MIME names but are widely used. - standardName = ucnv_getStandardName(name, "IANA", &error); - if (!U_SUCCESS(error) || !standardName) - continue; - } - - // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers. - // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding - // for encoding GB_2312-80 and several others. So, we need to override this behavior, too. - if (strcmp(standardName, "GB2312") == 0 || strcmp(standardName, "GB_2312-80") == 0) - standardName = "GBK"; - // Similarly, EUC-KR encodings all map to an extended version. - else if (strcmp(standardName, "KSC_5601") == 0 || strcmp(standardName, "EUC-KR") == 0 || strcmp(standardName, "cp1363") == 0) - standardName = "windows-949"; - // And so on. - else if (strcasecmp(standardName, "iso-8859-9") == 0) // This name is returned in different case by ICU 3.2 and 3.6. - standardName = "windows-1254"; - else if (strcmp(standardName, "TIS-620") == 0) - standardName = "windows-874"; - - registrar(standardName, standardName); - - uint16_t numAliases = ucnv_countAliases(name, &error); - ASSERT(U_SUCCESS(error)); - if (U_SUCCESS(error)) - for (uint16_t j = 0; j < numAliases; ++j) { - error = U_ZERO_ERROR; - const char* alias = ucnv_getAlias(name, j, &error); - ASSERT(U_SUCCESS(error)); - if (U_SUCCESS(error) && alias != standardName) - registrar(alias, standardName); - } - } - - // Additional aliases. - // These are present in modern versions of ICU, but not in ICU 3.2 (shipped with Mac OS X 10.4). - registrar("macroman", "macintosh"); - registrar("maccyrillic", "x-mac-cyrillic"); - - // Additional aliases that historically were present in the encoding - // table in WebKit on Macintosh that don't seem to be present in ICU. - // Perhaps we can prove these are not used on the web and remove them. - // Or perhaps we can get them added to ICU. - registrar("x-mac-roman", "macintosh"); - registrar("x-mac-ukrainian", "x-mac-cyrillic"); - registrar("cn-big5", "Big5"); - registrar("x-x-big5", "Big5"); - registrar("cn-gb", "GBK"); - registrar("csgb231280", "GBK"); - registrar("x-euc-cn", "GBK"); - registrar("x-gbk", "GBK"); - registrar("csISO88598I", "ISO-8859-8-I"); - registrar("koi", "KOI8-R"); - registrar("logical", "ISO-8859-8-I"); - registrar("unicode11utf8", "UTF-8"); - registrar("unicode20utf8", "UTF-8"); - registrar("x-unicode20utf8", "UTF-8"); - registrar("visual", "ISO-8859-8"); - registrar("winarabic", "windows-1256"); - registrar("winbaltic", "windows-1257"); - registrar("wincyrillic", "windows-1251"); - registrar("iso-8859-11", "windows-874"); - registrar("iso8859-11", "windows-874"); - registrar("dos-874", "windows-874"); - registrar("wingreek", "windows-1253"); - registrar("winhebrew", "windows-1255"); - registrar("winlatin2", "windows-1250"); - registrar("winturkish", "windows-1254"); - registrar("winvietnamese", "windows-1258"); - registrar("x-cp1250", "windows-1250"); - registrar("x-cp1251", "windows-1251"); - registrar("x-euc", "EUC-JP"); - registrar("x-windows-949", "windows-949"); - registrar("x-uhc", "windows-949"); - registrar("utf8", "UTF-8"); - registrar("shift-jis", "Shift_JIS"); - - // These aliases are present in modern versions of ICU, but use different codecs, and have no standard names. - // They are not present in ICU 3.2. - registrar("dos-720", "cp864"); - registrar("jis7", "ISO-2022-JP"); - - // Alternative spelling of ISO encoding names. - registrar("ISO8859-1", "ISO-8859-1"); - registrar("ISO8859-2", "ISO-8859-2"); - registrar("ISO8859-3", "ISO-8859-3"); - registrar("ISO8859-4", "ISO-8859-4"); - registrar("ISO8859-5", "ISO-8859-5"); - registrar("ISO8859-6", "ISO-8859-6"); - registrar("ISO8859-7", "ISO-8859-7"); - registrar("ISO8859-8", "ISO-8859-8"); - registrar("ISO8859-8-I", "ISO-8859-8-I"); - registrar("ISO8859-9", "ISO-8859-9"); - registrar("ISO8859-10", "ISO-8859-10"); - registrar("ISO8859-13", "ISO-8859-13"); - registrar("ISO8859-14", "ISO-8859-14"); - registrar("ISO8859-15", "ISO-8859-15"); - // Not registering ISO8859-16, because Firefox (as of version 3.6.6) doesn't know this particular alias, - // and because older versions of ICU don't support ISO-8859-16 encoding at all. -} - -void TextCodecICU::registerExtendedCodecs(TextCodecRegistrar registrar) -{ - // See comment above in registerEncodingNames. - registrar("ISO-8859-8-I", newTextCodecICU, 0); - - int32_t numEncodings = ucnv_countAvailable(); - for (int32_t i = 0; i < numEncodings; ++i) { - const char* name = ucnv_getAvailableName(i); - UErrorCode error = U_ZERO_ERROR; - const char* standardName = ucnv_getStandardName(name, "MIME", &error); - if (!U_SUCCESS(error) || !standardName) { - error = U_ZERO_ERROR; - standardName = ucnv_getStandardName(name, "IANA", &error); - if (!U_SUCCESS(error) || !standardName) - continue; - } - registrar(standardName, newTextCodecICU, 0); - } -} - -TextCodecICU::TextCodecICU(const TextEncoding& encoding) - : m_encoding(encoding) - , m_numBufferedBytes(0) - , m_converterICU(0) - , m_needsGBKFallbacks(false) -{ -} - -TextCodecICU::~TextCodecICU() -{ - releaseICUConverter(); -} - -void TextCodecICU::releaseICUConverter() const -{ - if (m_converterICU) { - UConverter*& cachedConverter = cachedConverterICU(); - if (cachedConverter) - ucnv_close(cachedConverter); - cachedConverter = m_converterICU; - m_converterICU = 0; - } -} - -void TextCodecICU::createICUConverter() const -{ - ASSERT(!m_converterICU); - - const char* name = m_encoding.name(); - m_needsGBKFallbacks = name[0] == 'G' && name[1] == 'B' && name[2] == 'K' && !name[3]; - - UErrorCode err; - - UConverter*& cachedConverter = cachedConverterICU(); - if (cachedConverter) { - err = U_ZERO_ERROR; - const char* cachedName = ucnv_getName(cachedConverter, &err); - if (U_SUCCESS(err) && m_encoding == cachedName) { - m_converterICU = cachedConverter; - cachedConverter = 0; - return; - } - } - - err = U_ZERO_ERROR; - m_converterICU = ucnv_open(m_encoding.name(), &err); -#if !LOG_DISABLED - if (err == U_AMBIGUOUS_ALIAS_WARNING) - LOG_ERROR("ICU ambiguous alias warning for encoding: %s", m_encoding.name()); -#endif - if (m_converterICU) - ucnv_setFallback(m_converterICU, TRUE); -} - -int TextCodecICU::decodeToBuffer(UChar* target, UChar* targetLimit, const char*& source, const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err) -{ - UChar* targetStart = target; - err = U_ZERO_ERROR; - ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, offsets, flush, &err); - return target - targetStart; -} - -class ErrorCallbackSetter { -public: - ErrorCallbackSetter(UConverter* converter, bool stopOnError) - : m_converter(converter) - , m_shouldStopOnEncodingErrors(stopOnError) - { - if (m_shouldStopOnEncodingErrors) { - UErrorCode err = U_ZERO_ERROR; - ucnv_setToUCallBack(m_converter, UCNV_TO_U_CALLBACK_SUBSTITUTE, - UCNV_SUB_STOP_ON_ILLEGAL, &m_savedAction, - &m_savedContext, &err); - ASSERT(err == U_ZERO_ERROR); - } - } - ~ErrorCallbackSetter() - { - if (m_shouldStopOnEncodingErrors) { - UErrorCode err = U_ZERO_ERROR; - const void* oldContext; - UConverterToUCallback oldAction; - ucnv_setToUCallBack(m_converter, m_savedAction, - m_savedContext, &oldAction, - &oldContext, &err); - ASSERT(oldAction == UCNV_TO_U_CALLBACK_SUBSTITUTE); - ASSERT(!strcmp(static_cast<const char*>(oldContext), UCNV_SUB_STOP_ON_ILLEGAL)); - ASSERT(err == U_ZERO_ERROR); - } - } -private: - UConverter* m_converter; - bool m_shouldStopOnEncodingErrors; - const void* m_savedContext; - UConverterToUCallback m_savedAction; -}; - -String TextCodecICU::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError) -{ - // Get a converter for the passed-in encoding. - if (!m_converterICU) { - createICUConverter(); - ASSERT(m_converterICU); - if (!m_converterICU) { - LOG_ERROR("error creating ICU encoder even though encoding was in table"); - return String(); - } - } - - ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError); - - Vector<UChar> result; - - UChar buffer[ConversionBufferSize]; - UChar* bufferLimit = buffer + ConversionBufferSize; - const char* source = reinterpret_cast<const char*>(bytes); - const char* sourceLimit = source + length; - int32_t* offsets = NULL; - UErrorCode err = U_ZERO_ERROR; - - do { - int ucharsDecoded = decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, flush, err); - result.append(buffer, ucharsDecoded); - } while (err == U_BUFFER_OVERFLOW_ERROR); - - if (U_FAILURE(err)) { - // flush the converter so it can be reused, and not be bothered by this error. - do { - decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, true, err); - } while (source < sourceLimit); - sawError = true; - } - - String resultString = String::adopt(result); - - // <http://bugs.webkit.org/show_bug.cgi?id=17014> - // Simplified Chinese pages use the code A3A0 to mean "full-width space", but ICU decodes it as U+E5E5. - if (strcmp(m_encoding.name(), "GBK") == 0 || strcasecmp(m_encoding.name(), "gb18030") == 0) - resultString.replace(0xE5E5, ideographicSpace); - - return resultString; -} - -// We need to apply these fallbacks ourselves as they are not currently supported by ICU and -// they were provided by the old TEC encoding path -// Needed to fix <rdar://problem/4708689> -static UChar getGbkEscape(UChar32 codePoint) -{ - switch (codePoint) { - case 0x01F9: - return 0xE7C8; - case 0x1E3F: - return 0xE7C7; - case 0x22EF: - return 0x2026; - case 0x301C: - return 0xFF5E; - default: - return 0; - } -} - -// Invalid character handler when writing escaped entities for unrepresentable -// characters. See the declaration of TextCodec::encode for more. -static void urlEscapedEntityCallback(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length, - UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) -{ - if (reason == UCNV_UNASSIGNED) { - *err = U_ZERO_ERROR; - - UnencodableReplacementArray entity; - int entityLen = TextCodec::getUnencodableReplacement(codePoint, URLEncodedEntitiesForUnencodables, entity); - ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err); - } else - UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err); -} - -// Substitutes special GBK characters, escaping all other unassigned entities. -static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length, - UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) -{ - UChar outChar; - if (reason == UCNV_UNASSIGNED && (outChar = getGbkEscape(codePoint))) { - const UChar* source = &outChar; - *err = U_ZERO_ERROR; - ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); - return; - } - UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err); -} - -// Combines both gbkUrlEscapedEntityCallback and GBK character substitution. -static void gbkUrlEscapedEntityCallack(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length, - UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) -{ - if (reason == UCNV_UNASSIGNED) { - if (UChar outChar = getGbkEscape(codePoint)) { - const UChar* source = &outChar; - *err = U_ZERO_ERROR; - ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); - return; - } - urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint, reason, err); - return; - } - UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint, reason, err); -} - -static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs* fromUArgs, const UChar* codeUnits, int32_t length, - UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) -{ - UChar outChar; - if (reason == UCNV_UNASSIGNED && (outChar = getGbkEscape(codePoint))) { - const UChar* source = &outChar; - *err = U_ZERO_ERROR; - ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); - return; - } - UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, err); -} - -CString TextCodecICU::encode(const UChar* characters, size_t length, UnencodableHandling handling) -{ - if (!length) - return ""; - - if (!m_converterICU) - createICUConverter(); - if (!m_converterICU) - return CString(); - - // FIXME: We should see if there is "force ASCII range" mode in ICU; - // until then, we change the backslash into a yen sign. - // Encoding will change the yen sign back into a backslash. - String copy(characters, length); - copy = m_encoding.displayString(copy.impl()); - - const UChar* source = copy.characters(); - const UChar* sourceLimit = source + copy.length(); - - UErrorCode err = U_ZERO_ERROR; - - switch (handling) { - case QuestionMarksForUnencodables: - ucnv_setSubstChars(m_converterICU, "?", 1, &err); - ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err); - break; - case EntitiesForUnencodables: - ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err); - break; - case URLEncodedEntitiesForUnencodables: - ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err); - break; - } - - ASSERT(U_SUCCESS(err)); - if (U_FAILURE(err)) - return CString(); - - Vector<char> result; - size_t size = 0; - do { - char buffer[ConversionBufferSize]; - char* target = buffer; - char* targetLimit = target + ConversionBufferSize; - err = U_ZERO_ERROR; - ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, 0, true, &err); - size_t count = target - buffer; - result.grow(size + count); - memcpy(result.data() + size, buffer, count); - size += count; - } while (err == U_BUFFER_OVERFLOW_ERROR); - - return CString(result.data(), size); -} - - -} // namespace WebCore diff --git a/WebCore/platform/text/TextCodecICU.h b/WebCore/platform/text/TextCodecICU.h deleted file mode 100644 index bf517f7..0000000 --- a/WebCore/platform/text/TextCodecICU.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (C) 2004, 2006, 2007 Apple Inc. All rights reserved. - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextCodecICU_h -#define TextCodecICU_h - -#include "TextCodec.h" -#include "TextEncoding.h" - -#include <unicode/utypes.h> - -typedef struct UConverter UConverter; - -namespace WebCore { - - class TextCodecICU : public TextCodec { - public: - static void registerBaseEncodingNames(EncodingNameRegistrar); - static void registerBaseCodecs(TextCodecRegistrar); - - static void registerExtendedEncodingNames(EncodingNameRegistrar); - static void registerExtendedCodecs(TextCodecRegistrar); - - TextCodecICU(const TextEncoding&); - virtual ~TextCodecICU(); - - virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); - virtual CString encode(const UChar*, size_t length, UnencodableHandling); - - private: - void createICUConverter() const; - void releaseICUConverter() const; - bool needsGBKFallbacks() const { return m_needsGBKFallbacks; } - void setNeedsGBKFallbacks(bool needsFallbacks) { m_needsGBKFallbacks = needsFallbacks; } - - int decodeToBuffer(UChar* buffer, UChar* bufferLimit, const char*& source, - const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err); - - TextEncoding m_encoding; - unsigned m_numBufferedBytes; - unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character - mutable UConverter* m_converterICU; - mutable bool m_needsGBKFallbacks; - }; - - struct ICUConverterWrapper { - ICUConverterWrapper() - : converter(0) - { - } - ~ICUConverterWrapper(); - - UConverter* converter; - }; - -} // namespace WebCore - -#endif // TextCodecICU_h diff --git a/WebCore/platform/text/TextCodecLatin1.cpp b/WebCore/platform/text/TextCodecLatin1.cpp deleted file mode 100644 index 2a217c5..0000000 --- a/WebCore/platform/text/TextCodecLatin1.cpp +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextCodecLatin1.h" - -#include "PlatformString.h" -#include <stdio.h> -#include <wtf/text/CString.h> -#include <wtf/text/StringBuffer.h> -#include <wtf/PassOwnPtr.h> - -namespace WebCore { - -static const UChar table[256] = { - 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, // 00-07 - 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, // 08-0F - 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, // 10-17 - 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, // 18-1F - 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, // 20-27 - 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, // 28-2F - 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, // 30-37 - 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, // 38-3F - 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, // 40-47 - 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, // 48-4F - 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, // 50-57 - 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, // 58-5F - 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, // 60-67 - 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, // 68-6F - 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, // 70-77 - 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, // 78-7F - 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87 - 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F - 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97 - 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, // 98-9F - 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, // A0-A7 - 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, // A8-AF - 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, // B0-B7 - 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, // B8-BF - 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, // C0-C7 - 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, // C8-CF - 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, // D0-D7 - 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, // D8-DF - 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, // E0-E7 - 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, // E8-EF - 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, // F0-F7 - 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF // F8-FF -}; - -void TextCodecLatin1::registerEncodingNames(EncodingNameRegistrar registrar) -{ - registrar("windows-1252", "windows-1252"); - registrar("ISO-8859-1", "ISO-8859-1"); - registrar("US-ASCII", "US-ASCII"); - - registrar("WinLatin1", "windows-1252"); - registrar("ibm-1252", "windows-1252"); - registrar("ibm-1252_P100-2000", "windows-1252"); - - registrar("CP819", "ISO-8859-1"); - registrar("IBM819", "ISO-8859-1"); - registrar("csISOLatin1", "ISO-8859-1"); - registrar("iso-ir-100", "ISO-8859-1"); - registrar("iso_8859-1:1987", "ISO-8859-1"); - registrar("l1", "ISO-8859-1"); - registrar("latin1", "ISO-8859-1"); - - registrar("ANSI_X3.4-1968", "US-ASCII"); - registrar("ANSI_X3.4-1986", "US-ASCII"); - registrar("ASCII", "US-ASCII"); - registrar("IBM367", "US-ASCII"); - registrar("ISO646-US", "US-ASCII"); - registrar("ISO_646.irv:1991", "US-ASCII"); - registrar("cp367", "US-ASCII"); - registrar("csASCII", "US-ASCII"); - registrar("ibm-367_P100-1995", "US-ASCII"); - registrar("iso-ir-6", "US-ASCII"); - registrar("iso-ir-6-us", "US-ASCII"); - registrar("us", "US-ASCII"); - registrar("x-ansi", "US-ASCII"); -} - -static PassOwnPtr<TextCodec> newStreamingTextDecoderWindowsLatin1(const TextEncoding&, const void*) -{ - return new TextCodecLatin1; -} - -void TextCodecLatin1::registerCodecs(TextCodecRegistrar registrar) -{ - registrar("windows-1252", newStreamingTextDecoderWindowsLatin1, 0); - - // ASCII and Latin-1 both decode as Windows Latin-1 although they retain unique identities. - registrar("ISO-8859-1", newStreamingTextDecoderWindowsLatin1, 0); - registrar("US-ASCII", newStreamingTextDecoderWindowsLatin1, 0); -} - -template<size_t size> struct NonASCIIMask; -template<> struct NonASCIIMask<4> { - static unsigned value() { return 0x80808080U; } -}; -template<> struct NonASCIIMask<8> { - static unsigned long long value() { return 0x8080808080808080ULL; } -}; - -template<size_t size> struct UCharByteFiller; -template<> struct UCharByteFiller<4> { - static void copy(UChar* dest, const unsigned char* src) - { - dest[0] = src[0]; - dest[1] = src[1]; - dest[2] = src[2]; - dest[3] = src[3]; - } -}; -template<> struct UCharByteFiller<8> { - static void copy(UChar* dest, const unsigned char* src) - { - dest[0] = src[0]; - dest[1] = src[1]; - dest[2] = src[2]; - dest[3] = src[3]; - dest[4] = src[4]; - dest[5] = src[5]; - dest[6] = src[6]; - dest[7] = src[7]; - } -}; - -String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&) -{ - UChar* characters; - String result = String::createUninitialized(length, characters); - - const unsigned char* src = reinterpret_cast<const unsigned char*>(bytes); - const unsigned char* end = reinterpret_cast<const unsigned char*>(bytes + length); - const unsigned char* alignedEnd = reinterpret_cast<const unsigned char*>(reinterpret_cast<ptrdiff_t>(end) & ~(sizeof(uintptr_t) - 1)); - UChar* dest = characters; - - while (src < end) { - if (*src < 0x80) { - // Fast path for values < 0x80 (most Latin-1 text will be ASCII) - // Wait until we're at a properly aligned address, then read full CPU words. - if (!(reinterpret_cast<ptrdiff_t>(src) & (sizeof(uintptr_t) - 1))) { - while (src < alignedEnd) { - uintptr_t chunk = *reinterpret_cast_ptr<const uintptr_t*>(src); - - if (chunk & NonASCIIMask<sizeof(uintptr_t)>::value()) - goto useLookupTable; - - UCharByteFiller<sizeof(uintptr_t)>::copy(dest, src); - - src += sizeof(uintptr_t); - dest += sizeof(uintptr_t); - } - - if (src == end) - break; - } - *dest = *src; - } else { -useLookupTable: - *dest = table[*src]; - } - - ++src; - ++dest; - } - - return result; -} - -static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length, UnencodableHandling handling) -{ - Vector<char> result(length); - char* bytes = result.data(); - - size_t resultLength = 0; - for (size_t i = 0; i < length; ) { - UChar32 c; - U16_NEXT(characters, i, length, c); - unsigned char b = c; - // Do an efficient check to detect characters other than 00-7F and A0-FF. - if (b != c || (c & 0xE0) == 0x80) { - // Look for a way to encode this with Windows Latin-1. - for (b = 0x80; b < 0xA0; ++b) - if (table[b] == c) - goto gotByte; - // No way to encode this character with Windows Latin-1. - UnencodableReplacementArray replacement; - int replacementLength = TextCodec::getUnencodableReplacement(c, handling, replacement); - result.grow(resultLength + replacementLength + length - i); - bytes = result.data(); - memcpy(bytes + resultLength, replacement, replacementLength); - resultLength += replacementLength; - continue; - } - gotByte: - bytes[resultLength++] = b; - } - - return CString(bytes, resultLength); -} - -CString TextCodecLatin1::encode(const UChar* characters, size_t length, UnencodableHandling handling) -{ - { - char* bytes; - CString string = CString::newUninitialized(length, bytes); - - // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII. - UChar ored = 0; - for (size_t i = 0; i < length; ++i) { - UChar c = characters[i]; - bytes[i] = c; - ored |= c; - } - - if (!(ored & 0xFF80)) - return string; - } - - // If it wasn't all ASCII, call the function that handles more-complex cases. - return encodeComplexWindowsLatin1(characters, length, handling); -} - -} // namespace WebCore diff --git a/WebCore/platform/text/TextCodecLatin1.h b/WebCore/platform/text/TextCodecLatin1.h deleted file mode 100644 index f035d01..0000000 --- a/WebCore/platform/text/TextCodecLatin1.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextCodecLatin1_h -#define TextCodecLatin1_h - -#include "TextCodec.h" - -namespace WebCore { - - class TextCodecLatin1 : public TextCodec { - public: - static void registerEncodingNames(EncodingNameRegistrar); - static void registerCodecs(TextCodecRegistrar); - - virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); - virtual CString encode(const UChar*, size_t length, UnencodableHandling); - }; - -} // namespace WebCore - -#endif // TextCodecLatin1_h diff --git a/WebCore/platform/text/TextCodecUTF16.cpp b/WebCore/platform/text/TextCodecUTF16.cpp deleted file mode 100644 index e88e83b..0000000 --- a/WebCore/platform/text/TextCodecUTF16.cpp +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (C) 2004, 2006, 2008, 2010 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextCodecUTF16.h" - -#include "PlatformString.h" -#include <wtf/text/CString.h> -#include <wtf/text/StringBuffer.h> -#include <wtf/PassOwnPtr.h> - -using namespace std; - -namespace WebCore { - -void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar) -{ - registrar("UTF-16LE", "UTF-16LE"); - registrar("UTF-16BE", "UTF-16BE"); - - registrar("ISO-10646-UCS-2", "UTF-16LE"); - registrar("UCS-2", "UTF-16LE"); - registrar("UTF-16", "UTF-16LE"); - registrar("Unicode", "UTF-16LE"); - registrar("csUnicode", "UTF-16LE"); - registrar("unicodeFEFF", "UTF-16LE"); - - registrar("unicodeFFFE", "UTF-16BE"); -} - -static PassOwnPtr<TextCodec> newStreamingTextDecoderUTF16LE(const TextEncoding&, const void*) -{ - return new TextCodecUTF16(true); -} - -static PassOwnPtr<TextCodec> newStreamingTextDecoderUTF16BE(const TextEncoding&, const void*) -{ - return new TextCodecUTF16(false); -} - -void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar) -{ - registrar("UTF-16LE", newStreamingTextDecoderUTF16LE, 0); - registrar("UTF-16BE", newStreamingTextDecoderUTF16BE, 0); -} - -String TextCodecUTF16::decode(const char* bytes, size_t length, bool, bool, bool&) -{ - if (!length) - return String(); - - const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes); - size_t numBytes = length + m_haveBufferedByte; - size_t numChars = numBytes / 2; - - StringBuffer buffer(numChars); - UChar* q = buffer.characters(); - - if (m_haveBufferedByte) { - UChar c; - if (m_littleEndian) - c = m_bufferedByte | (p[0] << 8); - else - c = (m_bufferedByte << 8) | p[0]; - *q++ = c; - m_haveBufferedByte = false; - p += 1; - numChars -= 1; - } - - if (m_littleEndian) { - for (size_t i = 0; i < numChars; ++i) { - UChar c = p[0] | (p[1] << 8); - p += 2; - *q++ = c; - } - } else { - for (size_t i = 0; i < numChars; ++i) { - UChar c = (p[0] << 8) | p[1]; - p += 2; - *q++ = c; - } - } - - if (numBytes & 1) { - ASSERT(!m_haveBufferedByte); - m_haveBufferedByte = true; - m_bufferedByte = p[0]; - } - - buffer.shrink(q - buffer.characters()); - - return String::adopt(buffer); -} - -CString TextCodecUTF16::encode(const UChar* characters, size_t length, UnencodableHandling) -{ - // We need to be sure we can double the length without overflowing. - // Since the passed-in length is the length of an actual existing - // character buffer, each character is two bytes, and we know - // the buffer doesn't occupy the entire address space, we can - // assert here that doubling the length does not overflow size_t - // and there's no need for a runtime check. - ASSERT(length <= numeric_limits<size_t>::max() / 2); - - char* bytes; - CString string = CString::newUninitialized(length * 2, bytes); - - // FIXME: CString is not a reasonable data structure for encoded UTF-16, which will have - // null characters inside it. Perhaps the result of encode should not be a CString. - if (m_littleEndian) { - for (size_t i = 0; i < length; ++i) { - UChar c = characters[i]; - bytes[i * 2] = c; - bytes[i * 2 + 1] = c >> 8; - } - } else { - for (size_t i = 0; i < length; ++i) { - UChar c = characters[i]; - bytes[i * 2] = c >> 8; - bytes[i * 2 + 1] = c; - } - } - - return string; -} - -} // namespace WebCore diff --git a/WebCore/platform/text/TextCodecUTF16.h b/WebCore/platform/text/TextCodecUTF16.h deleted file mode 100644 index 8ce9476..0000000 --- a/WebCore/platform/text/TextCodecUTF16.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextCodecUTF16_h -#define TextCodecUTF16_h - -#include "TextCodec.h" - -namespace WebCore { - - class TextCodecUTF16 : public TextCodec { - public: - static void registerEncodingNames(EncodingNameRegistrar); - static void registerCodecs(TextCodecRegistrar); - - TextCodecUTF16(bool littleEndian) : m_littleEndian(littleEndian), m_haveBufferedByte(false) { } - - virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); - virtual CString encode(const UChar*, size_t length, UnencodableHandling); - - private: - bool m_littleEndian; - bool m_haveBufferedByte; - unsigned char m_bufferedByte; - }; - -} // namespace WebCore - -#endif // TextCodecUTF16_h diff --git a/WebCore/platform/text/TextCodecUserDefined.cpp b/WebCore/platform/text/TextCodecUserDefined.cpp deleted file mode 100644 index 70d8673..0000000 --- a/WebCore/platform/text/TextCodecUserDefined.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (C) 2007, 2008 Apple, Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextCodecUserDefined.h" - -#include "PlatformString.h" -#include <stdio.h> -#include <wtf/text/CString.h> -#include <wtf/text/StringBuffer.h> -#include <wtf/PassOwnPtr.h> - -namespace WebCore { - -void TextCodecUserDefined::registerEncodingNames(EncodingNameRegistrar registrar) -{ - registrar("x-user-defined", "x-user-defined"); -} - -static PassOwnPtr<TextCodec> newStreamingTextDecoderUserDefined(const TextEncoding&, const void*) -{ - return new TextCodecUserDefined; -} - -void TextCodecUserDefined::registerCodecs(TextCodecRegistrar registrar) -{ - registrar("x-user-defined", newStreamingTextDecoderUserDefined, 0); -} - -String TextCodecUserDefined::decode(const char* bytes, size_t length, bool, bool, bool&) -{ - UChar* buffer; - String result = String::createUninitialized(length, buffer); - - for (size_t i = 0; i < length; ++i) { - signed char c = bytes[i]; - buffer[i] = c & 0xF7FF; - } - - return result; -} - -static CString encodeComplexUserDefined(const UChar* characters, size_t length, UnencodableHandling handling) -{ - Vector<char> result(length); - char* bytes = result.data(); - - size_t resultLength = 0; - for (size_t i = 0; i < length; ) { - UChar32 c; - U16_NEXT(characters, i, length, c); - signed char signedByte = c; - if ((signedByte & 0xF7FF) == c) - bytes[resultLength++] = signedByte; - else { - // No way to encode this character with x-user-defined. - UnencodableReplacementArray replacement; - int replacementLength = TextCodec::getUnencodableReplacement(c, handling, replacement); - result.grow(resultLength + replacementLength + length - i); - bytes = result.data(); - memcpy(bytes + resultLength, replacement, replacementLength); - resultLength += replacementLength; - } - } - - return CString(bytes, resultLength); -} - -CString TextCodecUserDefined::encode(const UChar* characters, size_t length, UnencodableHandling handling) -{ - char* bytes; - CString string = CString::newUninitialized(length, bytes); - - // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII. - UChar ored = 0; - for (size_t i = 0; i < length; ++i) { - UChar c = characters[i]; - bytes[i] = c; - ored |= c; - } - - if (!(ored & 0xFF80)) - return string; - - // If it wasn't all ASCII, call the function that handles more-complex cases. - return encodeComplexUserDefined(characters, length, handling); -} - -} // namespace WebCore diff --git a/WebCore/platform/text/TextCodecUserDefined.h b/WebCore/platform/text/TextCodecUserDefined.h deleted file mode 100644 index d1b3160..0000000 --- a/WebCore/platform/text/TextCodecUserDefined.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2007 Apple, Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextCodecUserDefined_h -#define TextCodecUserDefined_h - -#include "TextCodec.h" - -namespace WebCore { - - class TextCodecUserDefined : public TextCodec { - public: - static void registerEncodingNames(EncodingNameRegistrar); - static void registerCodecs(TextCodecRegistrar); - - virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); - virtual CString encode(const UChar*, size_t length, UnencodableHandling); - }; - -} // namespace WebCore - -#endif // TextCodecUserDefined_h diff --git a/WebCore/platform/text/TextDirection.h b/WebCore/platform/text/TextDirection.h deleted file mode 100644 index 5be416e..0000000 --- a/WebCore/platform/text/TextDirection.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (C) 2003, 2006 Apple Computer, Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextDirection_h -#define TextDirection_h - -namespace WebCore { - - enum TextDirection { RTL, LTR }; - -} - -#endif diff --git a/WebCore/platform/text/TextEncoding.cpp b/WebCore/platform/text/TextEncoding.cpp deleted file mode 100644 index 33313a0..0000000 --- a/WebCore/platform/text/TextEncoding.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * Copyright (C) 2007-2009 Torch Mobile, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextEncoding.h" - -#include "PlatformString.h" -#include "TextCodec.h" -#include "TextEncodingRegistry.h" -#if USE(ICU_UNICODE) -#include <unicode/unorm.h> -#elif USE(QT4_UNICODE) -#include <QString> -#elif USE(GLIB_UNICODE) -#include <glib.h> -#include "GOwnPtr.h" -#endif -#include <wtf/text/CString.h> -#include <wtf/OwnPtr.h> -#include <wtf/StdLibExtras.h> - -namespace WebCore { - -static const TextEncoding& UTF7Encoding() -{ - static TextEncoding globalUTF7Encoding("UTF-7"); - return globalUTF7Encoding; -} - -TextEncoding::TextEncoding(const char* name) - : m_name(atomicCanonicalTextEncodingName(name)) - , m_backslashAsCurrencySymbol(backslashAsCurrencySymbol()) -{ -} - -TextEncoding::TextEncoding(const String& name) - : m_name(atomicCanonicalTextEncodingName(name.characters(), name.length())) - , m_backslashAsCurrencySymbol(backslashAsCurrencySymbol()) -{ -} - -String TextEncoding::decode(const char* data, size_t length, bool stopOnError, bool& sawError) const -{ - if (!m_name) - return String(); - - return newTextCodec(*this)->decode(data, length, true, stopOnError, sawError); -} - -CString TextEncoding::encode(const UChar* characters, size_t length, UnencodableHandling handling) const -{ - if (!m_name) - return CString(); - - if (!length) - return ""; - -#if USE(ICU_UNICODE) - // FIXME: What's the right place to do normalization? - // It's a little strange to do it inside the encode function. - // Perhaps normalization should be an explicit step done before calling encode. - - const UChar* source = characters; - size_t sourceLength = length; - - Vector<UChar> normalizedCharacters; - - UErrorCode err = U_ZERO_ERROR; - if (unorm_quickCheck(source, sourceLength, UNORM_NFC, &err) != UNORM_YES) { - // First try using the length of the original string, since normalization to NFC rarely increases length. - normalizedCharacters.grow(sourceLength); - int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), length, &err); - if (err == U_BUFFER_OVERFLOW_ERROR) { - err = U_ZERO_ERROR; - normalizedCharacters.resize(normalizedLength); - normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), normalizedLength, &err); - } - ASSERT(U_SUCCESS(err)); - - source = normalizedCharacters.data(); - sourceLength = normalizedLength; - } - return newTextCodec(*this)->encode(source, sourceLength, handling); -#elif USE(QT4_UNICODE) - QString str(reinterpret_cast<const QChar*>(characters), length); - str = str.normalized(QString::NormalizationForm_C); - return newTextCodec(*this)->encode(reinterpret_cast<const UChar *>(str.utf16()), str.length(), handling); -#elif USE(GLIB_UNICODE) - GOwnPtr<char> UTF8Source; - UTF8Source.set(g_utf16_to_utf8(characters, length, 0, 0, 0)); - if (!UTF8Source) { - // If conversion to UTF-8 failed, try with the string without normalization - return newTextCodec(*this)->encode(characters, length, handling); - } - - GOwnPtr<char> UTF8Normalized; - UTF8Normalized.set(g_utf8_normalize(UTF8Source.get(), -1, G_NORMALIZE_NFC)); - - long UTF16Length; - GOwnPtr<UChar> UTF16Normalized; - UTF16Normalized.set(g_utf8_to_utf16(UTF8Normalized.get(), -1, 0, &UTF16Length, 0)); - - return newTextCodec(*this)->encode(UTF16Normalized.get(), UTF16Length, handling); -#elif OS(WINCE) - // normalization will be done by Windows CE API - OwnPtr<TextCodec> textCodec = newTextCodec(*this); - return textCodec.get() ? textCodec->encode(characters, length, handling) : CString(); -#elif USE(BREWMP_UNICODE) - // FIXME: not sure if Brew MP normalizes the input string automatically - OwnPtr<TextCodec> textCodec = newTextCodec(*this); - return textCodec.get() ? textCodec->encode(characters, length, handling) : CString(); -#endif -} - -const char* TextEncoding::domName() const -{ - if (noExtendedTextEncodingNameUsed()) - return m_name; - - // We treat EUC-KR as windows-949 (its superset), but need to expose - // the name 'EUC-KR' because the name 'windows-949' is not recognized by - // most Korean web servers even though they do use the encoding - // 'windows-949' with the name 'EUC-KR'. - // FIXME: This is not thread-safe. At the moment, this function is - // only accessed in a single thread, but eventually has to be made - // thread-safe along with usesVisualOrdering(). - static const char* const a = atomicCanonicalTextEncodingName("windows-949"); - if (m_name == a) - return "EUC-KR"; - return m_name; -} - -bool TextEncoding::usesVisualOrdering() const -{ - if (noExtendedTextEncodingNameUsed()) - return false; - - static const char* const a = atomicCanonicalTextEncodingName("ISO-8859-8"); - return m_name == a; -} - -bool TextEncoding::isJapanese() const -{ - return isJapaneseEncoding(m_name); -} - -UChar TextEncoding::backslashAsCurrencySymbol() const -{ - return shouldShowBackslashAsCurrencySymbolIn(m_name) ? 0x00A5 : '\\'; -} - -bool TextEncoding::isNonByteBasedEncoding() const -{ - if (noExtendedTextEncodingNameUsed()) { - return *this == UTF16LittleEndianEncoding() - || *this == UTF16BigEndianEncoding(); - } - - return *this == UTF16LittleEndianEncoding() - || *this == UTF16BigEndianEncoding() - || *this == UTF32BigEndianEncoding() - || *this == UTF32LittleEndianEncoding(); -} - -bool TextEncoding::isUTF7Encoding() const -{ - if (noExtendedTextEncodingNameUsed()) - return false; - - return *this == UTF7Encoding(); -} - -const TextEncoding& TextEncoding::closestByteBasedEquivalent() const -{ - if (isNonByteBasedEncoding()) - return UTF8Encoding(); - return *this; -} - -// HTML5 specifies that UTF-8 be used in form submission when a form is -// is a part of a document in UTF-16 probably because UTF-16 is not a -// byte-based encoding and can contain 0x00. By extension, the same -// should be done for UTF-32. In case of UTF-7, it is a byte-based encoding, -// but it's fraught with problems and we'd rather steer clear of it. -const TextEncoding& TextEncoding::encodingForFormSubmission() const -{ - if (isNonByteBasedEncoding() || isUTF7Encoding()) - return UTF8Encoding(); - return *this; -} - -const TextEncoding& ASCIIEncoding() -{ - static TextEncoding globalASCIIEncoding("ASCII"); - return globalASCIIEncoding; -} - -const TextEncoding& Latin1Encoding() -{ - static TextEncoding globalLatin1Encoding("latin1"); - return globalLatin1Encoding; -} - -const TextEncoding& UTF16BigEndianEncoding() -{ - static TextEncoding globalUTF16BigEndianEncoding("UTF-16BE"); - return globalUTF16BigEndianEncoding; -} - -const TextEncoding& UTF16LittleEndianEncoding() -{ - static TextEncoding globalUTF16LittleEndianEncoding("UTF-16LE"); - return globalUTF16LittleEndianEncoding; -} - -const TextEncoding& UTF32BigEndianEncoding() -{ - static TextEncoding globalUTF32BigEndianEncoding("UTF-32BE"); - return globalUTF32BigEndianEncoding; -} - -const TextEncoding& UTF32LittleEndianEncoding() -{ - static TextEncoding globalUTF32LittleEndianEncoding("UTF-32LE"); - return globalUTF32LittleEndianEncoding; -} - -const TextEncoding& UTF8Encoding() -{ - static TextEncoding globalUTF8Encoding("UTF-8"); - ASSERT(globalUTF8Encoding.isValid()); - return globalUTF8Encoding; -} - -const TextEncoding& WindowsLatin1Encoding() -{ - static TextEncoding globalWindowsLatin1Encoding("WinLatin-1"); - return globalWindowsLatin1Encoding; -} - -} // namespace WebCore diff --git a/WebCore/platform/text/TextEncoding.h b/WebCore/platform/text/TextEncoding.h deleted file mode 100644 index 675625b..0000000 --- a/WebCore/platform/text/TextEncoding.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (C) 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextEncoding_h -#define TextEncoding_h - -#include "TextCodec.h" -#include <wtf/Forward.h> -#include <wtf/unicode/Unicode.h> - -namespace WebCore { - - class TextEncoding { - public: - TextEncoding() : m_name(0) { } - TextEncoding(const char* name); - TextEncoding(const String& name); - - bool isValid() const { return m_name; } - const char* name() const { return m_name; } - const char* domName() const; // name exposed via DOM - bool usesVisualOrdering() const; - bool isJapanese() const; - - PassRefPtr<StringImpl> displayString(PassRefPtr<StringImpl> str) const - { - if (m_backslashAsCurrencySymbol == '\\' || !str) - return str; - return str->replace('\\', m_backslashAsCurrencySymbol); - } - void displayBuffer(UChar* characters, unsigned len) const - { - if (m_backslashAsCurrencySymbol == '\\') - return; - for (unsigned i = 0; i < len; ++i) { - if (characters[i] == '\\') - characters[i] = m_backslashAsCurrencySymbol; - } - } - - const TextEncoding& closestByteBasedEquivalent() const; - const TextEncoding& encodingForFormSubmission() const; - - String decode(const char* str, size_t length) const - { - bool ignored; - return decode(str, length, false, ignored); - } - String decode(const char*, size_t length, bool stopOnError, bool& sawError) const; - CString encode(const UChar*, size_t length, UnencodableHandling) const; - - UChar backslashAsCurrencySymbol() const; - - private: - bool isNonByteBasedEncoding() const; - bool isUTF7Encoding() const; - - const char* m_name; - UChar m_backslashAsCurrencySymbol; - }; - - inline bool operator==(const TextEncoding& a, const TextEncoding& b) { return a.name() == b.name(); } - inline bool operator!=(const TextEncoding& a, const TextEncoding& b) { return a.name() != b.name(); } - - const TextEncoding& ASCIIEncoding(); - const TextEncoding& Latin1Encoding(); - const TextEncoding& UTF16BigEndianEncoding(); - const TextEncoding& UTF16LittleEndianEncoding(); - const TextEncoding& UTF32BigEndianEncoding(); - const TextEncoding& UTF32LittleEndianEncoding(); - const TextEncoding& UTF8Encoding(); - const TextEncoding& WindowsLatin1Encoding(); - -} // namespace WebCore - -#endif // TextEncoding_h diff --git a/WebCore/platform/text/TextEncodingDetector.h b/WebCore/platform/text/TextEncodingDetector.h deleted file mode 100644 index 9f16ab0..0000000 --- a/WebCore/platform/text/TextEncodingDetector.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2009 Google Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextEncodingDetector_h -#define TextEncodingDetector_h - -namespace WebCore { - - class TextEncoding; - - // Given a sequence of bytes in |data| of length |len| and an optional - // hintEncodingName, detect the most likely character encoding. - // The way hintEncodingName is used is up to an implementation. - // Currently, the only caller sets it to the parent frame encoding. - bool detectTextEncoding(const char* data, size_t len, - const char* hintEncodingName, - TextEncoding* detectedEncoding); - -} // namespace WebCore - -#endif diff --git a/WebCore/platform/text/TextEncodingDetectorICU.cpp b/WebCore/platform/text/TextEncodingDetectorICU.cpp deleted file mode 100644 index c0d11de..0000000 --- a/WebCore/platform/text/TextEncodingDetectorICU.cpp +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (C) 2008, 2009 Google Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextEncodingDetector.h" - -#include "TextEncoding.h" -#include <wtf/UnusedParam.h> - -#ifndef BUILDING_ON_TIGER -#include "unicode/ucnv.h" -#include "unicode/ucsdet.h" -#endif - -namespace WebCore { - -bool detectTextEncoding(const char* data, size_t len, - const char* hintEncodingName, - TextEncoding* detectedEncoding) -{ - *detectedEncoding = TextEncoding(); -#ifdef BUILDING_ON_TIGER - // Tiger came with ICU 3.2 and does not have the encoding detector. - UNUSED_PARAM(data); - UNUSED_PARAM(len); - UNUSED_PARAM(hintEncodingName); - return false; -#else - int matchesCount = 0; - UErrorCode status = U_ZERO_ERROR; - UCharsetDetector* detector = ucsdet_open(&status); - if (U_FAILURE(status)) - return false; - ucsdet_enableInputFilter(detector, true); - ucsdet_setText(detector, data, static_cast<int32_t>(len), &status); - if (U_FAILURE(status)) - return false; - - // FIXME: A few things we can do other than improving - // the ICU detector itself. - // 1. Use ucsdet_detectAll and pick the most likely one given - // "the context" (parent-encoding, referrer encoding, etc). - // 2. 'Emulate' Firefox/IE's non-Universal detectors (e.g. - // Chinese, Japanese, Russian, Korean and Hebrew) by picking the - // encoding with a highest confidence among the detector-specific - // limited set of candidate encodings. - // Below is a partial implementation of the first part of what's outlined - // above. - const UCharsetMatch** matches = ucsdet_detectAll(detector, &matchesCount, &status); - if (U_FAILURE(status)) { - ucsdet_close(detector); - return false; - } - - const char* encoding = 0; - if (hintEncodingName) { - TextEncoding hintEncoding(hintEncodingName); - // 10 is the minimum confidence value consistent with the codepoint - // allocation in a given encoding. The size of a chunk passed to - // us varies even for the same html file (apparently depending on - // the network load). When we're given a rather short chunk, we - // don't have a sufficiently reliable signal other than the fact that - // the chunk is consistent with a set of encodings. So, instead of - // setting an arbitrary threshold, we have to scan all the encodings - // consistent with the data. - const int32_t kThresold = 10; - for (int i = 0; i < matchesCount; ++i) { - int32_t confidence = ucsdet_getConfidence(matches[i], &status); - if (U_FAILURE(status)) { - status = U_ZERO_ERROR; - continue; - } - if (confidence < kThresold) - break; - const char* matchEncoding = ucsdet_getName(matches[i], &status); - if (U_FAILURE(status)) { - status = U_ZERO_ERROR; - continue; - } - if (TextEncoding(matchEncoding) == hintEncoding) { - encoding = hintEncodingName; - break; - } - } - } - // If no match is found so far, just pick the top match. - // This can happen, say, when a parent frame in EUC-JP refers to - // a child frame in Shift_JIS and both frames do NOT specify the encoding - // making us resort to auto-detection (when it IS turned on). - if (!encoding && matchesCount > 0) - encoding = ucsdet_getName(matches[0], &status); - if (U_SUCCESS(status)) { - *detectedEncoding = TextEncoding(encoding); - ucsdet_close(detector); - return true; - } - ucsdet_close(detector); - return false; -#endif -} - -} diff --git a/WebCore/platform/text/TextEncodingDetectorNone.cpp b/WebCore/platform/text/TextEncodingDetectorNone.cpp deleted file mode 100644 index 3b62bc5..0000000 --- a/WebCore/platform/text/TextEncodingDetectorNone.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2009 Google Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextEncodingDetector.h" - -#include "TextEncoding.h" - -namespace WebCore { - -bool detectTextEncoding(const char*, size_t, const char*, TextEncoding* detectedEncoding) -{ - *detectedEncoding = TextEncoding(); - return false; -} - -} diff --git a/WebCore/platform/text/TextEncodingRegistry.cpp b/WebCore/platform/text/TextEncodingRegistry.cpp deleted file mode 100644 index c0c0255..0000000 --- a/WebCore/platform/text/TextEncodingRegistry.cpp +++ /dev/null @@ -1,402 +0,0 @@ -/* - * Copyright (C) 2006, 2007 Apple Inc. All rights reserved. - * Copyright (C) 2007-2009 Torch Mobile, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextEncodingRegistry.h" - -#include "PlatformString.h" -#include "TextCodecLatin1.h" -#include "TextCodecUserDefined.h" -#include "TextCodecUTF16.h" -#include "TextEncoding.h" -#include <wtf/ASCIICType.h> -#include <wtf/Assertions.h> -#include <wtf/HashFunctions.h> -#include <wtf/HashMap.h> -#include <wtf/HashSet.h> -#include <wtf/StdLibExtras.h> -#include <wtf/StringExtras.h> -#include <wtf/Threading.h> - -#if USE(ICU_UNICODE) -#include "TextCodecICU.h" -#endif -#if PLATFORM(MAC) -#include "TextCodecMac.h" -#endif -#if PLATFORM(QT) -#include "qt/TextCodecQt.h" -#endif -#if USE(GLIB_UNICODE) -#include "gtk/TextCodecGtk.h" -#endif -#if USE(BREWMP_UNICODE) -#include "brew/TextCodecBrew.h" -#endif -#if OS(WINCE) && !PLATFORM(QT) -#include "TextCodecWinCE.h" -#endif - -using namespace WTF; - -namespace WebCore { - -const size_t maxEncodingNameLength = 63; - -// Hash for all-ASCII strings that does case folding. -struct TextEncodingNameHash { - - static bool equal(const char* s1, const char* s2) - { - char c1; - char c2; - do { - c1 = *s1++; - c2 = *s2++; - if (toASCIILower(c1) != toASCIILower(c2)) - return false; - } while (c1 && c2); - return !c1 && !c2; - } - - // This algorithm is the one-at-a-time hash from: - // http://burtleburtle.net/bob/hash/hashfaq.html - // http://burtleburtle.net/bob/hash/doobs.html - static unsigned hash(const char* s) - { - unsigned h = WTF::stringHashingStartValue; - for (;;) { - char c = *s++; - if (!c) { - h += (h << 3); - h ^= (h >> 11); - h += (h << 15); - return h; - } - h += toASCIILower(c); - h += (h << 10); - h ^= (h >> 6); - } - } - - static const bool safeToCompareToEmptyOrDeleted = false; -}; - -struct TextCodecFactory { - NewTextCodecFunction function; - const void* additionalData; - TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { } -}; - -typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap; -typedef HashMap<const char*, TextCodecFactory> TextCodecMap; - -static Mutex& encodingRegistryMutex() -{ - // We don't have to use AtomicallyInitializedStatic here because - // this function is called on the main thread for any page before - // it is used in worker threads. - DEFINE_STATIC_LOCAL(Mutex, mutex, ()); - return mutex; -} - -static TextEncodingNameMap* textEncodingNameMap; -static TextCodecMap* textCodecMap; -static bool didExtendTextCodecMaps; -static HashSet<const char*>* japaneseEncodings; -static HashSet<const char*>* nonBackslashEncodings; - -static const char* const textEncodingNameBlacklist[] = { - "UTF-7" -}; - -#if ERROR_DISABLED - -static inline void checkExistingName(const char*, const char*) { } - -#else - -static void checkExistingName(const char* alias, const char* atomicName) -{ - const char* oldAtomicName = textEncodingNameMap->get(alias); - if (!oldAtomicName) - return; - if (oldAtomicName == atomicName) - return; - // Keep the warning silent about one case where we know this will happen. - if (strcmp(alias, "ISO-8859-8-I") == 0 - && strcmp(oldAtomicName, "ISO-8859-8-I") == 0 - && strcasecmp(atomicName, "iso-8859-8") == 0) - return; - LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s", alias, oldAtomicName, atomicName); -} - -#endif - -static bool isUndesiredAlias(const char* alias) -{ - // Reject aliases with version numbers that are supported by some back-ends (such as "ISO_2022,locale=ja,version=0" in ICU). - for (const char* p = alias; *p; ++p) { - if (*p == ',') - return true; - } - // 8859_1 is known to (at least) ICU, but other browsers don't support this name - and having it caused a compatibility - // problem, see bug 43554. - if (0 == strcmp(alias, "8859_1")) - return true; - return false; -} - -static void addToTextEncodingNameMap(const char* alias, const char* name) -{ - ASSERT(strlen(alias) <= maxEncodingNameLength); - if (isUndesiredAlias(alias)) - return; - const char* atomicName = textEncodingNameMap->get(name); - ASSERT(strcmp(alias, name) == 0 || atomicName); - if (!atomicName) - atomicName = name; - checkExistingName(alias, atomicName); - textEncodingNameMap->add(alias, atomicName); -} - -static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData) -{ - const char* atomicName = textEncodingNameMap->get(name); - ASSERT(atomicName); - textCodecMap->add(atomicName, TextCodecFactory(function, additionalData)); -} - -static void pruneBlacklistedCodecs() -{ - for (size_t i = 0; i < WTF_ARRAY_LENGTH(textEncodingNameBlacklist); ++i) { - const char* atomicName = textEncodingNameMap->get(textEncodingNameBlacklist[i]); - if (!atomicName) - continue; - - Vector<const char*> names; - TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin(); - TextEncodingNameMap::const_iterator end = textEncodingNameMap->end(); - for (; it != end; ++it) { - if (it->second == atomicName) - names.append(it->first); - } - - size_t length = names.size(); - for (size_t j = 0; j < length; ++j) - textEncodingNameMap->remove(names[j]); - - textCodecMap->remove(atomicName); - } -} - -static void buildBaseTextCodecMaps() -{ - ASSERT(isMainThread()); - ASSERT(!textCodecMap); - ASSERT(!textEncodingNameMap); - - textCodecMap = new TextCodecMap; - textEncodingNameMap = new TextEncodingNameMap; - - TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap); - TextCodecLatin1::registerCodecs(addToTextCodecMap); - - TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap); - TextCodecUTF16::registerCodecs(addToTextCodecMap); - - TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap); - TextCodecUserDefined::registerCodecs(addToTextCodecMap); - -#if USE(ICU_UNICODE) - TextCodecICU::registerBaseEncodingNames(addToTextEncodingNameMap); - TextCodecICU::registerBaseCodecs(addToTextCodecMap); -#endif - -#if USE(GLIB_UNICODE) - TextCodecGtk::registerBaseEncodingNames(addToTextEncodingNameMap); - TextCodecGtk::registerBaseCodecs(addToTextCodecMap); -#endif - -#if USE(BREWMP_UNICODE) - TextCodecBrew::registerBaseEncodingNames(addToTextEncodingNameMap); - TextCodecBrew::registerBaseCodecs(addToTextCodecMap); -#endif - -#if OS(WINCE) && !PLATFORM(QT) - TextCodecWinCE::registerBaseEncodingNames(addToTextEncodingNameMap); - TextCodecWinCE::registerBaseCodecs(addToTextCodecMap); -#endif -} - -static void addEncodingName(HashSet<const char*>* set, const char* name) -{ - // We must not use atomicCanonicalTextEncodingName() because this function is called in it. - const char* atomicName = textEncodingNameMap->get(name); - if (atomicName) - set->add(atomicName); -} - -static void buildQuirksSets() -{ - // FIXME: Having isJapaneseEncoding() and shouldShowBackslashAsCurrencySymbolIn() - // and initializing the sets for them in TextEncodingRegistry.cpp look strange. - - ASSERT(!japaneseEncodings); - ASSERT(!nonBackslashEncodings); - - japaneseEncodings = new HashSet<const char*>(); - addEncodingName(japaneseEncodings, "EUC-JP"); - addEncodingName(japaneseEncodings, "ISO-2022-JP"); - addEncodingName(japaneseEncodings, "ISO-2022-JP-1"); - addEncodingName(japaneseEncodings, "ISO-2022-JP-2"); - addEncodingName(japaneseEncodings, "ISO-2022-JP-3"); - addEncodingName(japaneseEncodings, "JIS_C6226-1978"); - addEncodingName(japaneseEncodings, "JIS_X0201"); - addEncodingName(japaneseEncodings, "JIS_X0208-1983"); - addEncodingName(japaneseEncodings, "JIS_X0208-1990"); - addEncodingName(japaneseEncodings, "JIS_X0212-1990"); - addEncodingName(japaneseEncodings, "Shift_JIS"); - addEncodingName(japaneseEncodings, "Shift_JIS_X0213-2000"); - addEncodingName(japaneseEncodings, "cp932"); - addEncodingName(japaneseEncodings, "x-mac-japanese"); - - nonBackslashEncodings = new HashSet<const char*>(); - // The text encodings below treat backslash as a currency symbol for IE compatibility. - // See http://blogs.msdn.com/michkap/archive/2005/09/17/469941.aspx for more information. - addEncodingName(nonBackslashEncodings, "x-mac-japanese"); - addEncodingName(nonBackslashEncodings, "ISO-2022-JP"); - addEncodingName(nonBackslashEncodings, "EUC-JP"); - // Shift_JIS_X0213-2000 is not the same encoding as Shift_JIS on Mac. We need to register both of them. - addEncodingName(nonBackslashEncodings, "Shift_JIS"); - addEncodingName(nonBackslashEncodings, "Shift_JIS_X0213-2000"); -} - -bool isJapaneseEncoding(const char* canonicalEncodingName) -{ - return canonicalEncodingName && japaneseEncodings && japaneseEncodings->contains(canonicalEncodingName); -} - -bool shouldShowBackslashAsCurrencySymbolIn(const char* canonicalEncodingName) -{ - return canonicalEncodingName && nonBackslashEncodings && nonBackslashEncodings->contains(canonicalEncodingName); -} - -static void extendTextCodecMaps() -{ -#if USE(ICU_UNICODE) - TextCodecICU::registerExtendedEncodingNames(addToTextEncodingNameMap); - TextCodecICU::registerExtendedCodecs(addToTextCodecMap); -#endif - -#if USE(QT4_UNICODE) - TextCodecQt::registerEncodingNames(addToTextEncodingNameMap); - TextCodecQt::registerCodecs(addToTextCodecMap); -#endif - -#if PLATFORM(MAC) - TextCodecMac::registerEncodingNames(addToTextEncodingNameMap); - TextCodecMac::registerCodecs(addToTextCodecMap); -#endif - -#if USE(GLIB_UNICODE) - TextCodecGtk::registerExtendedEncodingNames(addToTextEncodingNameMap); - TextCodecGtk::registerExtendedCodecs(addToTextCodecMap); -#endif - -#if OS(WINCE) && !PLATFORM(QT) - TextCodecWinCE::registerExtendedEncodingNames(addToTextEncodingNameMap); - TextCodecWinCE::registerExtendedCodecs(addToTextCodecMap); -#endif - - pruneBlacklistedCodecs(); - buildQuirksSets(); -} - -PassOwnPtr<TextCodec> newTextCodec(const TextEncoding& encoding) -{ - MutexLocker lock(encodingRegistryMutex()); - - ASSERT(textCodecMap); - TextCodecFactory factory = textCodecMap->get(encoding.name()); - ASSERT(factory.function); - return factory.function(encoding, factory.additionalData); -} - -const char* atomicCanonicalTextEncodingName(const char* name) -{ - if (!name || !name[0]) - return 0; - if (!textEncodingNameMap) - buildBaseTextCodecMaps(); - - MutexLocker lock(encodingRegistryMutex()); - - if (const char* atomicName = textEncodingNameMap->get(name)) - return atomicName; - if (didExtendTextCodecMaps) - return 0; - extendTextCodecMaps(); - didExtendTextCodecMaps = true; - return textEncodingNameMap->get(name); -} - -const char* atomicCanonicalTextEncodingName(const UChar* characters, size_t length) -{ - char buffer[maxEncodingNameLength + 1]; - size_t j = 0; - for (size_t i = 0; i < length; ++i) { - UChar c = characters[i]; - if (j == maxEncodingNameLength) - return 0; - buffer[j++] = c; - } - buffer[j] = 0; - return atomicCanonicalTextEncodingName(buffer); -} - -bool noExtendedTextEncodingNameUsed() -{ - // If the calling thread did not use extended encoding names, it is fine for it to use a stale false value. - return !didExtendTextCodecMaps; -} - -#ifndef NDEBUG -void dumpTextEncodingNameMap() -{ - unsigned size = textEncodingNameMap->size(); - fprintf(stderr, "Dumping %u entries in WebCore::textEncodingNameMap...\n", size); - - MutexLocker lock(encodingRegistryMutex()); - - TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin(); - TextEncodingNameMap::const_iterator end = textEncodingNameMap->end(); - for (; it != end; ++it) - fprintf(stderr, "'%s' => '%s'\n", it->first, it->second); -} -#endif - -} // namespace WebCore diff --git a/WebCore/platform/text/TextEncodingRegistry.h b/WebCore/platform/text/TextEncodingRegistry.h deleted file mode 100644 index 16844c6..0000000 --- a/WebCore/platform/text/TextEncodingRegistry.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2006, 2007 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextEncodingRegistry_h -#define TextEncodingRegistry_h - -#include <memory> -#include <wtf/PassOwnPtr.h> -#include <wtf/unicode/Unicode.h> - -namespace WebCore { - - class TextCodec; - class TextEncoding; - - // Use TextResourceDecoder::decode to decode resources, since it handles BOMs. - // Use TextEncoding::encode to encode, since it takes care of normalization. - PassOwnPtr<TextCodec> newTextCodec(const TextEncoding&); - - // Only TextEncoding should use the following functions directly. - const char* atomicCanonicalTextEncodingName(const char* alias); - const char* atomicCanonicalTextEncodingName(const UChar* aliasCharacters, size_t aliasLength); - bool noExtendedTextEncodingNameUsed(); - bool isJapaneseEncoding(const char* canonicalEncodingName); - bool shouldShowBackslashAsCurrencySymbolIn(const char* canonicalEncodingName); - -#ifndef NDEBUG - void dumpTextEncodingNameMap(); -#endif -} - -#endif // TextEncodingRegistry_h diff --git a/WebCore/platform/text/TextStream.cpp b/WebCore/platform/text/TextStream.cpp deleted file mode 100644 index 1094fa4..0000000 --- a/WebCore/platform/text/TextStream.cpp +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (C) 2004, 2008, 2010 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextStream.h" - -#include "PlatformString.h" -#include <wtf/StringExtras.h> - -using namespace std; - -namespace WebCore { - -static const size_t printBufferSize = 100; // large enough for any integer or floating point value in string format, including trailing null character - -TextStream& TextStream::operator<<(bool b) -{ - return *this << (b ? "1" : "0"); -} - -TextStream& TextStream::operator<<(int i) -{ - char buffer[printBufferSize]; - snprintf(buffer, sizeof(buffer) - 1, "%d", i); - return *this << buffer; -} - -TextStream& TextStream::operator<<(unsigned i) -{ - char buffer[printBufferSize]; - snprintf(buffer, sizeof(buffer) - 1, "%u", i); - return *this << buffer; -} - -TextStream& TextStream::operator<<(long i) -{ - char buffer[printBufferSize]; - snprintf(buffer, sizeof(buffer) - 1, "%ld", i); - return *this << buffer; -} - -TextStream& TextStream::operator<<(unsigned long i) -{ - char buffer[printBufferSize]; - snprintf(buffer, sizeof(buffer) - 1, "%lu", i); - return *this << buffer; -} - -TextStream& TextStream::operator<<(float f) -{ - char buffer[printBufferSize]; - snprintf(buffer, sizeof(buffer) - 1, "%.2f", f); - return *this << buffer; -} - -TextStream& TextStream::operator<<(double d) -{ - char buffer[printBufferSize]; - snprintf(buffer, sizeof(buffer) - 1, "%.2f", d); - return *this << buffer; -} - -TextStream& TextStream::operator<<(const char* string) -{ - size_t stringLength = strlen(string); - size_t textLength = m_text.size(); - if (stringLength > numeric_limits<size_t>::max() - textLength) - CRASH(); - m_text.grow(textLength + stringLength); - for (size_t i = 0; i < stringLength; ++i) - m_text[textLength + i] = string[i]; - return *this; -} - -TextStream& TextStream::operator<<(const void* p) -{ - char buffer[printBufferSize]; - snprintf(buffer, sizeof(buffer) - 1, "%p", p); - return *this << buffer; -} - -TextStream& TextStream::operator<<(const String& string) -{ - append(m_text, string); - return *this; -} - -String TextStream::release() -{ - return String::adopt(m_text); -} - -#if OS(WINDOWS) && CPU(X86_64) -TextStream& TextStream::operator<<(__int64 i) -{ - char buffer[printBufferSize]; - snprintf(buffer, sizeof(buffer) - 1, "%I64i", i); - return *this << buffer; -} -TextStream& TextStream::operator<<(unsigned __int64 i) -{ - char buffer[printBufferSize]; - snprintf(buffer, sizeof(buffer) - 1, "%I64u", i); - return *this << buffer; -} -#endif - -} diff --git a/WebCore/platform/text/TextStream.h b/WebCore/platform/text/TextStream.h deleted file mode 100644 index e7e4cc0..0000000 --- a/WebCore/platform/text/TextStream.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (C) 2004, 2008 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextStream_h -#define TextStream_h - -#include <wtf/Forward.h> -#include <wtf/Vector.h> -#include <wtf/unicode/Unicode.h> - -namespace WebCore { - -class TextStream { -public: - TextStream& operator<<(bool); - TextStream& operator<<(int); - TextStream& operator<<(unsigned); - TextStream& operator<<(long); - TextStream& operator<<(unsigned long); - TextStream& operator<<(float); - TextStream& operator<<(double); - TextStream& operator<<(const char*); - TextStream& operator<<(const void*); - TextStream& operator<<(const String&); -#if OS(WINDOWS) && CPU(X86_64) - TextStream& operator<<(unsigned __int64); - TextStream& operator<<(__int64); -#endif - - String release(); - -private: - Vector<UChar> m_text; -}; - -} - -#endif diff --git a/WebCore/platform/text/UnicodeRange.cpp b/WebCore/platform/text/UnicodeRange.cpp deleted file mode 100644 index 0373441..0000000 --- a/WebCore/platform/text/UnicodeRange.cpp +++ /dev/null @@ -1,462 +0,0 @@ -/* - * Copyright (C) 2007 Apple Computer, Inc. - * - * Portions are Copyright (C) 1998 Netscape Communications Corporation. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * - * Alternatively, the contents of this file may be used under the terms - * of either the Mozilla Public License Version 1.1, found at - * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public - * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html - * (the "GPL"), in which case the provisions of the MPL or the GPL are - * applicable instead of those above. If you wish to allow use of your - * version of this file only under the terms of one of those two - * licenses (the MPL or the GPL) and not to allow others to use your - * version of this file under the LGPL, indicate your decision by - * deletingthe provisions above and replace them with the notice and - * other provisions required by the MPL or the GPL, as the case may be. - * If you do not delete the provisions above, a recipient may use your - * version of this file under any of the LGPL, the MPL or the GPL. - */ - -#include "config.h" -#include "UnicodeRange.h" - -namespace WebCore { - -// This table depends on unicode range definitions. -// Each item's index must correspond to a unicode range value -// eg. x-cyrillic = LangGroupTable[cRangeCyrillic] -static const char* gUnicodeRangeToLangGroupTable[] = -{ - "x-cyrillic", - "el", - "tr", - "he", - "ar", - "x-baltic", - "th", - "ko", - "ja", - "zh-CN", - "zh-TW", - "x-devanagari", - "x-tamil", - "x-armn", - "x-beng", - "x-cans", - "x-ethi", - "x-geor", - "x-gujr", - "x-guru", - "x-khmr", - "x-mlym" -}; - -/********************************************************************** - * Unicode subranges as defined in unicode 3.0 - * x-western, x-central-euro, tr, x-baltic -> latin - * 0000 - 036f - * 1e00 - 1eff - * 2000 - 206f (general punctuation) - * 20a0 - 20cf (currency symbols) - * 2100 - 214f (letterlike symbols) - * 2150 - 218f (Number Forms) - * el -> greek - * 0370 - 03ff - * 1f00 - 1fff - * x-cyrillic -> cyrillic - * 0400 - 04ff - * he -> hebrew - * 0590 - 05ff - * ar -> arabic - * 0600 - 06ff - * fb50 - fdff (arabic presentation forms) - * fe70 - feff (arabic presentation forms b) - * th - thai - * 0e00 - 0e7f - * ko -> korean - * ac00 - d7af (hangul Syllables) - * 1100 - 11ff (jamo) - * 3130 - 318f (hangul compatibility jamo) - * ja - * 3040 - 309f (hiragana) - * 30a0 - 30ff (katakana) - * zh-CN - * zh-TW - * - * CJK - * 3100 - 312f (bopomofo) - * 31a0 - 31bf (bopomofo extended) - * 3000 - 303f (CJK Symbols and Punctuation) - * 2e80 - 2eff (CJK radicals supplement) - * 2f00 - 2fdf (Kangxi Radicals) - * 2ff0 - 2fff (Ideographic Description Characters) - * 3190 - 319f (kanbun) - * 3200 - 32ff (Enclosed CJK letters and Months) - * 3300 - 33ff (CJK compatibility) - * 3400 - 4dbf (CJK Unified Ideographs Extension A) - * 4e00 - 9faf (CJK Unified Ideographs) - * f900 - fa5f (CJK Compatibility Ideographs) - * fe30 - fe4f (CJK compatibility Forms) - * ff00 - ffef (halfwidth and fullwidth forms) - * - * Armenian - * 0530 - 058f - * Sriac - * 0700 - 074f - * Thaana - * 0780 - 07bf - * Devanagari - * 0900 - 097f - * Bengali - * 0980 - 09ff - * Gurmukhi - * 0a00 - 0a7f - * Gujarati - * 0a80 - 0aff - * Oriya - * 0b00 - 0b7f - * Tamil - * 0b80 - 0bff - * Telugu - * 0c00 - 0c7f - * Kannada - * 0c80 - 0cff - * Malayalam - * 0d00 - 0d7f - * Sinhala - * 0d80 - 0def - * Lao - * 0e80 - 0eff - * Tibetan - * 0f00 - 0fbf - * Myanmar - * 1000 - 109f - * Georgian - * 10a0 - 10ff - * Ethiopic - * 1200 - 137f - * Cherokee - * 13a0 - 13ff - * Canadian Aboriginal Syllabics - * 1400 - 167f - * Ogham - * 1680 - 169f - * Runic - * 16a0 - 16ff - * Khmer - * 1780 - 17ff - * Mongolian - * 1800 - 18af - * Misc - superscripts and subscripts - * 2070 - 209f - * Misc - Combining Diacritical Marks for Symbols - * 20d0 - 20ff - * Misc - Arrows - * 2190 - 21ff - * Misc - Mathematical Operators - * 2200 - 22ff - * Misc - Miscellaneous Technical - * 2300 - 23ff - * Misc - Control picture - * 2400 - 243f - * Misc - Optical character recognition - * 2440 - 2450 - * Misc - Enclose Alphanumerics - * 2460 - 24ff - * Misc - Box Drawing - * 2500 - 257f - * Misc - Block Elements - * 2580 - 259f - * Misc - Geometric Shapes - * 25a0 - 25ff - * Misc - Miscellaneous Symbols - * 2600 - 267f - * Misc - Dingbats - * 2700 - 27bf - * Misc - Braille Patterns - * 2800 - 28ff - * Yi Syllables - * a000 - a48f - * Yi radicals - * a490 - a4cf - * Alphabetic Presentation Forms - * fb00 - fb4f - * Misc - Combining half Marks - * fe20 - fe2f - * Misc - small form variants - * fe50 - fe6f - * Misc - Specials - * fff0 - ffff - *********************************************************************/ - -static const unsigned cNumSubTables = 9; -static const unsigned cSubTableSize = 16; - -static const unsigned char gUnicodeSubrangeTable[cNumSubTables][cSubTableSize] = -{ - { // table for X--- - cRangeTableBase+1, //u0xxx - cRangeTableBase+2, //u1xxx - cRangeTableBase+3, //u2xxx - cRangeSetCJK, //u3xxx - cRangeSetCJK, //u4xxx - cRangeSetCJK, //u5xxx - cRangeSetCJK, //u6xxx - cRangeSetCJK, //u7xxx - cRangeSetCJK, //u8xxx - cRangeSetCJK, //u9xxx - cRangeTableBase+4, //uaxxx - cRangeKorean, //ubxxx - cRangeKorean, //ucxxx - cRangeTableBase+5, //udxxx - cRangePrivate, //uexxx - cRangeTableBase+6 //ufxxx - }, - { //table for 0X-- - cRangeSetLatin, //u00xx - cRangeSetLatin, //u01xx - cRangeSetLatin, //u02xx - cRangeGreek, //u03xx XXX 0300-036f is in fact cRangeCombiningDiacriticalMarks - cRangeCyrillic, //u04xx - cRangeTableBase+7, //u05xx, includes Cyrillic supplement, Hebrew, and Armenian - cRangeArabic, //u06xx - cRangeTertiaryTable, //u07xx - cRangeUnassigned, //u08xx - cRangeTertiaryTable, //u09xx - cRangeTertiaryTable, //u0axx - cRangeTertiaryTable, //u0bxx - cRangeTertiaryTable, //u0cxx - cRangeTertiaryTable, //u0dxx - cRangeTertiaryTable, //u0exx - cRangeTibetan, //u0fxx - }, - { //table for 1x-- - cRangeTertiaryTable, //u10xx - cRangeKorean, //u11xx - cRangeEthiopic, //u12xx - cRangeTertiaryTable, //u13xx - cRangeCanadian, //u14xx - cRangeCanadian, //u15xx - cRangeTertiaryTable, //u16xx - cRangeKhmer, //u17xx - cRangeMongolian, //u18xx - cRangeUnassigned, //u19xx - cRangeUnassigned, //u1axx - cRangeUnassigned, //u1bxx - cRangeUnassigned, //u1cxx - cRangeUnassigned, //u1dxx - cRangeSetLatin, //u1exx - cRangeGreek, //u1fxx - }, - { //table for 2x-- - cRangeSetLatin, //u20xx - cRangeSetLatin, //u21xx - cRangeMathOperators, //u22xx - cRangeMiscTechnical, //u23xx - cRangeControlOpticalEnclose, //u24xx - cRangeBoxBlockGeometrics, //u25xx - cRangeMiscSymbols, //u26xx - cRangeDingbats, //u27xx - cRangeBraillePattern, //u28xx - cRangeUnassigned, //u29xx - cRangeUnassigned, //u2axx - cRangeUnassigned, //u2bxx - cRangeUnassigned, //u2cxx - cRangeUnassigned, //u2dxx - cRangeSetCJK, //u2exx - cRangeSetCJK, //u2fxx - }, - { //table for ax-- - cRangeYi, //ua0xx - cRangeYi, //ua1xx - cRangeYi, //ua2xx - cRangeYi, //ua3xx - cRangeYi, //ua4xx - cRangeUnassigned, //ua5xx - cRangeUnassigned, //ua6xx - cRangeUnassigned, //ua7xx - cRangeUnassigned, //ua8xx - cRangeUnassigned, //ua9xx - cRangeUnassigned, //uaaxx - cRangeUnassigned, //uabxx - cRangeKorean, //uacxx - cRangeKorean, //uadxx - cRangeKorean, //uaexx - cRangeKorean, //uafxx - }, - { //table for dx-- - cRangeKorean, //ud0xx - cRangeKorean, //ud1xx - cRangeKorean, //ud2xx - cRangeKorean, //ud3xx - cRangeKorean, //ud4xx - cRangeKorean, //ud5xx - cRangeKorean, //ud6xx - cRangeKorean, //ud7xx - cRangeSurrogate, //ud8xx - cRangeSurrogate, //ud9xx - cRangeSurrogate, //udaxx - cRangeSurrogate, //udbxx - cRangeSurrogate, //udcxx - cRangeSurrogate, //uddxx - cRangeSurrogate, //udexx - cRangeSurrogate, //udfxx - }, - { // table for fx-- - cRangePrivate, //uf0xx - cRangePrivate, //uf1xx - cRangePrivate, //uf2xx - cRangePrivate, //uf3xx - cRangePrivate, //uf4xx - cRangePrivate, //uf5xx - cRangePrivate, //uf6xx - cRangePrivate, //uf7xx - cRangePrivate, //uf8xx - cRangeSetCJK, //uf9xx - cRangeSetCJK, //ufaxx - cRangeArabic, //ufbxx, includes alphabic presentation form - cRangeArabic, //ufcxx - cRangeArabic, //ufdxx - cRangeArabic, //ufexx, includes Combining half marks, - // CJK compatibility forms, - // CJK compatibility forms, - // small form variants - cRangeTableBase+8, //uffxx, halfwidth and fullwidth forms, includes Specials - }, - { //table for 0x0500 - 0x05ff - cRangeCyrillic, //u050x - cRangeCyrillic, //u051x - cRangeCyrillic, //u052x - cRangeArmenian, //u053x - cRangeArmenian, //u054x - cRangeArmenian, //u055x - cRangeArmenian, //u056x - cRangeArmenian, //u057x - cRangeArmenian, //u058x - cRangeHebrew, //u059x - cRangeHebrew, //u05ax - cRangeHebrew, //u05bx - cRangeHebrew, //u05cx - cRangeHebrew, //u05dx - cRangeHebrew, //u05ex - cRangeHebrew, //u05fx - }, - { //table for 0xff00 - 0xffff - cRangeSetCJK, //uff0x, fullwidth latin - cRangeSetCJK, //uff1x, fullwidth latin - cRangeSetCJK, //uff2x, fullwidth latin - cRangeSetCJK, //uff3x, fullwidth latin - cRangeSetCJK, //uff4x, fullwidth latin - cRangeSetCJK, //uff5x, fullwidth latin - cRangeSetCJK, //uff6x, halfwidth katakana - cRangeSetCJK, //uff7x, halfwidth katakana - cRangeSetCJK, //uff8x, halfwidth katakana - cRangeSetCJK, //uff9x, halfwidth katakana - cRangeSetCJK, //uffax, halfwidth hangul jamo - cRangeSetCJK, //uffbx, halfwidth hangul jamo - cRangeSetCJK, //uffcx, halfwidth hangul jamo - cRangeSetCJK, //uffdx, halfwidth hangul jamo - cRangeSetCJK, //uffex, fullwidth symbols - cRangeSpecials, //ufffx, Specials - }, -}; - -// Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80) -// code points so that the number of entries in the tertiary range -// table for that range is obtained by dividing (0x1700 - 0x0700) by 128. -// Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal -// syllabaries take multiple chunks and Ogham and Runic share a single chunk. -static const unsigned cTertiaryTableSize = ((0x1700 - 0x0700) / 0x80); - -static const unsigned char gUnicodeTertiaryRangeTable[cTertiaryTableSize] = -{ //table for 0x0700 - 0x1600 - cRangeSyriac, //u070x - cRangeThaana, //u078x - cRangeUnassigned, //u080x place holder(resolved in the 2ndary tab.) - cRangeUnassigned, //u088x place holder(resolved in the 2ndary tab.) - cRangeDevanagari, //u090x - cRangeBengali, //u098x - cRangeGurmukhi, //u0a0x - cRangeGujarati, //u0a8x - cRangeOriya, //u0b0x - cRangeTamil, //u0b8x - cRangeTelugu, //u0c0x - cRangeKannada, //u0c8x - cRangeMalayalam, //u0d0x - cRangeSinhala, //u0d8x - cRangeThai, //u0e0x - cRangeLao, //u0e8x - cRangeTibetan, //u0f0x place holder(resolved in the 2ndary tab.) - cRangeTibetan, //u0f8x place holder(resolved in the 2ndary tab.) - cRangeMyanmar, //u100x - cRangeGeorgian, //u108x - cRangeKorean, //u110x place holder(resolved in the 2ndary tab.) - cRangeKorean, //u118x place holder(resolved in the 2ndary tab.) - cRangeEthiopic, //u120x place holder(resolved in the 2ndary tab.) - cRangeEthiopic, //u128x place holder(resolved in the 2ndary tab.) - cRangeEthiopic, //u130x - cRangeCherokee, //u138x - cRangeCanadian, //u140x place holder(resolved in the 2ndary tab.) - cRangeCanadian, //u148x place holder(resolved in the 2ndary tab.) - cRangeCanadian, //u150x place holder(resolved in the 2ndary tab.) - cRangeCanadian, //u158x place holder(resolved in the 2ndary tab.) - cRangeCanadian, //u160x - cRangeOghamRunic, //u168x this contains two scripts, Ogham & Runic -}; - -// A two level index is almost enough for locating a range, with the -// exception of u03xx and u05xx. Since we don't really care about range for -// combining diacritical marks in our font application, they are -// not discriminated further. Future adoption of this method for other use -// should be aware of this limitation. The implementation can be extended if -// there is such a need. -// For Indic, Southeast Asian scripts and some other scripts between -// U+0700 and U+16FF, it's extended to the third level. -unsigned int findCharUnicodeRange(UChar32 ch) -{ - if (ch >= 0xFFFF) - return 0; - - unsigned int range; - - //search the first table - range = gUnicodeSubrangeTable[0][ch >> 12]; - - if (range < cRangeTableBase) - // we try to get a specific range - return range; - - // otherwise, we have one more table to look at - range = gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x0f00) >> 8]; - if (range < cRangeTableBase) - return range; - if (range < cRangeTertiaryTable) - return gUnicodeSubrangeTable[range - cRangeTableBase][(ch & 0x00f0) >> 4]; - - // Yet another table to look at : U+0700 - U+16FF : 128 code point blocks - return gUnicodeTertiaryRangeTable[(ch - 0x0700) >> 7]; -} - -const char* langGroupFromUnicodeRange(unsigned char unicodeRange) -{ - if (cRangeSpecificItemNum > unicodeRange) - return gUnicodeRangeToLangGroupTable[unicodeRange]; - return 0; -} - -} diff --git a/WebCore/platform/text/UnicodeRange.h b/WebCore/platform/text/UnicodeRange.h deleted file mode 100644 index 2278a0e..0000000 --- a/WebCore/platform/text/UnicodeRange.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (C) 2007 Apple Computer, Inc. - * - * Portions are Copyright (C) 1998 Netscape Communications Corporation. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * - * Alternatively, the contents of this file may be used under the terms - * of either the Mozilla Public License Version 1.1, found at - * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public - * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html - * (the "GPL"), in which case the provisions of the MPL or the GPL are - * applicable instead of those above. If you wish to allow use of your - * version of this file only under the terms of one of those two - * licenses (the MPL or the GPL) and not to allow others to use your - * version of this file under the LGPL, indicate your decision by - * deletingthe provisions above and replace them with the notice and - * other provisions required by the MPL or the GPL, as the case may be. - * If you do not delete the provisions above, a recipient may use your - * version of this file under any of the LGPL, the MPL or the GPL. - */ - -#ifndef UnicodeRange_H -#define UnicodeRange_H - -#if PLATFORM(HAIKU) -#include "stdint.h" -#endif - -#include <wtf/unicode/Unicode.h> - -namespace WebCore { - -// The following constants define unicode subranges -// values below cRangeNum must be continuous so that we can map to -// a lang group directly. -// All ranges we care about should fit within 32 bits. - -// Frequently used range definitions -const unsigned char cRangeCyrillic = 0; -const unsigned char cRangeGreek = 1; -const unsigned char cRangeTurkish = 2; -const unsigned char cRangeHebrew = 3; -const unsigned char cRangeArabic = 4; -const unsigned char cRangeBaltic = 5; -const unsigned char cRangeThai = 6; -const unsigned char cRangeKorean = 7; -const unsigned char cRangeJapanese = 8; -const unsigned char cRangeSChinese = 9; -const unsigned char cRangeTChinese = 10; -const unsigned char cRangeDevanagari = 11; -const unsigned char cRangeTamil = 12; -const unsigned char cRangeArmenian = 13; -const unsigned char cRangeBengali = 14; -const unsigned char cRangeCanadian = 15; -const unsigned char cRangeEthiopic = 16; -const unsigned char cRangeGeorgian = 17; -const unsigned char cRangeGujarati = 18; -const unsigned char cRangeGurmukhi = 19; -const unsigned char cRangeKhmer = 20; -const unsigned char cRangeMalayalam = 21; - -const unsigned char cRangeSpecificItemNum = 22; - -//range/rangeSet grow to this place 22-29 - -const unsigned char cRangeSetStart = 30; // range set definition starts from here -const unsigned char cRangeSetLatin = 30; -const unsigned char cRangeSetCJK = 31; -const unsigned char cRangeSetEnd = 31; // range set definition ends here - -// less frequently used range definition -const unsigned char cRangeSurrogate = 32; -const unsigned char cRangePrivate = 33; -const unsigned char cRangeMisc = 34; -const unsigned char cRangeUnassigned = 35; -const unsigned char cRangeSyriac = 36; -const unsigned char cRangeThaana = 37; -const unsigned char cRangeOriya = 38; -const unsigned char cRangeTelugu = 39; -const unsigned char cRangeKannada = 40; -const unsigned char cRangeSinhala = 41; -const unsigned char cRangeLao = 42; -const unsigned char cRangeTibetan = 43; -const unsigned char cRangeMyanmar = 44; -const unsigned char cRangeCherokee = 45; -const unsigned char cRangeOghamRunic = 46; -const unsigned char cRangeMongolian = 47; -const unsigned char cRangeMathOperators = 48; -const unsigned char cRangeMiscTechnical = 49; -const unsigned char cRangeControlOpticalEnclose = 50; -const unsigned char cRangeBoxBlockGeometrics = 51; -const unsigned char cRangeMiscSymbols = 52; -const unsigned char cRangeDingbats = 53; -const unsigned char cRangeBraillePattern = 54; -const unsigned char cRangeYi = 55; -const unsigned char cRangeCombiningDiacriticalMarks = 56; -const unsigned char cRangeSpecials = 57; - -const unsigned char cRangeTableBase = 128; //values over 127 are reserved for internal use only -const unsigned char cRangeTertiaryTable = 145; // leave room for 16 subtable - // indices (cRangeTableBase + 1 .. - // cRangeTableBase + 16) - - - -unsigned int findCharUnicodeRange(UChar32 ch); -const char* langGroupFromUnicodeRange(unsigned char unicodeRange); - -} - -#endif // UnicodeRange_H diff --git a/WebCore/platform/text/android/HyphenationAndroid.cpp b/WebCore/platform/text/android/HyphenationAndroid.cpp deleted file mode 100644 index d1bd839..0000000 --- a/WebCore/platform/text/android/HyphenationAndroid.cpp +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright 2010, The Android Open Source Project - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "Hyphenation.h" - -// For external hyphenation library. -#include "hyphen.h" -#include <utils/AssetManager.h> -#include <wtf/text/CString.h> -#include <wtf/text/WTFString.h> - -extern android::AssetManager* globalAssetManager(); - -using namespace WTF; - -namespace WebCore { - -static HyphenDict* loadHyphenationDictionary() -{ - android::AssetManager* am = globalAssetManager(); - // Only support English for now. - android::Asset* a = am->open("webkit/hyph_en_US.dic", - android::Asset::ACCESS_BUFFER); - if (!a) { - // Asset webkit/hyph_en_US.dic not found! - return 0; - } - const CString dictContents = String(static_cast<const char*>(a->getBuffer(false)), - a->getLength()).utf8(); - HyphenDict* dict = hnj_hyphen_load_from_buffer(dictContents.data(), - dictContents.length()); - delete a; - - return dict; -} - -bool canHyphenate(const AtomicString& /* localeIdentifier */) -{ - // FIXME: Check that the locale identifier matches the available dictionary. - return true; -} - -size_t lastHyphenLocation(const UChar* characters, size_t length, size_t beforeIndex, const AtomicString& /* localeIdentifier */) -{ - static const size_t minWordLen = 5; - static const size_t maxWordLen = 100; - if (beforeIndex <= 0 || length < minWordLen || length > maxWordLen) - return 0; - - static HyphenDict* dict = loadHyphenationDictionary(); - if (!dict) - return 0; - - char word[maxWordLen]; - size_t wordLength = 0; - for (size_t i = 0; i < length; ++i) { - const UChar ch = characters[i]; - // Only English for now. - // To really make it language aware, we need something like language - // detection or rely on the langAttr in the html element. Though - // seems right now the langAttr is not used or quite implemented in - // webkit. - if (!isASCIIAlpha(ch)) { - // Bypass leading spaces. - if (isASCIISpace(ch) && !wordLength) - continue; - return 0; - } - word[wordLength++] = ch; - } - if (wordLength < minWordLen) - return 0; - - static const int extraBuffer = 5; - const int leadingSpacesCount = length - wordLength; - char hyphens[maxWordLen + extraBuffer]; - if (!hnj_hyphen_hyphenate(dict, word, wordLength, hyphens)) { - for (size_t i = beforeIndex - 2 - leadingSpacesCount; i > 0; --i) { - if (hyphens[i] & 1) - return i + 1 + leadingSpacesCount; - } - } - - return 0; -} - -} // namespace WebCore diff --git a/WebCore/platform/text/android/TextBreakIteratorInternalICU.cpp b/WebCore/platform/text/android/TextBreakIteratorInternalICU.cpp deleted file mode 100644 index 9732e92..0000000 --- a/WebCore/platform/text/android/TextBreakIteratorInternalICU.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2007, The Android Open Source Project - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextBreakIteratorInternalICU.h" - -namespace WebCore { - -const char* currentSearchLocaleID() -{ - // FIXME: Should use system locale. - return ""; -} - -const char* currentTextBreakLocaleID() -{ - // FIXME: Should use system locale. - return "en_us"; -} - -} diff --git a/WebCore/platform/text/brew/TextBoundariesBrew.cpp b/WebCore/platform/text/brew/TextBoundariesBrew.cpp deleted file mode 100644 index 506bdcf..0000000 --- a/WebCore/platform/text/brew/TextBoundariesBrew.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (C) 2006 Zack Rusin <zack@kde.org> - * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextBoundaries.h" - -#include "NotImplemented.h" -#include "PlatformString.h" - -using namespace WTF::Unicode; - -namespace WebCore { - -int findNextWordFromIndex(const UChar* buffer, int len, int position, bool forward) -{ - notImplemented(); - return 0; -} - -void findWordBoundary(const UChar* buffer, int len, int position, int* start, int* end) -{ - if (position > len) { - *start = 0; - *end = 0; - return; - } - - String str(buffer, len); - - int currentPosition = position - 1; - String foundWord; - while (currentPosition >= 0 && isLetter(str[currentPosition])) { - UChar c = str[currentPosition]; - foundWord.insert(&c, 1, 0); - --currentPosition; - } - - // currentPosition == 0 means the first char is not letter - // currentPosition == -1 means we reached the beginning - int startPos = (currentPosition < 0) ? 0 : ++currentPosition; - currentPosition = position; - while (isLetter(str[currentPosition])) { - foundWord.append(str[currentPosition]); - ++currentPosition; - } - - *start = startPos; - *end = currentPosition; -} - -} diff --git a/WebCore/platform/text/brew/TextBreakIteratorBrew.cpp b/WebCore/platform/text/brew/TextBreakIteratorBrew.cpp deleted file mode 100644 index 7f46e4f..0000000 --- a/WebCore/platform/text/brew/TextBreakIteratorBrew.cpp +++ /dev/null @@ -1,312 +0,0 @@ -/* - * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> - * Copyright (C) 2007-2009 Torch Mobile, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. - * - */ - -#include "config.h" -#include "TextBreakIterator.h" - -#include "PlatformString.h" -#include <wtf/StdLibExtras.h> -#include <wtf/unicode/Unicode.h> - -using namespace WTF::Unicode; - -namespace WebCore { - -// Hack, not entirely correct -static inline bool isCharStop(UChar c) -{ - CharCategory charCategory = category(c); - return charCategory != Mark_NonSpacing && (charCategory != Other_Surrogate || (c < 0xd800 || c >= 0xdc00)); -} - -static inline bool isLineStop(UChar c) -{ - return category(c) != Separator_Line; -} - -static inline bool isSentenceStop(UChar c) -{ - return isPunct(c); -} - -class TextBreakIterator { -public: - void reset(const UChar* str, int len) - { - string = str; - length = len; - currentPos = 0; - } - virtual int first() = 0; - virtual int next() = 0; - virtual int previous() = 0; - int following(int position) - { - currentPos = position; - return next(); - } - int preceding(int position) - { - currentPos = position; - return previous(); - } - - int currentPos; - const UChar* string; - int length; -}; - -struct WordBreakIterator: TextBreakIterator { - virtual int first(); - virtual int next(); - virtual int previous(); -}; - -struct CharBreakIterator: TextBreakIterator { - virtual int first(); - virtual int next(); - virtual int previous(); -}; - -struct LineBreakIterator: TextBreakIterator { - virtual int first(); - virtual int next(); - virtual int previous(); -}; - -struct SentenceBreakIterator : TextBreakIterator { - virtual int first(); - virtual int next(); - virtual int previous(); -}; - -int WordBreakIterator::first() -{ - currentPos = 0; - return currentPos; -} - -int WordBreakIterator::next() -{ - if (currentPos == length) { - currentPos = -1; - return currentPos; - } - bool haveSpace = false; - while (currentPos < length) { - if (haveSpace && !isSpace(string[currentPos])) - break; - if (isSpace(string[currentPos])) - haveSpace = true; - ++currentPos; - } - return currentPos; -} - -int WordBreakIterator::previous() -{ - if (!currentPos) { - currentPos = -1; - return currentPos; - } - bool haveSpace = false; - while (currentPos > 0) { - if (haveSpace && !isSpace(string[currentPos])) - break; - if (isSpace(string[currentPos])) - haveSpace = true; - --currentPos; - } - return currentPos; -} - -int CharBreakIterator::first() -{ - currentPos = 0; - return currentPos; -} - -int CharBreakIterator::next() -{ - if (currentPos >= length) - return -1; - ++currentPos; - while (currentPos < length && !isCharStop(string[currentPos])) - ++currentPos; - return currentPos; -} - -int CharBreakIterator::previous() -{ - if (currentPos <= 0) - return -1; - if (currentPos > length) - currentPos = length; - --currentPos; - while (currentPos > 0 && !isCharStop(string[currentPos])) - --currentPos; - return currentPos; -} - -int LineBreakIterator::first() -{ - currentPos = 0; - return currentPos; -} - -int LineBreakIterator::next() -{ - if (currentPos == length) { - currentPos = -1; - return currentPos; - } - bool haveSpace = false; - while (currentPos < length) { - if (haveSpace && !isLineStop(string[currentPos])) - break; - if (isLineStop(string[currentPos])) - haveSpace = true; - ++currentPos; - } - return currentPos; -} - -int LineBreakIterator::previous() -{ - if (!currentPos) { - currentPos = -1; - return currentPos; - } - bool haveSpace = false; - while (currentPos > 0) { - if (haveSpace && !isLineStop(string[currentPos])) - break; - if (isLineStop(string[currentPos])) - haveSpace = true; - --currentPos; - } - return currentPos; -} - -int SentenceBreakIterator::first() -{ - currentPos = 0; - return currentPos; -} - -int SentenceBreakIterator::next() -{ - if (currentPos == length) { - currentPos = -1; - return currentPos; - } - bool haveSpace = false; - while (currentPos < length) { - if (haveSpace && !isSentenceStop(string[currentPos])) - break; - if (isSentenceStop(string[currentPos])) - haveSpace = true; - ++currentPos; - } - return currentPos; -} - -int SentenceBreakIterator::previous() -{ - if (!currentPos) { - currentPos = -1; - return currentPos; - } - bool haveSpace = false; - while (currentPos > 0) { - if (haveSpace && !isSentenceStop(string[currentPos])) - break; - if (isSentenceStop(string[currentPos])) - haveSpace = true; - --currentPos; - } - return currentPos; -} - -TextBreakIterator* wordBreakIterator(const UChar* string, int length) -{ - DEFINE_STATIC_LOCAL(WordBreakIterator, iterator, ()); - iterator.reset(string, length); - return &iterator; -} - -TextBreakIterator* characterBreakIterator(const UChar* string, int length) -{ - DEFINE_STATIC_LOCAL(CharBreakIterator, iterator, ()); - iterator.reset(string, length); - return &iterator; -} - -TextBreakIterator* lineBreakIterator(const UChar* string, int length) -{ - DEFINE_STATIC_LOCAL(LineBreakIterator , iterator, ()); - iterator.reset(string, length); - return &iterator; -} - -TextBreakIterator* sentenceBreakIterator(const UChar* string, int length) -{ - DEFINE_STATIC_LOCAL(SentenceBreakIterator, iterator, ()); - iterator.reset(string, length); - return &iterator; -} - -int textBreakFirst(TextBreakIterator* breakIterator) -{ - return breakIterator->first(); -} - -int textBreakNext(TextBreakIterator* breakIterator) -{ - return breakIterator->next(); -} - -int textBreakPreceding(TextBreakIterator* breakIterator, int position) -{ - return breakIterator->preceding(position); -} - -int textBreakFollowing(TextBreakIterator* breakIterator, int position) -{ - return breakIterator->following(position); -} - -int textBreakCurrent(TextBreakIterator* breakIterator) -{ - return breakIterator->currentPos; -} - -bool isTextBreak(TextBreakIterator*, int) -{ - return true; -} - -TextBreakIterator* cursorMovementIterator(const UChar* string, int length) -{ - return characterBreakIterator(string, length); -} - -} // namespace WebCore diff --git a/WebCore/platform/text/brew/TextCodecBrew.cpp b/WebCore/platform/text/brew/TextCodecBrew.cpp deleted file mode 100644 index 1f32298..0000000 --- a/WebCore/platform/text/brew/TextCodecBrew.cpp +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright (C) 2010 Company 100, Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextCodecBrew.h" - -#include "AEEAppGen.h" -#include "AEEICharsetConv.h" -#include "NotImplemented.h" -#include "PlatformString.h" -#include <wtf/Assertions.h> -#include <wtf/text/CString.h> - -namespace WebCore { - -// FIXME: Not sure if there are Brew MP devices which use big endian. -const char* WebCore::TextCodecBrew::m_internalEncodingName = "UTF-16LE"; - -static PassOwnPtr<TextCodec> newTextCodecBrew(const TextEncoding& encoding, const void*) -{ - return new TextCodecBrew(encoding); -} - -void TextCodecBrew::registerBaseEncodingNames(EncodingNameRegistrar registrar) -{ - registrar("UTF-8", "UTF-8"); -} - -void TextCodecBrew::registerBaseCodecs(TextCodecRegistrar registrar) -{ - registrar("UTF-8", newTextCodecBrew, 0); -} - -void TextCodecBrew::registerExtendedEncodingNames(EncodingNameRegistrar registrar) -{ - // FIXME: Not sure how to enumerate all available encodings. - notImplemented(); -} - -void TextCodecBrew::registerExtendedCodecs(TextCodecRegistrar registrar) -{ - notImplemented(); -} - -TextCodecBrew::TextCodecBrew(const TextEncoding& encoding) - : m_charsetConverter(0) - , m_encoding(encoding) - , m_numBufferedBytes(0) -{ - String format = String::format("%s>%s", encoding.name(), m_internalEncodingName); - - IShell* shell = reinterpret_cast<AEEApplet*>(GETAPPINSTANCE())->m_pIShell; - AEECLSID classID = ISHELL_GetHandler(shell, AEEIID_ICharsetConv, format.latin1().data()); - ISHELL_CreateInstance(shell, classID, reinterpret_cast<void**>(&m_charsetConverter)); - - ASSERT(m_charsetConverter); -} - -TextCodecBrew::~TextCodecBrew() -{ - if (m_charsetConverter) - ICharsetConv_Release(m_charsetConverter); -} - -String TextCodecBrew::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError) -{ - int code = ICharsetConv_Initialize(m_charsetConverter, m_encoding.name(), m_internalEncodingName, 0); - ASSERT(code == AEE_SUCCESS); - - Vector<UChar> result; - Vector<unsigned char> prefixedBytes(length); - - int srcSize; - unsigned char* srcBegin; - - if (m_numBufferedBytes) { - srcSize = length + m_numBufferedBytes; - prefixedBytes.grow(srcSize); - memcpy(prefixedBytes.data(), m_bufferedBytes, m_numBufferedBytes); - memcpy(prefixedBytes.data() + m_numBufferedBytes, bytes, length); - - srcBegin = prefixedBytes.data(); - - // all buffered bytes are consumed now - m_numBufferedBytes = 0; - } else { - srcSize = length; - srcBegin = const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(bytes)); - } - - unsigned char* src = srcBegin; - unsigned char* srcEnd = srcBegin + srcSize; - - Vector<UChar> dstBuffer(srcSize); - - while (src < srcEnd) { - int numCharsConverted; - unsigned char* dstBegin = reinterpret_cast<unsigned char*>(dstBuffer.data()); - unsigned char* dst = dstBegin; - int dstSize = dstBuffer.size() * sizeof(UChar); - - code = ICharsetConv_CharsetConvert(m_charsetConverter, &src, &srcSize, &dst, &dstSize, &numCharsConverted); - ASSERT(code != AEE_ENOSUCH); - - if (code == AEE_EBUFFERTOOSMALL) { - // Increase the buffer and try it again. - dstBuffer.grow(dstBuffer.size() * 2); - continue; - } - - if (code == AEE_EBADITEM) { - sawError = true; - if (stopOnError) { - result.append(L'?'); - break; - } - - src++; - } - - if (code == AEE_EINCOMPLETEITEM) { - if (flush) { - LOG_ERROR("Partial bytes at end of input while flush requested."); - sawError = true; - return String(); - } - - m_numBufferedBytes = srcEnd - src; - memcpy(m_bufferedBytes, src, m_numBufferedBytes); - break; - } - - int numChars = (dst - dstBegin) / sizeof(UChar); - if (numChars > 0) - result.append(dstBuffer.data(), numChars); - } - - return String::adopt(result); -} - -CString TextCodecBrew::encode(const UChar* characters, size_t length, UnencodableHandling handling) -{ - if (!length) - return ""; - - unsigned int replacementCharacter = '?'; - - // FIXME: Impossible to handle EntitiesForUnencodables or URLEncodedEntitiesForUnencodables with ICharsetConv. - int code = ICharsetConv_Initialize(m_charsetConverter, m_internalEncodingName, m_encoding.name(), replacementCharacter); - ASSERT(code == AEE_SUCCESS); - - Vector<char> result; - - int srcSize = length * sizeof(UChar); - unsigned char* srcBegin = const_cast<unsigned char*>(reinterpret_cast<const unsigned char*>(characters)); - unsigned char* src = srcBegin; - unsigned char* srcEnd = srcBegin + srcSize; - - Vector<unsigned char> dstBuffer(length * sizeof(UChar)); - - while (src < srcEnd) { - int numCharsConverted; - unsigned char* dstBegin = dstBuffer.data(); - unsigned char* dst = dstBegin; - int dstSize = dstBuffer.size(); - - code = ICharsetConv_CharsetConvert(m_charsetConverter, &src, &srcSize, &dst, &dstSize, &numCharsConverted); - ASSERT(code != AEE_EINCOMPLETEITEM); - - if (code == AEE_ENOSUCH) { - LOG_ERROR("Conversion error, Code=%d", code); - return CString(); - } - - if (code == AEE_EBUFFERTOOSMALL) { - // Increase the buffer and try it again. - dstBuffer.grow(dstBuffer.size() * 2); - continue; - } - - if (code == AEE_EBADITEM) - src += sizeof(UChar); // Skip the invalid character - - int numBytes = dst - dstBegin; - if (numBytes > 0) - result.append(dstBuffer.data(), numBytes); - } - - return CString(result.data(), result.size()); -} - -} // namespace WebCore diff --git a/WebCore/platform/text/brew/TextCodecBrew.h b/WebCore/platform/text/brew/TextCodecBrew.h deleted file mode 100644 index 97e2c87..0000000 --- a/WebCore/platform/text/brew/TextCodecBrew.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (C) 2010 Company 100, Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextCodecBrew_h -#define TextCodecBrew_h - -#include "TextCodec.h" -#include "TextEncoding.h" - -typedef struct ICharsetConv ICharsetConv; - -namespace WebCore { - -class TextCodecBrew : public TextCodec { -public: - static void registerBaseEncodingNames(EncodingNameRegistrar); - static void registerBaseCodecs(TextCodecRegistrar); - - static void registerExtendedEncodingNames(EncodingNameRegistrar); - static void registerExtendedCodecs(TextCodecRegistrar); - - TextCodecBrew(const TextEncoding&); - virtual ~TextCodecBrew(); - - virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); - virtual CString encode(const UChar*, size_t length, UnencodableHandling); - -private: - TextEncoding m_encoding; - size_t m_numBufferedBytes; - unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character - ICharsetConv* m_charsetConverter; - - static const char* m_internalEncodingName; -}; - -} // namespace WebCore - -#endif // TextCodecBrew_h diff --git a/WebCore/platform/text/cf/HyphenationCF.cpp b/WebCore/platform/text/cf/HyphenationCF.cpp deleted file mode 100644 index 3adacad..0000000 --- a/WebCore/platform/text/cf/HyphenationCF.cpp +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (C) 2010 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "Hyphenation.h" - -#if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !defined(BUILDING_ON_SNOW_LEOPARD) - -#include "AtomicStringKeyedMRUCache.h" -#include "TextBreakIteratorInternalICU.h" -#include <wtf/ListHashSet.h> -#include <wtf/RetainPtr.h> - -namespace WebCore { - -#if !PLATFORM(WIN) || (defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7) - -template<> -RetainPtr<CFLocaleRef> AtomicStringKeyedMRUCache<RetainPtr<CFLocaleRef> >::createValueForNullKey() -{ - RetainPtr<CFLocaleRef> locale(AdoptCF, CFLocaleCopyCurrent()); - - return CFStringIsHyphenationAvailableForLocale(locale.get()) ? locale : 0; -} - -template<> -RetainPtr<CFLocaleRef> AtomicStringKeyedMRUCache<RetainPtr<CFLocaleRef> >::createValueForKey(const AtomicString& localeIdentifier) -{ - RetainPtr<CFStringRef> cfLocaleIdentifier(AdoptCF, localeIdentifier.createCFString()); - RetainPtr<CFLocaleRef> locale(AdoptCF, CFLocaleCreate(kCFAllocatorDefault, cfLocaleIdentifier.get())); - - return CFStringIsHyphenationAvailableForLocale(locale.get()) ? locale : 0; -} - -static AtomicStringKeyedMRUCache<RetainPtr<CFLocaleRef> >& cfLocaleCache() -{ - DEFINE_STATIC_LOCAL(AtomicStringKeyedMRUCache<RetainPtr<CFLocaleRef> >, cache, ()); - return cache; -} - -bool canHyphenate(const AtomicString& localeIdentifier) -{ - return cfLocaleCache().get(localeIdentifier); -} - -size_t lastHyphenLocation(const UChar* characters, size_t length, size_t beforeIndex, const AtomicString& localeIdentifier) -{ - RetainPtr<CFStringRef> string(AdoptCF, CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, reinterpret_cast<const UniChar*>(characters), length, kCFAllocatorNull)); - - RetainPtr<CFLocaleRef> locale = cfLocaleCache().get(localeIdentifier); - ASSERT(locale); - - CFIndex result = CFStringGetHyphenationLocationBeforeIndex(string.get(), beforeIndex, CFRangeMake(0, length), 0, locale.get(), 0); - return result == kCFNotFound ? 0 : result; -} - -#else - -bool canHyphenate(const AtomicString&) -{ - return false; -} - -size_t lastHyphenLocation(const UChar*, size_t, size_t, const AtomicString&) -{ - ASSERT_NOT_REACHED(); - return 0; -} - -#endif // PLATFORM(WIN) && (!defined(MAC_OS_X_VERSION_10_7) || MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_7) - -} // namespace WebCore - -#endif // !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !defined(BUILDING_ON_SNOW_LEOPARD) diff --git a/WebCore/platform/text/cf/StringCF.cpp b/WebCore/platform/text/cf/StringCF.cpp deleted file mode 100644 index dcaf8fb..0000000 --- a/WebCore/platform/text/cf/StringCF.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Copyright (C) 2006 Apple Computer, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#include "config.h" -#include "PlatformString.h" - -#if PLATFORM(CF) - -#include <CoreFoundation/CoreFoundation.h> - -namespace WTF { - -String::String(CFStringRef str) -{ - if (!str) - return; - - CFIndex size = CFStringGetLength(str); - if (size == 0) - m_impl = StringImpl::empty(); - else { - Vector<UChar, 1024> buffer(size); - CFStringGetCharacters(str, CFRangeMake(0, size), (UniChar*)buffer.data()); - m_impl = StringImpl::create(buffer.data(), size); - } -} - -CFStringRef String::createCFString() const -{ - if (!m_impl) - return static_cast<CFStringRef>(CFRetain(CFSTR(""))); - - return m_impl->createCFString(); -} - -} - -#endif // PLATFORM(CF) diff --git a/WebCore/platform/text/cf/StringImplCF.cpp b/WebCore/platform/text/cf/StringImplCF.cpp deleted file mode 100644 index 0157918..0000000 --- a/WebCore/platform/text/cf/StringImplCF.cpp +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (C) 2006, 2009 Apple Inc. All rights reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#include "config.h" -#include <wtf/text/StringImpl.h> - -#if PLATFORM(CF) - -#include <CoreFoundation/CoreFoundation.h> -#include <wtf/MainThread.h> -#include <wtf/PassRefPtr.h> -#include <wtf/Threading.h> - -#if PLATFORM(MAC) && !defined(BUILDING_ON_TIGER) -#include <objc/objc-auto.h> -#endif - -namespace WTF { - -namespace StringWrapperCFAllocator { - - static StringImpl* currentString; - - static const void* retain(const void* info) - { - return info; - } - - static void release(const void*) - { - ASSERT_NOT_REACHED(); - } - - static CFStringRef copyDescription(const void*) - { - return CFSTR("WTF::String-based allocator"); - } - - static void* allocate(CFIndex size, CFOptionFlags, void*) - { - StringImpl* underlyingString = 0; - if (isMainThread()) { - underlyingString = currentString; - if (underlyingString) { - currentString = 0; - underlyingString->ref(); // Balanced by call to deref in deallocate below. - } - } - StringImpl** header = static_cast<StringImpl**>(fastMalloc(sizeof(StringImpl*) + size)); - *header = underlyingString; - return header + 1; - } - - static void* reallocate(void* pointer, CFIndex newSize, CFOptionFlags, void*) - { - size_t newAllocationSize = sizeof(StringImpl*) + newSize; - StringImpl** header = static_cast<StringImpl**>(pointer) - 1; - ASSERT(!*header); - header = static_cast<StringImpl**>(fastRealloc(header, newAllocationSize)); - return header + 1; - } - - static void deallocateOnMainThread(void* headerPointer) - { - StringImpl** header = static_cast<StringImpl**>(headerPointer); - StringImpl* underlyingString = *header; - ASSERT(underlyingString); - underlyingString->deref(); // Balanced by call to ref in allocate above. - fastFree(header); - } - - static void deallocate(void* pointer, void*) - { - StringImpl** header = static_cast<StringImpl**>(pointer) - 1; - StringImpl* underlyingString = *header; - if (!underlyingString) - fastFree(header); - else { - if (!isMainThread()) - callOnMainThread(deallocateOnMainThread, header); - else { - underlyingString->deref(); // Balanced by call to ref in allocate above. - fastFree(header); - } - } - } - - static CFIndex preferredSize(CFIndex size, CFOptionFlags, void*) - { - // FIXME: If FastMalloc provided a "good size" callback, we'd want to use it here. - // Note that this optimization would help performance for strings created with the - // allocator that are mutable, and those typically are only created by callers who - // make a new string using the old string's allocator, such as some of the call - // sites in CFURL. - return size; - } - - static CFAllocatorRef create() - { -#if PLATFORM(MAC) && !defined(BUILDING_ON_TIGER) - // Since garbage collection isn't compatible with custom allocators, don't use this at all when garbage collection is active. - if (objc_collectingEnabled()) - return 0; -#endif - CFAllocatorContext context = { 0, 0, retain, release, copyDescription, allocate, reallocate, deallocate, preferredSize }; - return CFAllocatorCreate(0, &context); - } - - static CFAllocatorRef allocator() - { - static CFAllocatorRef allocator = create(); - return allocator; - } - -} - -CFStringRef StringImpl::createCFString() -{ - CFAllocatorRef allocator = (m_length && isMainThread()) ? StringWrapperCFAllocator::allocator() : 0; - if (!allocator) - return CFStringCreateWithCharacters(0, reinterpret_cast<const UniChar*>(m_data), m_length); - - // Put pointer to the StringImpl in a global so the allocator can store it with the CFString. - ASSERT(!StringWrapperCFAllocator::currentString); - StringWrapperCFAllocator::currentString = this; - - CFStringRef string = CFStringCreateWithCharactersNoCopy(allocator, reinterpret_cast<const UniChar*>(m_data), m_length, kCFAllocatorNull); - - // The allocator cleared the global when it read it, but also clear it here just in case. - ASSERT(!StringWrapperCFAllocator::currentString); - StringWrapperCFAllocator::currentString = 0; - - return string; -} - -// On StringImpl creation we could check if the allocator is the StringWrapperCFAllocator. -// If it is, then we could find the original StringImpl and just return that. But to -// do that we'd have to compute the offset from CFStringRef to the allocated block; -// the CFStringRef is *not* at the start of an allocated block. Testing shows 1000x -// more calls to createCFString than calls to the create functions with the appropriate -// allocator, so it's probably not urgent optimize that case. - -} - -#endif // PLATFORM(CF) diff --git a/WebCore/platform/text/chromium/TextBreakIteratorInternalICUChromium.cpp b/WebCore/platform/text/chromium/TextBreakIteratorInternalICUChromium.cpp deleted file mode 100644 index e390a65..0000000 --- a/WebCore/platform/text/chromium/TextBreakIteratorInternalICUChromium.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (C) 2007 Apple Inc. All rights reserved. - * Copyright (C) 2008, 2009 Google Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. - * - */ - -#include "config.h" -#include "TextBreakIteratorInternalICU.h" - -#include "Language.h" -#include "PlatformString.h" -#include <wtf/StdLibExtras.h> -#include <wtf/text/CString.h> - -namespace WebCore { - -static const char* UILanguage() -{ - // Chrome's UI language can be different from the OS UI language on Windows. - // We want to return Chrome's UI language here. - DEFINE_STATIC_LOCAL(CString, locale, (defaultLanguage().latin1())); - return locale.data(); -} - -const char* currentSearchLocaleID() -{ - return UILanguage(); -} - -const char* currentTextBreakLocaleID() -{ - return UILanguage(); -} - -} // namespace WebCore diff --git a/WebCore/platform/text/efl/TextBreakIteratorInternalICUEfl.cpp b/WebCore/platform/text/efl/TextBreakIteratorInternalICUEfl.cpp deleted file mode 100644 index 0056869..0000000 --- a/WebCore/platform/text/efl/TextBreakIteratorInternalICUEfl.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (C) 2007 Alp Toker <alp@atoker.com> - * Copyright (C) 2009-2010 ProFUSION embedded systems - * Copyright (C) 2009-2010 Samsung Electronics - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ - -#include "config.h" -#include "TextBreakIteratorInternalICU.h" - -namespace WebCore { - -const char* currentSearchLocaleID() -{ - // FIXME: Should use system locale. - return ""; -} - -const char* currentTextBreakLocaleID() -{ - return "en_us"; -} - -} diff --git a/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp b/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp deleted file mode 100644 index 990e331..0000000 --- a/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp +++ /dev/null @@ -1,365 +0,0 @@ -/* - * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> - * Copyright (C) 2007 Apple Inc. All rights reserved. - * Copyright (C) 2008 JĂĽrg Billeter <j@bitron.ch> - * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com> - * Copyright (C) 2010 Igalia S.L. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#include "config.h" - -#include "TextBreakIterator.h" - -#include "GOwnPtr.h" -#include <pango/pango.h> -using namespace std; - -#define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF) - -namespace WebCore { - -class CharacterIterator { -public: - bool setText(const UChar* string, int length); - const gchar* getText() { return m_utf8.get(); } - int getLength() { return m_length; } - glong getSize() { return m_size; } - void setIndex(int index); - int getIndex() { return m_index; } - void setUTF16Index(int index); - int getUTF16Index() { return m_utf16Index; } - int getUTF16Length() { return m_utf16Length; } - int first(); - int last(); - int next(); - int previous(); -private: - int characterSize(int index); - - GOwnPtr<char> m_utf8; - int m_length; - long m_size; - int m_index; - int m_utf16Index; - int m_utf16Length; -}; - -int CharacterIterator::characterSize(int index) -{ - if (index == m_length || index < 0) - return 0; - if (m_length == m_utf16Length) - return 1; - - gchar* indexPtr = g_utf8_offset_to_pointer(m_utf8.get(), index); - gunichar character = g_utf8_get_char(indexPtr); - return UTF8_IS_SURROGATE(character) ? 2 : 1; -} - -bool CharacterIterator::setText(const UChar* string, int length) -{ - long utf8Size = 0; - m_utf8.set(g_utf16_to_utf8(string, length, 0, &utf8Size, 0)); - if (!utf8Size) - return false; - - m_utf16Length = length; - m_length = g_utf8_strlen(m_utf8.get(), utf8Size); - m_size = utf8Size; - m_index = 0; - m_utf16Index = 0; - - return true; -} - -void CharacterIterator::setIndex(int index) -{ - if (index == m_index) - return; - if (index <= 0) - m_index = m_utf16Index = 0; - else if (index >= m_length) { - m_index = m_length; - m_utf16Index = m_utf16Length; - } else if (m_length == m_utf16Length) - m_index = m_utf16Index = index; - else { - m_index = index; - int utf16Index = 0; - int utf8Index = 0; - while (utf8Index < index) { - utf16Index += characterSize(utf8Index); - utf8Index++; - } - m_utf16Index = utf16Index; - } -} - -void CharacterIterator::setUTF16Index(int index) -{ - if (index == m_utf16Index) - return; - if (index <= 0) - m_utf16Index = m_index = 0; - else if (index >= m_utf16Length) { - m_utf16Index = m_utf16Length; - m_index = m_length; - } else if (m_length == m_utf16Length) - m_utf16Index = m_index = index; - else { - m_utf16Index = index; - int utf16Index = 0; - int utf8Index = 0; - while (utf16Index < index) { - utf16Index += characterSize(utf8Index); - utf8Index++; - } - m_index = utf8Index; - } -} - -int CharacterIterator::first() -{ - m_index = m_utf16Index = 0; - return m_index; -} - -int CharacterIterator::last() -{ - m_index = m_length; - m_utf16Index = m_utf16Length; - return m_index; -} - -int CharacterIterator::next() -{ - int next = m_index + 1; - - if (next <= m_length) { - m_utf16Index = min(m_utf16Index + characterSize(m_index), m_utf16Length); - m_index = next; - } else { - m_index = TextBreakDone; - m_utf16Index = TextBreakDone; - } - - return m_index; -} - -int CharacterIterator::previous() -{ - int previous = m_index - 1; - - if (previous >= 0) { - m_utf16Index = max(m_utf16Index - characterSize(previous), 0); - m_index = previous; - } else { - m_index = TextBreakDone; - m_utf16Index = TextBreakDone; - } - - return m_index; -} - -enum UBreakIteratorType { - UBRK_CHARACTER, - UBRK_WORD, - UBRK_LINE, - UBRK_SENTENCE -}; - -class TextBreakIterator { -public: - UBreakIteratorType m_type; - PangoLogAttr* m_logAttrs; - CharacterIterator m_charIterator; -}; - -static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator, - UBreakIteratorType type, const UChar* string, int length) -{ - if (!string) - return 0; - - if (!createdIterator) { - iterator = new TextBreakIterator(); - createdIterator = true; - } - if (!iterator) - return 0; - - if (!iterator->m_charIterator.setText(string, length)) - return 0; - - int charLength = iterator->m_charIterator.getLength(); - - iterator->m_type = type; - if (createdIterator) - g_free(iterator->m_logAttrs); - iterator->m_logAttrs = g_new0(PangoLogAttr, charLength + 1); - pango_get_log_attrs(iterator->m_charIterator.getText(), iterator->m_charIterator.getSize(), - -1, 0, iterator->m_logAttrs, charLength + 1); - - return iterator; -} - -TextBreakIterator* characterBreakIterator(const UChar* string, int length) -{ - static bool createdCharacterBreakIterator = false; - static TextBreakIterator* staticCharacterBreakIterator; - return setUpIterator(createdCharacterBreakIterator, staticCharacterBreakIterator, UBRK_CHARACTER, string, length); -} - -TextBreakIterator* cursorMovementIterator(const UChar* string, int length) -{ - // FIXME: This needs closer inspection to achieve behaviour identical to the ICU version. - return characterBreakIterator(string, length); -} - -TextBreakIterator* wordBreakIterator(const UChar* string, int length) -{ - static bool createdWordBreakIterator = false; - static TextBreakIterator* staticWordBreakIterator; - return setUpIterator(createdWordBreakIterator, staticWordBreakIterator, UBRK_WORD, string, length); -} - -TextBreakIterator* lineBreakIterator(const UChar* string, int length) -{ - static bool createdLineBreakIterator = false; - static TextBreakIterator* staticLineBreakIterator; - return setUpIterator(createdLineBreakIterator, staticLineBreakIterator, UBRK_LINE, string, length); -} - -TextBreakIterator* sentenceBreakIterator(const UChar* string, int length) -{ - static bool createdSentenceBreakIterator = false; - static TextBreakIterator* staticSentenceBreakIterator; - return setUpIterator(createdSentenceBreakIterator, staticSentenceBreakIterator, UBRK_SENTENCE, string, length); -} - -int textBreakFirst(TextBreakIterator* iterator) -{ - iterator->m_charIterator.first(); - return iterator->m_charIterator.getUTF16Index(); -} - -int textBreakLast(TextBreakIterator* iterator) -{ - // TextBreakLast is not meant to find just any break according to bi->m_type - // but really the one near the last character. - // (cmp ICU documentation for ubrk_first and ubrk_last) - // From ICU docs for ubrk_last: - // "Determine the index immediately beyond the last character in the text being scanned." - - // So we should advance or traverse back based on bi->m_logAttrs cursor positions. - // If last character position in the original string is a whitespace, - // traverse to the left until the first non-white character position is found - // and return the position of the first white-space char after this one. - // Otherwise return m_length, as "the first character beyond the last" is outside our string. - - bool whiteSpaceAtTheEnd = true; - int nextWhiteSpacePos = iterator->m_charIterator.getLength(); - - int pos = iterator->m_charIterator.last(); - while (pos >= 0 && whiteSpaceAtTheEnd) { - if (iterator->m_logAttrs[pos].is_cursor_position) { - if (whiteSpaceAtTheEnd = iterator->m_logAttrs[pos].is_white) - nextWhiteSpacePos = pos; - } - pos = iterator->m_charIterator.previous(); - } - iterator->m_charIterator.setIndex(nextWhiteSpacePos); - return iterator->m_charIterator.getUTF16Index(); -} - -int textBreakNext(TextBreakIterator* iterator) -{ - while (iterator->m_charIterator.next() != TextBreakDone) { - int index = iterator->m_charIterator.getIndex(); - - // FIXME: UBRK_WORD case: Single multibyte characters (i.e. white space around them), such as the euro symbol €, - // are not marked as word_start & word_end as opposed to the way ICU does it. - // This leads to - for example - different word selection behaviour when right clicking. - - if ((iterator->m_type == UBRK_LINE && iterator->m_logAttrs[index].is_line_break) - || (iterator->m_type == UBRK_WORD && (iterator->m_logAttrs[index].is_word_start || iterator->m_logAttrs[index].is_word_end)) - || (iterator->m_type == UBRK_CHARACTER && iterator->m_logAttrs[index].is_cursor_position) - || (iterator->m_type == UBRK_SENTENCE && iterator->m_logAttrs[index].is_sentence_boundary)) { - break; - } - } - return iterator->m_charIterator.getUTF16Index(); -} - -int textBreakPrevious(TextBreakIterator* iterator) -{ - while (iterator->m_charIterator.previous() != TextBreakDone) { - int index = iterator->m_charIterator.getIndex(); - - if ((iterator->m_type == UBRK_LINE && iterator->m_logAttrs[index].is_line_break) - || (iterator->m_type == UBRK_WORD && (iterator->m_logAttrs[index].is_word_start || iterator->m_logAttrs[index].is_word_end)) - || (iterator->m_type == UBRK_CHARACTER && iterator->m_logAttrs[index].is_cursor_position) - || (iterator->m_type == UBRK_SENTENCE && iterator->m_logAttrs[index].is_sentence_boundary)) { - break; - } - } - return iterator->m_charIterator.getUTF16Index(); -} - -int textBreakPreceding(TextBreakIterator* iterator, int offset) -{ - if (offset > iterator->m_charIterator.getUTF16Length()) - return TextBreakDone; - if (offset < 0) - return 0; - iterator->m_charIterator.setUTF16Index(offset); - return textBreakPrevious(iterator); -} - -int textBreakFollowing(TextBreakIterator* iterator, int offset) -{ - if (offset > iterator->m_charIterator.getUTF16Length()) - return TextBreakDone; - if (offset < 0) - return 0; - iterator->m_charIterator.setUTF16Index(offset); - return textBreakNext(iterator); -} - -int textBreakCurrent(TextBreakIterator* iterator) -{ - return iterator->m_charIterator.getUTF16Index(); -} - -bool isTextBreak(TextBreakIterator* iterator, int offset) -{ - if (!offset) - return true; - if (offset > iterator->m_charIterator.getUTF16Length()) - return false; - - iterator->m_charIterator.setUTF16Index(offset); - - int index = iterator->m_charIterator.getIndex(); - iterator->m_charIterator.previous(); - textBreakNext(iterator); - return iterator->m_charIterator.getIndex() == index; -} - -} diff --git a/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp b/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp deleted file mode 100644 index 35e5a05..0000000 --- a/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2007 Alp Toker <alp@atoker.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ - -#include "config.h" -#include "TextBreakIteratorInternalICU.h" - -namespace WebCore { - -const char* currentSearchLocaleID() -{ - // FIXME: Should use system locale. - return ""; -} - -const char* currentTextBreakLocaleID() -{ - // FIXME: Should use system locale. - return "en_us"; -} - -} diff --git a/WebCore/platform/text/gtk/TextCodecGtk.cpp b/WebCore/platform/text/gtk/TextCodecGtk.cpp deleted file mode 100644 index 9308b33..0000000 --- a/WebCore/platform/text/gtk/TextCodecGtk.cpp +++ /dev/null @@ -1,578 +0,0 @@ -/* - * Copyright (C) 2004, 2006, 2007, 2008 Apple Inc. All rights reserved. - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * Copyright (C) 2008 JĂĽrg Billeter <j@bitron.ch> - * Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextCodecGtk.h" - -#include <gio/gio.h> -#include "GOwnPtr.h" -#include "Logging.h" -#include "PlatformString.h" -#include <wtf/Assertions.h> -#include <wtf/HashMap.h> -#include <wtf/text/CString.h> - -using std::min; - -namespace WebCore { - -// TextCodec's appendOmittingBOM() is gone (http://trac.webkit.org/changeset/33380). -// That's why we need to avoid generating extra BOM's for the conversion result. -// This can be achieved by specifying the UTF-16 codecs' endianness explicitly when initializing GLib. - -#if (G_BYTE_ORDER == G_BIG_ENDIAN) -static const gchar* internalEncodingName = "UTF-16BE"; -#else -static const gchar* internalEncodingName = "UTF-16LE"; -#endif - - -const size_t ConversionBufferSize = 16384; - - -static PassOwnPtr<TextCodec> newTextCodecGtk(const TextEncoding& encoding, const void*) -{ - return new TextCodecGtk(encoding); -} - -static bool isEncodingAvailable(const gchar* encodingName) -{ - GIConv tester; - // test decoding - tester = g_iconv_open(internalEncodingName, encodingName); - if (tester == reinterpret_cast<GIConv>(-1)) { - return false; - } else { - g_iconv_close(tester); - // test encoding - tester = g_iconv_open(encodingName, internalEncodingName); - if (tester == reinterpret_cast<GIConv>(-1)) { - return false; - } else { - g_iconv_close(tester); - return true; - } - } -} - -static bool registerEncodingNameIfAvailable(EncodingNameRegistrar registrar, const char* canonicalName) -{ - if (isEncodingAvailable(canonicalName)) { - registrar(canonicalName, canonicalName); - return true; - } - - return false; -} - -static void registerEncodingAliasIfAvailable(EncodingNameRegistrar registrar, const char* canonicalName, const char* aliasName) -{ - if (isEncodingAvailable(aliasName)) - registrar(aliasName, canonicalName); -} - -static void registerCodecIfAvailable(TextCodecRegistrar registrar, const char* codecName) -{ - if (isEncodingAvailable(codecName)) - registrar(codecName, newTextCodecGtk, 0); -} - -void TextCodecGtk::registerBaseEncodingNames(EncodingNameRegistrar registrar) -{ - // Unicode - registerEncodingNameIfAvailable(registrar, "UTF-8"); - registerEncodingNameIfAvailable(registrar, "UTF-32"); - registerEncodingNameIfAvailable(registrar, "UTF-32BE"); - registerEncodingNameIfAvailable(registrar, "UTF-32LE"); - - // Western - if (registerEncodingNameIfAvailable(registrar, "ISO-8859-1")) { - registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "CP819"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "IBM819"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO-IR-100"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO8859-1"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO_8859-1"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "ISO_8859-1:1987"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "L1"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "LATIN1"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-1", "CSISOLATIN1"); - } -} - -void TextCodecGtk::registerBaseCodecs(TextCodecRegistrar registrar) -{ - // Unicode - registerCodecIfAvailable(registrar, "UTF-8"); - registerCodecIfAvailable(registrar, "UTF-32"); - registerCodecIfAvailable(registrar, "UTF-32BE"); - registerCodecIfAvailable(registrar, "UTF-32LE"); - - // Western - registerCodecIfAvailable(registrar, "ISO-8859-1"); -} - -void TextCodecGtk::registerExtendedEncodingNames(EncodingNameRegistrar registrar) -{ - // Western - if (registerEncodingNameIfAvailable(registrar, "MACROMAN")) { - registerEncodingAliasIfAvailable(registrar, "MACROMAN", "MAC"); - registerEncodingAliasIfAvailable(registrar, "MACROMAN", "MACINTOSH"); - registerEncodingAliasIfAvailable(registrar, "MACROMAN", "CSMACINTOSH"); - } - - // Japanese - if (registerEncodingNameIfAvailable(registrar, "Shift_JIS")) { - registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "MS_KANJI"); - registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "SHIFT-JIS"); - registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "SJIS"); - registerEncodingAliasIfAvailable(registrar, "Shift_JIS", "CSSHIFTJIS"); - } - if (registerEncodingNameIfAvailable(registrar, "EUC-JP")) { - registerEncodingAliasIfAvailable(registrar, "EUC-JP", "EUC_JP"); - registerEncodingAliasIfAvailable(registrar, "EUC-JP", "EUCJP"); - registerEncodingAliasIfAvailable(registrar, "EUC-JP", "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE"); - registerEncodingAliasIfAvailable(registrar, "EUC-JP", "CSEUCPKDFMTJAPANESE"); - } - registerEncodingNameIfAvailable(registrar, "ISO-2022-JP"); - - // Traditional Chinese - if (registerEncodingNameIfAvailable(registrar, "BIG5")) { - registerEncodingAliasIfAvailable(registrar, "BIG5", "BIG-5"); - registerEncodingAliasIfAvailable(registrar, "BIG5", "BIG-FIVE"); - registerEncodingAliasIfAvailable(registrar, "BIG5", "BIGFIVE"); - registerEncodingAliasIfAvailable(registrar, "BIG5", "CN-BIG5"); - registerEncodingAliasIfAvailable(registrar, "BIG5", "CSBIG5"); - } - if (registerEncodingNameIfAvailable(registrar, "BIG5-HKSCS")) { - registerEncodingAliasIfAvailable(registrar, "BIG5-HKSCS", "BIG5-HKSCS:2004"); - registerEncodingAliasIfAvailable(registrar, "BIG5-HKSCS", "BIG5HKSCS"); - } - registerEncodingNameIfAvailable(registrar, "CP950"); - - // Korean - if (registerEncodingNameIfAvailable(registrar, "ISO-2022-KR")) - registerEncodingAliasIfAvailable(registrar, "ISO-2022-KR", "CSISO2022KR"); - if (registerEncodingNameIfAvailable(registrar, "CP949")) - registerEncodingAliasIfAvailable(registrar, "CP949", "UHC"); - if (registerEncodingNameIfAvailable(registrar, "EUC-KR")) - registerEncodingAliasIfAvailable(registrar, "EUC-KR", "CSEUCKR"); - - // Arabic - if (registerEncodingNameIfAvailable(registrar, "ISO-8859-6")) { - registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ARABIC"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ASMO-708"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ECMA-114"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO-IR-127"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO8859-6"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO_8859-6"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "ISO_8859-6:1987"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-6", "CSISOLATINARABIC"); - } - // rearranged, windows-1256 now declared the canonical name and put to lowercase to fix /fast/encoding/ahram-org-eg.html test case - if (registerEncodingNameIfAvailable(registrar, "windows-1256")) { - registerEncodingAliasIfAvailable(registrar, "windows-1256", "CP1256"); - registerEncodingAliasIfAvailable(registrar, "windows-1256", "MS-ARAB"); - } - - // Hebrew - if (registerEncodingNameIfAvailable(registrar, "ISO-8859-8")) { - registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "HEBREW"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO-8859-8"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO-IR-138"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO8859-8"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO_8859-8"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "ISO_8859-8:1988"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-8", "CSISOLATINHEBREW"); - } - // rearranged, moved windows-1255 as canonical and lowercased, fixing /fast/encoding/meta-charset.html - if (registerEncodingNameIfAvailable(registrar, "windows-1255")) { - registerEncodingAliasIfAvailable(registrar, "windows-1255", "CP1255"); - registerEncodingAliasIfAvailable(registrar, "windows-1255", "MS-HEBR"); - } - - // Greek - if (registerEncodingNameIfAvailable(registrar, "ISO-8859-7")) { - registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ECMA-118"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ELOT_928"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "GREEK"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "GREEK8"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO-IR-126"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO8859-7"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO_8859-7"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO_8859-7:1987"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "ISO_8859-7:2003"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-7", "CSI"); - } - if (registerEncodingNameIfAvailable(registrar, "CP869")) { - registerEncodingAliasIfAvailable(registrar, "CP869", "869"); - registerEncodingAliasIfAvailable(registrar, "CP869", "CP-GR"); - registerEncodingAliasIfAvailable(registrar, "CP869", "IBM869"); - registerEncodingAliasIfAvailable(registrar, "CP869", "CSIBM869"); - } - registerEncodingNameIfAvailable(registrar, "WINDOWS-1253"); - - // Cyrillic - if (registerEncodingNameIfAvailable(registrar, "ISO-8859-5")) { - registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "CYRILLIC"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO-IR-144"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO8859-5"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO_8859-5"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "ISO_8859-5:1988"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-5", "CSISOLATINCYRILLIC"); - } - if (registerEncodingNameIfAvailable(registrar, "KOI8-R")) - registerEncodingAliasIfAvailable(registrar, "KOI8-R", "CSKOI8R"); - if (registerEncodingNameIfAvailable(registrar, "CP866")) { - registerEncodingAliasIfAvailable(registrar, "CP866", "866"); - registerEncodingAliasIfAvailable(registrar, "CP866", "IBM866"); - registerEncodingAliasIfAvailable(registrar, "CP866", "CSIBM866"); - } - registerEncodingNameIfAvailable(registrar, "KOI8-U"); - // CP1251 added to pass /fast/encoding/charset-cp1251.html - if (registerEncodingNameIfAvailable(registrar, "windows-1251")) - registerEncodingAliasIfAvailable(registrar, "windows-1251", "CP1251"); - if (registerEncodingNameIfAvailable(registrar, "mac-cyrillic")) { - registerEncodingAliasIfAvailable(registrar, "mac-cyrillic", "MACCYRILLIC"); - registerEncodingAliasIfAvailable(registrar, "mac-cyrillic", "x-mac-cyrillic"); - } - - // Thai - if (registerEncodingNameIfAvailable(registrar, "CP874")) - registerEncodingAliasIfAvailable(registrar, "CP874", "WINDOWS-874"); - registerEncodingNameIfAvailable(registrar, "TIS-620"); - - // Simplified Chinese - registerEncodingNameIfAvailable(registrar, "GBK"); - if (registerEncodingNameIfAvailable(registrar, "HZ")) - registerEncodingAliasIfAvailable(registrar, "HZ", "HZ-GB-2312"); - registerEncodingNameIfAvailable(registrar, "GB18030"); - if (registerEncodingNameIfAvailable(registrar, "EUC-CN")) { - registerEncodingAliasIfAvailable(registrar, "EUC-CN", "EUCCN"); - registerEncodingAliasIfAvailable(registrar, "EUC-CN", "GB2312"); - registerEncodingAliasIfAvailable(registrar, "EUC-CN", "CN-GB"); - registerEncodingAliasIfAvailable(registrar, "EUC-CN", "CSGB2312"); - registerEncodingAliasIfAvailable(registrar, "EUC-CN", "EUC_CN"); - } - if (registerEncodingNameIfAvailable(registrar, "GB_2312-80")) { - registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "CHINESE"); - registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "csISO58GB231280"); - registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "GB2312.1980-0"); - registerEncodingAliasIfAvailable(registrar, "GB_2312-80", "ISO-IR-58"); - } - - // Central European - if (registerEncodingNameIfAvailable(registrar, "ISO-8859-2")) { - registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO-IR-101"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO8859-2"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO_8859-2"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "ISO_8859-2:1987"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "L2"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "LATIN2"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-2", "CSISOLATIN2"); - } - if (registerEncodingNameIfAvailable(registrar, "CP1250")) { - registerEncodingAliasIfAvailable(registrar, "CP1250", "MS-EE"); - registerEncodingAliasIfAvailable(registrar, "CP1250", "WINDOWS-1250"); - } - registerEncodingNameIfAvailable(registrar, "MAC-CENTRALEUROPE"); - - // Vietnamese - if (registerEncodingNameIfAvailable(registrar, "CP1258")) - registerEncodingAliasIfAvailable(registrar, "CP1258", "WINDOWS-1258"); - - // Turkish - if (registerEncodingNameIfAvailable(registrar, "CP1254")) { - registerEncodingAliasIfAvailable(registrar, "CP1254", "MS-TURK"); - registerEncodingAliasIfAvailable(registrar, "CP1254", "WINDOWS-1254"); - } - if (registerEncodingNameIfAvailable(registrar, "ISO-8859-9")) { - registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO-IR-148"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO8859-9"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO_8859-9"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "ISO_8859-9:1989"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "L5"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "LATIN5"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-9", "CSISOLATIN5"); - } - - // Baltic - if (registerEncodingNameIfAvailable(registrar, "CP1257")) { - registerEncodingAliasIfAvailable(registrar, "CP1257", "WINBALTRIM"); - registerEncodingAliasIfAvailable(registrar, "CP1257", "WINDOWS-1257"); - } - if (registerEncodingNameIfAvailable(registrar, "ISO-8859-4")) { - registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO-IR-110"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO8859-4"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO_8859-4"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "ISO_8859-4:1988"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "L4"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "LATIN4"); - registerEncodingAliasIfAvailable(registrar, "ISO-8859-4", "CSISOLATIN4"); - } -} - -void TextCodecGtk::registerExtendedCodecs(TextCodecRegistrar registrar) -{ - // Western - registerCodecIfAvailable(registrar, "MACROMAN"); - - // Japanese - registerCodecIfAvailable(registrar, "Shift_JIS"); - registerCodecIfAvailable(registrar, "EUC-JP"); - registerCodecIfAvailable(registrar, "ISO-2022-JP"); - - // Traditional Chinese - registerCodecIfAvailable(registrar, "BIG5"); - registerCodecIfAvailable(registrar, "BIG5-HKSCS"); - registerCodecIfAvailable(registrar, "CP950"); - - // Korean - registerCodecIfAvailable(registrar, "ISO-2022-KR"); - registerCodecIfAvailable(registrar, "CP949"); - registerCodecIfAvailable(registrar, "EUC-KR"); - - // Arabic - registerCodecIfAvailable(registrar, "ISO-8859-6"); - // rearranged, windows-1256 now declared the canonical name and put to lowercase to fix /fast/encoding/ahram-org-eg.html test case - registerCodecIfAvailable(registrar, "windows-1256"); - - // Hebrew - registerCodecIfAvailable(registrar, "ISO-8859-8"); - // rearranged, moved windows-1255 as canonical and lowercased, fixing /fast/encoding/meta-charset.html - registerCodecIfAvailable(registrar, "windows-1255"); - - // Greek - registerCodecIfAvailable(registrar, "ISO-8859-7"); - registerCodecIfAvailable(registrar, "CP869"); - registerCodecIfAvailable(registrar, "WINDOWS-1253"); - - // Cyrillic - registerCodecIfAvailable(registrar, "ISO-8859-5"); - registerCodecIfAvailable(registrar, "KOI8-R"); - registerCodecIfAvailable(registrar, "CP866"); - registerCodecIfAvailable(registrar, "KOI8-U"); - // CP1251 added to pass /fast/encoding/charset-cp1251.html - registerCodecIfAvailable(registrar, "windows-1251"); - registerCodecIfAvailable(registrar, "mac-cyrillic"); - - // Thai - registerCodecIfAvailable(registrar, "CP874"); - registerCodecIfAvailable(registrar, "TIS-620"); - - // Simplified Chinese - registerCodecIfAvailable(registrar, "GBK"); - registerCodecIfAvailable(registrar, "HZ"); - registerCodecIfAvailable(registrar, "GB18030"); - registerCodecIfAvailable(registrar, "EUC-CN"); - registerCodecIfAvailable(registrar, "GB_2312-80"); - - // Central European - registerCodecIfAvailable(registrar, "ISO-8859-2"); - registerCodecIfAvailable(registrar, "CP1250"); - registerCodecIfAvailable(registrar, "MAC-CENTRALEUROPE"); - - // Vietnamese - registerCodecIfAvailable(registrar, "CP1258"); - - // Turkish - registerCodecIfAvailable(registrar, "CP1254"); - registerCodecIfAvailable(registrar, "ISO-8859-9"); - - // Baltic - registerCodecIfAvailable(registrar, "CP1257"); - registerCodecIfAvailable(registrar, "ISO-8859-4"); -} - -TextCodecGtk::TextCodecGtk(const TextEncoding& encoding) - : m_encoding(encoding) - , m_numBufferedBytes(0) -{ -} - -TextCodecGtk::~TextCodecGtk() -{ -} - -void TextCodecGtk::createIConvDecoder() const -{ - ASSERT(!m_iconvDecoder); - - m_iconvDecoder = adoptPlatformRef(g_charset_converter_new(internalEncodingName, m_encoding.name(), 0)); -} - -void TextCodecGtk::createIConvEncoder() const -{ - ASSERT(!m_iconvEncoder); - - m_iconvEncoder = adoptPlatformRef(g_charset_converter_new(m_encoding.name(), internalEncodingName, 0)); -} - -String TextCodecGtk::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError) -{ - // Get a converter for the passed-in encoding. - if (!m_iconvDecoder) - createIConvDecoder(); - if (!m_iconvDecoder) { - LOG_ERROR("Error creating IConv encoder even though encoding was in table."); - return String(); - } - - Vector<UChar> result; - - gsize bytesRead = 0; - gsize bytesWritten = 0; - const gchar* input = bytes; - gsize inputLength = length; - gchar buffer[ConversionBufferSize]; - int flags = !length ? G_CONVERTER_INPUT_AT_END : G_CONVERTER_NO_FLAGS; - if (flush) - flags |= G_CONVERTER_FLUSH; - - bool bufferWasFull = false; - char* prefixedBytes = 0; - - if (m_numBufferedBytes) { - inputLength = length + m_numBufferedBytes; - prefixedBytes = static_cast<char*>(fastMalloc(inputLength)); - memcpy(prefixedBytes, m_bufferedBytes, m_numBufferedBytes); - memcpy(prefixedBytes + m_numBufferedBytes, bytes, length); - - input = prefixedBytes; - - // all buffered bytes are consumed now - m_numBufferedBytes = 0; - } - - do { - GOwnPtr<GError> error; - GConverterResult res = g_converter_convert(G_CONVERTER(m_iconvDecoder.get()), - input, inputLength, - buffer, sizeof(buffer), - static_cast<GConverterFlags>(flags), - &bytesRead, &bytesWritten, - &error.outPtr()); - input += bytesRead; - inputLength -= bytesRead; - - if (res == G_CONVERTER_ERROR) { - if (g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT)) { - // There is not enough input to fully determine what the conversion should produce, - // save it to a buffer to prepend it to the next input. - memcpy(m_bufferedBytes, input, inputLength); - m_numBufferedBytes = inputLength; - inputLength = 0; - } else if (g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_NO_SPACE)) - bufferWasFull = true; - else if (g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_INVALID_DATA)) { - if (stopOnError) - sawError = true; - if (inputLength) { - // Ignore invalid character. - input += 1; - inputLength -= 1; - } - } else { - sawError = true; - LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", error->code, error->message); - m_numBufferedBytes = 0; // Reset state for subsequent calls to decode. - fastFree(prefixedBytes); - return String(); - } - } - - result.append(reinterpret_cast<UChar*>(buffer), bytesWritten / sizeof(UChar)); - } while ((inputLength || bufferWasFull) && !sawError); - - fastFree(prefixedBytes); - - return String::adopt(result); -} - -CString TextCodecGtk::encode(const UChar* characters, size_t length, UnencodableHandling handling) -{ - if (!length) - return ""; - - if (!m_iconvEncoder) - createIConvEncoder(); - if (!m_iconvEncoder) { - LOG_ERROR("Error creating IConv encoder even though encoding was in table."); - return CString(); - } - - gsize bytesRead = 0; - gsize bytesWritten = 0; - const gchar* input = reinterpret_cast<const char*>(characters); - gsize inputLength = length * sizeof(UChar); - gchar buffer[ConversionBufferSize]; - Vector<char> result; - GOwnPtr<GError> error; - - size_t size = 0; - do { - g_converter_convert(G_CONVERTER(m_iconvEncoder.get()), - input, inputLength, - buffer, sizeof(buffer), - G_CONVERTER_INPUT_AT_END, - &bytesRead, &bytesWritten, - &error.outPtr()); - input += bytesRead; - inputLength -= bytesRead; - if (bytesWritten > 0) { - result.grow(size + bytesWritten); - memcpy(result.data() + size, buffer, bytesWritten); - size += bytesWritten; - } - - if (error && g_error_matches(error.get(), G_IO_ERROR, G_IO_ERROR_INVALID_DATA)) { - UChar codePoint = reinterpret_cast<const UChar*>(input)[0]; - UnencodableReplacementArray replacement; - int replacementLength = TextCodec::getUnencodableReplacement(codePoint, handling, replacement); - - // Consume the invalid character. - input += sizeof(UChar); - inputLength -= sizeof(UChar); - - // Append replacement string to result buffer. - result.grow(size + replacementLength); - memcpy(result.data() + size, replacement, replacementLength); - size += replacementLength; - - error.clear(); - } - } while (inputLength && !error.get()); - - if (error) { - LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", error->code, error->message); - return CString(); - } - - return CString(result.data(), size); -} - -} // namespace WebCore diff --git a/WebCore/platform/text/gtk/TextCodecGtk.h b/WebCore/platform/text/gtk/TextCodecGtk.h deleted file mode 100644 index 1fb8df9..0000000 --- a/WebCore/platform/text/gtk/TextCodecGtk.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (C) 2004, 2006, 2007 Apple Inc. All rights reserved. - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * Copyright (C) 2008 JĂĽrg Billeter <j@bitron.ch> - * Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextCodecGTK_h -#define TextCodecGTK_h - -#include "GRefPtr.h" -#include <glib.h> -#include "TextCodec.h" -#include "TextEncoding.h" - -namespace WebCore { - - class TextCodecGtk : public TextCodec { - public: - static void registerBaseEncodingNames(EncodingNameRegistrar); - static void registerBaseCodecs(TextCodecRegistrar); - - static void registerExtendedEncodingNames(EncodingNameRegistrar); - static void registerExtendedCodecs(TextCodecRegistrar); - - TextCodecGtk(const TextEncoding&); - virtual ~TextCodecGtk(); - - virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); - virtual CString encode(const UChar*, size_t length, UnencodableHandling); - - private: - void createIConvDecoder() const; - void createIConvEncoder() const; - - TextEncoding m_encoding; - size_t m_numBufferedBytes; - unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character - mutable PlatformRefPtr<GCharsetConverter> m_iconvDecoder; - mutable PlatformRefPtr<GCharsetConverter> m_iconvEncoder; - }; - -} // namespace WebCore - -#endif // TextCodecGTK_h diff --git a/WebCore/platform/text/haiku/TextBreakIteratorInternalICUHaiku.cpp b/WebCore/platform/text/haiku/TextBreakIteratorInternalICUHaiku.cpp deleted file mode 100644 index 8bb8c70..0000000 --- a/WebCore/platform/text/haiku/TextBreakIteratorInternalICUHaiku.cpp +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2007 Apple Inc. All rights reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. - * - */ - -#include "config.h" -#include "TextBreakIteratorInternalICU.h" - -#include "NotImplemented.h" - - -namespace WebCore { - -const char* currentSearchLocaleID() -{ - notImplemented(); - return ""; -} - -const char* currentTextBreakLocaleID() -{ - notImplemented(); - return "en_us"; -} - -} // namespace WebCore - diff --git a/WebCore/platform/text/mac/CharsetData.h b/WebCore/platform/text/mac/CharsetData.h deleted file mode 100644 index 458cecb..0000000 --- a/WebCore/platform/text/mac/CharsetData.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2003, 2006 Apple Computer, Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -namespace WebCore { - - #define kTextEncodingISOLatinThai kCFStringEncodingISOLatinThai - - struct CharsetEntry { - const char* name; - ::TextEncoding encoding; - }; - - extern const CharsetEntry CharsetTable[]; - -} diff --git a/WebCore/platform/text/mac/HyphenationMac.mm b/WebCore/platform/text/mac/HyphenationMac.mm deleted file mode 100644 index d5c9283..0000000 --- a/WebCore/platform/text/mac/HyphenationMac.mm +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (C) 2010 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#import "config.h" -#import "Hyphenation.h" - -#if defined(BUILDING_ON_TIGER) || defined(BUILDING_ON_LEOPARD) || defined(BUILDING_ON_SNOW_LEOPARD) - -#import "AtomicStringKeyedMRUCache.h" -#import "TextBreakIteratorInternalICU.h" -#import "WebCoreSystemInterface.h" -#import <wtf/RetainPtr.h> - -namespace WebCore { - -template<> -bool AtomicStringKeyedMRUCache<bool>::createValueForNullKey() -{ - return !strcmp(currentSearchLocaleID(), "en"); -} - -template<> -bool AtomicStringKeyedMRUCache<bool>::createValueForKey(const AtomicString& localeIdentifier) -{ - RetainPtr<CFStringRef> cfLocaleIdentifier(AdoptCF, localeIdentifier.createCFString()); - RetainPtr<CFDictionaryRef> components(AdoptCF, CFLocaleCreateComponentsFromLocaleIdentifier(kCFAllocatorDefault, cfLocaleIdentifier.get())); - CFStringRef language = reinterpret_cast<CFStringRef>(CFDictionaryGetValue(components.get(), kCFLocaleLanguageCode)); - static CFStringRef englishLanguage = CFSTR("en"); - return language && CFEqual(language, englishLanguage); -} - -bool canHyphenate(const AtomicString& localeIdentifier) -{ - DEFINE_STATIC_LOCAL(AtomicStringKeyedMRUCache<bool>, isEnglishCache, ()); - return isEnglishCache.get(localeIdentifier); -} - -size_t lastHyphenLocation(const UChar* characters, size_t length, size_t beforeIndex, const AtomicString& localeIdentifier) -{ - ASSERT_UNUSED(localeIdentifier, canHyphenate(localeIdentifier)); - - RetainPtr<CFStringRef> string(AdoptCF, CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, characters, length, kCFAllocatorNull)); - return wkGetHyphenationLocationBeforeIndex(string.get(), beforeIndex); -} - -} // namespace WebCore - -#endif // defined(BUILDING_ON_TIGER) || defined(BUILDING_ON_LEOPARD) || defined(BUILDING_ON_SNOW_LEOPARD) diff --git a/WebCore/platform/text/mac/ShapeArabic.c b/WebCore/platform/text/mac/ShapeArabic.c deleted file mode 100644 index dd61ce5..0000000 --- a/WebCore/platform/text/mac/ShapeArabic.c +++ /dev/null @@ -1,556 +0,0 @@ -/* -****************************************************************************** -* -* Copyright (C) 2000-2004, International Business Machines -* Corporation and others. All Rights Reserved. -* Copyright (C) 2007 Apple Inc. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a copy of this -* software and associated documentation files (the "Software"), to deal in the Software -* without restriction, including without limitation the rights to use, copy, modify, -* merge, publish, distribute, and/or sell copies of the Software, and to permit persons -* to whom the Software is furnished to do so, provided that the above copyright notice(s) -* and this permission notice appear in all copies of the Software and that both the above -* copyright notice(s) and this permission notice appear in supporting documentation. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -* PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER -* OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR -* CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR -* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING -* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -* -* Except as contained in this notice, the name of a copyright holder shall not be used in -* advertising or otherwise to promote the sale, use or other dealings in this Software -* without prior written authorization of the copyright holder. -* -****************************************************************************** -* -* Arabic letter shaping implemented by Ayman Roshdy -*/ - -#include "config.h" - -#if USE(ATSUI) - -#include "ShapeArabic.h" - -#include <stdbool.h> -#include <string.h> -#include <unicode/utypes.h> -#include <unicode/uchar.h> -#include <unicode/ustring.h> -#include <unicode/ushape.h> -#include <wtf/Assertions.h> - -/* - * ### TODO in general for letter shaping: - * - the letter shaping code is UTF-16-unaware; needs update - * + especially invertBuffer()?! - * - needs to handle the "Arabic Tail" that is used in some legacy codepages - * as a glyph fragment of wide-glyph letters - * + IBM Unicode conversion tables map it to U+200B (ZWSP) - * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms - */ - -/* definitions for Arabic letter shaping ------------------------------------ */ - -#define IRRELEVANT 4 -#define LAMTYPE 16 -#define ALEFTYPE 32 -#define LINKR 1 -#define LINKL 2 - -static const UChar IrrelevantPos[] = { - 0x0, 0x2, 0x4, 0x6, - 0x8, 0xA, 0xC, 0xE, -}; - -static const UChar araLink[178]= -{ - 1 + 32 + 256 * 0x11,/*0x0622*/ - 1 + 32 + 256 * 0x13,/*0x0623*/ - 1 + 256 * 0x15,/*0x0624*/ - 1 + 32 + 256 * 0x17,/*0x0625*/ - 1 + 2 + 256 * 0x19,/*0x0626*/ - 1 + 32 + 256 * 0x1D,/*0x0627*/ - 1 + 2 + 256 * 0x1F,/*0x0628*/ - 1 + 256 * 0x23,/*0x0629*/ - 1 + 2 + 256 * 0x25,/*0x062A*/ - 1 + 2 + 256 * 0x29,/*0x062B*/ - 1 + 2 + 256 * 0x2D,/*0x062C*/ - 1 + 2 + 256 * 0x31,/*0x062D*/ - 1 + 2 + 256 * 0x35,/*0x062E*/ - 1 + 256 * 0x39,/*0x062F*/ - 1 + 256 * 0x3B,/*0x0630*/ - 1 + 256 * 0x3D,/*0x0631*/ - 1 + 256 * 0x3F,/*0x0632*/ - 1 + 2 + 256 * 0x41,/*0x0633*/ - 1 + 2 + 256 * 0x45,/*0x0634*/ - 1 + 2 + 256 * 0x49,/*0x0635*/ - 1 + 2 + 256 * 0x4D,/*0x0636*/ - 1 + 2 + 256 * 0x51,/*0x0637*/ - 1 + 2 + 256 * 0x55,/*0x0638*/ - 1 + 2 + 256 * 0x59,/*0x0639*/ - 1 + 2 + 256 * 0x5D,/*0x063A*/ - 0, 0, 0, 0, 0, /*0x063B-0x063F*/ - 1 + 2, /*0x0640*/ - 1 + 2 + 256 * 0x61,/*0x0641*/ - 1 + 2 + 256 * 0x65,/*0x0642*/ - 1 + 2 + 256 * 0x69,/*0x0643*/ - 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/ - 1 + 2 + 256 * 0x71,/*0x0645*/ - 1 + 2 + 256 * 0x75,/*0x0646*/ - 1 + 2 + 256 * 0x79,/*0x0647*/ - 1 + 256 * 0x7D,/*0x0648*/ - 1 + 256 * 0x7F,/*0x0649*/ - 1 + 2 + 256 * 0x81,/*0x064A*/ - 4, 4, 4, 4, /*0x064B-0x064E*/ - 4, 4, 4, 4, /*0x064F-0x0652*/ - 4, 4, 4, 0, 0, /*0x0653-0x0657*/ - 0, 0, 0, 0, /*0x0658-0x065B*/ - 1 + 256 * 0x85,/*0x065C*/ - 1 + 256 * 0x87,/*0x065D*/ - 1 + 256 * 0x89,/*0x065E*/ - 1 + 256 * 0x8B,/*0x065F*/ - 0, 0, 0, 0, 0, /*0x0660-0x0664*/ - 0, 0, 0, 0, 0, /*0x0665-0x0669*/ - 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/ - 4, /*0x0670*/ - 0, /*0x0671*/ - 1 + 32, /*0x0672*/ - 1 + 32, /*0x0673*/ - 0, /*0x0674*/ - 1 + 32, /*0x0675*/ - 1, 1, /*0x0676-0x0677*/ - 1+2, /*0x0678*/ - 1+2 + 256 * 0x16,/*0x0679*/ - 1+2 + 256 * 0x0E,/*0x067A*/ - 1+2 + 256 * 0x02,/*0x067B*/ - 1+2, 1+2, /*0x067C-0x067D*/ - 1+2 + 256 * 0x06,/*0x067E*/ - 1+2 + 256 * 0x12,/*0x067F*/ - 1+2 + 256 * 0x0A,/*0x0680*/ - 1+2, 1+2, /*0x0681-0x0682*/ - 1+2 + 256 * 0x26,/*0x0683*/ - 1+2 + 256 * 0x22,/*0x0684*/ - 1+2, /*0x0685*/ - 1+2 + 256 * 0x2A,/*0x0686*/ - 1+2 + 256 * 0x2E,/*0x0687*/ - 1 + 256 * 0x38,/*0x0688*/ - 1, 1, 1, /*0x0689-0x068B*/ - 1 + 256 * 0x34,/*0x068C*/ - 1 + 256 * 0x32,/*0x068D*/ - 1 + 256 * 0x36,/*0x068E*/ - 1, 1, /*0x068F-0x0690*/ - 1 + 256 * 0x3C,/*0x0691*/ - 1, 1, 1, 1, 1, 1, /*0x0692-0x0697*/ - 1 + 256 * 0x3A,/*0x0698*/ - 1, /*0x0699*/ - 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x069F*/ - 1+2, 1+2, 1+2, 1+2, /*0x06A0-0x06A3*/ - 1+2 + 256 * 0x2E,/*0x06A4*/ - 1+2, /*0x06A5*/ - 1+2 + 256 * 0x1E,/*0x06A6*/ - 1+2, 1+2, /*0x06A7-0x06A8*/ - 1+2 + 256 * 0x3E,/*0x06A9*/ - 1+2, 1+2, 1+2, /*0x06AA-0x06AC*/ - 1+2 + 256 * 0x83,/*0x06AD*/ - 1+2, /*0x06AE*/ - 1+2 + 256 * 0x42,/*0x06AF*/ - 1+2, /*0x06B0*/ - 1+2 + 256 * 0x4A,/*0x06B1*/ - 1+2, /*0x06B2*/ - 1+2 + 256 * 0x46,/*0x06B3*/ - 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x06B4-0x06B9*/ - 1+2, /*0x06BA*/ // FIXME: Seems to have a final form - 1+2 + 256 * 0x50,/*0x06BB*/ - 1+2, 1+2, /*0x06BC-0x06BD*/ - 1+2 + 256 * 0x5A,/*0x06BE*/ - 1+2, /*0x06BF*/ - 1, /*0x06C0*/ - 1+2 + 256 * 0x56,/*0x06C1*/ - 1+2, /*0x06C2*/ - 1, 1, /*0x06C3-0x06C4*/ - 1 + 256 * 0x90,/*0x06C5*/ - 1 + 256 * 0x89,/*0x06C6*/ - 1 + 256 * 0x87,/*0x06C7*/ - 1 + 256 * 0x8B,/*0x06C8*/ - 1 + 256 * 0x92,/*0x06C9*/ - 1, /*0x06CA*/ - 1 + 256 * 0x8E,/*0x06CB*/ - 1+2 + 256 * 0xAC,/*0x06CC*/ - 1, /*0x06CD*/ - 1+2, /*0x06CE*/ - 1, /*0x06CF*/ - 1+2 + 256 * 0x94,/*0x06D0*/ - 1+2, /*0x06D1*/ - 1 + 256 * 0x5E,/*0x06D2*/ - 1 + 256 * 0x60 /*0x06D3*/ -}; - -static const UChar presLink[141]= -{ - 1 + 2, /*0xFE70*/ - 1 + 2, /*0xFE71*/ - 1 + 2, 0, 1+ 2, 0, 1+ 2, /*0xFE72-0xFE76*/ - 1 + 2, /*0xFE77*/ - 1+ 2, 1 + 2, 1+2, 1 + 2, /*0xFE78-0xFE81*/ - 1+ 2, 1 + 2, 1+2, 1 + 2, /*0xFE82-0xFE85*/ - 0, 0 + 32, 1 + 32, 0 + 32, /*0xFE86-0xFE89*/ - 1 + 32, 0, 1, 0 + 32, /*0xFE8A-0xFE8D*/ - 1 + 32, 0, 2, 1 + 2, /*0xFE8E-0xFE91*/ - 1, 0 + 32, 1 + 32, 0, /*0xFE92-0xFE95*/ - 2, 1 + 2, 1, 0, /*0xFE96-0xFE99*/ - 1, 0, 2, 1 + 2, /*0xFE9A-0xFE9D*/ - 1, 0, 2, 1 + 2, /*0xFE9E-0xFEA1*/ - 1, 0, 2, 1 + 2, /*0xFEA2-0xFEA5*/ - 1, 0, 2, 1 + 2, /*0xFEA6-0xFEA9*/ - 1, 0, 2, 1 + 2, /*0xFEAA-0xFEAD*/ - 1, 0, 1, 0, /*0xFEAE-0xFEB1*/ - 1, 0, 1, 0, /*0xFEB2-0xFEB5*/ - 1, 0, 2, 1+2, /*0xFEB6-0xFEB9*/ - 1, 0, 2, 1+2, /*0xFEBA-0xFEBD*/ - 1, 0, 2, 1+2, /*0xFEBE-0xFEC1*/ - 1, 0, 2, 1+2, /*0xFEC2-0xFEC5*/ - 1, 0, 2, 1+2, /*0xFEC6-0xFEC9*/ - 1, 0, 2, 1+2, /*0xFECA-0xFECD*/ - 1, 0, 2, 1+2, /*0xFECE-0xFED1*/ - 1, 0, 2, 1+2, /*0xFED2-0xFED5*/ - 1, 0, 2, 1+2, /*0xFED6-0xFED9*/ - 1, 0, 2, 1+2, /*0xFEDA-0xFEDD*/ - 1, 0, 2, 1+2, /*0xFEDE-0xFEE1*/ - 1, 0 + 16, 2 + 16, 1 + 2 +16, /*0xFEE2-0xFEE5*/ - 1 + 16, 0, 2, 1+2, /*0xFEE6-0xFEE9*/ - 1, 0, 2, 1+2, /*0xFEEA-0xFEED*/ - 1, 0, 2, 1+2, /*0xFEEE-0xFEF1*/ - 1, 0, 1, 0, /*0xFEF2-0xFEF5*/ - 1, 0, 2, 1+2, /*0xFEF6-0xFEF9*/ - 1, 0, 1, 0, /*0xFEFA-0xFEFD*/ - 1, 0, 1, 0, - 1 -}; - -static const UChar convertFEto06[] = -{ -/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/ -/*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652, -/*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628, -/*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C, -/*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632, -/*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636, -/*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A, -/*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644, -/*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649, -/*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F -}; - -static const UChar shapeTable[4][4][4]= -{ - { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} }, - { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }, - { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} }, - { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} } -}; - -/* - *Name : changeLamAlef - *Function : Converts the Alef characters into an equivalent - * LamAlef location in the 0x06xx Range, this is an - * intermediate stage in the operation of the program - * later it'll be converted into the 0xFExx LamAlefs - * in the shaping function. - */ -static UChar -changeLamAlef(UChar ch) { - - switch(ch) { - case 0x0622 : - return(0x065C); - break; - case 0x0623 : - return(0x065D); - break; - case 0x0625 : - return(0x065E); - break; - case 0x0627 : - return(0x065F); - break; - default : - return(0); - break; - } -} - -/* - *Name : specialChar - *Function : Special Arabic characters need special handling in the shapeUnicode - * function, this function returns 1 or 2 for these special characters - */ -static int32_t -specialChar(UChar ch) { - - if( (ch>0x0621 && ch<0x0626)||(ch==0x0627)||(ch>0x062e && ch<0x0633)|| - (ch>0x0647 && ch<0x064a)||(ch==0x0629) ) { - return (1); - } - else - if( ch>=0x064B && ch<= 0x0652 ) - return (2); - else - if( (ch>=0x0653 && ch<= 0x0655) || ch == 0x0670 || - (ch>=0xFE70 && ch<= 0xFE7F) ) - return (3); - else - return (0); -} - -/* - *Name : getLink - *Function : Resolves the link between the characters as - * Arabic characters have four forms : - * Isolated, Initial, Middle and Final Form - */ -static UChar -getLink(UChar ch) { - - if(ch >= 0x0622 && ch <= 0x06D3) { - return(araLink[ch-0x0622]); - } else if(ch == 0x200D) { - return(3); - } else if(ch >= 0x206D && ch <= 0x206F) { - return(4); - } else if(ch >= 0xFE70 && ch <= 0xFEFC) { - return(presLink[ch-0xFE70]); - } else { - return(0); - } -} - -/* - *Name : isTashkeelChar - *Function : Returns 1 for Tashkeel characters else return 0 - */ -static int32_t -isTashkeelChar(UChar ch) { - - if( ch>=0x064B && ch<= 0x0652 ) - return (1); - else - return (0); -} - -/* - *Name : shapeUnicode - *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped - * arabic Unicode buffer in FExx Range - */ -static int32_t -shapeUnicode(UChar *dest, int32_t sourceLength, - int32_t destSize, - int tashkeelFlag) { - - int32_t i, iend; - int32_t prevPos, lastPos,Nx, Nw; - unsigned int Shape; - int32_t flag; - int32_t lamalef_found = 0; - UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0; - UChar wLamalef; - - /* - * Converts the input buffer from FExx Range into 06xx Range - * to make sure that all characters are in the 06xx range - * even the lamalef is converted to the special region in - * the 06xx range - */ - for (i = 0; i < sourceLength; i++) { - UChar inputChar = dest[i]; - if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) { - dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ; - } - } - - /* sets the index to the end of the buffer, together with the step point to -1 */ - i = 0; - iend = sourceLength; - - /* - * This function resolves the link between the characters . - * Arabic characters have four forms : - * Isolated Form, Initial Form, Middle Form and Final Form - */ - currLink = getLink(dest[i]); - - prevPos = i; - lastPos = i; - Nx = sourceLength + 2, Nw = 0; - - while (i != iend) { - /* If high byte of currLink > 0 then more than one shape */ - if ((currLink & 0xFF00) > 0 || isTashkeelChar(dest[i])) { - Nw = i + 1; - while (Nx >= sourceLength) { /* we need to know about next char */ - if(Nw == iend) { - nextLink = 0; - Nx = -1; - } else { - nextLink = getLink(dest[Nw]); - if((nextLink & IRRELEVANT) == 0) { - Nx = Nw; - } else { - Nw = Nw + 1; - } - } - } - - if ( ((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0) ) { - lamalef_found = 1; - wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */ - if ( wLamalef != 0) { - dest[i] = ' '; /* The default case is to drop the Alef and replace */ - dest[lastPos] =wLamalef; /* it by a space. */ - i=lastPos; - } - lastLink = prevLink; - currLink = getLink(wLamalef); - } - /* - * get the proper shape according to link ability of neighbors - * and of character; depends on the order of the shapes - * (isolated, initial, middle, final) in the compatibility area - */ - flag = specialChar(dest[i]); - - Shape = shapeTable[nextLink & (LINKR + LINKL)] - [lastLink & (LINKR + LINKL)] - [currLink & (LINKR + LINKL)]; - - if (flag == 1) { - Shape = (Shape == 1 || Shape == 3) ? 1 : 0; - } - else - if(flag == 2) { - if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) && - dest[i] != 0x064C && dest[i] != 0x064D ) { - Shape = 1; - if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE ) - Shape = 0; - } - else { - Shape = 0; - } - } - - if(flag == 2) { - dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape; - } - else - dest[i] = (UChar)((dest[i] < 0x0670 ? 0xFE70 : 0xFB50) + (currLink >> 8) + Shape); - } - - /* move one notch forward */ - if ((currLink & IRRELEVANT) == 0) { - prevLink = lastLink; - lastLink = currLink; - prevPos = lastPos; - lastPos = i; - } - - i++; - if (i == Nx) { - currLink = nextLink; - Nx = sourceLength + 2; - } - else if(i != iend) { - currLink = getLink(dest[i]); - } - } - - destSize = sourceLength; - - return destSize; -} - -int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int32_t destCapacity, uint32_t options, UErrorCode *pErrorCode) { - int32_t destLength; - - /* usual error checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */ - if( source==NULL || sourceLength<-1 || - (dest==NULL && destCapacity!=0) || destCapacity<0 || - options>=U_SHAPE_DIGIT_TYPE_RESERVED || - (options&U_SHAPE_DIGITS_MASK)>=U_SHAPE_DIGITS_RESERVED - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* determine the source length */ - if(sourceLength==-1) { - sourceLength=u_strlen(source); - } - if(sourceLength==0) { - return 0; - } - - /* check that source and destination do not overlap */ - if( dest!=NULL && - ((source<=dest && dest<source+sourceLength) || - (dest<=source && source<dest+destCapacity)) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) { - int32_t outputSize = sourceLength; - - /* calculate destination size */ - /* TODO: do we ever need to do this pure preflighting? */ - ASSERT((options&U_SHAPE_LENGTH_MASK) != U_SHAPE_LENGTH_GROW_SHRINK); - - if(outputSize>destCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - return outputSize; - } - - /* Start of Arabic letter shaping part */ - memcpy(dest, source, sourceLength*U_SIZEOF_UCHAR); - - ASSERT((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL); - - switch(options&U_SHAPE_LETTERS_MASK) { - case U_SHAPE_LETTERS_SHAPE : - /* Call the shaping function with tashkeel flag == 1 */ - destLength = shapeUnicode(dest,sourceLength,destCapacity,1); - break; - case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED : - /* Call the shaping function with tashkeel flag == 0 */ - destLength = shapeUnicode(dest,sourceLength,destCapacity,0); - break; - case U_SHAPE_LETTERS_UNSHAPE : - ASSERT_NOT_REACHED(); - break; - default : - /* will never occur because of validity checks above */ - destLength = 0; - break; - } - - /* End of Arabic letter shaping part */ - } else - ASSERT_NOT_REACHED(); - - ASSERT((options & U_SHAPE_DIGITS_MASK) == U_SHAPE_DIGITS_NOOP); - - return sourceLength; -} - -#endif // USE(ATSUI) diff --git a/WebCore/platform/text/mac/ShapeArabic.h b/WebCore/platform/text/mac/ShapeArabic.h deleted file mode 100644 index 8aa577d..0000000 --- a/WebCore/platform/text/mac/ShapeArabic.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2007 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ShapeArabic_h -#define ShapeArabic_h - -#if USE(ATSUI) - -#include <unicode/ushape.h> - -#ifdef __cplusplus -extern "C" { -#endif - -int32_t shapeArabic(const UChar *source, int32_t sourceLength, UChar *dest, int32_t destCapacity, uint32_t options, UErrorCode *pErrorCode); - -#ifdef __cplusplus -} -#endif - -#endif // USE(ATSUI) -#endif // ShapeArabic_h diff --git a/WebCore/platform/text/mac/StringImplMac.mm b/WebCore/platform/text/mac/StringImplMac.mm deleted file mode 100644 index 6f5e953..0000000 --- a/WebCore/platform/text/mac/StringImplMac.mm +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (C) 2006, 2009 Apple Inc. All rights reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#include "config.h" -#include <wtf/text/StringImpl.h> - -#include "FoundationExtras.h" - -namespace WTF { - -StringImpl::operator NSString *() -{ - return HardAutorelease(createCFString()); -} - -} diff --git a/WebCore/platform/text/mac/StringMac.mm b/WebCore/platform/text/mac/StringMac.mm deleted file mode 100644 index 7e98b2b..0000000 --- a/WebCore/platform/text/mac/StringMac.mm +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Copyright (C) 2006 Apple Computer, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#include "config.h" -#include "PlatformString.h" -#include <CoreFoundation/CFString.h> - -namespace WTF { - -String::String(NSString* str) -{ - if (!str) - return; - - CFIndex size = CFStringGetLength(reinterpret_cast<CFStringRef>(str)); - if (size == 0) - m_impl = StringImpl::empty(); - else { - Vector<UChar, 1024> buffer(size); - CFStringGetCharacters(reinterpret_cast<CFStringRef>(str), CFRangeMake(0, size), buffer.data()); - m_impl = StringImpl::create(buffer.data(), size); - } -} - -} diff --git a/WebCore/platform/text/mac/TextBoundaries.mm b/WebCore/platform/text/mac/TextBoundaries.mm deleted file mode 100644 index bd7ddf8..0000000 --- a/WebCore/platform/text/mac/TextBoundaries.mm +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#import "config.h" -#import "TextBoundaries.h" - -using namespace WTF::Unicode; - -namespace WebCore { - -void findWordBoundary(const UChar* chars, int len, int position, int* start, int* end) -{ - NSString* string = [[NSString alloc] initWithCharactersNoCopy:const_cast<unichar*>(chars) - length:len freeWhenDone:NO]; - NSAttributedString* attr = [[NSAttributedString alloc] initWithString:string]; - NSRange range = [attr doubleClickAtIndex:(position >= len) ? len - 1 : position]; - [attr release]; - [string release]; - *start = range.location; - *end = range.location + range.length; -} - -int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward) -{ - NSString* string = [[NSString alloc] initWithCharactersNoCopy:const_cast<unichar*>(chars) - length:len freeWhenDone:NO]; - NSAttributedString* attr = [[NSAttributedString alloc] initWithString:string]; - int result = [attr nextWordFromIndex:position forward:forward]; - [attr release]; - [string release]; - return result; -} - -} diff --git a/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm b/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm deleted file mode 100644 index 6af5616..0000000 --- a/WebCore/platform/text/mac/TextBreakIteratorInternalICUMac.mm +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (C) 2007, 2009 Apple Inc. All rights reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#include "config.h" -#include "TextBreakIteratorInternalICU.h" - -#include <wtf/RetainPtr.h> - -namespace WebCore { - -static const int maxLocaleStringLength = 32; - -static inline RetainPtr<CFStringRef> textBreakLocalePreference() -{ - RetainPtr<CFPropertyListRef> locale(AdoptCF, CFPreferencesCopyValue(CFSTR("AppleTextBreakLocale"), - kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost)); - if (!locale || CFGetTypeID(locale.get()) != CFStringGetTypeID()) - return 0; - return static_cast<CFStringRef>(locale.get()); -} - -static RetainPtr<CFStringRef> topLanguagePreference() -{ - NSArray *languagesArray = [[NSUserDefaults standardUserDefaults] arrayForKey:@"AppleLanguages"]; - if (!languagesArray) - return 0; - if ([languagesArray count] < 1) - return 0; - NSString *value = [languagesArray objectAtIndex:0]; - if (![value isKindOfClass:[NSString class]]) - return 0; - return reinterpret_cast<CFStringRef>(value); -} - -static RetainPtr<CFStringRef> canonicalLanguageIdentifier(CFStringRef locale) -{ - if (!locale) - return 0; - RetainPtr<CFStringRef> canonicalLocale(AdoptCF, - CFLocaleCreateCanonicalLanguageIdentifierFromString(kCFAllocatorDefault, locale)); - if (!canonicalLocale) - return locale; - return canonicalLocale; -} - -static void getLocale(CFStringRef locale, char localeStringBuffer[maxLocaleStringLength]) -{ - // Empty string means "root locale", and that is what we use if we can't get a preference. - localeStringBuffer[0] = 0; - if (!locale) - return; - CFStringGetCString(locale, localeStringBuffer, maxLocaleStringLength, kCFStringEncodingASCII); -} - -static void getSearchLocale(char localeStringBuffer[maxLocaleStringLength]) -{ - getLocale(canonicalLanguageIdentifier(topLanguagePreference().get()).get(), localeStringBuffer); -} - -const char* currentSearchLocaleID() -{ - static char localeStringBuffer[maxLocaleStringLength]; - static bool gotSearchLocale = false; - if (!gotSearchLocale) { - getSearchLocale(localeStringBuffer); - gotSearchLocale = true; - } - return localeStringBuffer; -} - -static void getTextBreakLocale(char localeStringBuffer[maxLocaleStringLength]) -{ - // If there is no text break locale, use the top language preference. - RetainPtr<CFStringRef> locale = textBreakLocalePreference(); - if (!locale) - locale = topLanguagePreference(); - getLocale(canonicalLanguageIdentifier(locale.get()).get(), localeStringBuffer); -} - -const char* currentTextBreakLocaleID() -{ - static char localeStringBuffer[maxLocaleStringLength]; - static bool gotTextBreakLocale = false; - if (!gotTextBreakLocale) { - getTextBreakLocale(localeStringBuffer); - gotTextBreakLocale = true; - } - return localeStringBuffer; -} - -} diff --git a/WebCore/platform/text/mac/TextCodecMac.cpp b/WebCore/platform/text/mac/TextCodecMac.cpp deleted file mode 100644 index b743f3d..0000000 --- a/WebCore/platform/text/mac/TextCodecMac.cpp +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved. - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextCodecMac.h" - -#include "CharacterNames.h" -#include "CharsetData.h" -#include "PlatformString.h" -#include "ThreadGlobalData.h" -#include <wtf/Assertions.h> -#include <wtf/text/CString.h> -#include <wtf/PassOwnPtr.h> -#include <wtf/RetainPtr.h> -#include <wtf/Threading.h> - -using namespace std; - -namespace WebCore { - -// We need to keep this because ICU doesn't support some of the encodings that we need: -// <http://bugs.webkit.org/show_bug.cgi?id=4195>. - -const size_t ConversionBufferSize = 16384; - -static TECConverterWrapper& cachedConverterTEC() -{ - return threadGlobalData().cachedConverterTEC(); -} - -void TextCodecMac::registerEncodingNames(EncodingNameRegistrar registrar) -{ - TECTextEncodingID lastEncoding = invalidEncoding; - const char* lastName = 0; - - for (size_t i = 0; CharsetTable[i].name; ++i) { - if (CharsetTable[i].encoding != lastEncoding) { - lastEncoding = CharsetTable[i].encoding; - lastName = CharsetTable[i].name; - } - registrar(CharsetTable[i].name, lastName); - } -} - -static PassOwnPtr<TextCodec> newTextCodecMac(const TextEncoding&, const void* additionalData) -{ - return new TextCodecMac(*static_cast<const TECTextEncodingID*>(additionalData)); -} - -void TextCodecMac::registerCodecs(TextCodecRegistrar registrar) -{ - TECTextEncodingID lastEncoding = invalidEncoding; - - for (size_t i = 0; CharsetTable[i].name; ++i) - if (CharsetTable[i].encoding != lastEncoding) { - registrar(CharsetTable[i].name, newTextCodecMac, &CharsetTable[i].encoding); - lastEncoding = CharsetTable[i].encoding; - } -} - -TextCodecMac::TextCodecMac(TECTextEncodingID encoding) - : m_encoding(encoding) - , m_numBufferedBytes(0) - , m_converterTEC(0) -{ -} - -TextCodecMac::~TextCodecMac() -{ - releaseTECConverter(); -} - -void TextCodecMac::releaseTECConverter() const -{ - if (m_converterTEC) { - TECConverterWrapper& cachedConverter = cachedConverterTEC(); - if (cachedConverter.converter) - TECDisposeConverter(cachedConverter.converter); - cachedConverter.converter = m_converterTEC; - cachedConverter.encoding = m_encoding; - m_converterTEC = 0; - } -} - -OSStatus TextCodecMac::createTECConverter() const -{ - TECConverterWrapper& cachedConverter = cachedConverterTEC(); - - bool cachedEncodingEqual = cachedConverter.encoding == m_encoding; - cachedConverter.encoding = invalidEncoding; - - if (cachedEncodingEqual && cachedConverter.converter) { - m_converterTEC = cachedConverter.converter; - cachedConverter.converter = 0; - - TECClearConverterContextInfo(m_converterTEC); - } else { - OSStatus status = TECCreateConverter(&m_converterTEC, m_encoding, - CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicode16BitFormat)); - if (status) - return status; - - TECSetBasicOptions(m_converterTEC, kUnicodeForceASCIIRangeMask); - } - - return noErr; -} - -OSStatus TextCodecMac::decode(const unsigned char* inputBuffer, int inputBufferLength, int& inputLength, - void *outputBuffer, int outputBufferLength, int& outputLength) -{ - OSStatus status; - unsigned long bytesRead = 0; - unsigned long bytesWritten = 0; - - if (m_numBufferedBytes != 0) { - // Finish converting a partial character that's in our buffer. - - // First, fill the partial character buffer with as many bytes as are available. - ASSERT(m_numBufferedBytes < sizeof(m_bufferedBytes)); - const int spaceInBuffer = sizeof(m_bufferedBytes) - m_numBufferedBytes; - const int bytesToPutInBuffer = min(spaceInBuffer, inputBufferLength); - ASSERT(bytesToPutInBuffer != 0); - memcpy(m_bufferedBytes + m_numBufferedBytes, inputBuffer, bytesToPutInBuffer); - - // Now, do a conversion on the buffer. - status = TECConvertText(m_converterTEC, m_bufferedBytes, m_numBufferedBytes + bytesToPutInBuffer, &bytesRead, - reinterpret_cast<unsigned char*>(outputBuffer), outputBufferLength, &bytesWritten); - ASSERT(bytesRead <= m_numBufferedBytes + bytesToPutInBuffer); - - if (status == kTECPartialCharErr && bytesRead == 0) { - // Handle the case where the partial character was not converted. - if (bytesToPutInBuffer >= spaceInBuffer) { - LOG_ERROR("TECConvertText gave a kTECPartialCharErr but read none of the %zu bytes in the buffer", sizeof(m_bufferedBytes)); - m_numBufferedBytes = 0; - status = kTECUnmappableElementErr; // should never happen, but use this error code - } else { - // Tell the caller we read all the source bytes and keep them in the buffer. - m_numBufferedBytes += bytesToPutInBuffer; - bytesRead = bytesToPutInBuffer; - status = noErr; - } - } else { - // We are done with the partial character buffer. - // Also, we have read some of the bytes from the main buffer. - if (bytesRead > m_numBufferedBytes) { - bytesRead -= m_numBufferedBytes; - } else { - LOG_ERROR("TECConvertText accepted some bytes it previously rejected with kTECPartialCharErr"); - bytesRead = 0; - } - m_numBufferedBytes = 0; - if (status == kTECPartialCharErr) { - // While there may be a partial character problem in the small buffer, - // we have to try again and not get confused and think there is a partial - // character problem in the large buffer. - status = noErr; - } - } - } else { - status = TECConvertText(m_converterTEC, inputBuffer, inputBufferLength, &bytesRead, - static_cast<unsigned char*>(outputBuffer), outputBufferLength, &bytesWritten); - ASSERT(static_cast<int>(bytesRead) <= inputBufferLength); - } - - // Work around bug 3351093, where sometimes we get kTECBufferBelowMinimumSizeErr instead of kTECOutputBufferFullStatus. - if (status == kTECBufferBelowMinimumSizeErr && bytesWritten != 0) - status = kTECOutputBufferFullStatus; - - inputLength = bytesRead; - outputLength = bytesWritten; - return status; -} - -String TextCodecMac::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError) -{ - // Get a converter for the passed-in encoding. - if (!m_converterTEC && createTECConverter() != noErr) - return String(); - - Vector<UChar> result; - - const unsigned char* sourcePointer = reinterpret_cast<const unsigned char*>(bytes); - int sourceLength = length; - bool bufferWasFull = false; - UniChar buffer[ConversionBufferSize]; - - while ((sourceLength || bufferWasFull) && !sawError) { - int bytesRead = 0; - int bytesWritten = 0; - OSStatus status = decode(sourcePointer, sourceLength, bytesRead, buffer, sizeof(buffer), bytesWritten); - ASSERT(bytesRead <= sourceLength); - sourcePointer += bytesRead; - sourceLength -= bytesRead; - - switch (status) { - case noErr: - case kTECOutputBufferFullStatus: - break; - case kTextMalformedInputErr: - case kTextUndefinedElementErr: - // FIXME: Put FFFD character into the output string in this case? - TECClearConverterContextInfo(m_converterTEC); - if (stopOnError) { - sawError = true; - break; - } - if (sourceLength) { - sourcePointer += 1; - sourceLength -= 1; - } - break; - case kTECPartialCharErr: { - // Put the partial character into the buffer. - ASSERT(m_numBufferedBytes == 0); - const int bufferSize = sizeof(m_numBufferedBytes); - if (sourceLength < bufferSize) { - memcpy(m_bufferedBytes, sourcePointer, sourceLength); - m_numBufferedBytes = sourceLength; - } else { - LOG_ERROR("TECConvertText gave a kTECPartialCharErr, but left %u bytes in the buffer", sourceLength); - } - sourceLength = 0; - break; - } - default: - sawError = true; - return String(); - } - - ASSERT(!(bytesWritten % sizeof(UChar))); - result.append(buffer, bytesWritten / sizeof(UChar)); - - bufferWasFull = status == kTECOutputBufferFullStatus; - } - - if (flush) { - unsigned long bytesWritten = 0; - TECFlushText(m_converterTEC, reinterpret_cast<unsigned char*>(buffer), sizeof(buffer), &bytesWritten); - ASSERT(!(bytesWritten % sizeof(UChar))); - result.append(buffer, bytesWritten / sizeof(UChar)); - } - - String resultString = String::adopt(result); - - // <rdar://problem/3225472> - // Simplified Chinese pages use the code A3A0 to mean "full-width space". - // But GB18030 decodes it to U+E5E5, which is correct in theory but not in practice. - // To work around, just change all occurences of U+E5E5 to U+3000 (ideographic space). - if (m_encoding == kCFStringEncodingGB_18030_2000) - resultString.replace(0xE5E5, ideographicSpace); - - return resultString; -} - -CString TextCodecMac::encode(const UChar* characters, size_t length, UnencodableHandling handling) -{ - // FIXME: We should really use TEC here instead of CFString for consistency with the other direction. - - // FIXME: Since there's no "force ASCII range" mode in CFString, we change the backslash into a yen sign. - // Encoding will change the yen sign back into a backslash. - String copy(characters, length); - copy.replace('\\', m_backslashAsCurrencySymbol); - RetainPtr<CFStringRef> cfs(AdoptCF, copy.createCFString()); - - CFIndex startPos = 0; - CFIndex charactersLeft = CFStringGetLength(cfs.get()); - Vector<char> result; - size_t size = 0; - UInt8 lossByte = handling == QuestionMarksForUnencodables ? '?' : 0; - while (charactersLeft > 0) { - CFRange range = CFRangeMake(startPos, charactersLeft); - CFIndex bufferLength; - CFStringGetBytes(cfs.get(), range, m_encoding, lossByte, false, NULL, 0x7FFFFFFF, &bufferLength); - - result.grow(size + bufferLength); - unsigned char* buffer = reinterpret_cast<unsigned char*>(result.data() + size); - CFIndex charactersConverted = CFStringGetBytes(cfs.get(), range, m_encoding, lossByte, false, buffer, bufferLength, &bufferLength); - size += bufferLength; - - if (charactersConverted != charactersLeft) { - unsigned badChar = CFStringGetCharacterAtIndex(cfs.get(), startPos + charactersConverted); - ++charactersConverted; - if ((badChar & 0xFC00) == 0xD800 && charactersConverted != charactersLeft) { // is high surrogate - UniChar low = CFStringGetCharacterAtIndex(cfs.get(), startPos + charactersConverted); - if ((low & 0xFC00) == 0xDC00) { // is low surrogate - badChar <<= 10; - badChar += low; - badChar += 0x10000 - (0xD800 << 10) - 0xDC00; - ++charactersConverted; - } - } - UnencodableReplacementArray entity; - int entityLength = getUnencodableReplacement(badChar, handling, entity); - result.grow(size + entityLength); - memcpy(result.data() + size, entity, entityLength); - size += entityLength; - } - - startPos += charactersConverted; - charactersLeft -= charactersConverted; - } - return CString(result.data(), size); -} - -} // namespace WebCore diff --git a/WebCore/platform/text/mac/TextCodecMac.h b/WebCore/platform/text/mac/TextCodecMac.h deleted file mode 100644 index 3e7a237..0000000 --- a/WebCore/platform/text/mac/TextCodecMac.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved. - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextCodecMac_h -#define TextCodecMac_h - -#include "TextCodec.h" -#include <CoreServices/CoreServices.h> - -namespace WebCore { - - typedef ::TextEncoding TECTextEncodingID; - const TECTextEncodingID invalidEncoding = kCFStringEncodingInvalidId; - - class TextCodecMac : public TextCodec { - public: - static void registerEncodingNames(EncodingNameRegistrar); - static void registerCodecs(TextCodecRegistrar); - - explicit TextCodecMac(TECTextEncodingID); - virtual ~TextCodecMac(); - - virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); - virtual CString encode(const UChar*, size_t length, UnencodableHandling); - - private: - OSStatus decode(const unsigned char* inputBuffer, int inputBufferLength, int& inputLength, - void* outputBuffer, int outputBufferLength, int& outputLength); - - OSStatus createTECConverter() const; - void releaseTECConverter() const; - - TECTextEncodingID m_encoding; - UChar m_backslashAsCurrencySymbol; - unsigned m_numBufferedBytes; - unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character - mutable TECObjectRef m_converterTEC; - }; - - struct TECConverterWrapper { - TECConverterWrapper() : converter(0), encoding(invalidEncoding) { } - ~TECConverterWrapper() { if (converter) TECDisposeConverter(converter); } - - TECObjectRef converter; - TECTextEncodingID encoding; - }; - -} // namespace WebCore - -#endif // TextCodecMac_h diff --git a/WebCore/platform/text/mac/character-sets.txt b/WebCore/platform/text/mac/character-sets.txt deleted file mode 100644 index 475e78e..0000000 --- a/WebCore/platform/text/mac/character-sets.txt +++ /dev/null @@ -1,1868 +0,0 @@ - -=================================================================== -CHARACTER SETS - -(last updated 28 January 2005) - -These are the official names for character sets that may be used in -the Internet and may be referred to in Internet documentation. These -names are expressed in ANSI_X3.4-1968 which is commonly called -US-ASCII or simply ASCII. The character set most commonly use in the -Internet and used especially in protocol standards is US-ASCII, this -is strongly encouraged. The use of the name US-ASCII is also -encouraged. - -The character set names may be up to 40 characters taken from the -printable characters of US-ASCII. However, no distinction is made -between use of upper and lower case letters. - -The MIBenum value is a unique value for use in MIBs to identify coded -character sets. - -The value space for MIBenum values has been divided into three -regions. The first region (3-999) consists of coded character sets -that have been standardized by some standard setting organization. -This region is intended for standards that do not have subset -implementations. The second region (1000-1999) is for the Unicode and -ISO/IEC 10646 coded character sets together with a specification of a -(set of) sub-repertoires that may occur. The third region (>1999) is -intended for vendor specific coded character sets. - - Assigned MIB enum Numbers - ------------------------- - 0-2 Reserved - 3-999 Set By Standards Organizations - 1000-1999 Unicode / 10646 - 2000-2999 Vendor - -The aliases that start with "cs" have been added for use with the -IANA-CHARSET-MIB as originally defined in RFC3808, and as currently -maintained by IANA at http://www/iana.org/assignments/ianacharset-mib. -Note that the ianacharset-mib needs to be kept in sync with this -registry. These aliases that start with "cs" contain the standard -numbers along with suggestive names in order to facilitate applications -that want to display the names in user interfaces. The "cs" stands -for character set and is provided for applications that need a lower -case first letter but want to use mixed case thereafter that cannot -contain any special characters, such as underbar ("_") and dash ("-"). - -If the character set is from an ISO standard, its cs alias is the ISO -standard number or name. If the character set is not from an ISO -standard, but is registered with ISO (IPSJ/ITSCJ is the current ISO -Registration Authority), the ISO Registry number is specified as -ISOnnn followed by letters suggestive of the name or standards number -of the code set. When a national or international standard is -revised, the year of revision is added to the cs alias of the new -character set entry in the IANA Registry in order to distinguish the -revised character set from the original character set. - - -Character Set Reference -------------- --------- - -Name: ANSI_X3.4-1968 [RFC1345,KXS2] -MIBenum: 3 -Source: ECMA registry -Alias: iso-ir-6 -Alias: ANSI_X3.4-1986 -Alias: ISO_646.irv:1991 -Alias: ASCII -Alias: ISO646-US -Alias: US-ASCII (preferred MIME name) -Alias: us -Alias: IBM367 -Alias: cp367 -Alias: csASCII - -Name: ISO-10646-UTF-1 -MIBenum: 27 -Source: Universal Transfer Format (1), this is the multibyte - encoding, that subsets ASCII-7. It does not have byte - ordering issues. -Alias: csISO10646UTF1 - -Name: ISO_646.basic:1983 [RFC1345,KXS2] -MIBenum: 28 -Source: ECMA registry -Alias: ref -Alias: csISO646basic1983 - -Name: INVARIANT [RFC1345,KXS2] -MIBenum: 29 -Alias: csINVARIANT - -Name: ISO_646.irv:1983 [RFC1345,KXS2] -MIBenum: 30 -Source: ECMA registry -Alias: iso-ir-2 -Alias: irv -Alias: csISO2IntlRefVersion - -Name: BS_4730 [RFC1345,KXS2] -MIBenum: 20 -Source: ECMA registry -Alias: iso-ir-4 -Alias: ISO646-GB -Alias: gb -Alias: uk -Alias: csISO4UnitedKingdom - -Name: NATS-SEFI [RFC1345,KXS2] -MIBenum: 31 -Source: ECMA registry -Alias: iso-ir-8-1 -Alias: csNATSSEFI - -Name: NATS-SEFI-ADD [RFC1345,KXS2] -MIBenum: 32 -Source: ECMA registry -Alias: iso-ir-8-2 -Alias: csNATSSEFIADD - -Name: NATS-DANO [RFC1345,KXS2] -MIBenum: 33 -Source: ECMA registry -Alias: iso-ir-9-1 -Alias: csNATSDANO - -Name: NATS-DANO-ADD [RFC1345,KXS2] -MIBenum: 34 -Source: ECMA registry -Alias: iso-ir-9-2 -Alias: csNATSDANOADD - -Name: SEN_850200_B [RFC1345,KXS2] -MIBenum: 35 -Source: ECMA registry -Alias: iso-ir-10 -Alias: FI -Alias: ISO646-FI -Alias: ISO646-SE -Alias: se -Alias: csISO10Swedish - -Name: SEN_850200_C [RFC1345,KXS2] -MIBenum: 21 -Source: ECMA registry -Alias: iso-ir-11 -Alias: ISO646-SE2 -Alias: se2 -Alias: csISO11SwedishForNames - -Name: KS_C_5601-1987 [RFC1345,KXS2] -MIBenum: 36 -Source: ECMA registry -Alias: iso-ir-149 -Alias: KS_C_5601-1989 -Alias: KSC_5601 -Alias: korean -Alias: csKSC56011987 - -Name: ISO-2022-KR (preferred MIME name) [RFC1557,Choi] -MIBenum: 37 -Source: RFC-1557 (see also KS_C_5601-1987) -Alias: csISO2022KR - -Name: EUC-KR (preferred MIME name) [RFC1557,Choi] -MIBenum: 38 -Source: RFC-1557 (see also KS_C_5861-1992) -Alias: csEUCKR - -Name: ISO-2022-JP (preferred MIME name) [RFC1468,Murai] -MIBenum: 39 -Source: RFC-1468 (see also RFC-2237) -Alias: csISO2022JP - -Name: ISO-2022-JP-2 (preferred MIME name) [RFC1554,Ohta] -MIBenum: 40 -Source: RFC-1554 -Alias: csISO2022JP2 - -Name: ISO-2022-CN [RFC1922] -MIBenum: 104 -Source: RFC-1922 - -Name: ISO-2022-CN-EXT [RFC1922] -MIBenum: 105 -Source: RFC-1922 - -Name: JIS_C6220-1969-jp [RFC1345,KXS2] -MIBenum: 41 -Source: ECMA registry -Alias: JIS_C6220-1969 -Alias: iso-ir-13 -Alias: katakana -Alias: x0201-7 -Alias: csISO13JISC6220jp - -Name: JIS_C6220-1969-ro [RFC1345,KXS2] -MIBenum: 42 -Source: ECMA registry -Alias: iso-ir-14 -Alias: jp -Alias: ISO646-JP -Alias: csISO14JISC6220ro - -Name: IT [RFC1345,KXS2] -MIBenum: 22 -Source: ECMA registry -Alias: iso-ir-15 -Alias: ISO646-IT -Alias: csISO15Italian - -Name: PT [RFC1345,KXS2] -MIBenum: 43 -Source: ECMA registry -Alias: iso-ir-16 -Alias: ISO646-PT -Alias: csISO16Portuguese - -Name: ES [RFC1345,KXS2] -MIBenum: 23 -Source: ECMA registry -Alias: iso-ir-17 -Alias: ISO646-ES -Alias: csISO17Spanish - -Name: greek7-old [RFC1345,KXS2] -MIBenum: 44 -Source: ECMA registry -Alias: iso-ir-18 -Alias: csISO18Greek7Old - -Name: latin-greek [RFC1345,KXS2] -MIBenum: 45 -Source: ECMA registry -Alias: iso-ir-19 -Alias: csISO19LatinGreek - -Name: DIN_66003 [RFC1345,KXS2] -MIBenum: 24 -Source: ECMA registry -Alias: iso-ir-21 -Alias: de -Alias: ISO646-DE -Alias: csISO21German - -Name: NF_Z_62-010_(1973) [RFC1345,KXS2] -MIBenum: 46 -Source: ECMA registry -Alias: iso-ir-25 -Alias: ISO646-FR1 -Alias: csISO25French - -Name: Latin-greek-1 [RFC1345,KXS2] -MIBenum: 47 -Source: ECMA registry -Alias: iso-ir-27 -Alias: csISO27LatinGreek1 - -Name: ISO_5427 [RFC1345,KXS2] -MIBenum: 48 -Source: ECMA registry -Alias: iso-ir-37 -Alias: csISO5427Cyrillic - -Name: JIS_C6226-1978 [RFC1345,KXS2] -MIBenum: 49 -Source: ECMA registry -Alias: iso-ir-42 -Alias: csISO42JISC62261978 - -Name: BS_viewdata [RFC1345,KXS2] -MIBenum: 50 -Source: ECMA registry -Alias: iso-ir-47 -Alias: csISO47BSViewdata - -Name: INIS [RFC1345,KXS2] -MIBenum: 51 -Source: ECMA registry -Alias: iso-ir-49 -Alias: csISO49INIS - -Name: INIS-8 [RFC1345,KXS2] -MIBenum: 52 -Source: ECMA registry -Alias: iso-ir-50 -Alias: csISO50INIS8 - -Name: INIS-cyrillic [RFC1345,KXS2] -MIBenum: 53 -Source: ECMA registry -Alias: iso-ir-51 -Alias: csISO51INISCyrillic - -Name: ISO_5427:1981 [RFC1345,KXS2] -MIBenum: 54 -Source: ECMA registry -Alias: iso-ir-54 -Alias: ISO5427Cyrillic1981 - -Name: ISO_5428:1980 [RFC1345,KXS2] -MIBenum: 55 -Source: ECMA registry -Alias: iso-ir-55 -Alias: csISO5428Greek - -Name: GB_1988-80 [RFC1345,KXS2] -MIBenum: 56 -Source: ECMA registry -Alias: iso-ir-57 -Alias: cn -Alias: ISO646-CN -Alias: csISO57GB1988 - -Name: GB_2312-80 [RFC1345,KXS2] -MIBenum: 57 -Source: ECMA registry -Alias: iso-ir-58 -Alias: chinese -Alias: csISO58GB231280 - -Name: NS_4551-1 [RFC1345,KXS2] -MIBenum: 25 -Source: ECMA registry -Alias: iso-ir-60 -Alias: ISO646-NO -Alias: no -Alias: csISO60DanishNorwegian -Alias: csISO60Norwegian1 - -Name: NS_4551-2 [RFC1345,KXS2] -MIBenum: 58 -Source: ECMA registry -Alias: ISO646-NO2 -Alias: iso-ir-61 -Alias: no2 -Alias: csISO61Norwegian2 - -Name: NF_Z_62-010 [RFC1345,KXS2] -MIBenum: 26 -Source: ECMA registry -Alias: iso-ir-69 -Alias: ISO646-FR -Alias: fr -Alias: csISO69French - -Name: videotex-suppl [RFC1345,KXS2] -MIBenum: 59 -Source: ECMA registry -Alias: iso-ir-70 -Alias: csISO70VideotexSupp1 - -Name: PT2 [RFC1345,KXS2] -MIBenum: 60 -Source: ECMA registry -Alias: iso-ir-84 -Alias: ISO646-PT2 -Alias: csISO84Portuguese2 - -Name: ES2 [RFC1345,KXS2] -MIBenum: 61 -Source: ECMA registry -Alias: iso-ir-85 -Alias: ISO646-ES2 -Alias: csISO85Spanish2 - -Name: MSZ_7795.3 [RFC1345,KXS2] -MIBenum: 62 -Source: ECMA registry -Alias: iso-ir-86 -Alias: ISO646-HU -Alias: hu -Alias: csISO86Hungarian - -Name: JIS_C6226-1983 [RFC1345,KXS2] -MIBenum: 63 -Source: ECMA registry -Alias: iso-ir-87 -Alias: x0208 -Alias: JIS_X0208-1983 -Alias: csISO87JISX0208 - -Name: greek7 [RFC1345,KXS2] -MIBenum: 64 -Source: ECMA registry -Alias: iso-ir-88 -Alias: csISO88Greek7 - -Name: ASMO_449 [RFC1345,KXS2] -MIBenum: 65 -Source: ECMA registry -Alias: ISO_9036 -Alias: arabic7 -Alias: iso-ir-89 -Alias: csISO89ASMO449 - -Name: iso-ir-90 [RFC1345,KXS2] -MIBenum: 66 -Source: ECMA registry -Alias: csISO90 - -Name: JIS_C6229-1984-a [RFC1345,KXS2] -MIBenum: 67 -Source: ECMA registry -Alias: iso-ir-91 -Alias: jp-ocr-a -Alias: csISO91JISC62291984a - -Name: JIS_C6229-1984-b [RFC1345,KXS2] -MIBenum: 68 -Source: ECMA registry -Alias: iso-ir-92 -Alias: ISO646-JP-OCR-B -Alias: jp-ocr-b -Alias: csISO92JISC62991984b - -Name: JIS_C6229-1984-b-add [RFC1345,KXS2] -MIBenum: 69 -Source: ECMA registry -Alias: iso-ir-93 -Alias: jp-ocr-b-add -Alias: csISO93JIS62291984badd - -Name: JIS_C6229-1984-hand [RFC1345,KXS2] -MIBenum: 70 -Source: ECMA registry -Alias: iso-ir-94 -Alias: jp-ocr-hand -Alias: csISO94JIS62291984hand - -Name: JIS_C6229-1984-hand-add [RFC1345,KXS2] -MIBenum: 71 -Source: ECMA registry -Alias: iso-ir-95 -Alias: jp-ocr-hand-add -Alias: csISO95JIS62291984handadd - -Name: JIS_C6229-1984-kana [RFC1345,KXS2] -MIBenum: 72 -Source: ECMA registry -Alias: iso-ir-96 -Alias: csISO96JISC62291984kana - -Name: ISO_2033-1983 [RFC1345,KXS2] -MIBenum: 73 -Source: ECMA registry -Alias: iso-ir-98 -Alias: e13b -Alias: csISO2033 - -Name: ANSI_X3.110-1983 [RFC1345,KXS2] -MIBenum: 74 -Source: ECMA registry -Alias: iso-ir-99 -Alias: CSA_T500-1983 -Alias: NAPLPS -Alias: csISO99NAPLPS - -Name: ISO_8859-1:1987 [RFC1345,KXS2] -MIBenum: 4 -Source: ECMA registry -Alias: iso-ir-100 -Alias: ISO_8859-1 -Alias: ISO-8859-1 (preferred MIME name) -Alias: latin1 -Alias: l1 -Alias: IBM819 -Alias: CP819 -Alias: csISOLatin1 - -Name: ISO_8859-2:1987 [RFC1345,KXS2] -MIBenum: 5 -Source: ECMA registry -Alias: iso-ir-101 -Alias: ISO_8859-2 -Alias: ISO-8859-2 (preferred MIME name) -Alias: latin2 -Alias: l2 -Alias: csISOLatin2 - -Name: T.61-7bit [RFC1345,KXS2] -MIBenum: 75 -Source: ECMA registry -Alias: iso-ir-102 -Alias: csISO102T617bit - -Name: T.61-8bit [RFC1345,KXS2] -MIBenum: 76 -Alias: T.61 -Source: ECMA registry -Alias: iso-ir-103 -Alias: csISO103T618bit - -Name: ISO_8859-3:1988 [RFC1345,KXS2] -MIBenum: 6 -Source: ECMA registry -Alias: iso-ir-109 -Alias: ISO_8859-3 -Alias: ISO-8859-3 (preferred MIME name) -Alias: latin3 -Alias: l3 -Alias: csISOLatin3 - -Name: ISO_8859-4:1988 [RFC1345,KXS2] -MIBenum: 7 -Source: ECMA registry -Alias: iso-ir-110 -Alias: ISO_8859-4 -Alias: ISO-8859-4 (preferred MIME name) -Alias: latin4 -Alias: l4 -Alias: csISOLatin4 - -Name: ECMA-cyrillic -MIBenum: 77 -Source: ISO registry (formerly ECMA registry) - http://www.itscj.ipsj.jp/ISO-IR/111.pdf -Alias: iso-ir-111 -Alias: KOI8-E -Alias: csISO111ECMACyrillic - -Name: CSA_Z243.4-1985-1 [RFC1345,KXS2] -MIBenum: 78 -Source: ECMA registry -Alias: iso-ir-121 -Alias: ISO646-CA -Alias: csa7-1 -Alias: ca -Alias: csISO121Canadian1 - -Name: CSA_Z243.4-1985-2 [RFC1345,KXS2] -MIBenum: 79 -Source: ECMA registry -Alias: iso-ir-122 -Alias: ISO646-CA2 -Alias: csa7-2 -Alias: csISO122Canadian2 - -Name: CSA_Z243.4-1985-gr [RFC1345,KXS2] -MIBenum: 80 -Source: ECMA registry -Alias: iso-ir-123 -Alias: csISO123CSAZ24341985gr - -Name: ISO_8859-6:1987 [RFC1345,KXS2] -MIBenum: 9 -Source: ECMA registry -Alias: iso-ir-127 -Alias: ISO_8859-6 -Alias: ISO-8859-6 (preferred MIME name) -Alias: ECMA-114 -Alias: ASMO-708 -Alias: arabic -Alias: csISOLatinArabic - -Name: ISO_8859-6-E [RFC1556,IANA] -MIBenum: 81 -Source: RFC1556 -Alias: csISO88596E -Alias: ISO-8859-6-E (preferred MIME name) - -Name: ISO_8859-6-I [RFC1556,IANA] -MIBenum: 82 -Source: RFC1556 -Alias: csISO88596I -Alias: ISO-8859-6-I (preferred MIME name) - -Name: ISO_8859-7:1987 [RFC1947,RFC1345,KXS2] -MIBenum: 10 -Source: ECMA registry -Alias: iso-ir-126 -Alias: ISO_8859-7 -Alias: ISO-8859-7 (preferred MIME name) -Alias: ELOT_928 -Alias: ECMA-118 -Alias: greek -Alias: greek8 -Alias: csISOLatinGreek - -Name: T.101-G2 [RFC1345,KXS2] -MIBenum: 83 -Source: ECMA registry -Alias: iso-ir-128 -Alias: csISO128T101G2 - -Name: ISO_8859-8:1988 [RFC1345,KXS2] -MIBenum: 11 -Source: ECMA registry -Alias: iso-ir-138 -Alias: ISO_8859-8 -Alias: ISO-8859-8 (preferred MIME name) -Alias: hebrew -Alias: csISOLatinHebrew - -Name: ISO_8859-8-E [RFC1556,Nussbacher] -MIBenum: 84 -Source: RFC1556 -Alias: csISO88598E -Alias: ISO-8859-8-E (preferred MIME name) - -Name: ISO_8859-8-I [RFC1556,Nussbacher] -MIBenum: 85 -Source: RFC1556 -Alias: csISO88598I -Alias: ISO-8859-8-I (preferred MIME name) - -Name: CSN_369103 [RFC1345,KXS2] -MIBenum: 86 -Source: ECMA registry -Alias: iso-ir-139 -Alias: csISO139CSN369103 - -Name: JUS_I.B1.002 [RFC1345,KXS2] -MIBenum: 87 -Source: ECMA registry -Alias: iso-ir-141 -Alias: ISO646-YU -Alias: js -Alias: yu -Alias: csISO141JUSIB1002 - -Name: ISO_6937-2-add [RFC1345,KXS2] -MIBenum: 14 -Source: ECMA registry and ISO 6937-2:1983 -Alias: iso-ir-142 -Alias: csISOTextComm - -Name: IEC_P27-1 [RFC1345,KXS2] -MIBenum: 88 -Source: ECMA registry -Alias: iso-ir-143 -Alias: csISO143IECP271 - -Name: ISO_8859-5:1988 [RFC1345,KXS2] -MIBenum: 8 -Source: ECMA registry -Alias: iso-ir-144 -Alias: ISO_8859-5 -Alias: ISO-8859-5 (preferred MIME name) -Alias: cyrillic -Alias: csISOLatinCyrillic - -Name: JUS_I.B1.003-serb [RFC1345,KXS2] -MIBenum: 89 -Source: ECMA registry -Alias: iso-ir-146 -Alias: serbian -Alias: csISO146Serbian - -Name: JUS_I.B1.003-mac [RFC1345,KXS2] -MIBenum: 90 -Source: ECMA registry -Alias: macedonian -Alias: iso-ir-147 -Alias: csISO147Macedonian - -Name: ISO_8859-9:1989 [RFC1345,KXS2] -MIBenum: 12 -Source: ECMA registry -Alias: iso-ir-148 -Alias: ISO_8859-9 -Alias: ISO-8859-9 (preferred MIME name) -Alias: latin5 -Alias: l5 -Alias: csISOLatin5 - -Name: greek-ccitt [RFC1345,KXS2] -MIBenum: 91 -Source: ECMA registry -Alias: iso-ir-150 -Alias: csISO150 -Alias: csISO150GreekCCITT - -Name: NC_NC00-10:81 [RFC1345,KXS2] -MIBenum: 92 -Source: ECMA registry -Alias: cuba -Alias: iso-ir-151 -Alias: ISO646-CU -Alias: csISO151Cuba - -Name: ISO_6937-2-25 [RFC1345,KXS2] -MIBenum: 93 -Source: ECMA registry -Alias: iso-ir-152 -Alias: csISO6937Add - -Name: GOST_19768-74 [RFC1345,KXS2] -MIBenum: 94 -Source: ECMA registry -Alias: ST_SEV_358-88 -Alias: iso-ir-153 -Alias: csISO153GOST1976874 - -Name: ISO_8859-supp [RFC1345,KXS2] -MIBenum: 95 -Source: ECMA registry -Alias: iso-ir-154 -Alias: latin1-2-5 -Alias: csISO8859Supp - -Name: ISO_10367-box [RFC1345,KXS2] -MIBenum: 96 -Source: ECMA registry -Alias: iso-ir-155 -Alias: csISO10367Box - -Name: ISO-8859-10 (preferred MIME name) [RFC1345,KXS2] -MIBenum: 13 -Source: ECMA registry -Alias: iso-ir-157 -Alias: l6 -Alias: ISO_8859-10:1992 -Alias: csISOLatin6 -Alias: latin6 - -Name: latin-lap [RFC1345,KXS2] -MIBenum: 97 -Source: ECMA registry -Alias: lap -Alias: iso-ir-158 -Alias: csISO158Lap - -Name: JIS_X0212-1990 [RFC1345,KXS2] -MIBenum: 98 -Source: ECMA registry -Alias: x0212 -Alias: iso-ir-159 -Alias: csISO159JISX02121990 - -Name: DS_2089 [RFC1345,KXS2] -MIBenum: 99 -Source: Danish Standard, DS 2089, February 1974 -Alias: DS2089 -Alias: ISO646-DK -Alias: dk -Alias: csISO646Danish - -Name: us-dk [RFC1345,KXS2] -MIBenum: 100 -Alias: csUSDK - -Name: dk-us [RFC1345,KXS2] -MIBenum: 101 -Alias: csDKUS - -Name: JIS_X0201 [RFC1345,KXS2] -MIBenum: 15 -Source: JIS X 0201-1976. One byte only, this is equivalent to - JIS/Roman (similar to ASCII) plus eight-bit half-width - Katakana -Alias: X0201 -Alias: csHalfWidthKatakana - -Name: KSC5636 [RFC1345,KXS2] -MIBenum: 102 -Alias: ISO646-KR -Alias: csKSC5636 - -Name: ISO-10646-UCS-2 -MIBenum: 1000 -Source: the 2-octet Basic Multilingual Plane, aka Unicode - this needs to specify network byte order: the standard - does not specify (it is a 16-bit integer space) -Alias: csUnicode - -Name: ISO-10646-UCS-4 -MIBenum: 1001 -Source: the full code space. (same comment about byte order, - these are 31-bit numbers. -Alias: csUCS4 - -Name: DEC-MCS [RFC1345,KXS2] -MIBenum: 2008 -Source: VAX/VMS User's Manual, - Order Number: AI-Y517A-TE, April 1986. -Alias: dec -Alias: csDECMCS - -Name: hp-roman8 [HP-PCL5,RFC1345,KXS2] -MIBenum: 2004 -Source: LaserJet IIP Printer User's Manual, - HP part no 33471-90901, Hewlet-Packard, June 1989. -Alias: roman8 -Alias: r8 -Alias: csHPRoman8 - -Name: macintosh [RFC1345,KXS2] -MIBenum: 2027 -Source: The Unicode Standard ver1.0, ISBN 0-201-56788-1, Oct 1991 -Alias: mac -Alias: csMacintosh - -Name: IBM037 [RFC1345,KXS2] -MIBenum: 2028 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp037 -Alias: ebcdic-cp-us -Alias: ebcdic-cp-ca -Alias: ebcdic-cp-wt -Alias: ebcdic-cp-nl -Alias: csIBM037 - -Name: IBM038 [RFC1345,KXS2] -MIBenum: 2029 -Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990 -Alias: EBCDIC-INT -Alias: cp038 -Alias: csIBM038 - -Name: IBM273 [RFC1345,KXS2] -MIBenum: 2030 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: CP273 -Alias: csIBM273 - -Name: IBM274 [RFC1345,KXS2] -MIBenum: 2031 -Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990 -Alias: EBCDIC-BE -Alias: CP274 -Alias: csIBM274 - -Name: IBM275 [RFC1345,KXS2] -MIBenum: 2032 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: EBCDIC-BR -Alias: cp275 -Alias: csIBM275 - -Name: IBM277 [RFC1345,KXS2] -MIBenum: 2033 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: EBCDIC-CP-DK -Alias: EBCDIC-CP-NO -Alias: csIBM277 - -Name: IBM278 [RFC1345,KXS2] -MIBenum: 2034 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: CP278 -Alias: ebcdic-cp-fi -Alias: ebcdic-cp-se -Alias: csIBM278 - -Name: IBM280 [RFC1345,KXS2] -MIBenum: 2035 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: CP280 -Alias: ebcdic-cp-it -Alias: csIBM280 - -Name: IBM281 [RFC1345,KXS2] -MIBenum: 2036 -Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990 -Alias: EBCDIC-JP-E -Alias: cp281 -Alias: csIBM281 - -Name: IBM284 [RFC1345,KXS2] -MIBenum: 2037 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: CP284 -Alias: ebcdic-cp-es -Alias: csIBM284 - -Name: IBM285 [RFC1345,KXS2] -MIBenum: 2038 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: CP285 -Alias: ebcdic-cp-gb -Alias: csIBM285 - -Name: IBM290 [RFC1345,KXS2] -MIBenum: 2039 -Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990 -Alias: cp290 -Alias: EBCDIC-JP-kana -Alias: csIBM290 - -Name: IBM297 [RFC1345,KXS2] -MIBenum: 2040 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp297 -Alias: ebcdic-cp-fr -Alias: csIBM297 - -Name: IBM420 [RFC1345,KXS2] -MIBenum: 2041 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990, - IBM NLS RM p 11-11 -Alias: cp420 -Alias: ebcdic-cp-ar1 -Alias: csIBM420 - -Name: IBM423 [RFC1345,KXS2] -MIBenum: 2042 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp423 -Alias: ebcdic-cp-gr -Alias: csIBM423 - -Name: IBM424 [RFC1345,KXS2] -MIBenum: 2043 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp424 -Alias: ebcdic-cp-he -Alias: csIBM424 - -Name: IBM437 [RFC1345,KXS2] -MIBenum: 2011 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp437 -Alias: 437 -Alias: csPC8CodePage437 - -Name: IBM500 [RFC1345,KXS2] -MIBenum: 2044 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: CP500 -Alias: ebcdic-cp-be -Alias: ebcdic-cp-ch -Alias: csIBM500 - -Name: IBM775 [HP-PCL5] -MIBenum: 2087 -Source: HP PCL 5 Comparison Guide (P/N 5021-0329) pp B-13, 1996 -Alias: cp775 -Alias: csPC775Baltic - -Name: IBM850 [RFC1345,KXS2] -MIBenum: 2009 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp850 -Alias: 850 -Alias: csPC850Multilingual - -Name: IBM851 [RFC1345,KXS2] -MIBenum: 2045 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp851 -Alias: 851 -Alias: csIBM851 - -Name: IBM852 [RFC1345,KXS2] -MIBenum: 2010 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp852 -Alias: 852 -Alias: csPCp852 - -Name: IBM855 [RFC1345,KXS2] -MIBenum: 2046 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp855 -Alias: 855 -Alias: csIBM855 - -Name: IBM857 [RFC1345,KXS2] -MIBenum: 2047 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp857 -Alias: 857 -Alias: csIBM857 - -Name: IBM860 [RFC1345,KXS2] -MIBenum: 2048 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp860 -Alias: 860 -Alias: csIBM860 - -Name: IBM861 [RFC1345,KXS2] -MIBenum: 2049 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp861 -Alias: 861 -Alias: cp-is -Alias: csIBM861 - -Name: IBM862 [RFC1345,KXS2] -MIBenum: 2013 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp862 -Alias: 862 -Alias: csPC862LatinHebrew - -Name: IBM863 [RFC1345,KXS2] -MIBenum: 2050 -Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991 -Alias: cp863 -Alias: 863 -Alias: csIBM863 - -Name: IBM864 [RFC1345,KXS2] -MIBenum: 2051 -Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991 -Alias: cp864 -Alias: csIBM864 - -Name: IBM865 [RFC1345,KXS2] -MIBenum: 2052 -Source: IBM DOS 3.3 Ref (Abridged), 94X9575 (Feb 1987) -Alias: cp865 -Alias: 865 -Alias: csIBM865 - -Name: IBM866 [Pond] -MIBenum: 2086 -Source: IBM NLDG Volume 2 (SE09-8002-03) August 1994 -Alias: cp866 -Alias: 866 -Alias: csIBM866 - -Name: IBM868 [RFC1345,KXS2] -MIBenum: 2053 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: CP868 -Alias: cp-ar -Alias: csIBM868 - -Name: IBM869 [RFC1345,KXS2] -MIBenum: 2054 -Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991 -Alias: cp869 -Alias: 869 -Alias: cp-gr -Alias: csIBM869 - -Name: IBM870 [RFC1345,KXS2] -MIBenum: 2055 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: CP870 -Alias: ebcdic-cp-roece -Alias: ebcdic-cp-yu -Alias: csIBM870 - -Name: IBM871 [RFC1345,KXS2] -MIBenum: 2056 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: CP871 -Alias: ebcdic-cp-is -Alias: csIBM871 - -Name: IBM880 [RFC1345,KXS2] -MIBenum: 2057 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp880 -Alias: EBCDIC-Cyrillic -Alias: csIBM880 - -Name: IBM891 [RFC1345,KXS2] -MIBenum: 2058 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp891 -Alias: csIBM891 - -Name: IBM903 [RFC1345,KXS2] -MIBenum: 2059 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp903 -Alias: csIBM903 - -Name: IBM904 [RFC1345,KXS2] -MIBenum: 2060 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: cp904 -Alias: 904 -Alias: csIBBM904 - -Name: IBM905 [RFC1345,KXS2] -MIBenum: 2061 -Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990 -Alias: CP905 -Alias: ebcdic-cp-tr -Alias: csIBM905 - -Name: IBM918 [RFC1345,KXS2] -MIBenum: 2062 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: CP918 -Alias: ebcdic-cp-ar2 -Alias: csIBM918 - -Name: IBM1026 [RFC1345,KXS2] -MIBenum: 2063 -Source: IBM NLS RM Vol2 SE09-8002-01, March 1990 -Alias: CP1026 -Alias: csIBM1026 - -Name: EBCDIC-AT-DE [RFC1345,KXS2] -MIBenum: 2064 -Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 -Alias: csIBMEBCDICATDE - -Name: EBCDIC-AT-DE-A [RFC1345,KXS2] -MIBenum: 2065 -Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 -Alias: csEBCDICATDEA - -Name: EBCDIC-CA-FR [RFC1345,KXS2] -MIBenum: 2066 -Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 -Alias: csEBCDICCAFR - -Name: EBCDIC-DK-NO [RFC1345,KXS2] -MIBenum: 2067 -Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 -Alias: csEBCDICDKNO - -Name: EBCDIC-DK-NO-A [RFC1345,KXS2] -MIBenum: 2068 -Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 -Alias: csEBCDICDKNOA - -Name: EBCDIC-FI-SE [RFC1345,KXS2] -MIBenum: 2069 -Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 -Alias: csEBCDICFISE - -Name: EBCDIC-FI-SE-A [RFC1345,KXS2] -MIBenum: 2070 -Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 -Alias: csEBCDICFISEA - -Name: EBCDIC-FR [RFC1345,KXS2] -MIBenum: 2071 -Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 -Alias: csEBCDICFR - -Name: EBCDIC-IT [RFC1345,KXS2] -MIBenum: 2072 -Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 -Alias: csEBCDICIT - -Name: EBCDIC-PT [RFC1345,KXS2] -MIBenum: 2073 -Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 -Alias: csEBCDICPT - -Name: EBCDIC-ES [RFC1345,KXS2] -MIBenum: 2074 -Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 -Alias: csEBCDICES - -Name: EBCDIC-ES-A [RFC1345,KXS2] -MIBenum: 2075 -Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 -Alias: csEBCDICESA - -Name: EBCDIC-ES-S [RFC1345,KXS2] -MIBenum: 2076 -Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 -Alias: csEBCDICESS - -Name: EBCDIC-UK [RFC1345,KXS2] -MIBenum: 2077 -Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 -Alias: csEBCDICUK - -Name: EBCDIC-US [RFC1345,KXS2] -MIBenum: 2078 -Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 -Alias: csEBCDICUS - -Name: UNKNOWN-8BIT [RFC1428] -MIBenum: 2079 -Alias: csUnknown8BiT - -Name: MNEMONIC [RFC1345,KXS2] -MIBenum: 2080 -Source: RFC 1345, also known as "mnemonic+ascii+38" -Alias: csMnemonic - -Name: MNEM [RFC1345,KXS2] -MIBenum: 2081 -Source: RFC 1345, also known as "mnemonic+ascii+8200" -Alias: csMnem - -Name: VISCII [RFC1456] -MIBenum: 2082 -Source: RFC 1456 -Alias: csVISCII - -Name: VIQR [RFC1456] -MIBenum: 2083 -Source: RFC 1456 -Alias: csVIQR - -Name: KOI8-R (preferred MIME name) [RFC1489] -MIBenum: 2084 -Source: RFC 1489, based on GOST-19768-74, ISO-6937/8, - INIS-Cyrillic, ISO-5427. -Alias: csKOI8R - -Name: KOI8-U [RFC2319] -MIBenum: 2088 -Source: RFC 2319 - -Name: IBM00858 -MIBenum: 2089 -Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00858) [Mahdi] -Alias: CCSID00858 -Alias: CP00858 -Alias: PC-Multilingual-850+euro - -Name: IBM00924 -MIBenum: 2090 -Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00924) [Mahdi] -Alias: CCSID00924 -Alias: CP00924 -Alias: ebcdic-Latin9--euro - -Name: IBM01140 -MIBenum: 2091 -Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01140) [Mahdi] -Alias: CCSID01140 -Alias: CP01140 -Alias: ebcdic-us-37+euro - -Name: IBM01141 -MIBenum: 2092 -Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01141) [Mahdi] -Alias: CCSID01141 -Alias: CP01141 -Alias: ebcdic-de-273+euro - -Name: IBM01142 -MIBenum: 2093 -Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01142) [Mahdi] -Alias: CCSID01142 -Alias: CP01142 -Alias: ebcdic-dk-277+euro -Alias: ebcdic-no-277+euro - -Name: IBM01143 -MIBenum: 2094 -Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01143) [Mahdi] -Alias: CCSID01143 -Alias: CP01143 -Alias: ebcdic-fi-278+euro -Alias: ebcdic-se-278+euro - -Name: IBM01144 -MIBenum: 2095 -Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01144) [Mahdi] -Alias: CCSID01144 -Alias: CP01144 -Alias: ebcdic-it-280+euro - -Name: IBM01145 -MIBenum: 2096 -Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01145) [Mahdi] -Alias: CCSID01145 -Alias: CP01145 -Alias: ebcdic-es-284+euro - -Name: IBM01146 -MIBenum: 2097 -Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01146) [Mahdi] -Alias: CCSID01146 -Alias: CP01146 -Alias: ebcdic-gb-285+euro - -Name: IBM01147 -MIBenum: 2098 -Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01147) [Mahdi] -Alias: CCSID01147 -Alias: CP01147 -Alias: ebcdic-fr-297+euro - -Name: IBM01148 -MIBenum: 2099 -Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01148) [Mahdi] -Alias: CCSID01148 -Alias: CP01148 -Alias: ebcdic-international-500+euro - -Name: IBM01149 -MIBenum: 2100 -Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01149) [Mahdi] -Alias: CCSID01149 -Alias: CP01149 -Alias: ebcdic-is-871+euro - -Name: Big5-HKSCS [Yick] -MIBenum: 2101 -Source: See (http://www.iana.org/assignments/charset-reg/Big5-HKSCS) -Alias: None - -Name: IBM1047 [Robrigado] -MIBenum: 2102 -Source: IBM1047 (EBCDIC Latin 1/Open Systems) -http://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf -Alias: IBM-1047 - -Name: PTCP154 [Uskov] -MIBenum: 2103 -Source: See (http://www.iana.org/assignments/charset-reg/PTCP154) -Alias: csPTCP154 -Alias: PT154 -Alias: CP154 -Alias: Cyrillic-Asian - -Name: Amiga-1251 -MIBenum: 2104 -Source: See (http://www.amiga.ultranet.ru/Amiga-1251.html) -Alias: Ami1251 -Alias: Amiga1251 -Alias: Ami-1251 -(Aliases are provided for historical reasons and should not be used) - [Malyshev] - -Name: KOI7-switched -MIBenum: 2105 -Source: See <http://www.iana.org/assignments/charset-reg/KOI7-switched> -Aliases: None - -Name: UNICODE-1-1 [RFC1641] -MIBenum: 1010 -Source: RFC 1641 -Alias: csUnicode11 - -Name: SCSU -MIBenum: 1011 -Source: SCSU See (http://www.iana.org/assignments/charset-reg/SCSU) [Scherer] -Alias: None - -Name: UTF-7 [RFC2152] -MIBenum: 1012 -Source: RFC 2152 -Alias: None - -Name: UTF-16BE [RFC2781] -MIBenum: 1013 -Source: RFC 2781 -Alias: None - -Name: UTF-16LE [RFC2781] -MIBenum: 1014 -Source: RFC 2781 -Alias: None - -Name: UTF-16 [RFC2781] -MIBenum: 1015 -Source: RFC 2781 -Alias: None - -Name: CESU-8 [Phipps] -MIBenum: 1016 -Source: <http://www.unicode.org/unicode/reports/tr26> -Alias: csCESU-8 - -Name: UTF-32 [Davis] -MIBenum: 1017 -Source: <http://www.unicode.org/unicode/reports/tr19/> -Alias: None - -Name: UTF-32BE [Davis] -MIBenum: 1018 -Source: <http://www.unicode.org/unicode/reports/tr19/> -Alias: None - -Name: UTF-32LE [Davis] -MIBenum: 1019 -Source: <http://www.unicode.org/unicode/reports/tr19/> -Alias: None - -Name: BOCU-1 [Scherer] -MIBenum: 1020 -Source: http://www.unicode.org/notes/tn6/ -Alias: csBOCU-1 - -Name: UNICODE-1-1-UTF-7 [RFC1642] -MIBenum: 103 -Source: RFC 1642 -Alias: csUnicode11UTF7 - -Name: UTF-8 [RFC3629] -MIBenum: 106 -Source: RFC 3629 -Alias: None - -Name: ISO-8859-13 -MIBenum: 109 -Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-13)[Tumasonis] -Alias: None - -Name: ISO-8859-14 -MIBenum: 110 -Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-14) [Simonsen] -Alias: iso-ir-199 -Alias: ISO_8859-14:1998 -Alias: ISO_8859-14 -Alias: latin8 -Alias: iso-celtic -Alias: l8 - -Name: ISO-8859-15 -MIBenum: 111 -Source: ISO - Please see: <http://www.iana.org/assignments/charset-reg/ISO-8859-15> -Alias: ISO_8859-15 -Alias: Latin-9 - -Name: ISO-8859-16 -MIBenum: 112 -Source: ISO -Alias: iso-ir-226 -Alias: ISO_8859-16:2001 -Alias: ISO_8859-16 -Alias: latin10 -Alias: l10 - -Name: GBK -MIBenum: 113 -Source: Chinese IT Standardization Technical Committee - Please see: <http://www.iana.org/assignments/charset-reg/GBK> -Alias: CP936 -Alias: MS936 -Alias: windows-936 - -Name: GB18030 -MIBenum: 114 -Source: Chinese IT Standardization Technical Committee - Please see: <http://www.iana.org/assignments/charset-reg/GB18030> -Alias: None - -Name: OSD_EBCDIC_DF04_15 -MIBenum: 115 -Source: Fujitsu-Siemens standard mainframe EBCDIC encoding - Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15> -Alias: None - -Name: OSD_EBCDIC_DF03_IRV -MIBenum: 116 -Source: Fujitsu-Siemens standard mainframe EBCDIC encoding - Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV> -Alias: None - -Name: OSD_EBCDIC_DF04_1 -MIBenum: 117 -Source: Fujitsu-Siemens standard mainframe EBCDIC encoding - Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1> -Alias: None - -Name: JIS_Encoding -MIBenum: 16 -Source: JIS X 0202-1991. Uses ISO 2022 escape sequences to - shift code sets as documented in JIS X 0202-1991. -Alias: csJISEncoding - -Name: Shift_JIS (preferred MIME name) -MIBenum: 17 -Source: This charset is an extension of csHalfWidthKatakana by - adding graphic characters in JIS X 0208. The CCS's are - JIS X0201:1997 and JIS X0208:1997. The - complete definition is shown in Appendix 1 of JIS - X0208:1997. - This charset can be used for the top-level media type "text". -Alias: MS_Kanji -Alias: csShiftJIS - -Name: Extended_UNIX_Code_Packed_Format_for_Japanese -MIBenum: 18 -Source: Standardized by OSF, UNIX International, and UNIX Systems - Laboratories Pacific. Uses ISO 2022 rules to select - code set 0: US-ASCII (a single 7-bit byte set) - code set 1: JIS X0208-1990 (a double 8-bit byte set) - restricted to A0-FF in both bytes - code set 2: Half Width Katakana (a single 7-bit byte set) - requiring SS2 as the character prefix - code set 3: JIS X0212-1990 (a double 7-bit byte set) - restricted to A0-FF in both bytes - requiring SS3 as the character prefix -Alias: csEUCPkdFmtJapanese -Alias: EUC-JP (preferred MIME name) - -Name: Extended_UNIX_Code_Fixed_Width_for_Japanese -MIBenum: 19 -Source: Used in Japan. Each character is 2 octets. - code set 0: US-ASCII (a single 7-bit byte set) - 1st byte = 00 - 2nd byte = 20-7E - code set 1: JIS X0208-1990 (a double 7-bit byte set) - restricted to A0-FF in both bytes - code set 2: Half Width Katakana (a single 7-bit byte set) - 1st byte = 00 - 2nd byte = A0-FF - code set 3: JIS X0212-1990 (a double 7-bit byte set) - restricted to A0-FF in - the first byte - and 21-7E in the second byte -Alias: csEUCFixWidJapanese - -Name: ISO-10646-UCS-Basic -MIBenum: 1002 -Source: ASCII subset of Unicode. Basic Latin = collection 1 - See ISO 10646, Appendix A -Alias: csUnicodeASCII - -Name: ISO-10646-Unicode-Latin1 -MIBenum: 1003 -Source: ISO Latin-1 subset of Unicode. Basic Latin and Latin-1 - Supplement = collections 1 and 2. See ISO 10646, - Appendix A. See RFC 1815. -Alias: csUnicodeLatin1 -Alias: ISO-10646 - -Name: ISO-10646-J-1 -Source: ISO 10646 Japanese, see RFC 1815. - -Name: ISO-Unicode-IBM-1261 -MIBenum: 1005 -Source: IBM Latin-2, -3, -5, Extended Presentation Set, GCSGID: 1261 -Alias: csUnicodeIBM1261 - -Name: ISO-Unicode-IBM-1268 -MIBenum: 1006 -Source: IBM Latin-4 Extended Presentation Set, GCSGID: 1268 -Alias: csUnicodeIBM1268 - -Name: ISO-Unicode-IBM-1276 -MIBenum: 1007 -Source: IBM Cyrillic Greek Extended Presentation Set, GCSGID: 1276 -Alias: csUnicodeIBM1276 - -Name: ISO-Unicode-IBM-1264 -MIBenum: 1008 -Source: IBM Arabic Presentation Set, GCSGID: 1264 -Alias: csUnicodeIBM1264 - -Name: ISO-Unicode-IBM-1265 -MIBenum: 1009 -Source: IBM Hebrew Presentation Set, GCSGID: 1265 -Alias: csUnicodeIBM1265 - -Name: ISO-8859-1-Windows-3.0-Latin-1 [HP-PCL5] -MIBenum: 2000 -Source: Extended ISO 8859-1 Latin-1 for Windows 3.0. - PCL Symbol Set id: 9U -Alias: csWindows30Latin1 - -Name: ISO-8859-1-Windows-3.1-Latin-1 [HP-PCL5] -MIBenum: 2001 -Source: Extended ISO 8859-1 Latin-1 for Windows 3.1. - PCL Symbol Set id: 19U -Alias: csWindows31Latin1 - -Name: ISO-8859-2-Windows-Latin-2 [HP-PCL5] -MIBenum: 2002 -Source: Extended ISO 8859-2. Latin-2 for Windows 3.1. - PCL Symbol Set id: 9E -Alias: csWindows31Latin2 - -Name: ISO-8859-9-Windows-Latin-5 [HP-PCL5] -MIBenum: 2003 -Source: Extended ISO 8859-9. Latin-5 for Windows 3.1 - PCL Symbol Set id: 5T -Alias: csWindows31Latin5 - -Name: Adobe-Standard-Encoding [Adobe] -MIBenum: 2005 -Source: PostScript Language Reference Manual - PCL Symbol Set id: 10J -Alias: csAdobeStandardEncoding - -Name: Ventura-US [HP-PCL5] -MIBenum: 2006 -Source: Ventura US. ASCII plus characters typically used in - publishing, like pilcrow, copyright, registered, trade mark, - section, dagger, and double dagger in the range A0 (hex) - to FF (hex). - PCL Symbol Set id: 14J -Alias: csVenturaUS - -Name: Ventura-International [HP-PCL5] -MIBenum: 2007 -Source: Ventura International. ASCII plus coded characters similar - to Roman8. - PCL Symbol Set id: 13J -Alias: csVenturaInternational - -Name: PC8-Danish-Norwegian [HP-PCL5] -MIBenum: 2012 -Source: PC Danish Norwegian - 8-bit PC set for Danish Norwegian - PCL Symbol Set id: 11U -Alias: csPC8DanishNorwegian - -Name: PC8-Turkish [HP-PCL5] -MIBenum: 2014 -Source: PC Latin Turkish. PCL Symbol Set id: 9T -Alias: csPC8Turkish - -Name: IBM-Symbols [IBM-CIDT] -MIBenum: 2015 -Source: Presentation Set, CPGID: 259 -Alias: csIBMSymbols - -Name: IBM-Thai [IBM-CIDT] -MIBenum: 2016 -Source: Presentation Set, CPGID: 838 -Alias: csIBMThai - -Name: HP-Legal [HP-PCL5] -MIBenum: 2017 -Source: PCL 5 Comparison Guide, Hewlett-Packard, - HP part number 5961-0510, October 1992 - PCL Symbol Set id: 1U -Alias: csHPLegal - -Name: HP-Pi-font [HP-PCL5] -MIBenum: 2018 -Source: PCL 5 Comparison Guide, Hewlett-Packard, - HP part number 5961-0510, October 1992 - PCL Symbol Set id: 15U -Alias: csHPPiFont - -Name: HP-Math8 [HP-PCL5] -MIBenum: 2019 -Source: PCL 5 Comparison Guide, Hewlett-Packard, - HP part number 5961-0510, October 1992 - PCL Symbol Set id: 8M -Alias: csHPMath8 - -Name: Adobe-Symbol-Encoding [Adobe] -MIBenum: 2020 -Source: PostScript Language Reference Manual - PCL Symbol Set id: 5M -Alias: csHPPSMath - -Name: HP-DeskTop [HP-PCL5] -MIBenum: 2021 -Source: PCL 5 Comparison Guide, Hewlett-Packard, - HP part number 5961-0510, October 1992 - PCL Symbol Set id: 7J -Alias: csHPDesktop - -Name: Ventura-Math [HP-PCL5] -MIBenum: 2022 -Source: PCL 5 Comparison Guide, Hewlett-Packard, - HP part number 5961-0510, October 1992 - PCL Symbol Set id: 6M -Alias: csVenturaMath - -Name: Microsoft-Publishing [HP-PCL5] -MIBenum: 2023 -Source: PCL 5 Comparison Guide, Hewlett-Packard, - HP part number 5961-0510, October 1992 - PCL Symbol Set id: 6J -Alias: csMicrosoftPublishing - -Name: Windows-31J -MIBenum: 2024 -Source: Windows Japanese. A further extension of Shift_JIS - to include NEC special characters (Row 13), NEC - selection of IBM extensions (Rows 89 to 92), and IBM - extensions (Rows 115 to 119). The CCS's are - JIS X0201:1997, JIS X0208:1997, and these extensions. - This charset can be used for the top-level media type "text", - but it is of limited or specialized use (see RFC2278). - PCL Symbol Set id: 19K -Alias: csWindows31J - -Name: GB2312 (preferred MIME name) -MIBenum: 2025 -Source: Chinese for People's Republic of China (PRC) mixed one byte, - two byte set: - 20-7E = one byte ASCII - A1-FE = two byte PRC Kanji - See GB 2312-80 - PCL Symbol Set Id: 18C -Alias: csGB2312 - -Name: Big5 (preferred MIME name) -MIBenum: 2026 -Source: Chinese for Taiwan Multi-byte set. - PCL Symbol Set Id: 18T -Alias: csBig5 - -Name: windows-1250 -MIBenum: 2250 -Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1250) [Lazhintseva] -Alias: None - -Name: windows-1251 -MIBenum: 2251 -Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1251) [Lazhintseva] -Alias: None - -Name: windows-1252 -MIBenum: 2252 -Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1252) [Wendt] -Alias: None - -Name: windows-1253 -MIBenum: 2253 -Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1253) [Lazhintseva] -Alias: None - -Name: windows-1254 -MIBenum: 2254 -Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1254) [Lazhintseva] -Alias: None - -Name: windows-1255 -MIBenum: 2255 -Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1255) [Lazhintseva] -Alias: None - -Name: windows-1256 -MIBenum: 2256 -Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1256) [Lazhintseva] -Alias: None - -Name: windows-1257 -MIBenum: 2257 -Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1257) [Lazhintseva] -Alias: None - -Name: windows-1258 -MIBenum: 2258 -Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1258) [Lazhintseva] -Alias: None - -Name: TIS-620 -MIBenum: 2259 -Source: Thai Industrial Standards Institute (TISI) [Tantsetthi] - -Name: HZ-GB-2312 -MIBenum: 2085 -Source: RFC 1842, RFC 1843 [RFC1842, RFC1843] - - -REFERENCES ----------- - -[RFC1345] Simonsen, K., "Character Mnemonics & Character Sets", - RFC 1345, Rationel Almen Planlaegning, Rationel Almen - Planlaegning, June 1992. - -[RFC1428] Vaudreuil, G., "Transition of Internet Mail from - Just-Send-8 to 8bit-SMTP/MIME", RFC1428, CNRI, February - 1993. - -[RFC1456] Vietnamese Standardization Working Group, "Conventions for - Encoding the Vietnamese Language VISCII: VIetnamese - Standard Code for Information Interchange VIQR: VIetnamese - Quoted-Readable Specification Revision 1.1", RFC 1456, May - 1993. - -[RFC1468] Murai, J., Crispin, M., and E. van der Poel, "Japanese - Character Encoding for Internet Messages", RFC 1468, - Keio University, Panda Programming, June 1993. - -[RFC1489] Chernov, A., "Registration of a Cyrillic Character Set", - RFC1489, RELCOM Development Team, July 1993. - -[RFC1554] Ohta, M., and K. Handa, "ISO-2022-JP-2: Multilingual - Extension of ISO-2022-JP", RFC1554, Tokyo Institute of - Technology, ETL, December 1993. - -[RFC1556] Nussbacher, H., "Handling of Bi-directional Texts in MIME", - RFC1556, Israeli Inter-University, December 1993. - -[RFC1557] Choi, U., Chon, K., and H. Park, "Korean Character Encoding - for Internet Messages", KAIST, Solvit Chosun Media, - December 1993. - -[RFC1641] Goldsmith, D., and M. Davis, "Using Unicode with MIME", - RFC1641, Taligent, Inc., July 1994. - -[RFC1642] Goldsmith, D., and M. Davis, "UTF-7", RFC1642, Taligent, - Inc., July 1994. - -[RFC1815] Ohta, M., "Character Sets ISO-10646 and ISO-10646-J-1", - RFC 1815, Tokyo Institute of Technology, July 1995. - - -[Adobe] Adobe Systems Incorporated, PostScript Language Reference - Manual, second edition, Addison-Wesley Publishing Company, - Inc., 1990. - -[ECMA Registry] ISO-IR: International Register of Escape Sequences - http://www.itscj.ipsj.or.jp/ISO-IE/ Note: The current - registration authority is IPSJ/ITSCJ, Japan. - -[HP-PCL5] Hewlett-Packard Company, "HP PCL 5 Comparison Guide", - (P/N 5021-0329) pp B-13, 1996. - -[IBM-CIDT] IBM Corporation, "ABOUT TYPE: IBM's Technical Reference - for Core Interchange Digitized Type", Publication number - S544-3708-01 - -[RFC1842] Wei, Y., J. Li, and Y. Jiang, "ASCII Printable - Characters-Based Chinese Character Encoding for Internet - Messages", RFC 1842, Harvard University, Rice University, - University of Maryland, August 1995. - -[RFC1843] Lee, F., "HZ - A Data Format for Exchanging Files of - Arbitrarily Mixed Chinese and ASCII Characters", RFC 1843, - Stanford University, August 1995. - -[RFC2152] Goldsmith, D., M. Davis, "UTF-7: A Mail-Safe Transformation - Format of Unicode", RFC 2152, Apple Computer, Inc., - Taligent Inc., May 1997. - -[RFC2279] Yergeau, F., "UTF-8, A Transformation Format of ISO 10646", - RFC 2279, Alis Technologies, January, 1998. - -[RFC2781] Hoffman, P., Yergeau, F., "UTF-16, an encoding of ISO 10646", - RFC 2781, February 2000. - -[RFC3629] Yergeau, F., "UTF-8, a transformation format of ISO 10646", - RFC3629, November 2003. - -PEOPLE ------- - -[KXS2] Keld Simonsen <Keld.Simonsen@dkuug.dk> - -[Choi] Woohyong Choi <whchoi@cosmos.kaist.ac.kr> - -[Davis] Mark Davis, <mark@unicode.org>, April 2002. - -[Lazhintseva] Katya Lazhintseva, <katyal@MICROSOFT.com>, May 1996. - -[Mahdi] Tamer Mahdi, <tamer@ca.ibm.com>, August 2000. - -[Malyshev] Michael Malyshev, <michael_malyshev@mail.ru>, January 2004 - -[Murai] Jun Murai <jun@wide.ad.jp> - -[Nussbacher] Hank Nussbacher, <hank@vm.tau.ac.il> - -[Ohta] Masataka Ohta, <mohta@cc.titech.ac.jp>, July 1995. - -[Phipps] Toby Phipps, <tphipps@peoplesoft.com>, March 2002. - -[Pond] Rick Pond, <rickpond@vnet.ibm.com>, March 1997. - -[Robrigado] Reuel Robrigado, <reuelr@ca.ibm.com>, September 2002. - -[Scherer] Markus Scherer, <markus.scherer@jtcsv.com>, August 2000, - September 2002. - -[Simonsen] Keld Simonsen, <Keld.Simonsen@rap.dk>, August 2000. - -[Tantsetthi] Trin Tantsetthi, <trin@mozart.inet.co.th>, September 1998. - -[Tumasonis] Vladas Tumasonis, <vladas.tumasonis@maf.vu.lt>, August 2000. - -[Uskov] Alexander Uskov, <auskov@idc.kz>, September 2002. - -[Wendt] Chris Wendt, <christw@microsoft.com>, December 1999. - -[Yick] Nicky Yick, <cliac@itsd.gcn.gov.hk>, October 2000. - -[] - - - - - - - diff --git a/WebCore/platform/text/mac/mac-encodings.txt b/WebCore/platform/text/mac/mac-encodings.txt deleted file mode 100644 index bb45e22..0000000 --- a/WebCore/platform/text/mac/mac-encodings.txt +++ /dev/null @@ -1,45 +0,0 @@ -# We'd like to eliminate this file. -# It would be nice to get rid of dependence on the TextEncodingConvert entirely. -# Perhaps we can prove these are not used on the web and remove them. -# Or perhaps we can get them added to ICU. - -# The items on the left are names of TEC TextEncoding values (without the leading kTextEncoding). -# The items on the right are IANA character set names. Names listed in character-sets.txt are not -# repeated here; mentioning any one character set from a group in there pulls in all the aliases in -# that group. - -DOSChineseTrad: cp950 -DOSGreek: cp737, ibm737 -EUC_TW: EUC-TW -ISOLatin10: ISO-8859-16 -ISOLatin6: ISO-8859-10 -ISOLatin8: ISO-8859-14 -ISOLatinThai: ISO-8859-11 -ISO_2022_JP_3: ISO-2022-JP-3 -JIS_C6226_78: JIS_C6226-1978 -JIS_X0208_83: JIS_X0208-1983 -JIS_X0208_90: JIS_X0208-1990 -JIS_X0212_90: JIS_X0212-1990 -KOI8_U: KOI8-U -MacArabic: x-mac-arabic -MacChineseSimp: x-mac-chinesesimp, xmacsimpchinese -MacChineseTrad: x-mac-chinesetrad, xmactradchinese -MacCroatian: x-mac-croatian -MacDevanagari: x-mac-devanagari -MacDingbats: x-mac-dingbats -MacFarsi: x-mac-farsi -MacGujarati: x-mac-gujarati -MacGurmukhi: x-mac-gurmukhi -MacHebrew: x-mac-hebrew -MacIcelandic: x-mac-icelandic -MacJapanese: x-mac-japanese -MacKorean: x-mac-korean -MacRomanLatin1: x-mac-roman-latin1 -MacRomanian: x-mac-romanian -MacSymbol: x-mac-symbol -MacThai: x-mac-thai -MacTibetan: x-mac-tibetan -MacVT100: x-mac-vt100 -NextStepLatin: x-nextstep -ShiftJIS_X0213_00: Shift_JIS_X0213-2000 -WindowsKoreanJohab: johab diff --git a/WebCore/platform/text/mac/make-charset-table.pl b/WebCore/platform/text/mac/make-charset-table.pl deleted file mode 100755 index 16fd25a..0000000 --- a/WebCore/platform/text/mac/make-charset-table.pl +++ /dev/null @@ -1,225 +0,0 @@ -#!/usr/bin/perl -w - -# Copyright (C) 2003, 2004, 2005, 2006 Apple Computer, Inc. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of -# its contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -use strict; - -my %aliasesFromCharsetsFile; -my %namesWritten; - -my $output = ""; - -my $error = 0; - -sub error ($) -{ - print STDERR @_, "\n"; - $error = 1; -} - -sub emit_line -{ - my ($name, $prefix, $encoding, $flags) = @_; - - error "$name shows up twice in output" if $namesWritten{$name}; - $namesWritten{$name} = 1; - - $output .= " { \"$name\", $prefix$encoding },\n"; -} - -sub process_platform_encodings -{ - my ($filename, $PlatformPrefix) = @_; - my $baseFilename = $filename; - $baseFilename =~ s|.*/||; - - my %seenPlatformNames; - my %seenIANANames; - - open PLATFORM_ENCODINGS, $filename or die; - - while (<PLATFORM_ENCODINGS>) { - chomp; - s/\#.*$//; - s/\s+$//; - if (my ($PlatformName, undef, $flags, $IANANames) = /^(.+?)(, (.+))?: (.+)$/) { - my %aliases; - - my $PlatformNameWithFlags = $PlatformName; - if ($flags) { - $PlatformNameWithFlags .= ", " . $flags; - } else { - $flags = "NoEncodingFlags"; - } - error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformNameWithFlags}; - $seenPlatformNames{$PlatformNameWithFlags} = 1; - - # Build the aliases list. - # Also check that no two names are part of the same entry in the charsets file. - my @IANANames = split ", ", $IANANames; - my $firstName = ""; - my $canonicalFirstName = ""; - my $prevName = ""; - for my $name (@IANANames) { - if ($firstName eq "") { - if ($name !~ /^[-A-Za-z0-9_]+$/) { - error "$name, in $baseFilename, has illegal characters in it"; - next; - } - $firstName = $name; - } else { - if ($name !~ /^[a-z0-9]+$/) { - error "$name, in $baseFilename, has illegal characters in it (must be all lowercase alphanumeric)"; - next; - } - if ($name le $prevName) { - error "$name comes after $prevName in $baseFilename, but everything must be in alphabetical order"; - } - $prevName = $name; - } - - my $canonicalName = lc $name; - $canonicalName =~ tr/-_//d; - - $canonicalFirstName = $canonicalName if $canonicalFirstName eq ""; - - error "$name is mentioned twice in $baseFilename" if $seenIANANames{$canonicalName}; - $seenIANANames{$canonicalName} = 1; - - $aliases{$canonicalName} = 1; - next if !$aliasesFromCharsetsFile{$canonicalName}; - for my $alias (@{$aliasesFromCharsetsFile{$canonicalName}}) { - $aliases{$alias} = 1; - } - for my $otherName (@IANANames) { - next if $canonicalName eq $otherName; - if ($aliasesFromCharsetsFile{$otherName} - && $aliasesFromCharsetsFile{$canonicalName} eq $aliasesFromCharsetsFile{$otherName} - && $canonicalName le $otherName) { - error "$baseFilename lists both $name and $otherName under $PlatformName, but that aliasing is already specified in character-sets.txt"; - } - } - } - - # write out - emit_line($firstName, $PlatformPrefix, $PlatformName, $flags); - for my $alias (sort keys %aliases) { - emit_line($alias, $PlatformPrefix, $PlatformName, $flags) if $alias ne $canonicalFirstName; - } - } elsif (/^([a-zA-Z0-9_]+)(, (.+))?$/) { - my $PlatformName = $1; - - error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformName}; - $seenPlatformNames{$PlatformName} = 1; - } elsif (/./) { - error "syntax error in $baseFilename, line $."; - } - } - - close PLATFORM_ENCODINGS; -} - -sub process_iana_charset -{ - my ($canonical_name, @aliases) = @_; - - return if !$canonical_name; - - my @names = sort $canonical_name, @aliases; - - for my $name (@names) { - $aliasesFromCharsetsFile{$name} = \@names; - } -} - -sub process_iana_charsets -{ - my ($filename) = @_; - - open CHARSETS, $filename or die; - - my %seen; - - my $canonical_name; - my @aliases; - - my %exceptions = ( isoir91 => 1, isoir92 => 1 ); - - while (<CHARSETS>) { - chomp; - if ((my $new_canonical_name) = /Name: ([^ \t]*).*/) { - $new_canonical_name = lc $new_canonical_name; - $new_canonical_name =~ tr/a-z0-9//cd; - - error "saw $new_canonical_name twice in character-sets.txt", if $seen{$new_canonical_name}; - $seen{$new_canonical_name} = $new_canonical_name; - - process_iana_charset $canonical_name, @aliases; - - $canonical_name = $new_canonical_name; - @aliases = (); - } elsif ((my $new_alias) = /Alias: ([^ \t]*).*/) { - $new_alias = lc $new_alias; - $new_alias =~ tr/a-z0-9//cd; - - # do this after normalizing the alias, sometimes character-sets.txt - # has weird escape characters, e.g. \b after None - next if $new_alias eq "none"; - - error "saw $new_alias twice in character-sets.txt $seen{$new_alias}, $canonical_name", if $seen{$new_alias} && $seen{$new_alias} ne $canonical_name && !$exceptions{$new_alias}; - push @aliases, $new_alias if !$seen{$new_alias}; - $seen{$new_alias} = $canonical_name; - } - } - - process_iana_charset $canonical_name, @aliases; - - close CHARSETS; -} - -# Program body - -process_iana_charsets($ARGV[0]); -process_platform_encodings($ARGV[1], $ARGV[2]); - -exit 1 if $error; - -print <<EOF -// File generated by make-charset-table.pl. Do not edit! - -#include "config.h" -#include "CharsetData.h" - -namespace WebCore { - - const CharsetEntry CharsetTable[] = { -$output - { 0, 0 } - }; - -} -EOF diff --git a/WebCore/platform/text/qt/TextBoundariesQt.cpp b/WebCore/platform/text/qt/TextBoundariesQt.cpp deleted file mode 100644 index a354ca6..0000000 --- a/WebCore/platform/text/qt/TextBoundariesQt.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (C) 2006 Zack Rusin <zack@kde.org> - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include "TextBoundaries.h" -#include "NotImplemented.h" - -#include <QString> -#include <QChar> - -#include <QDebug> -#include <stdio.h> - -#include <qtextboundaryfinder.h> - -namespace WebCore { - -int findNextWordFromIndex(UChar const* buffer, int len, int position, bool forward) -{ - QString str(reinterpret_cast<QChar const*>(buffer), len); - QTextBoundaryFinder iterator(QTextBoundaryFinder::Word, str); - iterator.setPosition(position >= len ? len - 1 : position); - if (forward) { - int pos = iterator.toNextBoundary(); - while (pos > 0) { - if (QChar(buffer[pos-1]).isLetterOrNumber()) - return pos; - pos = iterator.toNextBoundary(); - } - return len; - } else { - int pos = iterator.toPreviousBoundary(); - while (pos > 0) { - if (QChar(buffer[pos]).isLetterOrNumber()) - return pos; - pos = iterator.toPreviousBoundary(); - } - return 0; - } -} - -void findWordBoundary(UChar const* buffer, int len, int position, int* start, int* end) -{ - QString str(reinterpret_cast<QChar const*>(buffer), len); - QTextBoundaryFinder iterator(QTextBoundaryFinder::Word, str); - iterator.setPosition(position); - *start = position > 0 ? iterator.toPreviousBoundary() : 0; - *end = position == len ? len : iterator.toNextBoundary(); -} - -} - diff --git a/WebCore/platform/text/qt/TextBreakIteratorQt.cpp b/WebCore/platform/text/qt/TextBreakIteratorQt.cpp deleted file mode 100644 index b9f5a9e..0000000 --- a/WebCore/platform/text/qt/TextBreakIteratorQt.cpp +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#include "config.h" -#include "TextBreakIterator.h" - -#include <QtCore/qtextboundaryfinder.h> -#include <qdebug.h> - -// #define DEBUG_TEXT_ITERATORS -#ifdef DEBUG_TEXT_ITERATORS -#define DEBUG qDebug -#else -#define DEBUG if (1) {} else qDebug -#endif - -namespace WebCore { - -#if USE(QT_ICU_TEXT_BREAKING) -const char* currentTextBreakLocaleID() -{ - return QLocale::system().name().toLatin1(); -} -#else - static unsigned char buffer[1024]; - - class TextBreakIterator : public QTextBoundaryFinder { - public: - TextBreakIterator(QTextBoundaryFinder::BoundaryType type, const UChar* string, int length) - : QTextBoundaryFinder(type, (const QChar*)string, length, buffer, sizeof(buffer)) - , length(length) - , string(string) {} - TextBreakIterator() - : QTextBoundaryFinder() - , length(0) - , string(0) {} - - int length; - const UChar* string; - }; - - TextBreakIterator* setUpIterator(TextBreakIterator& iterator, QTextBoundaryFinder::BoundaryType type, const UChar* string, int length) - { - if (!string || !length) - return 0; - - if (iterator.isValid() && type == iterator.type() && length == iterator.length - && memcmp(string, iterator.string, length) == 0) { - iterator.toStart(); - return &iterator; - } - - iterator = TextBreakIterator(type, string, length); - - return &iterator; - } - - TextBreakIterator* wordBreakIterator(const UChar* string, int length) - { - static TextBreakIterator staticWordBreakIterator; - return setUpIterator(staticWordBreakIterator, QTextBoundaryFinder::Word, string, length); - } - - TextBreakIterator* characterBreakIterator(const UChar* string, int length) - { - static TextBreakIterator staticCharacterBreakIterator; - return setUpIterator(staticCharacterBreakIterator, QTextBoundaryFinder::Grapheme, string, length); - } - - TextBreakIterator* cursorMovementIterator(const UChar* string, int length) - { - return characterBreakIterator(string, length); - } - - TextBreakIterator* lineBreakIterator(const UChar* string, int length) - { - static TextBreakIterator staticLineBreakIterator; - return setUpIterator(staticLineBreakIterator, QTextBoundaryFinder::Line, string, length); - } - - TextBreakIterator* sentenceBreakIterator(const UChar* string, int length) - { - static TextBreakIterator staticSentenceBreakIterator; - return setUpIterator(staticSentenceBreakIterator, QTextBoundaryFinder::Sentence, string, length); - - } - - int textBreakFirst(TextBreakIterator* bi) - { - bi->toStart(); - DEBUG() << "textBreakFirst" << bi->position(); - return bi->position(); - } - - int textBreakNext(TextBreakIterator* bi) - { - int pos = bi->toNextBoundary(); - DEBUG() << "textBreakNext" << pos; - return pos; - } - - int textBreakPreceding(TextBreakIterator* bi, int pos) - { - bi->setPosition(pos); - int newpos = bi->toPreviousBoundary(); - DEBUG() << "textBreakPreceding" << pos << newpos; - return newpos; - } - - int textBreakFollowing(TextBreakIterator* bi, int pos) - { - bi->setPosition(pos); - int newpos = bi->toNextBoundary(); - DEBUG() << "textBreakFollowing" << pos << newpos; - return newpos; - } - - int textBreakCurrent(TextBreakIterator* bi) - { - return bi->position(); - } - - bool isTextBreak(TextBreakIterator*, int) - { - return true; - } -#endif - -} diff --git a/WebCore/platform/text/qt/TextCodecQt.cpp b/WebCore/platform/text/qt/TextCodecQt.cpp deleted file mode 100644 index 1e95d87..0000000 --- a/WebCore/platform/text/qt/TextCodecQt.cpp +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> - * Copyright (C) 2008 Holger Hans Peter Freyther - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextCodecQt.h" -#include "PlatformString.h" -#include <wtf/text/CString.h> -#include <qset.h> - -namespace WebCore { - -static QSet<QByteArray> *unique_names = 0; - -static const char *getAtomicName(const QByteArray &name) -{ - if (!unique_names) - unique_names = new QSet<QByteArray>; - - unique_names->insert(name); - return unique_names->find(name)->constData(); -} - -void TextCodecQt::registerEncodingNames(EncodingNameRegistrar registrar) -{ - QList<int> mibs = QTextCodec::availableMibs(); -// qDebug() << ">>>>>>>>> registerEncodingNames"; - - for (int i = 0; i < mibs.size(); ++i) { - QTextCodec *c = QTextCodec::codecForMib(mibs.at(i)); - const char *name = getAtomicName(c->name()); - registrar(name, name); -// qDebug() << " " << name << name; - QList<QByteArray> aliases = c->aliases(); - for (int i = 0; i < aliases.size(); ++i) { - const char *a = getAtomicName(aliases.at(i)); -// qDebug() << " (a) " << a << name; - registrar(a, name); - } - } -} - -static PassOwnPtr<TextCodec> newTextCodecQt(const TextEncoding& encoding, const void*) -{ - return new TextCodecQt(encoding); -} - -void TextCodecQt::registerCodecs(TextCodecRegistrar registrar) -{ - QList<int> mibs = QTextCodec::availableMibs(); -// qDebug() << ">>>>>>>>> registerCodecs"; - - for (int i = 0; i < mibs.size(); ++i) { - QTextCodec *c = QTextCodec::codecForMib(mibs.at(i)); - const char *name = getAtomicName(c->name()); -// qDebug() << " " << name; - registrar(name, newTextCodecQt, 0); - } -} - -TextCodecQt::TextCodecQt(const TextEncoding& encoding) - : m_encoding(encoding) -{ - m_codec = QTextCodec::codecForName(m_encoding.name()); -} - -TextCodecQt::~TextCodecQt() -{ -} - - -String TextCodecQt::decode(const char* bytes, size_t length, bool flush, bool /*stopOnError*/, bool& sawError) -{ - // We chop input buffer to smaller buffers to avoid excessive memory consumption - // when the input buffer is big. This helps reduce peak memory consumption in - // mobile devices where system RAM is limited. -#if OS(SYMBIAN) - static const int MaxInputChunkSize = 32 * 1024; -#else - static const int MaxInputChunkSize = 1024 * 1024; -#endif - const char* buf = bytes; - const char* end = buf + length; - String unicode(""); // a non-null string is expected - - while (buf < end) { - int size = end - buf; - size = qMin(size, MaxInputChunkSize); - QString decoded = m_codec->toUnicode(buf, size, &m_state); - unicode.append(reinterpret_cast_ptr<const UChar*>(decoded.unicode()), decoded.length()); - buf += size; - } - - sawError = m_state.invalidChars != 0; - - if (flush) { - m_state.flags = QTextCodec::DefaultConversion; - m_state.remainingChars = 0; - m_state.invalidChars = 0; - } - - return unicode; -} - -CString TextCodecQt::encode(const UChar* characters, size_t length, UnencodableHandling handling) -{ - QTextCodec::ConverterState state; - state.flags = QTextCodec::ConversionFlags(QTextCodec::ConvertInvalidToNull | QTextCodec::IgnoreHeader); - - if (!length) - return ""; - - QByteArray ba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), length, &state); - - // If some <b> characters </b> are unencodable, escape them as specified by <b> handling </b> - // We append one valid encoded chunk to a QByteArray at a time. When we encounter an unencodable chunk we - // escape it with getUnencodableReplacement, append it, then move to the next chunk. - if (state.invalidChars) { - state.invalidChars = 0; - state.remainingChars = 0; - int len = 0; - ba.clear(); - for (size_t pos = 0; pos < length; ++pos) { - QByteArray tba = m_codec->fromUnicode(reinterpret_cast<const QChar*>(characters), ++len, &state); - if (state.remainingChars) - continue; - if (state.invalidChars) { - UnencodableReplacementArray replacement; - getUnencodableReplacement(characters[0], handling, replacement); - tba.replace('\0', replacement); - state.invalidChars = 0; - } - ba.append(tba); - characters += len; - len = 0; - state.remainingChars = 0; - } - } - - return CString(ba.constData(), ba.length()); -} - - -} // namespace WebCore diff --git a/WebCore/platform/text/qt/TextCodecQt.h b/WebCore/platform/text/qt/TextCodecQt.h deleted file mode 100644 index f28f0bb..0000000 --- a/WebCore/platform/text/qt/TextCodecQt.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextCodecQt_h -#define TextCodecQt_h - -#include "TextCodec.h" -#include "TextEncoding.h" -#include <QTextCodec> - -namespace WebCore { - - class TextCodecQt : public TextCodec { - public: - static void registerEncodingNames(EncodingNameRegistrar); - static void registerCodecs(TextCodecRegistrar); - - TextCodecQt(const TextEncoding&); - virtual ~TextCodecQt(); - - virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); - virtual CString encode(const UChar*, size_t length, UnencodableHandling); - - private: - TextEncoding m_encoding; - QTextCodec *m_codec; - QTextCodec::ConverterState m_state; - }; - -} // namespace WebCore - -#endif // TextCodecICU_h diff --git a/WebCore/platform/text/transcoder/FontTranscoder.cpp b/WebCore/platform/text/transcoder/FontTranscoder.cpp deleted file mode 100644 index 68601f9..0000000 --- a/WebCore/platform/text/transcoder/FontTranscoder.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2010, Google Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "FontTranscoder.h" - -#include "CharacterNames.h" -#include "FontDescription.h" -#include "TextEncoding.h" - -namespace WebCore { - -FontTranscoder::FontTranscoder() -{ - m_converterTypes.add("MS PGothic", BackslashToYenSign); - UChar unicodeNameMSPGothic[] = {0xFF2D, 0xFF33, 0x0020, 0xFF30, 0x30B4, 0x30B7, 0x30C3, 0x30AF}; - m_converterTypes.add(AtomicString(unicodeNameMSPGothic, WTF_ARRAY_LENGTH(unicodeNameMSPGothic)), BackslashToYenSign); - - m_converterTypes.add("MS PMincho", BackslashToYenSign); - UChar unicodeNameMSPMincho[] = {0xFF2D, 0xFF33, 0x0020, 0xFF30, 0x660E, 0x671D}; - m_converterTypes.add(AtomicString(unicodeNameMSPMincho, WTF_ARRAY_LENGTH(unicodeNameMSPMincho)), BackslashToYenSign); - - m_converterTypes.add("MS Gothic", BackslashToYenSign); - UChar unicodeNameMSGothic[] = {0xFF2D, 0xFF33, 0x0020, 0x30B4, 0x30B7, 0x30C3, 0x30AF}; - m_converterTypes.add(AtomicString(unicodeNameMSGothic, WTF_ARRAY_LENGTH(unicodeNameMSGothic)), BackslashToYenSign); - - m_converterTypes.add("MS Mincho", BackslashToYenSign); - UChar unicodeNameMSMincho[] = {0xFF2D, 0xFF33, 0x0020, 0x660E, 0x671D}; - m_converterTypes.add(AtomicString(unicodeNameMSMincho, WTF_ARRAY_LENGTH(unicodeNameMSMincho)), BackslashToYenSign); - - m_converterTypes.add("Meiryo", BackslashToYenSign); - UChar unicodeNameMeiryo[] = {0x30E1, 0x30A4, 0x30EA, 0x30AA}; - m_converterTypes.add(AtomicString(unicodeNameMeiryo, WTF_ARRAY_LENGTH(unicodeNameMeiryo)), BackslashToYenSign); -} - -FontTranscoder::ConverterType FontTranscoder::converterType(const FontDescription& fontDescription, const TextEncoding* encoding) const -{ - const AtomicString& fontFamily = fontDescription.family().family().string(); - if (!fontFamily.isNull()) { - HashMap<AtomicString, ConverterType>::const_iterator found = m_converterTypes.find(fontFamily); - if (found != m_converterTypes.end()) - return found->second; - } - - // IE's default fonts for Japanese encodings change backslashes into yen signs. - // We emulate this behavior only when no font is explicitly specified. - if (encoding && encoding->backslashAsCurrencySymbol() != '\\' && !fontDescription.isSpecifiedFont()) - return BackslashToYenSign; - - return NoConversion; -} - -void FontTranscoder::convert(String& text, const FontDescription& fontDescription, const TextEncoding* encoding) const -{ - switch (converterType(fontDescription, encoding)) { - case BackslashToYenSign: { - // FIXME: TextEncoding.h has similar code. We need to factor them out. - text.replace('\\', yenSign); - break; - } - case NoConversion: - default: - ASSERT_NOT_REACHED(); - } -} - -bool FontTranscoder::needsTranscoding(const FontDescription& fontDescription, const TextEncoding* encoding) const -{ - ConverterType type = converterType(fontDescription, encoding); - return type != NoConversion; -} - -FontTranscoder& fontTranscoder() -{ - static FontTranscoder* transcoder = new FontTranscoder; - return *transcoder; -} - -} // namespace WebCore diff --git a/WebCore/platform/text/transcoder/FontTranscoder.h b/WebCore/platform/text/transcoder/FontTranscoder.h deleted file mode 100644 index 67db977..0000000 --- a/WebCore/platform/text/transcoder/FontTranscoder.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2010, Google Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef FontTranscoder_h -#define FontTranscoder_h - -#include <wtf/HashMap.h> -#include <wtf/Noncopyable.h> -#include <wtf/text/AtomicStringHash.h> - -namespace WebCore { - -class FontDescription; -class TextEncoding; - -class FontTranscoder : public Noncopyable { -public: - void convert(String& text, const FontDescription&, const TextEncoding* = 0) const; - bool needsTranscoding(const FontDescription&, const TextEncoding* = 0) const; - -private: - FontTranscoder(); - ~FontTranscoder(); // Not implemented to make sure nobody accidentally calls delete -- WebCore does not delete singletons. - - enum ConverterType { - NoConversion, BackslashToYenSign, - }; - - ConverterType converterType(const FontDescription&, const TextEncoding*) const; - - HashMap<AtomicString, ConverterType> m_converterTypes; - - friend FontTranscoder& fontTranscoder(); -}; - -FontTranscoder& fontTranscoder(); - -} // namespace WebCore - -#endif // FontTranscoder_h diff --git a/WebCore/platform/text/win/TextBreakIteratorInternalICUWin.cpp b/WebCore/platform/text/win/TextBreakIteratorInternalICUWin.cpp deleted file mode 100644 index e417e17..0000000 --- a/WebCore/platform/text/win/TextBreakIteratorInternalICUWin.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2007 Apple Inc. All rights reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#include "config.h" -#include "TextBreakIteratorInternalICU.h" - -namespace WebCore { - -const char* currentSearchLocaleID() -{ - // FIXME: Should use system locale. - return ""; -} - -const char* currentTextBreakLocaleID() -{ - // Using en_US_POSIX now so word selection in address field works as expected as before (double-clicking - // in a URL selects a word delimited by periods rather than selecting the entire URL). - // However, this is not entirely correct - we should honor the system locale in the normal case. - // FIXME: <rdar://problem/6786703> Should use system locale for text breaking - return "en_US_POSIX"; -} - -} diff --git a/WebCore/platform/text/wince/TextBreakIteratorWinCE.cpp b/WebCore/platform/text/wince/TextBreakIteratorWinCE.cpp deleted file mode 100644 index 96488c0..0000000 --- a/WebCore/platform/text/wince/TextBreakIteratorWinCE.cpp +++ /dev/null @@ -1,303 +0,0 @@ -/* - * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> - * Copyright (C) 2007-2009 Torch Mobile, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. - * - */ - -#include "config.h" -#include "TextBreakIterator.h" - -#include "PlatformString.h" -#include <wtf/StdLibExtras.h> -#include <wtf/unicode/Unicode.h> - -using namespace WTF::Unicode; - -namespace WebCore { - -// Hack, not entirely correct -static inline bool isCharStop(UChar c) -{ - CharCategory charCategory = category(c); - return charCategory != Mark_NonSpacing && (charCategory != Other_Surrogate || (c < 0xd800 || c >= 0xdc00)); -} - -static inline bool isLineStop(UChar c) -{ - return category(c) != Separator_Line; -} - -static inline bool isSentenceStop(UChar c) -{ - return isPunct(c); -} - -class TextBreakIterator { -public: - void reset(const UChar* str, int len) - { - string = str; - length = len; - currentPos = 0; - } - int first() - { - currentPos = 0; - return currentPos; - } - int last() - { - currentPos = length; - return currentPos; - } - virtual int next() = 0; - virtual int previous() = 0; - int following(int position) - { - currentPos = position; - return next(); - } - int preceding(int position) - { - currentPos = position; - return previous(); - } - - int currentPos; - const UChar* string; - int length; -}; - -struct WordBreakIterator: TextBreakIterator { - virtual int next(); - virtual int previous(); -}; - -struct CharBreakIterator: TextBreakIterator { - virtual int next(); - virtual int previous(); -}; - -struct LineBreakIterator: TextBreakIterator { - virtual int next(); - virtual int previous(); -}; - -struct SentenceBreakIterator : TextBreakIterator { - virtual int next(); - virtual int previous(); -}; - -int WordBreakIterator::next() -{ - if (currentPos == length) { - currentPos = -1; - return currentPos; - } - bool haveSpace = false; - while (currentPos < length) { - if (haveSpace && !isSpace(string[currentPos])) - break; - if (isSpace(string[currentPos])) - haveSpace = true; - ++currentPos; - } - return currentPos; -} - -int WordBreakIterator::previous() -{ - if (!currentPos) { - currentPos = -1; - return currentPos; - } - bool haveSpace = false; - while (currentPos > 0) { - if (haveSpace && !isSpace(string[currentPos])) - break; - if (isSpace(string[currentPos])) - haveSpace = true; - --currentPos; - } - return currentPos; -} - -int CharBreakIterator::next() -{ - if (currentPos >= length) - return -1; - ++currentPos; - while (currentPos < length && !isCharStop(string[currentPos])) - ++currentPos; - return currentPos; -} - -int CharBreakIterator::previous() -{ - if (currentPos <= 0) - return -1; - if (currentPos > length) - currentPos = length; - --currentPos; - while (currentPos > 0 && !isCharStop(string[currentPos])) - --currentPos; - return currentPos; -} - -int LineBreakIterator::next() -{ - if (currentPos == length) { - currentPos = -1; - return currentPos; - } - bool haveSpace = false; - while (currentPos < length) { - if (haveSpace && !isLineStop(string[currentPos])) - break; - if (isLineStop(string[currentPos])) - haveSpace = true; - ++currentPos; - } - return currentPos; -} - -int LineBreakIterator::previous() -{ - if (!currentPos) { - currentPos = -1; - return currentPos; - } - bool haveSpace = false; - while (currentPos > 0) { - if (haveSpace && !isLineStop(string[currentPos])) - break; - if (isLineStop(string[currentPos])) - haveSpace = true; - --currentPos; - } - return currentPos; -} - -int SentenceBreakIterator::next() -{ - if (currentPos == length) { - currentPos = -1; - return currentPos; - } - bool haveSpace = false; - while (currentPos < length) { - if (haveSpace && !isSentenceStop(string[currentPos])) - break; - if (isSentenceStop(string[currentPos])) - haveSpace = true; - ++currentPos; - } - return currentPos; -} - -int SentenceBreakIterator::previous() -{ - if (!currentPos) { - currentPos = -1; - return currentPos; - } - bool haveSpace = false; - while (currentPos > 0) { - if (haveSpace && !isSentenceStop(string[currentPos])) - break; - if (isSentenceStop(string[currentPos])) - haveSpace = true; - --currentPos; - } - return currentPos; -} - -TextBreakIterator* wordBreakIterator(const UChar* string, int length) -{ - DEFINE_STATIC_LOCAL(WordBreakIterator, iterator, ()); - iterator.reset(string, length); - return &iterator; -} - -TextBreakIterator* characterBreakIterator(const UChar* string, int length) -{ - DEFINE_STATIC_LOCAL(CharBreakIterator, iterator, ()); - iterator.reset(string, length); - return &iterator; -} - -TextBreakIterator* lineBreakIterator(const UChar* string, int length) -{ - DEFINE_STATIC_LOCAL(LineBreakIterator , iterator, ()); - iterator.reset(string, length); - return &iterator; -} - -TextBreakIterator* sentenceBreakIterator(const UChar* string, int length) -{ - DEFINE_STATIC_LOCAL(SentenceBreakIterator, iterator, ()); - iterator.reset(string, length); - return &iterator; -} - -int textBreakFirst(TextBreakIterator* breakIterator) -{ - return breakIterator->first(); -} - -int textBreakLast(TextBreakIterator* breakIterator) -{ - return breakIterator->last(); -} - -int textBreakNext(TextBreakIterator* breakIterator) -{ - return breakIterator->next(); -} - -int textBreakPrevious(TextBreakIterator* breakIterator) -{ - return breakIterator->previous(); -} - -int textBreakPreceding(TextBreakIterator* breakIterator, int position) -{ - return breakIterator->preceding(position); -} - -int textBreakFollowing(TextBreakIterator* breakIterator, int position) -{ - return breakIterator->following(position); -} - -int textBreakCurrent(TextBreakIterator* breakIterator) -{ - return breakIterator->currentPos; -} - -bool isTextBreak(TextBreakIterator*, int) -{ - return true; -} - -TextBreakIterator* cursorMovementIterator(const UChar* string, int length) -{ - return characterBreakIterator(string, length); -} - -} // namespace WebCore diff --git a/WebCore/platform/text/wince/TextCodecWinCE.cpp b/WebCore/platform/text/wince/TextCodecWinCE.cpp deleted file mode 100644 index 3532e74..0000000 --- a/WebCore/platform/text/wince/TextCodecWinCE.cpp +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved. - * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * This library is distributed in the hope that i will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ - -#include "config.h" -#include "TextCodecWinCE.h" - -#include "FontCache.h" -#include "PlatformString.h" -#include <mlang.h> -#include <winbase.h> -#include <winnls.h> -#include <wtf/HashMap.h> -#include <wtf/HashSet.h> -#include <wtf/text/CString.h> -#include <wtf/text/StringConcatenate.h> -#include <wtf/text/StringHash.h> -#include <wtf/unicode/UTF8.h> - -namespace WebCore { - -struct CharsetInfo { - CString m_name; - String m_friendlyName; - UINT m_codePage; - Vector<CString> m_aliases; -}; - -class LanguageManager { -private: - LanguageManager(); - - friend LanguageManager& languageManager(); -}; - -// Usage: a lookup table used to get CharsetInfo with code page ID. -// Key: code page ID. Value: charset information. -static HashMap<UINT, CString>& codePageCharsets() -{ - static HashMap<UINT, CString> cc; - return cc; -} - -static HashMap<String, CharsetInfo>& knownCharsets() -{ - static HashMap<String, CharsetInfo> kc; - return kc; -} - -// Usage: a map that stores charsets that are supported by system. Sorted by name. -// Key: charset. Value: code page ID. -typedef HashSet<String> CharsetSet; -static CharsetSet& supportedCharsets() -{ - static CharsetSet sl; - return sl; -} - -static LanguageManager& languageManager() -{ - static LanguageManager lm; - return lm; -} - -LanguageManager::LanguageManager() -{ - IEnumCodePage* enumInterface; - IMultiLanguage* mli = FontCache::getMultiLanguageInterface(); - if (mli && S_OK == mli->EnumCodePages(MIMECONTF_BROWSER, &enumInterface)) { - MIMECPINFO cpInfo; - ULONG ccpInfo; - while (S_OK == enumInterface->Next(1, &cpInfo, &ccpInfo) && ccpInfo) { - if (!IsValidCodePage(cpInfo.uiCodePage)) - continue; - - HashMap<UINT, CString>::iterator i = codePageCharsets().find(cpInfo.uiCodePage); - - CString name(String(cpInfo.wszWebCharset).latin1()); - if (i == codePageCharsets().end()) { - CharsetInfo info; - info.m_codePage = cpInfo.uiCodePage; - knownCharsets().set(name.data(), info); - i = codePageCharsets().set(cpInfo.uiCodePage, name).first; - } - if (i != codePageCharsets().end()) { - HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(String(i->second.data(), i->second.length())); - ASSERT(j != knownCharsets().end()); - CharsetInfo& info = j->second; - info.m_name = i->second.data(); - info.m_friendlyName = cpInfo.wszDescription; - info.m_aliases.append(name); - info.m_aliases.append(String(cpInfo.wszHeaderCharset).latin1()); - info.m_aliases.append(String(cpInfo.wszBodyCharset).latin1()); - String cpName = makeString("cp", String::number(cpInfo.uiCodePage)); - info.m_aliases.append(cpName.latin1()); - supportedCharsets().add(i->second.data()); - } - } - enumInterface->Release(); - } -} - -static UINT getCodePage(const char* name) -{ - if (!strcmp(name, "UTF-8")) - return CP_UTF8; - - // Explicitly use a "const" reference to fix the silly VS build error - // saying "==" is not found for const_iterator and iterator - const HashMap<String, CharsetInfo>& charsets = knownCharsets(); - HashMap<String, CharsetInfo>::const_iterator i = charsets.find(name); - return i == charsets.end() ? CP_ACP : i->second.m_codePage; -} - -static PassOwnPtr<TextCodec> newTextCodecWinCE(const TextEncoding& encoding, const void*) -{ - return new TextCodecWinCE(getCodePage(encoding.name())); -} - -TextCodecWinCE::TextCodecWinCE(UINT codePage) - : m_codePage(codePage) -{ -} - -TextCodecWinCE::~TextCodecWinCE() -{ -} - -void TextCodecWinCE::registerBaseEncodingNames(EncodingNameRegistrar registrar) -{ - registrar("UTF-8", "UTF-8"); -} - -void TextCodecWinCE::registerBaseCodecs(TextCodecRegistrar registrar) -{ - registrar("UTF-8", newTextCodecWinCE, 0); -} - -void TextCodecWinCE::registerExtendedEncodingNames(EncodingNameRegistrar registrar) -{ - languageManager(); - for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) { - HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i); - if (j != knownCharsets().end()) { - registrar(j->second.m_name.data(), j->second.m_name.data()); - for (Vector<CString>::const_iterator alias = j->second.m_aliases.begin(); alias != j->second.m_aliases.end(); ++alias) - registrar(alias->data(), j->second.m_name.data()); - } - } -} - -void TextCodecWinCE::registerExtendedCodecs(TextCodecRegistrar registrar) -{ - languageManager(); - for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) { - HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i); - if (j != knownCharsets().end()) - registrar(j->second.m_name.data(), newTextCodecWinCE, 0); - } -} - -static DWORD getCodePageFlags(UINT codePage) -{ - if (codePage == CP_UTF8) - return MB_ERR_INVALID_CHARS; - - if (codePage == 42) // Symbol - return 0; - - // Microsoft says the flag must be 0 for the following code pages - if (codePage > 50000) { - if ((codePage >= 50220 && codePage <= 50222) - || codePage == 50225 - || codePage == 50227 - || codePage == 50229 - || codePage == 52936 - || codePage == 54936 - || (codePage >= 57002 && codePage <= 57001) - || codePage == 65000 // UTF-7 - ) - return 0; - } - - return MB_PRECOMPOSED | MB_ERR_INVALID_CHARS; -} - -static inline const char* findFirstNonAsciiCharacter(const char* bytes, size_t length) -{ - for (const char* bytesEnd = bytes + length; bytes < bytesEnd; ++bytes) { - if (*bytes & 0x80) - break; - } - return bytes; -} - -static void decode(Vector<UChar, 8192>& result, UINT codePage, const char* bytes, size_t length, size_t* left, bool canBeFirstTime, bool& sawInvalidChar) -{ - *left = length; - if (!bytes || !length) - return; - - DWORD flags = getCodePageFlags(codePage); - - if (codePage == CP_UTF8) { - if (canBeFirstTime) { - // Handle BOM. - if (length > 3) { - if (bytes[0] == (char)0xEF && bytes[1] == (char)0xBB && bytes[2] == (char)0xBF) { - // BOM found! - length -= 3; - bytes += 3; - *left = length; - } - } else if (bytes[0] == 0xEF && (length < 2 || bytes[1] == (char)0xBB) && (length < 3 || bytes[2] == (char)0xBF)) { - if (length == 3) - *left = 0; - return; - } - } - - // Process ASCII characters at beginning. - const char* firstNonAsciiChar = findFirstNonAsciiCharacter(bytes, length); - int numAsciiCharacters = firstNonAsciiChar - bytes; - if (numAsciiCharacters) { - result.append(bytes, numAsciiCharacters); - length -= numAsciiCharacters; - if (!length) { - *left = 0; - return; - } - bytes = firstNonAsciiChar; - } - - int oldSize = result.size(); - result.resize(oldSize + length); - UChar* resultStart = result.data() + oldSize; - const char* sourceStart = bytes; - const char* const sourceEnd = bytes + length; - for (;;) { - using namespace WTF::Unicode; - ConversionResult convRes = convertUTF8ToUTF16(&sourceStart - , sourceEnd - , &resultStart - , result.data() + result.size() - , true); - - // FIXME: is it possible? - if (convRes == targetExhausted && sourceStart < sourceEnd) { - oldSize = result.size(); - result.resize(oldSize + 256); - resultStart = result.data() + oldSize; - continue; - } - - if (convRes != conversionOK) - sawInvalidChar = true; - - break; - } - - *left = sourceEnd - sourceStart; - result.resize(resultStart - result.data()); - } else { - int testLength = length; - int untestedLength = length; - for (;;) { - int resultLength = MultiByteToWideChar(codePage, flags, bytes, testLength, 0, 0); - - if (resultLength > 0) { - int oldSize = result.size(); - result.resize(oldSize + resultLength); - - MultiByteToWideChar(codePage, flags, bytes, testLength, result.data() + oldSize, resultLength); - - if (testLength == untestedLength) { - *left = length - testLength; - break; - } - untestedLength -= testLength; - length -= testLength; - bytes += testLength; - } else { - untestedLength = testLength - 1; - if (!untestedLength) { - *left = length; - break; - } - } - testLength = (untestedLength + 1) / 2; - } - } -} - -String TextCodecWinCE::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError) -{ - if (!m_decodeBuffer.isEmpty()) { - m_decodeBuffer.append(bytes, length); - bytes = m_decodeBuffer.data(); - length = m_decodeBuffer.size(); - } - - size_t left; - Vector<UChar, 8192> result; - for (;;) { - bool sawInvalidChar = false; - WebCore::decode(result, m_codePage, bytes, length, &left, m_decodeBuffer.isEmpty(), sawInvalidChar); - if (!left) - break; - - if (!sawInvalidChar && !flush && left < 16) - break; - - result.append(L'?'); - sawError = true; - if (stopOnError) - return String::adopt(result); - - if (left == 1) - break; - - bytes += length - left + 1; - length = left - 1; - } - if (left && !flush) { - if (m_decodeBuffer.isEmpty()) - m_decodeBuffer.append(bytes + length - left, left); - else { - memmove(m_decodeBuffer.data(), bytes + length - left, left); - m_decodeBuffer.resize(left); - } - } else - m_decodeBuffer.clear(); - - return String::adopt(result); -} - -CString TextCodecWinCE::encode(const UChar* characters, size_t length, UnencodableHandling) -{ - if (!characters || !length) - return CString(); - - DWORD flags = m_codePage == CP_UTF8 ? 0 : WC_COMPOSITECHECK; - - int resultLength = WideCharToMultiByte(m_codePage, flags, characters, length, 0, 0, 0, 0); - - // FIXME: We need to implement UnencodableHandling: QuestionMarksForUnencodables, EntitiesForUnencodables, and URLEncodedEntitiesForUnencodables. - - if (resultLength <= 0) - return "?"; - - char* characterBuffer; - CString result = CString::newUninitialized(resultLength, characterBuffer); - - WideCharToMultiByte(m_codePage, flags, characters, length, characterBuffer, resultLength, 0, 0); - - return result; -} - -void TextCodecWinCE::enumerateSupportedEncodings(EncodingReceiver& receiver) -{ - languageManager(); - for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) { - HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i); - if (j != knownCharsets().end() && !receiver.receive(j->second.m_name.data(), j->second.m_friendlyName.charactersWithNullTermination(), j->second.m_codePage)) - break; - } -} - -} // namespace WebCore diff --git a/WebCore/platform/text/wince/TextCodecWinCE.h b/WebCore/platform/text/wince/TextCodecWinCE.h deleted file mode 100644 index 8d332a6..0000000 --- a/WebCore/platform/text/wince/TextCodecWinCE.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (C) 2004, 2006, 2007 Apple Inc. All rights reserved. - * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> - * Copyright (C) 2007-2009 Torch Mobile, Inc. - * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TextCodecWinCE_h -#define TextCodecWinCE_h - -#include "PlatformString.h" -#include "TextCodec.h" -#include "TextEncoding.h" -#include <wtf/Vector.h> -#include <windows.h> - -namespace WebCore { - -class TextCodecWinCE : public TextCodec { -public: - static void registerBaseEncodingNames(EncodingNameRegistrar); - static void registerBaseCodecs(TextCodecRegistrar); - - static void registerExtendedEncodingNames(EncodingNameRegistrar); - static void registerExtendedCodecs(TextCodecRegistrar); - - TextCodecWinCE(UINT codePage); - virtual ~TextCodecWinCE(); - - virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); - virtual CString encode(const UChar*, size_t length, UnencodableHandling); - - struct EncodingInfo { - String m_encoding; - String m_friendlyName; - }; - - struct EncodingReceiver { - // Return false to stop enumerating. - virtual bool receive(const char* encoding, const wchar_t* friendlyName, unsigned int codePage) = 0; - }; - - static void enumerateSupportedEncodings(EncodingReceiver& receiver); - -private: - UINT m_codePage; - Vector<char> m_decodeBuffer; -}; - -} // namespace WebCore - -#endif // TextCodecWinCE_h |