diff options
author | Steve Block <steveblock@google.com> | 2009-12-15 10:12:09 +0000 |
---|---|---|
committer | Steve Block <steveblock@google.com> | 2009-12-17 17:41:10 +0000 |
commit | 643ca7872b450ea4efacab6188849e5aac2ba161 (patch) | |
tree | 6982576c228bcd1a7efe98afed544d840751094c /WebCore/platform/text | |
parent | d026980fde6eb3b01c1fe49441174e89cd1be298 (diff) | |
download | external_webkit-643ca7872b450ea4efacab6188849e5aac2ba161.zip external_webkit-643ca7872b450ea4efacab6188849e5aac2ba161.tar.gz external_webkit-643ca7872b450ea4efacab6188849e5aac2ba161.tar.bz2 |
Merge webkit.org at r51976 : Initial merge by git.
Change-Id: Ib0e7e2f0fb4bee5a186610272edf3186f0986b43
Diffstat (limited to 'WebCore/platform/text')
22 files changed, 1293 insertions, 265 deletions
diff --git a/WebCore/platform/text/AtomicString.h b/WebCore/platform/text/AtomicString.h index 8805f4c..47d07c5 100644 --- a/WebCore/platform/text/AtomicString.h +++ b/WebCore/platform/text/AtomicString.h @@ -24,6 +24,14 @@ #include "AtomicStringImpl.h" #include "PlatformString.h" +// Define 'NO_IMPLICIT_ATOMICSTRING' before including this header, +// to disallow (expensive) implicit String-->AtomicString conversions. +#ifdef NO_IMPLICIT_ATOMICSTRING +#define ATOMICSTRING_CONVERSION explicit +#else +#define ATOMICSTRING_CONVERSION +#endif + namespace WebCore { struct AtomicStringHash; @@ -40,9 +48,9 @@ public: AtomicString(const JSC::UString& s) : m_string(add(s)) { } AtomicString(const JSC::Identifier& s) : m_string(add(s)) { } #endif - AtomicString(StringImpl* imp) : m_string(add(imp)) { } + ATOMICSTRING_CONVERSION AtomicString(StringImpl* imp) : m_string(add(imp)) { } AtomicString(AtomicStringImpl* imp) : m_string(imp) { } - AtomicString(const String& s) : m_string(add(s.impl())) { } + ATOMICSTRING_CONVERSION AtomicString(const String& s) : m_string(add(s.impl())) { } // Hash table deleted values, which are only constructed and never copied or destroyed. AtomicString(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { } @@ -96,7 +104,7 @@ public: static void remove(StringImpl*); -#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) +#if PLATFORM(CF) AtomicString(CFStringRef s) : m_string(add(String(s).impl())) { } CFStringRef createCFString() const { return m_string.createCFString(); } #endif diff --git a/WebCore/platform/text/AtomicStringImpl.h b/WebCore/platform/text/AtomicStringImpl.h index d905afc..ba1c72c 100644 --- a/WebCore/platform/text/AtomicStringImpl.h +++ b/WebCore/platform/text/AtomicStringImpl.h @@ -1,6 +1,4 @@ /* - * This file is part of the DOM implementation for KDE. - * * Copyright (C) 2006 Apple Computer, Inc. * * This library is free software; you can redistribute it and/or diff --git a/WebCore/platform/text/BidiContext.cpp b/WebCore/platform/text/BidiContext.cpp index 546571e..59db7bd 100644 --- a/WebCore/platform/text/BidiContext.cpp +++ b/WebCore/platform/text/BidiContext.cpp @@ -30,7 +30,7 @@ using namespace WTF::Unicode; PassRefPtr<BidiContext> BidiContext::create(unsigned char level, Direction direction, bool override, BidiContext* parent) { - ASSERT(direction == level % 2 ? RightToLeft : LeftToRight); + ASSERT(direction == (level % 2 ? RightToLeft : LeftToRight)); if (parent) return adoptRef(new BidiContext(level, direction, override, parent)); diff --git a/WebCore/platform/text/PlatformString.h b/WebCore/platform/text/PlatformString.h index 8d19c17..247536a 100644 --- a/WebCore/platform/text/PlatformString.h +++ b/WebCore/platform/text/PlatformString.h @@ -41,7 +41,7 @@ #include <wtf/OwnPtr.h> #endif -#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) +#if PLATFORM(CF) typedef const struct __CFString * CFStringRef; #endif @@ -206,7 +206,7 @@ public: StringImpl* impl() const { return m_impl.get(); } -#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) +#if PLATFORM(CF) String(CFStringRef); CFStringRef createCFString() const; #endif @@ -286,6 +286,11 @@ inline bool equalIgnoringCase(const String& a, const String& b) { return equalIg inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), b); } inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(a, b.impl()); } +inline bool equalPossiblyIgnoringCase(const String& a, const String& b, bool ignoreCase) +{ + return ignoreCase ? equalIgnoringCase(a, b) : (a == b); +} + inline bool equalIgnoringNullity(const String& a, const String& b) { return equalIgnoringNullity(a.impl(), b.impl()); } inline bool operator!(const String& str) { return str.isNull(); } diff --git a/WebCore/platform/text/RegularExpression.h b/WebCore/platform/text/RegularExpression.h index 3254067..f1611e5 100644 --- a/WebCore/platform/text/RegularExpression.h +++ b/WebCore/platform/text/RegularExpression.h @@ -30,7 +30,7 @@ namespace WebCore { -class RegularExpression { +class RegularExpression : public FastAllocBase { public: RegularExpression(const String&, TextCaseSensitivity); ~RegularExpression(); diff --git a/WebCore/platform/text/String.cpp b/WebCore/platform/text/String.cpp index 44582a9..24659a4 100644 --- a/WebCore/platform/text/String.cpp +++ b/WebCore/platform/text/String.cpp @@ -81,6 +81,9 @@ String::String(const char* str, unsigned length) void String::append(const String& str) { + if (str.isEmpty()) + return; + // FIXME: This is extremely inefficient. So much so that we might want to take this // out of String's API. We can make it better by optimizing the case where exactly // one String is pointing at this StringImpl, but even then it's going to require a diff --git a/WebCore/platform/text/StringHash.h b/WebCore/platform/text/StringHash.h index fc6cb3c..21a478e 100644 --- a/WebCore/platform/text/StringHash.h +++ b/WebCore/platform/text/StringHash.h @@ -1,5 +1,6 @@ /* * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved + * Copyright (C) Research In Motion Limited 2009. All rights reserved. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -29,6 +30,10 @@ namespace WebCore { + // The hash() functions on StringHash and CaseFoldingHash do not support + // null strings. get(), contains(), and add() on HashMap<String,..., StringHash> + // cause a null-pointer dereference when passed null strings. + // FIXME: We should really figure out a way to put the computeHash function that's // currently a member function of StringImpl into this file so we can be a little // closer to having all the nearly-identical hash functions in one place. diff --git a/WebCore/platform/text/StringImpl.h b/WebCore/platform/text/StringImpl.h index dac25b2..5155fa5 100644 --- a/WebCore/platform/text/StringImpl.h +++ b/WebCore/platform/text/StringImpl.h @@ -37,7 +37,7 @@ #include <runtime/UString.h> #endif -#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) +#if PLATFORM(CF) typedef const struct __CFString * CFStringRef; #endif @@ -168,7 +168,7 @@ public: WTF::Unicode::Direction defaultWritingDirection(); -#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) +#if PLATFORM(CF) CFStringRef createCFString(); #endif #ifdef __OBJC__ diff --git a/WebCore/platform/text/TextBoundariesICU.cpp b/WebCore/platform/text/TextBoundaries.cpp index b1e8ee2..2455f6d 100644 --- a/WebCore/platform/text/TextBoundariesICU.cpp +++ b/WebCore/platform/text/TextBoundaries.cpp @@ -1,5 +1,6 @@ /* * Copyright (C) 2006, 2007 Apple Inc. All rights reserved. + * Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -26,39 +27,40 @@ #include "config.h" #include "TextBoundaries.h" -#include <unicode/ubrk.h> -#include <unicode/uchar.h> - #include "StringImpl.h" #include "TextBreakIterator.h" +#include <wtf/unicode/Unicode.h> + +using namespace WTF; +using namespace Unicode; namespace WebCore { int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward) { - UBreakIterator* it = wordBreakIterator(chars, len); + TextBreakIterator* it = wordBreakIterator(chars, len); if (forward) { - position = ubrk_following(it, position); - while (position != UBRK_DONE) { + position = textBreakFollowing(it, position); + while (position != TextBreakDone) { // We stop searching when the character preceeding the break // is alphanumeric. - if (position < len && u_isalnum(chars[position - 1])) + if (position < len && isAlphanumeric(chars[position - 1])) return position; - position = ubrk_following(it, position); + position = textBreakFollowing(it, position); } return len; } else { - position = ubrk_preceding(it, position); - while (position != UBRK_DONE) { + position = textBreakPreceding(it, position); + while (position != TextBreakDone) { // We stop searching when the character following the break // is alphanumeric. - if (position > 0 && u_isalnum(chars[position])) + if (position > 0 && isAlphanumeric(chars[position])) return position; - position = ubrk_preceding(it, position); + position = textBreakPreceding(it, position); } return 0; @@ -67,11 +69,11 @@ int findNextWordFromIndex(const UChar* chars, int len, int position, bool forwar void findWordBoundary(const UChar* chars, int len, int position, int* start, int* end) { - UBreakIterator* it = wordBreakIterator(chars, len); - *end = ubrk_following(it, position); + TextBreakIterator* it = wordBreakIterator(chars, len); + *end = textBreakFollowing(it, position); if (*end < 0) - *end = ubrk_last(it); - *start = ubrk_previous(it); + *end = textBreakLast(it); + *start = textBreakPrevious(it); } } // namespace WebCore diff --git a/WebCore/platform/text/TextBreakIterator.h b/WebCore/platform/text/TextBreakIterator.h index 7b3b963..17cf5f0 100644 --- a/WebCore/platform/text/TextBreakIterator.h +++ b/WebCore/platform/text/TextBreakIterator.h @@ -47,7 +47,9 @@ namespace WebCore { TextBreakIterator* sentenceBreakIterator(const UChar*, int length); int textBreakFirst(TextBreakIterator*); + int textBreakLast(TextBreakIterator*); int textBreakNext(TextBreakIterator*); + int textBreakPrevious(TextBreakIterator*); int textBreakCurrent(TextBreakIterator*); int textBreakPreceding(TextBreakIterator*, int); int textBreakFollowing(TextBreakIterator*, int); diff --git a/WebCore/platform/text/TextBreakIteratorICU.cpp b/WebCore/platform/text/TextBreakIteratorICU.cpp index c922fbc..44423c0 100644 --- a/WebCore/platform/text/TextBreakIteratorICU.cpp +++ b/WebCore/platform/text/TextBreakIteratorICU.cpp @@ -90,11 +90,21 @@ int textBreakFirst(TextBreakIterator* bi) return ubrk_first(bi); } +int textBreakLast(TextBreakIterator* bi) +{ + return ubrk_last(bi); +} + int textBreakNext(TextBreakIterator* bi) { return ubrk_next(bi); } +int textBreakPrevious(TextBreakIterator* bi) +{ + return ubrk_previous(bi); +} + int textBreakPreceding(TextBreakIterator* bi, int pos) { return ubrk_preceding(bi, pos); diff --git a/WebCore/platform/text/TextEncoding.cpp b/WebCore/platform/text/TextEncoding.cpp index c5c8cfd..ec9a8b0 100644 --- a/WebCore/platform/text/TextEncoding.cpp +++ b/WebCore/platform/text/TextEncoding.cpp @@ -32,10 +32,13 @@ #include "PlatformString.h" #include "TextCodec.h" #include "TextEncodingRegistry.h" -#if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID) +#if USE(ICU_UNICODE) #include <unicode/unorm.h> #elif USE(QT4_UNICODE) #include <QString> +#elif USE(GLIB_UNICODE) +#include <glib.h> +#include <wtf/gtk/GOwnPtr.h> #endif #include <wtf/HashSet.h> #include <wtf/OwnPtr.h> @@ -84,7 +87,7 @@ CString TextEncoding::encode(const UChar* characters, size_t length, Unencodable if (!length) return ""; -#if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID) +#if USE(ICU_UNICODE) // FIXME: What's the right place to do normalization? // It's a little strange to do it inside the encode function. // Perhaps normalization should be an explicit step done before calling encode. @@ -114,6 +117,18 @@ CString TextEncoding::encode(const UChar* characters, size_t length, Unencodable QString str(reinterpret_cast<const QChar*>(characters), length); str = str.normalized(QString::NormalizationForm_C); return newTextCodec(*this)->encode(reinterpret_cast<const UChar *>(str.utf16()), str.length(), handling); +#elif USE(GLIB_UNICODE) + GOwnPtr<char> UTF8Source; + UTF8Source.set(g_utf16_to_utf8(characters, length, 0, 0, 0)); + + GOwnPtr<char> UTF8Normalized; + UTF8Normalized.set(g_utf8_normalize(UTF8Source.get(), -1, G_NORMALIZE_NFC)); + + long UTF16Length; + GOwnPtr<UChar> UTF16Normalized; + UTF16Normalized.set(g_utf8_to_utf16(UTF8Normalized.get(), -1, 0, &UTF16Length, 0)); + + return newTextCodec(*this)->encode(UTF16Normalized.get(), UTF16Length, handling); #elif PLATFORM(WINCE) // normalization will be done by Windows CE API OwnPtr<TextCodec> textCodec = newTextCodec(*this); diff --git a/WebCore/platform/text/TextEncodingRegistry.cpp b/WebCore/platform/text/TextEncodingRegistry.cpp index d3e2965..a4be520 100644 --- a/WebCore/platform/text/TextEncodingRegistry.cpp +++ b/WebCore/platform/text/TextEncodingRegistry.cpp @@ -39,7 +39,7 @@ #include <wtf/StringExtras.h> #include <wtf/Threading.h> -#if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID) +#if USE(ICU_UNICODE) #include "TextCodecICU.h" #endif #if PLATFORM(MAC) @@ -48,6 +48,9 @@ #if PLATFORM(QT) #include "qt/TextCodecQt.h" #endif +#if USE(GLIB_UNICODE) +#include "gtk/TextCodecGtk.h" +#endif #if PLATFORM(WINCE) && !PLATFORM(QT) #include "TextCodecWince.h" #endif @@ -217,11 +220,16 @@ static void buildBaseTextCodecMaps() TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap); TextCodecUserDefined::registerCodecs(addToTextCodecMap); -#if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID) +#if USE(ICU_UNICODE) TextCodecICU::registerBaseEncodingNames(addToTextEncodingNameMap); TextCodecICU::registerBaseCodecs(addToTextCodecMap); #endif +#if USE(GLIB_UNICODE) + TextCodecGtk::registerBaseEncodingNames(addToTextEncodingNameMap); + TextCodecGtk::registerBaseCodecs(addToTextCodecMap); +#endif + #if PLATFORM(WINCE) && !PLATFORM(QT) TextCodecWince::registerBaseEncodingNames(addToTextEncodingNameMap); TextCodecWince::registerBaseCodecs(addToTextCodecMap); @@ -230,7 +238,7 @@ static void buildBaseTextCodecMaps() static void extendTextCodecMaps() { -#if USE(ICU_UNICODE) || USE(GLIB_ICU_UNICODE_HYBRID) +#if USE(ICU_UNICODE) TextCodecICU::registerExtendedEncodingNames(addToTextEncodingNameMap); TextCodecICU::registerExtendedCodecs(addToTextCodecMap); #endif @@ -245,6 +253,11 @@ static void extendTextCodecMaps() TextCodecMac::registerCodecs(addToTextCodecMap); #endif +#if USE(GLIB_UNICODE) + TextCodecGtk::registerExtendedEncodingNames(addToTextEncodingNameMap); + TextCodecGtk::registerExtendedCodecs(addToTextCodecMap); +#endif + #if PLATFORM(WINCE) && !PLATFORM(QT) TextCodecWince::registerExtendedEncodingNames(addToTextEncodingNameMap); TextCodecWince::registerExtendedCodecs(addToTextCodecMap); diff --git a/WebCore/platform/text/cf/StringCF.cpp b/WebCore/platform/text/cf/StringCF.cpp index b770d0e..97691e5 100644 --- a/WebCore/platform/text/cf/StringCF.cpp +++ b/WebCore/platform/text/cf/StringCF.cpp @@ -21,7 +21,7 @@ #include "config.h" #include "PlatformString.h" -#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) +#if PLATFORM(CF) #include <CoreFoundation/CoreFoundation.h> @@ -52,4 +52,4 @@ CFStringRef String::createCFString() const } -#endif // PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) +#endif // PLATFORM(CF) diff --git a/WebCore/platform/text/cf/StringImplCF.cpp b/WebCore/platform/text/cf/StringImplCF.cpp index 8a2ae79..aff45b3 100644 --- a/WebCore/platform/text/cf/StringImplCF.cpp +++ b/WebCore/platform/text/cf/StringImplCF.cpp @@ -21,7 +21,7 @@ #include "config.h" #include "StringImpl.h" -#if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) +#if PLATFORM(CF) #include <CoreFoundation/CoreFoundation.h> #include <wtf/MainThread.h> @@ -159,4 +159,4 @@ CFStringRef StringImpl::createCFString() } -#endif // PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN)) +#endif // PLATFORM(CF) diff --git a/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp b/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp new file mode 100644 index 0000000..7a10b41 --- /dev/null +++ b/WebCore/platform/text/gtk/TextBreakIteratorGtk.cpp @@ -0,0 +1,217 @@ +/* + * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> + * Copyright (C) 2007 Apple Inc. All rights reserved. + * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> + * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#include "config.h" +#include "TextBreakIterator.h" + +#include <pango/pango.h> +#include <wtf/gtk/GOwnPtr.h> + +namespace WebCore { + +enum UBreakIteratorType { + UBRK_CHARACTER, + UBRK_WORD, + UBRK_LINE, + UBRK_SENTENCE +}; + +class TextBreakIterator { +public: + UBreakIteratorType m_type; + int m_length; + PangoLogAttr* m_logAttrs; + int m_index; +}; + +static TextBreakIterator* setUpIterator(bool& createdIterator, TextBreakIterator*& iterator, + UBreakIteratorType type, const UChar* string, int length) +{ + if (!string) + return 0; + + if (!createdIterator) { + iterator = new TextBreakIterator(); + createdIterator = true; + } + if (!iterator) + return 0; + + long utf8len; + GOwnPtr<char> utf8; + utf8.set(g_utf16_to_utf8(string, length, 0, &utf8len, 0)); + + // FIXME: assumes no surrogate pairs + + iterator->m_type = type; + iterator->m_length = length; + if (createdIterator) + g_free(iterator->m_logAttrs); + iterator->m_logAttrs = g_new0(PangoLogAttr, length + 1); + iterator->m_index = -1; + pango_get_log_attrs(utf8.get(), utf8len, -1, 0, iterator->m_logAttrs, length + 1); + + return iterator; +} + +TextBreakIterator* characterBreakIterator(const UChar* string, int length) +{ + static bool createdCharacterBreakIterator = false; + static TextBreakIterator* staticCharacterBreakIterator; + return setUpIterator(createdCharacterBreakIterator, staticCharacterBreakIterator, UBRK_CHARACTER, string, length); +} + +TextBreakIterator* cursorMovementIterator(const UChar* string, int length) +{ + // FIXME: This needs closer inspection to achieve behaviour identical to the ICU version. + return characterBreakIterator(string, length); +} + +TextBreakIterator* wordBreakIterator(const UChar* string, int length) +{ + static bool createdWordBreakIterator = false; + static TextBreakIterator* staticWordBreakIterator; + return setUpIterator(createdWordBreakIterator, staticWordBreakIterator, UBRK_WORD, string, length); +} + +TextBreakIterator* lineBreakIterator(const UChar* string, int length) +{ + static bool createdLineBreakIterator = false; + static TextBreakIterator* staticLineBreakIterator; + return setUpIterator(createdLineBreakIterator, staticLineBreakIterator, UBRK_LINE, string, length); +} + +TextBreakIterator* sentenceBreakIterator(const UChar* string, int length) +{ + static bool createdSentenceBreakIterator = false; + static TextBreakIterator* staticSentenceBreakIterator; + return setUpIterator(createdSentenceBreakIterator, staticSentenceBreakIterator, UBRK_SENTENCE, string, length); +} + +int textBreakFirst(TextBreakIterator* bi) +{ + // see textBreakLast + + int firstCursorPosition = -1; + int pos = 0; + while (pos <= bi->m_length && (firstCursorPosition < 0)) { + if (bi->m_logAttrs[pos].is_cursor_position) + firstCursorPosition = pos; + } + bi->m_index = firstCursorPosition; + return firstCursorPosition; +} + +int textBreakLast(TextBreakIterator* bi) +{ + // TextBreakLast is not meant to find just any break according to bi->m_type + // but really the one near the last character. + // (cmp ICU documentation for ubrk_first and ubrk_last) + // From ICU docs for ubrk_last: + // "Determine the index immediately beyond the last character in the text being scanned." + + // So we should advance or traverse back based on bi->m_logAttrs cursor positions. + // If last character position in the original string is a whitespace, + // traverse to the left until the first non-white character position is found + // and return the position of the first white-space char after this one. + // Otherwise return m_length, as "the first character beyond the last" is outside our string. + + bool whiteSpaceAtTheEnd = true; + int nextWhiteSpacePos = bi->m_length; + + int pos = bi->m_length; + while (pos >= 0 && whiteSpaceAtTheEnd) { + if (bi->m_logAttrs[pos].is_cursor_position) { + if (whiteSpaceAtTheEnd = bi->m_logAttrs[pos].is_white) + nextWhiteSpacePos = pos; + } + pos--; + } + bi->m_index = nextWhiteSpacePos; + return nextWhiteSpacePos; +} + +int textBreakNext(TextBreakIterator* bi) +{ + for (int i = bi->m_index + 1; i <= bi->m_length; i++) { + + // FIXME: UBRK_WORD case: Single multibyte characters (i.e. white space around them), such as the euro symbol €, + // are not marked as word_start & word_end as opposed to the way ICU does it. + // This leads to - for example - different word selection behaviour when right clicking. + + if ((bi->m_type == UBRK_LINE && bi->m_logAttrs[i].is_line_break) + || (bi->m_type == UBRK_WORD && (bi->m_logAttrs[i].is_word_start || bi->m_logAttrs[i].is_word_end)) + || (bi->m_type == UBRK_CHARACTER && bi->m_logAttrs[i].is_cursor_position) + || (bi->m_type == UBRK_SENTENCE && (bi->m_logAttrs[i].is_sentence_start || bi->m_logAttrs[i].is_sentence_end)) ) { + bi->m_index = i; + return i; + } + } + return TextBreakDone; +} + +int textBreakPrevious(TextBreakIterator* bi) +{ + for (int i = bi->m_index - 1; i >= 0; i--) { + if ((bi->m_type == UBRK_LINE && bi->m_logAttrs[i].is_line_break) + || (bi->m_type == UBRK_WORD && (bi->m_logAttrs[i].is_word_start || bi->m_logAttrs[i].is_word_end)) + || (bi->m_type == UBRK_CHARACTER && bi->m_logAttrs[i].is_cursor_position) + || (bi->m_type == UBRK_SENTENCE && (bi->m_logAttrs[i].is_sentence_start || bi->m_logAttrs[i].is_sentence_end)) ) { + bi->m_index = i; + return i; + } + } + return textBreakFirst(bi); +} + +int textBreakPreceding(TextBreakIterator* bi, int pos) +{ + bi->m_index = pos; + return textBreakPrevious(bi); +} + +int textBreakFollowing(TextBreakIterator* bi, int pos) +{ + if (pos < 0) + pos = -1; + bi->m_index = pos; + return textBreakNext(bi); +} + +int textBreakCurrent(TextBreakIterator* bi) +{ + return bi->m_index; +} + +bool isTextBreak(TextBreakIterator* bi, int pos) +{ + if (bi->m_index < 0) + return false; + + return ((bi->m_type == UBRK_LINE && bi->m_logAttrs[bi->m_index].is_line_break) + || (bi->m_type == UBRK_WORD && bi->m_logAttrs[bi->m_index].is_word_end) + || (bi->m_type == UBRK_CHARACTER && bi->m_logAttrs[bi->m_index].is_char_break) + || (bi->m_type == UBRK_SENTENCE && bi->m_logAttrs[bi->m_index].is_sentence_end) ); +} + +} diff --git a/WebCore/platform/text/gtk/TextCodecGtk.cpp b/WebCore/platform/text/gtk/TextCodecGtk.cpp new file mode 100644 index 0000000..31da3b7 --- /dev/null +++ b/WebCore/platform/text/gtk/TextCodecGtk.cpp @@ -0,0 +1,446 @@ +/* + * Copyright (C) 2004, 2006, 2007, 2008 Apple Inc. All rights reserved. + * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> + * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> + * Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "TextCodecGtk.h" + +#include "CString.h" +#include "PlatformString.h" +#include <wtf/Assertions.h> +#include <wtf/HashMap.h> +#include <wtf/gtk/GOwnPtr.h> +#include "Logging.h" + +using std::min; + +namespace WebCore { + +// TextCodec's appendOmittingBOM() is gone (http://trac.webkit.org/changeset/33380). +// That's why we need to avoid generating extra BOM's for the conversion result. +// This can be achieved by specifying the UTF-16 codecs' endianness explicitly when initializing GLib. + +#if (G_BYTE_ORDER == G_BIG_ENDIAN) + const gchar* WebCore::TextCodecGtk::m_internalEncodingName = "UTF-16BE"; +#else + const gchar* WebCore::TextCodecGtk::m_internalEncodingName = "UTF-16LE"; +#endif + + +// We're specifying the list of text codecs and their aliases here. +// For each codec the first entry is the canonical name, remaining ones are used as aliases. +// Each alias list must be terminated by a 0. + +// Unicode +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_UTF_8 = { "UTF-8", 0 }; + +// Western +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_1 = { "ISO-8859-1", "CP819", "IBM819", "ISO-IR-100", "ISO8859-1", "ISO_8859-1", "ISO_8859-1:1987", "L1", "LATIN1", "CSISOLATIN1", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_MACROMAN = { "MACROMAN", "MAC", "MACINTOSH", "CSMACINTOSH", 0 }; + +// Japanese +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_SHIFT_JIS = { "Shift_JIS", "MS_KANJI", "SHIFT-JIS", "SJIS", "CSSHIFTJIS", 0 }; + TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_EUC_JP = { "EUC-JP", "EUC_JP", "EUCJP", "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE", "CSEUCPKDFMTJAPANESE", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_2022_JP = { "ISO-2022-JP", 0 }; + +// Traditional Chinese +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_BIG5 = { "BIG5", "BIG-5", "BIG-FIVE", "BIG5", "BIGFIVE", "CN-BIG5", "CSBIG5", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_BIG5_HKSCS = { "BIG5-HKSCS", "BIG5-HKSCS:2004", "BIG5HKSCS", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP950 = { "CP950", 0 }; + +// Korean +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_2022_KR = { "ISO-2022-KR", "CSISO2022KR", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP949 = { "CP949", "UHC", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_EUC_KR = { "EUC-KR", "CSEUCKR", 0 }; + +// Arabic +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_6 = { "ISO-8859-6", "ARABIC", "ASMO-708", "ECMA-114", "ISO-IR-127", "ISO8859-6", "ISO_8859-6", "ISO_8859-6:1987", "CSISOLATINARABIC", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP1256 = { "windows-1256", "CP1256", "MS-ARAB", 0 }; // rearranged, windows-1256 now declared the canonical name and put to lowercase to fix /fast/encoding/ahram-org-eg.html test case + +// Hebrew +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_8 = { "ISO-8859-8", "HEBREW", "ISO-8859-8", "ISO-IR-138", "ISO8859-8", "ISO_8859-8", "ISO_8859-8:1988", "CSISOLATINHEBREW", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP1255 = { "windows-1255", "CP1255", "MS-HEBR", 0 }; // rearranged, moved windows-1255 as canonical and lowercased, fixing /fast/encoding/meta-charset.html + +// Greek +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_7 = { "ISO-8859-7", "ECMA-118", "ELOT_928", "GREEK", "GREEK8", "ISO-IR-126", "ISO8859-7", "ISO_8859-7", "ISO_8859-7:1987", "ISO_8859-7:2003", "CSI", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP869 = { "CP869", "869", "CP-GR", "IBM869", "CSIBM869", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_WINDOWS_1253 = { "WINDOWS-1253", 0 }; + +// Cyrillic +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_5 = { "ISO-8859-5", "CYRILLIC", "ISO-IR-144", "ISO8859-5", "ISO_8859-5", "ISO_8859-5:1988", "CSISOLATINCYRILLIC", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_KOI8_R = { "KOI8-R", "CSKOI8R", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP866 = { "CP866", "866", "IBM866", "CSIBM866", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_KOI8_U = { "KOI8-U", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_WINDOWS_1251 = { "windows-1251", "CP1251", 0 }; // CP1251 added to pass /fast/encoding/charset-cp1251.html +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_MACCYRILLIC = { "mac-cyrillic", "MACCYRILLIC", "x-mac-cyrillic", 0 }; + +// Thai +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP874 = { "CP874", "WINDOWS-874", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_TIS_620 = { "TIS-620", 0 }; + +// Simplified Chinese +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_GBK = { "GBK", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_HZ = { "HZ", "HZ-GB-2312", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_GB18030 = { "GB18030", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_EUC_CN = { "EUC-CN", "EUCCN", "GB2312", "CN-GB", "CSGB2312", "EUC_CN", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_2312_80 = { "GB_2312-80", "CHINESE", "csISO58GB231280", "GB2312.1980-0", "ISO-IR-58" }; + +// Central European +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_2 = { "ISO-8859-2", "ISO-IR-101", "ISO8859-2", "ISO_8859-2", "ISO_8859-2:1987", "L2", "LATIN2", "CSISOLATIN2", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP1250 = { "CP1250", "MS-EE", "WINDOWS-1250", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_MACCENTRALEUROPE = { "MAC-CENTRALEUROPE", 0 }; + +// Vietnamese +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP1258 = { "CP1258", "WINDOWS-1258", 0 }; + +// Turkish +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP1254 = { "CP1254", "MS-TURK", "WINDOWS-1254", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_9 = { "ISO-8859-9", "ISO-IR-148", "ISO8859-9", "ISO_8859-9", "ISO_8859-9:1989", "L5", "LATIN5", "CSISOLATIN5", 0 }; + +// Baltic +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_CP1257 = { "CP1257", "WINBALTRIM", "WINDOWS-1257", 0 }; +TextCodecGtk::codecAliasList TextCodecGtk::m_codecAliases_ISO_8859_4 = { "ISO-8859-4", "ISO-IR-110", "ISO8859-4", "ISO_8859-4", "ISO_8859-4:1988", "L4", "LATIN4", "CSISOLATIN4", 0 }; + +gconstpointer const TextCodecGtk::m_iconvBaseCodecList[] = { + // Unicode + &m_codecAliases_UTF_8, + + // Western + &m_codecAliases_ISO_8859_1 +}; + +gconstpointer const TextCodecGtk::m_iconvExtendedCodecList[] = +{ + // Western + &m_codecAliases_MACROMAN, + + // Japanese + &m_codecAliases_SHIFT_JIS, + &m_codecAliases_EUC_JP, + &m_codecAliases_ISO_2022_JP, + + // Simplified Chinese + &m_codecAliases_BIG5, + &m_codecAliases_BIG5_HKSCS, + &m_codecAliases_CP950, + + // Korean + &m_codecAliases_ISO_2022_KR, + &m_codecAliases_CP949, + &m_codecAliases_EUC_KR, + + // Arabic + &m_codecAliases_ISO_8859_6, + &m_codecAliases_CP1256, + + // Hebrew + &m_codecAliases_ISO_8859_8, + &m_codecAliases_CP1255, + + // Greek + &m_codecAliases_ISO_8859_7, + &m_codecAliases_CP869, + &m_codecAliases_WINDOWS_1253, + + // Cyrillic + &m_codecAliases_ISO_8859_5, + &m_codecAliases_KOI8_R, + &m_codecAliases_CP866, + &m_codecAliases_KOI8_U, + &m_codecAliases_WINDOWS_1251, + &m_codecAliases_MACCYRILLIC, + + // Thai + &m_codecAliases_CP874, + &m_codecAliases_TIS_620, + + // Traditional Chinese + &m_codecAliases_GBK, + &m_codecAliases_HZ, + &m_codecAliases_GB18030, + &m_codecAliases_EUC_CN, + &m_codecAliases_2312_80, + + // Central European + &m_codecAliases_ISO_8859_2, + &m_codecAliases_CP1250, + &m_codecAliases_MACCENTRALEUROPE, + + // Vietnamese + &m_codecAliases_CP1258, + + // Turkish + &m_codecAliases_CP1254, + &m_codecAliases_ISO_8859_9, + + // Baltic + &m_codecAliases_CP1257, + &m_codecAliases_ISO_8859_4 +}; + + +const size_t ConversionBufferSize = 16384; + + +static PassOwnPtr<TextCodec> newTextCodecGtk(const TextEncoding& encoding, const void*) +{ + return new TextCodecGtk(encoding); +} + +gboolean TextCodecGtk::isEncodingAvailable(const gchar* encName) +{ + GIConv tester; + // test decoding + tester = g_iconv_open(m_internalEncodingName, encName); + if (tester == reinterpret_cast<GIConv>(-1)) { + return false; + } else { + g_iconv_close(tester); + // test encoding + tester = g_iconv_open(encName, m_internalEncodingName); + if (tester == reinterpret_cast<GIConv>(-1)) { + return false; + } else { + g_iconv_close(tester); + return true; + } + } +} + +void TextCodecGtk::registerEncodingNames(EncodingNameRegistrar registrar, bool extended) +{ + const void* const* encodingList; + unsigned int listLength = 0; + if (extended) { + encodingList = m_iconvExtendedCodecList; + listLength = sizeof(m_iconvExtendedCodecList)/sizeof(gpointer); + } else { + encodingList = m_iconvBaseCodecList; + listLength = sizeof(m_iconvBaseCodecList)/sizeof(gpointer); + } + + for (unsigned int i = 0; i < listLength; ++i) { + codecAliasList *codecAliases = static_cast<codecAliasList*>(encodingList[i]); + + // Our convention is, the first entry in codecAliases is the canonical name, + // see above in the list of declarations. + // Probe GLib for this one first. If it's not available, we skip the whole group of aliases. + + int codecCount = 0; + const char *canonicalName; + canonicalName = (*codecAliases)[codecCount]; + + if(!isEncodingAvailable(canonicalName)) { + LOG(TextConversion, "Canonical encoding %s not available, skipping.", canonicalName); + continue; + } + registrar(canonicalName, canonicalName); + + const char *currentAlias; + while ((currentAlias = (*codecAliases)[++codecCount])) { + if (isEncodingAvailable(currentAlias)) { + LOG(TextConversion, "Registering encoding name alias %s to canonical %s", currentAlias, canonicalName); + registrar(currentAlias, canonicalName); + } + } + + } +} + +void TextCodecGtk::registerCodecs(TextCodecRegistrar registrar, bool extended) +{ + const void* const* encodingList; + unsigned int listLength = 0; + if (extended) { + encodingList = m_iconvExtendedCodecList; + listLength = sizeof(m_iconvExtendedCodecList)/sizeof(gpointer); + } else { + encodingList = m_iconvBaseCodecList; + listLength = sizeof(m_iconvBaseCodecList)/sizeof(gpointer); + } + + for (unsigned int i = 0; i < listLength; ++i) { + codecAliasList *codecAliases = static_cast<codecAliasList*>(encodingList[i]); + // by convention, the first "alias" should be the canonical name, see the definition of the alias lists + const gchar *codecName = (*codecAliases)[0]; + if (isEncodingAvailable(codecName)) + registrar(codecName, newTextCodecGtk, 0); + } +} + +void TextCodecGtk::registerBaseEncodingNames(EncodingNameRegistrar registrar) +{ + registerEncodingNames(registrar, false); +} + +void TextCodecGtk::registerBaseCodecs(TextCodecRegistrar registrar) +{ + registerCodecs(registrar, false); +} + +void TextCodecGtk::registerExtendedEncodingNames(EncodingNameRegistrar registrar) +{ + registerEncodingNames(registrar, true); +} + +void TextCodecGtk::registerExtendedCodecs(TextCodecRegistrar registrar) +{ + registerCodecs(registrar, true); +} + +TextCodecGtk::TextCodecGtk(const TextEncoding& encoding) + : m_encoding(encoding) + , m_numBufferedBytes(0) + , m_iconvDecoder(reinterpret_cast<GIConv>(-1)) + , m_iconvEncoder(reinterpret_cast<GIConv>(-1)) +{ +} + +TextCodecGtk::~TextCodecGtk() +{ + if (m_iconvDecoder != reinterpret_cast<GIConv>(-1)) { + g_iconv_close(m_iconvDecoder); + m_iconvDecoder = reinterpret_cast<GIConv>(-1); + } + if (m_iconvEncoder != reinterpret_cast<GIConv>(-1)) { + g_iconv_close(m_iconvEncoder); + m_iconvEncoder = reinterpret_cast<GIConv>(-1); + } +} + +void TextCodecGtk::createIConvDecoder() const +{ + ASSERT(m_iconvDecoder == reinterpret_cast<GIConv>(-1)); + + m_iconvDecoder = g_iconv_open(m_internalEncodingName, m_encoding.name()); +} + +void TextCodecGtk::createIConvEncoder() const +{ + ASSERT(m_iconvDecoder == reinterpret_cast<GIConv>(-1)); + + m_iconvEncoder = g_iconv_open(m_encoding.name(), m_internalEncodingName); +} + +String TextCodecGtk::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError) +{ + // Get a converter for the passed-in encoding. + if (m_iconvDecoder == reinterpret_cast<GIConv>(-1)) { + createIConvDecoder(); + ASSERT(m_iconvDecoder != reinterpret_cast<GIConv>(-1)); + if (m_iconvDecoder == reinterpret_cast<GIConv>(-1)) { + LOG_ERROR("Error creating IConv encoder even though encoding was in table."); + return String(); + } + } + + size_t countWritten, countRead, conversionLength; + const char* conversionBytes; + char* prefixedBytes = 0; + + if (m_numBufferedBytes) { + conversionLength = length + m_numBufferedBytes; + prefixedBytes = static_cast<char*>(fastMalloc(conversionLength)); + memcpy(prefixedBytes, m_bufferedBytes, m_numBufferedBytes); + memcpy(prefixedBytes + m_numBufferedBytes, bytes, length); + + conversionBytes = prefixedBytes; + + // all buffered bytes are consumed now + m_numBufferedBytes = 0; + } else { + // no previously buffered partial data, + // just convert the data that was passed in + conversionBytes = bytes; + conversionLength = length; + } + + GOwnPtr<GError> err; + GOwnPtr<UChar> buffer; + + buffer.outPtr() = reinterpret_cast<UChar*>(g_convert_with_iconv(conversionBytes, conversionLength, m_iconvDecoder, &countRead, &countWritten, &err.outPtr())); + + + if (err) { + LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", err->code, err->message); + m_numBufferedBytes = 0; // reset state for subsequent calls to decode + fastFree(prefixedBytes); + sawError = true; + return String(); + } + + // Partial input at the end of the string may not result in an error being raised. + // From the gnome library documentation on g_convert_with_iconv: + // "Even if the conversion was successful, this may be less than len if there were partial characters at the end of the input." + // That's why we need to compare conversionLength against countRead + + m_numBufferedBytes = conversionLength - countRead; + if (m_numBufferedBytes > 0) { + if (flush) { + LOG_ERROR("Partial bytes at end of input while flush requested."); + m_numBufferedBytes = 0; // reset state for subsequent calls to decode + fastFree(prefixedBytes); + sawError = true; + return String(); + } + memcpy(m_bufferedBytes, conversionBytes + countRead, m_numBufferedBytes); + } + + fastFree(prefixedBytes); + + Vector<UChar> result; + + result.append(buffer.get(), countWritten / sizeof(UChar)); + + return String::adopt(result); +} + +CString TextCodecGtk::encode(const UChar* characters, size_t length, UnencodableHandling handling) +{ + if (!length) + return ""; + + if (m_iconvEncoder == reinterpret_cast<GIConv>(-1)) + createIConvEncoder(); + if (m_iconvEncoder == reinterpret_cast<GIConv>(-1)) + return CString(); + + size_t count; + + GOwnPtr<GError> err; + GOwnPtr<char> buffer; + + buffer.outPtr() = g_convert_with_iconv(reinterpret_cast<const char*>(characters), length * sizeof(UChar), m_iconvEncoder, 0, &count, &err.outPtr()); + if (err) { + LOG_ERROR("GIConv conversion error, Code %d: \"%s\"", err->code, err->message); + return CString(); + } + + return CString(buffer.get(), count); +} + +} // namespace WebCore diff --git a/WebCore/platform/text/gtk/TextCodecGtk.h b/WebCore/platform/text/gtk/TextCodecGtk.h new file mode 100644 index 0000000..a8af752 --- /dev/null +++ b/WebCore/platform/text/gtk/TextCodecGtk.h @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2004, 2006, 2007 Apple Inc. All rights reserved. + * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> + * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> + * Copyright (C) 2009 Dominik Röttsches <dominik.roettsches@access-company.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TextCodecGTK_h +#define TextCodecGTK_h + +#include <glib.h> +#include "TextCodec.h" +#include "TextEncoding.h" + +namespace WebCore { + + class TextCodecGtk : public TextCodec { + public: + static void registerBaseEncodingNames(EncodingNameRegistrar); + static void registerBaseCodecs(TextCodecRegistrar); + + static void registerExtendedEncodingNames(EncodingNameRegistrar); + static void registerExtendedCodecs(TextCodecRegistrar); + + TextCodecGtk(const TextEncoding&); + virtual ~TextCodecGtk(); + + virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); + virtual CString encode(const UChar*, size_t length, UnencodableHandling); + + private: + void createIConvDecoder() const; + void createIConvEncoder() const; + + static void registerEncodingNames(EncodingNameRegistrar registrar, bool extended); + static void registerCodecs(TextCodecRegistrar registrar, bool extended); + static gboolean isEncodingAvailable(const gchar*); + + TextEncoding m_encoding; + size_t m_numBufferedBytes; + unsigned char m_bufferedBytes[16]; // bigger than any single multi-byte character + mutable GIConv m_iconvDecoder; + mutable GIConv m_iconvEncoder; + + static const gchar* m_internalEncodingName; + + typedef const gchar* const codecAliasList[]; + + // Unicode + static codecAliasList m_codecAliases_UTF_8; + + // Western + static codecAliasList m_codecAliases_ISO_8859_1; + static codecAliasList m_codecAliases_MACROMAN; + + // Japanese + static codecAliasList m_codecAliases_SHIFT_JIS; + static codecAliasList m_codecAliases_EUC_JP; + static codecAliasList m_codecAliases_ISO_2022_JP; + + // Traditional Chinese + static codecAliasList m_codecAliases_BIG5; + static codecAliasList m_codecAliases_BIG5_HKSCS; + static codecAliasList m_codecAliases_CP950; + + // Korean + static codecAliasList m_codecAliases_ISO_2022_KR; + static codecAliasList m_codecAliases_CP949; + static codecAliasList m_codecAliases_EUC_KR; + + // Arabic + static codecAliasList m_codecAliases_ISO_8859_6; + static codecAliasList m_codecAliases_CP1256; + + // Hebrew + static codecAliasList m_codecAliases_ISO_8859_8; + static codecAliasList m_codecAliases_CP1255; + + // Greek + static codecAliasList m_codecAliases_ISO_8859_7; + static codecAliasList m_codecAliases_CP869; + static codecAliasList m_codecAliases_WINDOWS_1253; + + // Cyrillic + static codecAliasList m_codecAliases_ISO_8859_5; + static codecAliasList m_codecAliases_KOI8_R; + static codecAliasList m_codecAliases_CP866; + static codecAliasList m_codecAliases_KOI8_U; + static codecAliasList m_codecAliases_WINDOWS_1251; + static codecAliasList m_codecAliases_MACCYRILLIC; + + // Thai + static codecAliasList m_codecAliases_CP874; + static codecAliasList m_codecAliases_TIS_620; + + // Simplified Chinese + static codecAliasList m_codecAliases_GBK; + static codecAliasList m_codecAliases_HZ; + static codecAliasList m_codecAliases_GB18030; + static codecAliasList m_codecAliases_EUC_CN; + static codecAliasList m_codecAliases_2312_80; + + // Central European + static codecAliasList m_codecAliases_ISO_8859_2; + static codecAliasList m_codecAliases_CP1250; + static codecAliasList m_codecAliases_MACCENTRALEUROPE; + + // Vietnamese + static codecAliasList m_codecAliases_CP1258; + + // Turkish + static codecAliasList m_codecAliases_CP1254; + static codecAliasList m_codecAliases_ISO_8859_9; + + // Baltic + static codecAliasList m_codecAliases_CP1257; + static codecAliasList m_codecAliases_ISO_8859_4; + + static gconstpointer const m_iconvBaseCodecList[]; + static gconstpointer const m_iconvExtendedCodecList[]; + + }; + +} // namespace WebCore + +#endif // TextCodecGTK_h diff --git a/WebCore/platform/text/qt/TextBoundaries.cpp b/WebCore/platform/text/qt/TextBoundariesQt.cpp index ffc4c44..a354ca6 100644 --- a/WebCore/platform/text/qt/TextBoundaries.cpp +++ b/WebCore/platform/text/qt/TextBoundariesQt.cpp @@ -36,7 +36,6 @@ #include <QDebug> #include <stdio.h> -#if QT_VERSION >= 0x040400 #include <qtextboundaryfinder.h> namespace WebCore { @@ -76,48 +75,3 @@ void findWordBoundary(UChar const* buffer, int len, int position, int* start, in } -#else -namespace WebCore { - -int findNextWordFromIndex(UChar const* buffer, int len, int position, bool forward) -{ - QString str(reinterpret_cast<QChar const*>(buffer), len); - notImplemented(); - return 0; -} - -void findWordBoundary(UChar const* buffer, int len, int position, int* start, int* end) -{ - QString str(reinterpret_cast<QChar const*>(buffer), len); - - if (position > str.length()) { - *start = 0; - *end = 0; - return; - } - - int currentPosition = position - 1; - QString foundWord; - while (currentPosition >= 0 && - str[currentPosition].isLetter()) { - foundWord.prepend(str[currentPosition]); - --currentPosition; - } - - // currentPosition == 0 means the first char is not letter - // currentPosition == -1 means we reached the beginning - int startPos = (currentPosition < 0) ? 0 : ++currentPosition; - currentPosition = position; - if (str[currentPosition].isLetter()) { - while (str[currentPosition].isLetter()) { - foundWord.append(str[currentPosition]); - ++currentPosition; - } - } - - *start = startPos; - *end = currentPosition; -} - -} -#endif diff --git a/WebCore/platform/text/qt/TextBreakIteratorQt.cpp b/WebCore/platform/text/qt/TextBreakIteratorQt.cpp index d80e270..101947c 100644 --- a/WebCore/platform/text/qt/TextBreakIteratorQt.cpp +++ b/WebCore/platform/text/qt/TextBreakIteratorQt.cpp @@ -1,6 +1,4 @@ /* - * This file is part of the DOM implementation for KDE. - * * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> * * This library is free software; you can redistribute it and/or @@ -23,7 +21,6 @@ #include "config.h" #include "TextBreakIterator.h" -#if QT_VERSION >= 0x040400 #include <QtCore/qtextboundaryfinder.h> #include <qdebug.h> @@ -132,183 +129,3 @@ namespace WebCore { } } -#else -#include <qtextlayout.h> - -namespace WebCore { - - class TextBreakIterator { - public: - virtual int first() = 0; - virtual int next() = 0; - virtual int previous() = 0; - inline int following(int pos) - { - currentPos = pos; - return next(); - } - inline int preceding(int pos) - { - currentPos = pos; - return previous(); - } - int currentPos; - const UChar *string; - int length; - }; - - class WordBreakIteratorQt : public TextBreakIterator { - public: - virtual int first(); - virtual int next(); - virtual int previous(); - }; - - class CharBreakIteratorQt : public TextBreakIterator { - public: - virtual int first(); - virtual int next(); - virtual int previous(); - QTextLayout layout; - }; - - int WordBreakIteratorQt::first() - { - currentPos = 0; - return currentPos; - } - - int WordBreakIteratorQt::next() - { - if (currentPos >= length) { - currentPos = -1; - return currentPos; - } - bool haveSpace = false; - while (currentPos < length) { - if (haveSpace && !QChar(string[currentPos]).isSpace()) - break; - if (QChar(string[currentPos]).isSpace()) - haveSpace = true; - ++currentPos; - } - return currentPos; - } - - int WordBreakIteratorQt::previous() - { - if (currentPos <= 0) { - currentPos = -1; - return currentPos; - } - bool haveSpace = false; - while (currentPos > 0) { - if (haveSpace && !QChar(string[currentPos]).isSpace()) - break; - if (QChar(string[currentPos]).isSpace()) - haveSpace = true; - --currentPos; - } - return currentPos; - } - - int CharBreakIteratorQt::first() - { - currentPos = 0; - return currentPos; - } - - int CharBreakIteratorQt::next() - { - if (currentPos >= length) - return -1; - currentPos = layout.nextCursorPosition(currentPos); - return currentPos; - } - - int CharBreakIteratorQt::previous() - { - if (currentPos <= 0) - return -1; - currentPos = layout.previousCursorPosition(currentPos); - return currentPos; - } - - -TextBreakIterator* wordBreakIterator(const UChar* string, int length) -{ - static WordBreakIteratorQt *iterator = 0; - if (!iterator) - iterator = new WordBreakIteratorQt; - - iterator->string = string; - iterator->length = length; - iterator->currentPos = 0; - - return iterator; -} - -TextBreakIterator* characterBreakIterator(const UChar* string, int length) -{ - static CharBreakIteratorQt *iterator = 0; - if (!iterator) - iterator = new CharBreakIteratorQt; - - iterator->string = string; - iterator->length = length; - iterator->currentPos = 0; - iterator->layout.setText(QString(reinterpret_cast<const QChar*>(string), length)); - - return iterator; -} - -TextBreakIterator* cursorMovementIterator(const UChar* string, int length) -{ - return characterBreakIterator(string, length); -} - -TextBreakIterator* lineBreakIterator(const UChar*, int) -{ - // not yet implemented - return 0; -} - -TextBreakIterator* sentenceBreakIterator(const UChar*, int) -{ - // not yet implemented - return 0; -} - -int textBreakFirst(TextBreakIterator* bi) -{ - return bi->first(); -} - -int textBreakNext(TextBreakIterator* bi) -{ - return bi->next(); -} - -int textBreakPreceding(TextBreakIterator* bi, int pos) -{ - return bi->preceding(pos); -} - -int textBreakFollowing(TextBreakIterator* bi, int pos) -{ - return bi->following(pos); -} - -int textBreakCurrent(TextBreakIterator* bi) -{ - return bi->currentPos; -} - -bool isTextBreak(TextBreakIterator*, int) -{ - return true; -} - -} - -#endif diff --git a/WebCore/platform/text/wince/TextBoundariesWince.cpp b/WebCore/platform/text/wince/TextBoundariesWince.cpp new file mode 100644 index 0000000..df6f757 --- /dev/null +++ b/WebCore/platform/text/wince/TextBoundariesWince.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2006 Zack Rusin <zack@kde.org> + * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "TextBoundaries.h" + +#include "NotImplemented.h" +#include "PlatformString.h" + +using namespace WTF::Unicode; + +namespace WebCore { + +int findNextWordFromIndex(const UChar * buffer, int len, int position, bool forward) +{ + notImplemented(); + return 0; +} + +void findWordBoundary(const UChar * buffer, int len, int position, int* start, int* end) +{ + if (position > len) { + *start = 0; + *end = 0; + return; + } + + String str(buffer, len); + + int currentPosition = position - 1; + String foundWord; + while (currentPosition >= 0 && isLetter(str[currentPosition])) { + UChar c = str[currentPosition]; + foundWord.insert(&c, 1, 0); + --currentPosition; + } + + // currentPosition == 0 means the first char is not letter + // currentPosition == -1 means we reached the beginning + int startPos = (currentPosition < 0) ? 0 : ++currentPosition; + currentPosition = position; + while (isLetter(str[currentPosition])) { + foundWord.append(str[currentPosition]); + ++currentPosition; + } + + *start = startPos; + *end = currentPosition; +} + + +} diff --git a/WebCore/platform/text/wince/TextBreakIteratorWince.cpp b/WebCore/platform/text/wince/TextBreakIteratorWince.cpp new file mode 100644 index 0000000..26a5be2 --- /dev/null +++ b/WebCore/platform/text/wince/TextBreakIteratorWince.cpp @@ -0,0 +1,311 @@ +/* + * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> + * Copyright (C) 2007-2009 Torch Mobile, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +#include "config.h" +#include "TextBreakIterator.h" + +#include "PlatformString.h" +#include <wtf/unicode/Unicode.h> + +using namespace WTF::Unicode; + +namespace WebCore { + +// Hack, not entirely correct +static inline bool isCharStop(UChar c) +{ + CharCategory charCategory = category(c); + return charCategory != Mark_NonSpacing && (charCategory != Other_Surrogate || (c < 0xd800 || c >= 0xdc00)); +} + +static inline bool isLineStop(UChar c) +{ + return category(c) != Separator_Line; +} + +static inline bool isSentenceStop(UChar c) +{ + return isPunct(c); +} + +class TextBreakIterator { +public: + void reset(const UChar* str, int len) + { + string = str; + length = len; + currentPos = 0; + } + virtual int first() = 0; + virtual int next() = 0; + virtual int previous() = 0; + int following(int position) + { + currentPos = position; + return next(); + } + int preceding(int position) + { + currentPos = position; + return previous(); + } + + int currentPos; + const UChar* string; + int length; +}; + +struct WordBreakIterator: TextBreakIterator { + virtual int first(); + virtual int next(); + virtual int previous(); +}; + +struct CharBreakIterator: TextBreakIterator { + virtual int first(); + virtual int next(); + virtual int previous(); +}; + +struct LineBreakIterator: TextBreakIterator { + virtual int first(); + virtual int next(); + virtual int previous(); +}; + +struct SentenceBreakIterator : TextBreakIterator { + virtual int first(); + virtual int next(); + virtual int previous(); +}; + +int WordBreakIterator::first() +{ + currentPos = 0; + return currentPos; +} + +int WordBreakIterator::next() +{ + if (currentPos == length) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos < length) { + if (haveSpace && !isSpace(string[currentPos])) + break; + if (isSpace(string[currentPos])) + haveSpace = true; + ++currentPos; + } + return currentPos; +} + +int WordBreakIterator::previous() +{ + if (!currentPos) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos > 0) { + if (haveSpace && !isSpace(string[currentPos])) + break; + if (isSpace(string[currentPos])) + haveSpace = true; + --currentPos; + } + return currentPos; +} + +int CharBreakIterator::first() +{ + currentPos = 0; + return currentPos; +} + +int CharBreakIterator::next() +{ + if (currentPos >= length) + return -1; + ++currentPos; + while (currentPos < length && !isCharStop(string[currentPos])) + ++currentPos; + return currentPos; +} + +int CharBreakIterator::previous() +{ + if (currentPos <= 0) + return -1; + if (currentPos > length) + currentPos = length; + --currentPos; + while (currentPos > 0 && !isCharStop(string[currentPos])) + --currentPos; + return currentPos; +} + +int LineBreakIterator::first() +{ + currentPos = 0; + return currentPos; +} + +int LineBreakIterator::next() +{ + if (currentPos == length) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos < length) { + if (haveSpace && !isLineStop(string[currentPos])) + break; + if (isLineStop(string[currentPos])) + haveSpace = true; + ++currentPos; + } + return currentPos; +} + +int LineBreakIterator::previous() +{ + if (!currentPos) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos > 0) { + if (haveSpace && !isLineStop(string[currentPos])) + break; + if (isLineStop(string[currentPos])) + haveSpace = true; + --currentPos; + } + return currentPos; +} + +int SentenceBreakIterator::first() +{ + currentPos = 0; + return currentPos; +} + +int SentenceBreakIterator::next() +{ + if (currentPos == length) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos < length) { + if (haveSpace && !isSentenceStop(string[currentPos])) + break; + if (isSentenceStop(string[currentPos])) + haveSpace = true; + ++currentPos; + } + return currentPos; +} + +int SentenceBreakIterator::previous() +{ + if (!currentPos) { + currentPos = -1; + return currentPos; + } + bool haveSpace = false; + while (currentPos > 0) { + if (haveSpace && !isSentenceStop(string[currentPos])) + break; + if (isSentenceStop(string[currentPos])) + haveSpace = true; + --currentPos; + } + return currentPos; +} + +TextBreakIterator* wordBreakIterator(const UChar* string, int length) +{ + DEFINE_STATIC_LOCAL(WordBreakIterator, iterator, ()); + iterator.reset(string, length); + return &iterator; +} + +TextBreakIterator* characterBreakIterator(const UChar* string, int length) +{ + DEFINE_STATIC_LOCAL(CharBreakIterator, iterator, ()); + iterator.reset(string, length); + return &iterator; +} + +TextBreakIterator* lineBreakIterator(const UChar* string, int length) +{ + DEFINE_STATIC_LOCAL(LineBreakIterator , iterator, ()); + iterator.reset(string, length); + return &iterator; +} + +TextBreakIterator* sentenceBreakIterator(const UChar* string, int length) +{ + DEFINE_STATIC_LOCAL(SentenceBreakIterator, iterator, ()); + iterator.reset(string, length); + return &iterator; +} + +int textBreakFirst(TextBreakIterator* breakIterator) +{ + return breakIterator->first(); +} + +int textBreakNext(TextBreakIterator* breakIterator) +{ + return breakIterator->next(); +} + +int textBreakPreceding(TextBreakIterator* breakIterator, int position) +{ + return breakIterator->preceding(position); +} + +int textBreakFollowing(TextBreakIterator* breakIterator, int position) +{ + return breakIterator->following(position); +} + +int textBreakCurrent(TextBreakIterator* breakIterator) +{ + return breakIterator->currentPos; +} + +bool isTextBreak(TextBreakIterator*, int) +{ + return true; +} + +TextBreakIterator* cursorMovementIterator(const UChar* string, int length) +{ + return characterBreakIterator(string, length); +} + +} |